summaryrefslogtreecommitdiffstats
path: root/vendor/dissimilar/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/dissimilar/src
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/dissimilar/src')
-rw-r--r--vendor/dissimilar/src/find.rs232
-rw-r--r--vendor/dissimilar/src/lib.rs932
-rw-r--r--vendor/dissimilar/src/range.rs148
-rw-r--r--vendor/dissimilar/src/tests.rs580
4 files changed, 1892 insertions, 0 deletions
diff --git a/vendor/dissimilar/src/find.rs b/vendor/dissimilar/src/find.rs
new file mode 100644
index 000000000..90ca2c6c5
--- /dev/null
+++ b/vendor/dissimilar/src/find.rs
@@ -0,0 +1,232 @@
+// The strstr implementation in this file is extracted from the Rust standard
+// library's str::find. The algorithm works for arbitrary &[u8] haystack and
+// needle but is only exposed by the standard library on UTF-8 strings.
+//
+// https://github.com/rust-lang/rust/blob/1.40.0/src/libcore/str/pattern.rs
+//
+// ---
+//
+// This is the Two-Way search algorithm, which was introduced in the paper:
+// Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675.
+//
+// Here's some background information.
+//
+// A *word* is a string of symbols. The *length* of a word should be a familiar
+// notion, and here we denote it for any word x by |x|. (We also allow for the
+// possibility of the *empty word*, a word of length zero.)
+//
+// If x is any non-empty word, then an integer p with 0 < p <= |x| is said to be
+// a *period* for x iff for all i with 0 <= i <= |x| - p - 1, we have x[i] ==
+// x[i+p]. For example, both 1 and 2 are periods for the string "aa". As another
+// example, the only period of the string "abcd" is 4.
+//
+// We denote by period(x) the *smallest* period of x (provided that x is
+// non-empty). This is always well-defined since every non-empty word x has at
+// least one period, |x|. We sometimes call this *the period* of x.
+//
+// If u, v and x are words such that x = uv, where uv is the concatenation of u
+// and v, then we say that (u, v) is a *factorization* of x.
+//
+// Let (u, v) be a factorization for a word x. Then if w is a non-empty word
+// such that both of the following hold
+//
+// - either w is a suffix of u or u is a suffix of w
+// - either w is a prefix of v or v is a prefix of w
+//
+// then w is said to be a *repetition* for the factorization (u, v).
+//
+// Just to unpack this, there are four possibilities here. Let w = "abc". Then
+// we might have:
+//
+// - w is a suffix of u and w is a prefix of v. ex: ("lolabc", "abcde")
+// - w is a suffix of u and v is a prefix of w. ex: ("lolabc", "ab")
+// - u is a suffix of w and w is a prefix of v. ex: ("bc", "abchi")
+// - u is a suffix of w and v is a prefix of w. ex: ("bc", "a")
+//
+// Note that the word vu is a repetition for any factorization (u,v) of x = uv,
+// so every factorization has at least one repetition.
+//
+// If x is a string and (u, v) is a factorization for x, then a *local period*
+// for (u, v) is an integer r such that there is some word w such that |w| = r
+// and w is a repetition for (u, v).
+//
+// We denote by local_period(u, v) the smallest local period of (u, v). We
+// sometimes call this *the local period* of (u, v). Provided that x = uv is
+// non-empty, this is well-defined (because each non-empty word has at least one
+// factorization, as noted above).
+//
+// It can be proven that the following is an equivalent definition of a local
+// period for a factorization (u, v): any positive integer r such that x[i] ==
+// x[i+r] for all i such that |u| - r <= i <= |u| - 1 and such that both x[i]
+// and x[i+r] are defined. (i.e., i > 0 and i + r < |x|).
+//
+// Using the above reformulation, it is easy to prove that
+//
+// 1 <= local_period(u, v) <= period(uv)
+//
+// A factorization (u, v) of x such that local_period(u,v) = period(x) is called
+// a *critical factorization*.
+//
+// The algorithm hinges on the following theorem, which is stated without proof:
+//
+// **Critical Factorization Theorem** Any word x has at least one critical
+// factorization (u, v) such that |u| < period(x).
+//
+// The purpose of maximal_suffix is to find such a critical factorization.
+//
+// If the period is short, compute another factorization x = u' v' to use for
+// reverse search, chosen instead so that |v'| < period(x).
+
+use std::cmp;
+use std::usize;
+
+pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
+ assert!(!needle.is_empty());
+
+ // crit_pos: critical factorization index
+ let (crit_pos_false, period_false) = maximal_suffix(needle, false);
+ let (crit_pos_true, period_true) = maximal_suffix(needle, true);
+ let (crit_pos, mut period) = if crit_pos_false > crit_pos_true {
+ (crit_pos_false, period_false)
+ } else {
+ (crit_pos_true, period_true)
+ };
+
+ // Byteset is an extension (not part of the two way algorithm); it is a
+ // 64-bit "fingerprint" where each set bit j corresponds to a (byte & 63) ==
+ // j present in the needle.
+ let byteset;
+ // Index into needle before which we have already matched.
+ let mut memory;
+
+ // A particularly readable explanation of what's going on here can be found
+ // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
+ // see the code for "Algorithm CP" on p. 323.
+ //
+ // What's going on is we have some critical factorization (u, v) of the
+ // needle, and we want to determine whether u is a suffix of &v[..period].
+ // If it is, we use "Algorithm CP1". Otherwise we use "Algorithm CP2", which
+ // is optimized for when the period of the needle is large.
+ let long_period = needle[..crit_pos] != needle[period..period + crit_pos];
+ if long_period {
+ // Long period case -- we have an approximation to the actual period,
+ // and don't use memorization.
+ //
+ // Approximate the period by lower bound max(|u|, |v|) + 1.
+ period = cmp::max(crit_pos, needle.len() - crit_pos) + 1;
+ byteset = byteset_create(needle);
+ // Dummy value to signify that the period is long.
+ memory = usize::MAX;
+ } else {
+ // Short period case -- the period is exact.
+ byteset = byteset_create(&needle[..period]);
+ memory = 0;
+ }
+
+ // One of the main ideas of Two-Way is that we factorize the needle into two
+ // halves, (u, v), and begin trying to find v in the haystack by scanning
+ // left to right. If v matches, we try to match u by scanning right to left.
+ // How far we can jump when we encounter a mismatch is all based on the fact
+ // that (u, v) is a critical factorization for the needle.
+ let mut position = 0;
+ let needle_last = needle.len() - 1;
+ 'search: loop {
+ // Check that we have room to search in. position + needle_last cannot
+ // overflow if we assume slices are bounded by isize's range.
+ let tail_byte = *haystack.get(position + needle_last)?;
+
+ // Quickly skip by large portions unrelated to our substring.
+ if !byteset_contains(byteset, tail_byte) {
+ position += needle.len();
+ if !long_period {
+ memory = 0;
+ }
+ continue 'search;
+ }
+
+ // See if the right part of the needle matches.
+ let start = if long_period {
+ crit_pos
+ } else {
+ cmp::max(crit_pos, memory)
+ };
+ for i in start..needle.len() {
+ if needle[i] != haystack[position + i] {
+ position += i - crit_pos + 1;
+ if !long_period {
+ memory = 0;
+ }
+ continue 'search;
+ }
+ }
+
+ // See if the left part of the needle matches.
+ let start = if long_period { 0 } else { memory };
+ for i in (start..crit_pos).rev() {
+ if needle[i] != haystack[position + i] {
+ position += period;
+ if !long_period {
+ memory = needle.len() - period;
+ }
+ continue 'search;
+ }
+ }
+
+ // We have found a match!
+ return Some(position);
+ }
+}
+
+fn byteset_create(bytes: &[u8]) -> u64 {
+ bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a)
+}
+
+fn byteset_contains(byteset: u64, byte: u8) -> bool {
+ (byteset >> ((byte & 0x3f) as usize)) & 1 != 0
+}
+
+// Compute the maximal suffix of `arr`.
+//
+// The maximal suffix is a possible critical factorization (u, v) of `arr`.
+//
+// Returns (`i`, `p`) where `i` is the starting index of v and `p` is the
+// period of v.
+//
+// `order_greater` determines if lexical order is `<` or `>`. Both
+// orders must be computed -- the ordering with the largest `i` gives
+// a critical factorization.
+//
+// For long period cases, the resulting period is not exact (it is too short).
+fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
+ let mut left = 0; // Corresponds to i in the paper
+ let mut right = 1; // Corresponds to j in the paper
+ let mut offset = 0; // Corresponds to k in the paper, but starting at 0
+ // to match 0-based indexing.
+ let mut period = 1; // Corresponds to p in the paper
+
+ while let Some(&a) = arr.get(right + offset) {
+ // `left` will be inbounds when `right` is.
+ let b = arr[left + offset];
+ if (a < b && !order_greater) || (a > b && order_greater) {
+ // Suffix is smaller, period is entire prefix so far.
+ right += offset + 1;
+ offset = 0;
+ period = right - left;
+ } else if a == b {
+ // Advance through repetition of the current period.
+ if offset + 1 == period {
+ right += offset + 1;
+ offset = 0;
+ } else {
+ offset += 1;
+ }
+ } else {
+ // Suffix is larger, start over from current location.
+ left = right;
+ right += 1;
+ offset = 0;
+ period = 1;
+ }
+ }
+ (left, period)
+}
diff --git a/vendor/dissimilar/src/lib.rs b/vendor/dissimilar/src/lib.rs
new file mode 100644
index 000000000..8ce9faad3
--- /dev/null
+++ b/vendor/dissimilar/src/lib.rs
@@ -0,0 +1,932 @@
+//! [![github]](https://github.com/dtolnay/dissimilar)&ensp;[![crates-io]](https://crates.io/crates/dissimilar)&ensp;[![docs-rs]](https://docs.rs/dissimilar)
+//!
+//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
+//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
+//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logoColor=white&logo=
+//!
+//! <br>
+//!
+//! ## Diff library with semantic cleanup, based on Google's diff-match-patch
+//!
+//! This library is a port of the Diff component of [Diff Match Patch] to Rust.
+//! The diff implementation is based on [Myers' diff algorithm] but includes
+//! some [semantic cleanups] to increase human readability by factoring out
+//! commonalities which are likely to be coincidental.
+//!
+//! Diff Match Patch was originally built in 2006 to power Google Docs.
+//!
+//! # Interface
+//!
+//! Here is the entire API of the Rust implementation. It operates on borrowed
+//! strings and the return value of the diff algorithm is a vector of chunks
+//! pointing into slices of those input strings.
+//!
+//! ```
+//! pub enum Chunk<'a> {
+//! Equal(&'a str),
+//! Delete(&'a str),
+//! Insert(&'a str),
+//! }
+//!
+//! # const IGNORE: &str = stringify! {
+//! pub fn diff(text1: &str, text2: &str) -> Vec<Chunk>;
+//! # };
+//! ```
+//!
+//! [Diff Match Patch]: https://github.com/google/diff-match-patch
+//! [Myers' diff algorithm]: https://neil.fraser.name/writing/diff/myers.pdf
+//! [semantic cleanups]: https://neil.fraser.name/writing/diff/
+
+#![doc(html_root_url = "https://docs.rs/dissimilar/1.0.4")]
+#![allow(
+ clippy::blocks_in_if_conditions,
+ clippy::cast_possible_wrap,
+ clippy::cast_sign_loss,
+ clippy::cloned_instead_of_copied, // https://github.com/rust-lang/rust-clippy/issues/7127
+ clippy::collapsible_else_if,
+ clippy::comparison_chain,
+ clippy::match_same_arms,
+ clippy::module_name_repetitions,
+ clippy::must_use_candidate,
+ clippy::new_without_default,
+ clippy::octal_escapes,
+ clippy::shadow_unrelated,
+ clippy::similar_names,
+ clippy::too_many_lines,
+ clippy::unseparated_literal_suffix,
+ unused_parens, // false positive on Some(&(mut diff)) pattern
+)]
+
+mod find;
+mod range;
+
+#[cfg(test)]
+mod tests;
+
+use crate::range::{bytes, str, Range};
+use std::cmp;
+use std::collections::VecDeque;
+use std::fmt::{self, Debug};
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum Chunk<'a> {
+ Equal(&'a str),
+ Delete(&'a str),
+ Insert(&'a str),
+}
+
+#[derive(Copy, Clone)]
+enum Diff<'a, 'b> {
+ Equal(Range<'a>, Range<'b>),
+ Delete(Range<'a>),
+ Insert(Range<'b>),
+}
+
+impl<'tmp, 'a: 'tmp, 'b: 'tmp> Diff<'a, 'b> {
+ fn text(&self) -> Range<'tmp> {
+ match *self {
+ Diff::Equal(range, _) | Diff::Delete(range) | Diff::Insert(range) => range,
+ }
+ }
+
+ fn grow_left(&mut self, increment: usize) {
+ self.for_each(|range| {
+ range.offset -= increment;
+ range.len += increment;
+ });
+ }
+
+ fn grow_right(&mut self, increment: usize) {
+ self.for_each(|range| range.len += increment);
+ }
+
+ fn shift_left(&mut self, increment: usize) {
+ self.for_each(|range| range.offset -= increment);
+ }
+
+ fn shift_right(&mut self, increment: usize) {
+ self.for_each(|range| range.offset += increment);
+ }
+
+ fn for_each(&mut self, f: impl Fn(&mut Range)) {
+ match self {
+ Diff::Equal(range1, range2) => {
+ f(range1);
+ f(range2);
+ }
+ Diff::Delete(range) => f(range),
+ Diff::Insert(range) => f(range),
+ }
+ }
+}
+
+pub fn diff<'a>(text1: &'a str, text2: &'a str) -> Vec<Chunk<'a>> {
+ let text1 = Range::new(text1, ..);
+ let text2 = Range::new(text2, ..);
+ let mut solution = main(text1, text2);
+ cleanup_char_boundary(&mut solution);
+ cleanup_semantic(&mut solution);
+ cleanup_merge(&mut solution);
+ solution.diffs.into_iter().map(Chunk::from).collect()
+}
+
+struct Solution<'a, 'b> {
+ text1: Range<'a>,
+ text2: Range<'b>,
+ diffs: Vec<Diff<'a, 'b>>,
+ utf8: bool,
+}
+
+fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> {
+ let whole1 = text1;
+ let whole2 = text2;
+
+ // Trim off common prefix.
+ let common_prefix_len = common_prefix_bytes(text1, text2);
+ let common_prefix = Diff::Equal(
+ text1.substring(..common_prefix_len),
+ text2.substring(..common_prefix_len),
+ );
+ text1 = text1.substring(common_prefix_len..);
+ text2 = text2.substring(common_prefix_len..);
+
+ // Trim off common suffix.
+ let common_suffix_len = common_suffix_bytes(text1, text2);
+ let common_suffix = Diff::Equal(
+ text1.substring(text1.len - common_suffix_len..),
+ text2.substring(text2.len - common_suffix_len..),
+ );
+ text1 = text1.substring(..text1.len - common_suffix_len);
+ text2 = text2.substring(..text2.len - common_suffix_len);
+
+ // Compute the diff on the middle block.
+ let mut solution = Solution {
+ text1: whole1,
+ text2: whole2,
+ diffs: compute(text1, text2),
+ utf8: false,
+ };
+
+ // Restore the prefix and suffix.
+ if common_prefix_len > 0 {
+ solution.diffs.insert(0, common_prefix);
+ }
+ if common_suffix_len > 0 {
+ solution.diffs.push(common_suffix);
+ }
+
+ cleanup_merge(&mut solution);
+
+ solution
+}
+
+// Find the differences between two texts. Assumes that the texts do not have
+// any common prefix or suffix.
+fn compute<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
+ match (text1.is_empty(), text2.is_empty()) {
+ (true, true) => return Vec::new(),
+ (true, false) => return vec![Diff::Insert(text2)],
+ (false, true) => return vec![Diff::Delete(text1)],
+ (false, false) => {}
+ }
+
+ // Check for entire shorter text inside the longer text.
+ if text1.len > text2.len {
+ if let Some(i) = text1.find(text2) {
+ return vec![
+ Diff::Delete(text1.substring(..i)),
+ Diff::Equal(text1.substring(i..i + text2.len), text2),
+ Diff::Delete(text1.substring(i + text2.len..)),
+ ];
+ }
+ } else {
+ if let Some(i) = text2.find(text1) {
+ return vec![
+ Diff::Insert(text2.substring(..i)),
+ Diff::Equal(text1, text2.substring(i..i + text1.len)),
+ Diff::Insert(text2.substring(i + text1.len..)),
+ ];
+ }
+ }
+
+ if text1.len == 1 || text2.len == 1 {
+ // Single character string.
+ // After the previous check, the character can't be an equality.
+ return vec![Diff::Delete(text1), Diff::Insert(text2)];
+ }
+
+ bisect(text1, text2)
+}
+
+// Find the 'middle snake' of a diff, split the problem in two and return the
+// recursively constructed diff.
+//
+// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
+fn bisect<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
+ let max_d = (text1.len + text2.len + 1) / 2;
+ let v_offset = max_d;
+ let v_len = 2 * max_d;
+ let mut v1 = vec![-1isize; v_len];
+ let mut v2 = vec![-1isize; v_len];
+ v1[v_offset + 1] = 0;
+ v2[v_offset + 1] = 0;
+ let delta = text1.len as isize - text2.len as isize;
+ // If the total number of characters is odd, then the front path will
+ // collide with the reverse path.
+ let front = delta % 2 != 0;
+ // Offsets for start and end of k loop.
+ // Prevents mapping of space beyond the grid.
+ let mut k1start = 0;
+ let mut k1end = 0;
+ let mut k2start = 0;
+ let mut k2end = 0;
+ for d in 0..max_d as isize {
+ // Walk the front path one step.
+ let mut k1 = -d + k1start;
+ while k1 <= d - k1end {
+ let k1_offset = (v_offset as isize + k1) as usize;
+ let mut x1 = if k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) {
+ v1[k1_offset + 1]
+ } else {
+ v1[k1_offset - 1] + 1
+ } as usize;
+ let mut y1 = (x1 as isize - k1) as usize;
+ if let (Some(s1), Some(s2)) = (text1.get(x1..), text2.get(y1..)) {
+ let advance = common_prefix_bytes(s1, s2);
+ x1 += advance;
+ y1 += advance;
+ }
+ v1[k1_offset] = x1 as isize;
+ if x1 > text1.len {
+ // Ran off the right of the graph.
+ k1end += 2;
+ } else if y1 > text2.len {
+ // Ran off the bottom of the graph.
+ k1start += 2;
+ } else if front {
+ let k2_offset = v_offset as isize + delta - k1;
+ if k2_offset >= 0 && k2_offset < v_len as isize && v2[k2_offset as usize] != -1 {
+ // Mirror x2 onto top-left coordinate system.
+ let x2 = text1.len as isize - v2[k2_offset as usize];
+ if x1 as isize >= x2 {
+ // Overlap detected.
+ return bisect_split(text1, text2, x1, y1);
+ }
+ }
+ }
+ k1 += 2;
+ }
+
+ // Walk the reverse path one step.
+ let mut k2 = -d + k2start;
+ while k2 <= d - k2end {
+ let k2_offset = (v_offset as isize + k2) as usize;
+ let mut x2 = if k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) {
+ v2[k2_offset + 1]
+ } else {
+ v2[k2_offset - 1] + 1
+ } as usize;
+ let mut y2 = (x2 as isize - k2) as usize;
+ if x2 < text1.len && y2 < text2.len {
+ let advance = common_suffix_bytes(
+ text1.substring(..text1.len - x2),
+ text2.substring(..text2.len - y2),
+ );
+ x2 += advance;
+ y2 += advance;
+ }
+ v2[k2_offset] = x2 as isize;
+ if x2 > text1.len {
+ // Ran off the left of the graph.
+ k2end += 2;
+ } else if y2 > text2.len {
+ // Ran off the top of the graph.
+ k2start += 2;
+ } else if !front {
+ let k1_offset = v_offset as isize + delta - k2;
+ if k1_offset >= 0 && k1_offset < v_len as isize && v1[k1_offset as usize] != -1 {
+ let x1 = v1[k1_offset as usize] as usize;
+ let y1 = v_offset + x1 - k1_offset as usize;
+ // Mirror x2 onto top-left coordinate system.
+ x2 = text1.len - x2;
+ if x1 >= x2 {
+ // Overlap detected.
+ return bisect_split(text1, text2, x1, y1);
+ }
+ }
+ }
+ k2 += 2;
+ }
+ }
+ // Number of diffs equals number of characters, no commonality at all.
+ vec![Diff::Delete(text1), Diff::Insert(text2)]
+}
+
+// Given the location of the 'middle snake', split the diff in two parts and
+// recurse.
+fn bisect_split<'a, 'b>(
+ text1: Range<'a>,
+ text2: Range<'b>,
+ x: usize,
+ y: usize,
+) -> Vec<Diff<'a, 'b>> {
+ let (text1a, text1b) = text1.split_at(x);
+ let (text2a, text2b) = text2.split_at(y);
+
+ // Compute both diffs serially.
+ let mut diffs = main(text1a, text2a).diffs;
+ diffs.extend(main(text1b, text2b).diffs);
+
+ diffs
+}
+
+// Determine the length of the common prefix of two strings.
+fn common_prefix(text1: Range, text2: Range) -> usize {
+ for ((i, ch1), ch2) in text1.char_indices().zip(text2.chars()) {
+ if ch1 != ch2 {
+ return i;
+ }
+ }
+ cmp::min(text1.len, text2.len)
+}
+
+// Determine the length of the common suffix of two strings.
+fn common_suffix(text1: Range, text2: Range) -> usize {
+ for ((i, ch1), ch2) in text1.char_indices().rev().zip(text2.chars().rev()) {
+ if ch1 != ch2 {
+ return text1.len - i - ch1.len_utf8();
+ }
+ }
+ cmp::min(text1.len, text2.len)
+}
+
+fn common_prefix_bytes(text1: Range, text2: Range) -> usize {
+ for (i, (b1, b2)) in text1.bytes().zip(text2.bytes()).enumerate() {
+ if b1 != b2 {
+ return i;
+ }
+ }
+ cmp::min(text1.len, text2.len)
+}
+
+fn common_suffix_bytes(text1: Range, text2: Range) -> usize {
+ for (i, (b1, b2)) in text1.bytes().rev().zip(text2.bytes().rev()).enumerate() {
+ if b1 != b2 {
+ return i;
+ }
+ }
+ cmp::min(text1.len, text2.len)
+}
+
+// Determine if the suffix of one string is the prefix of another.
+//
+// Returns the number of characters common to the end of the first string and
+// the start of the second string.
+fn common_overlap(mut text1: Range, mut text2: Range) -> usize {
+ // Eliminate the null case.
+ if text1.is_empty() || text2.is_empty() {
+ return 0;
+ }
+ // Truncate the longer string.
+ if text1.len > text2.len {
+ text1 = text1.substring(text1.len - text2.len..);
+ } else if text1.len < text2.len {
+ text2 = text2.substring(..text1.len);
+ }
+ // Quick check for the worst case.
+ if bytes(text1) == bytes(text2) {
+ return text1.len;
+ }
+
+ // Start by looking for a single character match
+ // and increase length until no match is found.
+ // Performance analysis: https://neil.fraser.name/news/2010/11/04/
+ let mut best = 0;
+ let mut length = 1;
+ loop {
+ let pattern = text1.substring(text1.len - length..);
+ let found = match text2.find(pattern) {
+ Some(found) => found,
+ None => return best,
+ };
+ length += found;
+ if found == 0
+ || bytes(text1.substring(text1.len - length..)) == bytes(text2.substring(..length))
+ {
+ best = length;
+ length += 1;
+ }
+ }
+}
+
+fn cleanup_char_boundary(solution: &mut Solution) {
+ fn boundary_down(doc: &str, pos: usize) -> usize {
+ let mut adjust = 0;
+ while !doc.is_char_boundary(pos - adjust) {
+ adjust += 1;
+ }
+ adjust
+ }
+
+ fn boundary_up(doc: &str, pos: usize) -> usize {
+ let mut adjust = 0;
+ while !doc.is_char_boundary(pos + adjust) {
+ adjust += 1;
+ }
+ adjust
+ }
+
+ fn skip_overlap<'a>(prev: &Range<'a>, range: &mut Range<'a>) {
+ let prev_end = prev.offset + prev.len;
+ if prev_end > range.offset {
+ let delta = cmp::min(prev_end - range.offset, range.len);
+ range.offset += delta;
+ range.len -= delta;
+ }
+ }
+
+ let mut read = 0;
+ let mut retain = 0;
+ let mut last_delete = Range::empty();
+ let mut last_insert = Range::empty();
+ while let Some(&(mut diff)) = solution.diffs.get(read) {
+ read += 1;
+ match &mut diff {
+ Diff::Equal(range1, range2) => {
+ let adjust = boundary_up(range1.doc, range1.offset);
+ // If the whole range is sub-character, skip it.
+ if range1.len <= adjust {
+ continue;
+ }
+ range1.offset += adjust;
+ range1.len -= adjust;
+ range2.offset += adjust;
+ range2.len -= adjust;
+ let adjust = boundary_down(range1.doc, range1.offset + range1.len);
+ range1.len -= adjust;
+ range2.len -= adjust;
+ last_delete = Range::empty();
+ last_insert = Range::empty();
+ }
+ Diff::Delete(range) => {
+ skip_overlap(&last_delete, range);
+ if range.len == 0 {
+ continue;
+ }
+ let adjust = boundary_down(range.doc, range.offset);
+ range.offset -= adjust;
+ range.len += adjust;
+ let adjust = boundary_up(range.doc, range.offset + range.len);
+ range.len += adjust;
+ last_delete = *range;
+ }
+ Diff::Insert(range) => {
+ skip_overlap(&last_insert, range);
+ if range.len == 0 {
+ continue;
+ }
+ let adjust = boundary_down(range.doc, range.offset);
+ range.offset -= adjust;
+ range.len += adjust;
+ let adjust = boundary_up(range.doc, range.offset + range.len);
+ range.len += adjust;
+ last_insert = *range;
+ }
+ }
+ solution.diffs[retain] = diff;
+ retain += 1;
+ }
+
+ solution.diffs.truncate(retain);
+ solution.utf8 = true;
+}
+
+// Reduce the number of edits by eliminating semantically trivial equalities.
+fn cleanup_semantic(solution: &mut Solution) {
+ let mut diffs = &mut solution.diffs;
+ if diffs.is_empty() {
+ return;
+ }
+
+ let mut changes = false;
+ let mut equalities = VecDeque::new(); // Double-ended queue of equalities.
+ let mut last_equality = None; // Always equal to equalities.peek().text
+ let mut pointer = 0;
+ // Number of characters that changed prior to the equality.
+ let mut len_insertions1 = 0;
+ let mut len_deletions1 = 0;
+ // Number of characters that changed after the equality.
+ let mut len_insertions2 = 0;
+ let mut len_deletions2 = 0;
+ while let Some(&this_diff) = diffs.get(pointer) {
+ match this_diff {
+ Diff::Equal(text1, text2) => {
+ equalities.push_back(pointer);
+ len_insertions1 = len_insertions2;
+ len_deletions1 = len_deletions2;
+ len_insertions2 = 0;
+ len_deletions2 = 0;
+ last_equality = Some((text1, text2));
+ pointer += 1;
+ continue;
+ }
+ Diff::Delete(text) => len_deletions2 += text.len,
+ Diff::Insert(text) => len_insertions2 += text.len,
+ }
+ // Eliminate an equality that is smaller or equal to the edits on both
+ // sides of it.
+ if last_equality.map_or(false, |(last_equality, _)| {
+ last_equality.len <= cmp::max(len_insertions1, len_deletions1)
+ && last_equality.len <= cmp::max(len_insertions2, len_deletions2)
+ }) {
+ // Jump back to offending equality.
+ pointer = equalities.pop_back().unwrap();
+
+ // Replace equality with a delete.
+ diffs[pointer] = Diff::Delete(last_equality.unwrap().0);
+ // Insert a corresponding insert.
+ diffs.insert(pointer + 1, Diff::Insert(last_equality.unwrap().1));
+
+ len_insertions1 = 0; // Reset the counters.
+ len_insertions2 = 0;
+ len_deletions1 = 0;
+ len_deletions2 = 0;
+ last_equality = None;
+ changes = true;
+
+ // Throw away the previous equality (it needs to be reevaluated).
+ equalities.pop_back();
+ if let Some(back) = equalities.back() {
+ // There is a safe equality we can fall back to.
+ pointer = *back;
+ } else {
+ // There are no previous equalities, jump back to the start.
+ pointer = 0;
+ continue;
+ }
+ }
+ pointer += 1;
+ }
+
+ // Normalize the diff.
+ if changes {
+ cleanup_merge(solution);
+ }
+ cleanup_semantic_lossless(solution);
+ diffs = &mut solution.diffs;
+
+ // Find any overlaps between deletions and insertions.
+ // e.g: <del>abcxxx</del><ins>xxxdef</ins>
+ // -> <del>abc</del>xxx<ins>def</ins>
+ // e.g: <del>xxxabc</del><ins>defxxx</ins>
+ // -> <ins>def</ins>xxx<del>abc</del>
+ // Only extract an overlap if it is as big as the edit ahead or behind it.
+ let mut pointer = 1;
+ while let Some(&this_diff) = diffs.get(pointer) {
+ let prev_diff = diffs[pointer - 1];
+ if let (Diff::Delete(deletion), Diff::Insert(insertion)) = (prev_diff, this_diff) {
+ let overlap_len1 = common_overlap(deletion, insertion);
+ let overlap_len2 = common_overlap(insertion, deletion);
+ let overlap_min = cmp::min(deletion.len, insertion.len);
+ if overlap_len1 >= overlap_len2 && 2 * overlap_len1 >= overlap_min {
+ // Overlap found. Insert an equality and trim the surrounding edits.
+ diffs.insert(
+ pointer,
+ Diff::Equal(
+ deletion.substring(deletion.len - overlap_len1..deletion.len),
+ insertion.substring(..overlap_len1),
+ ),
+ );
+ diffs[pointer - 1] =
+ Diff::Delete(deletion.substring(..deletion.len - overlap_len1));
+ diffs[pointer + 1] = Diff::Insert(insertion.substring(overlap_len1..));
+ } else if overlap_len1 < overlap_len2 && 2 * overlap_len2 >= overlap_min {
+ // Reverse overlap found.
+ // Insert an equality and swap and trim the surrounding edits.
+ diffs.insert(
+ pointer,
+ Diff::Equal(
+ deletion.substring(..overlap_len2),
+ insertion.substring(insertion.len - overlap_len2..insertion.len),
+ ),
+ );
+ diffs[pointer - 1] =
+ Diff::Insert(insertion.substring(..insertion.len - overlap_len2));
+ diffs[pointer + 1] = Diff::Delete(deletion.substring(overlap_len2..));
+ }
+ pointer += 1;
+ }
+ pointer += 1;
+ }
+}
+
+// Look for single edits surrounded on both sides by equalities which can be
+// shifted sideways to align the edit to a word boundary.
+//
+// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
+fn cleanup_semantic_lossless(solution: &mut Solution) {
+ let diffs = &mut solution.diffs;
+ let mut pointer = 1;
+ while let Some(&next_diff) = diffs.get(pointer + 1) {
+ let prev_diff = diffs[pointer - 1];
+ if let (
+ Diff::Equal(mut prev_equal1, mut prev_equal2),
+ Diff::Equal(mut next_equal1, mut next_equal2),
+ ) = (prev_diff, next_diff)
+ {
+ // This is a single edit surrounded by equalities.
+ let mut edit = diffs[pointer];
+
+ // First, shift the edit as far left as possible.
+ let common_offset = common_suffix(prev_equal1, edit.text());
+ let original_prev_len = prev_equal1.len;
+ prev_equal1.len -= common_offset;
+ prev_equal2.len -= common_offset;
+ edit.shift_left(common_offset);
+ next_equal1.offset -= common_offset;
+ next_equal1.len += common_offset;
+ next_equal2.offset -= common_offset;
+ next_equal2.len += common_offset;
+
+ // Second, step character by character right, looking for the best fit.
+ let mut best_prev_equal = (prev_equal1, prev_equal2);
+ let mut best_edit = edit;
+ let mut best_next_equal = (next_equal1, next_equal2);
+ let mut best_score = cleanup_semantic_score(prev_equal1, edit.text())
+ + cleanup_semantic_score(edit.text(), next_equal1);
+ while !edit.text().is_empty()
+ && !next_equal1.is_empty()
+ && edit.text().chars().next().unwrap() == next_equal1.chars().next().unwrap()
+ {
+ let increment = edit.text().chars().next().unwrap().len_utf8();
+ prev_equal1.len += increment;
+ prev_equal2.len += increment;
+ edit.shift_right(increment);
+ next_equal1.offset += increment;
+ next_equal1.len -= increment;
+ next_equal2.offset += increment;
+ next_equal2.len -= increment;
+ let score = cleanup_semantic_score(prev_equal1, edit.text())
+ + cleanup_semantic_score(edit.text(), next_equal1);
+ // The >= encourages trailing rather than leading whitespace on edits.
+ if score >= best_score {
+ best_score = score;
+ best_prev_equal = (prev_equal1, prev_equal2);
+ best_edit = edit;
+ best_next_equal = (next_equal1, next_equal2);
+ }
+ }
+
+ if original_prev_len != best_prev_equal.0.len {
+ // We have an improvement, save it back to the diff.
+ if best_next_equal.0.is_empty() {
+ diffs.remove(pointer + 1);
+ } else {
+ diffs[pointer + 1] = Diff::Equal(best_next_equal.0, best_next_equal.1);
+ }
+ diffs[pointer] = best_edit;
+ if best_prev_equal.0.is_empty() {
+ diffs.remove(pointer - 1);
+ pointer -= 1;
+ } else {
+ diffs[pointer - 1] = Diff::Equal(best_prev_equal.0, best_prev_equal.1);
+ }
+ }
+ }
+ pointer += 1;
+ }
+}
+
+// Given two strings, compute a score representing whether the internal boundary
+// falls on logical boundaries.
+//
+// Scores range from 6 (best) to 0 (worst).
+fn cleanup_semantic_score(one: Range, two: Range) -> usize {
+ if one.is_empty() || two.is_empty() {
+ // Edges are the best.
+ return 6;
+ }
+
+ // Each port of this function behaves slightly differently due to subtle
+ // differences in each language's definition of things like 'whitespace'.
+ // Since this function's purpose is largely cosmetic, the choice has been
+ // made to use each language's native features rather than force total
+ // conformity.
+ let char1 = one.chars().next_back().unwrap();
+ let char2 = two.chars().next().unwrap();
+ let non_alphanumeric1 = !char1.is_ascii_alphanumeric();
+ let non_alphanumeric2 = !char2.is_ascii_alphanumeric();
+ let whitespace1 = non_alphanumeric1 && char1.is_ascii_whitespace();
+ let whitespace2 = non_alphanumeric2 && char2.is_ascii_whitespace();
+ let line_break1 = whitespace1 && char1.is_control();
+ let line_break2 = whitespace2 && char2.is_control();
+ let blank_line1 = line_break1 && (one.ends_with("\n\n") || one.ends_with("\n\r\n"));
+ let blank_line2 = line_break2 && (two.starts_with("\n\n") || two.starts_with("\r\n\r\n"));
+
+ if blank_line1 || blank_line2 {
+ // Five points for blank lines.
+ 5
+ } else if line_break1 || line_break2 {
+ // Four points for line breaks.
+ 4
+ } else if non_alphanumeric1 && !whitespace1 && whitespace2 {
+ // Three points for end of sentences.
+ 3
+ } else if whitespace1 || whitespace2 {
+ // Two points for whitespace.
+ 2
+ } else if non_alphanumeric1 || non_alphanumeric2 {
+ // One point for non-alphanumeric.
+ 1
+ } else {
+ 0
+ }
+}
+
+// Reorder and merge like edit sections. Merge equalities. Any edit section can
+// move as long as it doesn't cross an equality.
+fn cleanup_merge(solution: &mut Solution) {
+ let diffs = &mut solution.diffs;
+ let common_prefix = if solution.utf8 {
+ common_prefix
+ } else {
+ common_prefix_bytes
+ };
+ let common_suffix = if solution.utf8 {
+ common_suffix
+ } else {
+ common_suffix_bytes
+ };
+
+ loop {
+ if diffs.is_empty() {
+ return;
+ }
+
+ diffs.push(Diff::Equal(
+ solution.text1.substring(solution.text1.len..),
+ solution.text2.substring(solution.text2.len..),
+ )); // Add a dummy entry at the end.
+ let mut pointer = 0;
+ let mut count_delete = 0;
+ let mut count_insert = 0;
+ let mut text_delete = Range::empty();
+ let mut text_insert = Range::empty();
+ while let Some(&this_diff) = diffs.get(pointer) {
+ match this_diff {
+ Diff::Insert(text) => {
+ count_insert += 1;
+ if text_insert.is_empty() {
+ text_insert = text;
+ } else {
+ text_insert.len += text.len;
+ }
+ }
+ Diff::Delete(text) => {
+ count_delete += 1;
+ if text_delete.is_empty() {
+ text_delete = text;
+ } else {
+ text_delete.len += text.len;
+ }
+ }
+ Diff::Equal(text, _) => {
+ let count_both = count_delete + count_insert;
+ if count_both > 1 {
+ let both_types = count_delete != 0 && count_insert != 0;
+ // Delete the offending records.
+ diffs.splice(pointer - count_both..pointer, None);
+ pointer -= count_both;
+ if both_types {
+ // Factor out any common prefix.
+ let common_length = common_prefix(text_insert, text_delete);
+ if common_length != 0 {
+ if pointer > 0 {
+ match &mut diffs[pointer - 1] {
+ Diff::Equal(this_diff1, this_diff2) => {
+ this_diff1.len += common_length;
+ this_diff2.len += common_length;
+ }
+ _ => unreachable!(
+ "previous diff should have been an equality"
+ ),
+ }
+ } else {
+ diffs.insert(
+ pointer,
+ Diff::Equal(
+ text_delete.substring(..common_length),
+ text_insert.substring(..common_length),
+ ),
+ );
+ pointer += 1;
+ }
+ text_insert = text_insert.substring(common_length..);
+ text_delete = text_delete.substring(common_length..);
+ }
+ // Factor out any common suffix.
+ let common_length = common_suffix(text_insert, text_delete);
+ if common_length != 0 {
+ diffs[pointer].grow_left(common_length);
+ text_insert.len -= common_length;
+ text_delete.len -= common_length;
+ }
+ }
+ // Insert the merged records.
+ if !text_delete.is_empty() {
+ diffs.insert(pointer, Diff::Delete(text_delete));
+ pointer += 1;
+ }
+ if !text_insert.is_empty() {
+ diffs.insert(pointer, Diff::Insert(text_insert));
+ pointer += 1;
+ }
+ } else if pointer > 0 {
+ if let Some(Diff::Equal(prev_equal1, prev_equal2)) =
+ diffs.get_mut(pointer - 1)
+ {
+ // Merge this equality with the previous one.
+ prev_equal1.len += text.len;
+ prev_equal2.len += text.len;
+ diffs.remove(pointer);
+ pointer -= 1;
+ }
+ }
+ count_insert = 0;
+ count_delete = 0;
+ text_delete = Range::empty();
+ text_insert = Range::empty();
+ }
+ }
+ pointer += 1;
+ }
+ if diffs.last().unwrap().text().is_empty() {
+ diffs.pop(); // Remove the dummy entry at the end.
+ }
+
+ // Second pass: look for single edits surrounded on both sides by equalities
+ // which can be shifted sideways to eliminate an equality.
+ // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
+ let mut changes = false;
+ let mut pointer = 1;
+ // Intentionally ignore the first and last element (don't need checking).
+ while let Some(&next_diff) = diffs.get(pointer + 1) {
+ let prev_diff = diffs[pointer - 1];
+ let this_diff = diffs[pointer];
+ if let (Diff::Equal(prev_diff, _), Diff::Equal(next_diff, _)) = (prev_diff, next_diff) {
+ // This is a single edit surrounded by equalities.
+ if this_diff.text().ends_with(prev_diff) {
+ // Shift the edit over the previous equality.
+ diffs[pointer].shift_left(prev_diff.len);
+ diffs[pointer + 1].grow_left(prev_diff.len);
+ diffs.remove(pointer - 1); // Delete prev_diff.
+ changes = true;
+ } else if this_diff.text().starts_with(next_diff) {
+ // Shift the edit over the next equality.
+ diffs[pointer - 1].grow_right(next_diff.len);
+ diffs[pointer].shift_right(next_diff.len);
+ diffs.remove(pointer + 1); // Delete next_diff.
+ changes = true;
+ }
+ }
+ pointer += 1;
+ }
+ // If shifts were made, the diff needs reordering and another shift sweep.
+ if !changes {
+ return;
+ }
+ }
+}
+
+impl Debug for Chunk<'_> {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ let (name, text) = match *self {
+ Chunk::Equal(text) => ("Equal", text),
+ Chunk::Delete(text) => ("Delete", text),
+ Chunk::Insert(text) => ("Insert", text),
+ };
+ write!(formatter, "{}({:?})", name, text)
+ }
+}
+
+impl Debug for Diff<'_, '_> {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ let (name, bytes) = match *self {
+ Diff::Equal(range, _) => ("Equal", bytes(range)),
+ Diff::Delete(range) => ("Delete", bytes(range)),
+ Diff::Insert(range) => ("Insert", bytes(range)),
+ };
+ let text = String::from_utf8_lossy(bytes);
+ write!(formatter, "{}({:?})", name, text)
+ }
+}
+
+impl<'a> From<Diff<'a, 'a>> for Chunk<'a> {
+ fn from(diff: Diff<'a, 'a>) -> Self {
+ match diff {
+ Diff::Equal(range, _) => Chunk::Equal(str(range)),
+ Diff::Delete(range) => Chunk::Delete(str(range)),
+ Diff::Insert(range) => Chunk::Insert(str(range)),
+ }
+ }
+}
diff --git a/vendor/dissimilar/src/range.rs b/vendor/dissimilar/src/range.rs
new file mode 100644
index 000000000..565a94c06
--- /dev/null
+++ b/vendor/dissimilar/src/range.rs
@@ -0,0 +1,148 @@
+use crate::find::find;
+use std::fmt::Debug;
+use std::ops::{self, RangeFrom, RangeFull, RangeTo};
+use std::str::{CharIndices, Chars};
+
+#[derive(Copy, Clone)]
+pub struct Range<'a> {
+ pub doc: &'a str,
+ pub offset: usize,
+ pub len: usize,
+}
+
+impl<'a> Range<'a> {
+ pub fn empty() -> Self {
+ Range {
+ doc: "",
+ offset: 0,
+ len: 0,
+ }
+ }
+
+ pub fn new(doc: &'a str, bounds: impl RangeBounds) -> Self {
+ let (offset, len) = bounds.index(doc.len());
+ Range { doc, offset, len }
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.len == 0
+ }
+
+ pub fn substring(&self, bounds: impl RangeBounds) -> Self {
+ let (offset, len) = bounds.index(self.len);
+ Range {
+ doc: self.doc,
+ offset: self.offset + offset,
+ len,
+ }
+ }
+
+ pub fn get(&self, bounds: impl RangeBounds) -> Option<Self> {
+ let (offset, len) = bounds.try_index(self.len)?;
+ Some(Range {
+ doc: self.doc,
+ offset: self.offset + offset,
+ len,
+ })
+ }
+
+ pub fn split_at(&self, mid: usize) -> (Self, Self) {
+ (self.substring(..mid), self.substring(mid..))
+ }
+
+ pub fn chars(&self) -> Chars<'a> {
+ str(*self).chars()
+ }
+
+ pub fn char_indices(&self) -> CharIndices<'a> {
+ str(*self).char_indices()
+ }
+
+ pub fn bytes(&self) -> impl Iterator<Item = u8> + DoubleEndedIterator + ExactSizeIterator + 'a {
+ bytes(*self).iter().cloned()
+ }
+
+ pub fn starts_with(&self, prefix: impl AsRef<[u8]>) -> bool {
+ bytes(*self).starts_with(prefix.as_ref())
+ }
+
+ pub fn ends_with(&self, suffix: impl AsRef<[u8]>) -> bool {
+ bytes(*self).ends_with(suffix.as_ref())
+ }
+
+ pub fn find(&self, needle: impl AsRef<[u8]>) -> Option<usize> {
+ find(bytes(*self), needle.as_ref())
+ }
+}
+
+pub fn str(range: Range) -> &str {
+ if cfg!(debug)
+ && range
+ .doc
+ .get(range.offset..range.offset + range.len)
+ .is_none()
+ {
+ eprintln!(
+ "doc={:?} offset={} len={}",
+ range.doc, range.offset, range.len
+ );
+ }
+ &range.doc[range.offset..range.offset + range.len]
+}
+
+pub fn bytes(range: Range) -> &[u8] {
+ &range.doc.as_bytes()[range.offset..range.offset + range.len]
+}
+
+impl AsRef<[u8]> for Range<'_> {
+ fn as_ref(&self) -> &[u8] {
+ bytes(*self)
+ }
+}
+
+pub trait RangeBounds: Sized + Clone + Debug {
+ // Returns (offset, len).
+ fn try_index(self, len: usize) -> Option<(usize, usize)>;
+ fn index(self, len: usize) -> (usize, usize) {
+ match self.clone().try_index(len) {
+ Some(range) => range,
+ None => panic!("index out of range, index={:?}, len={}", self, len),
+ }
+ }
+}
+
+impl RangeBounds for ops::Range<usize> {
+ fn try_index(self, len: usize) -> Option<(usize, usize)> {
+ if self.start <= self.end && self.end <= len {
+ Some((self.start, self.end - self.start))
+ } else {
+ None
+ }
+ }
+}
+
+impl RangeBounds for RangeFrom<usize> {
+ fn try_index(self, len: usize) -> Option<(usize, usize)> {
+ if self.start <= len {
+ Some((self.start, len - self.start))
+ } else {
+ None
+ }
+ }
+}
+
+impl RangeBounds for RangeTo<usize> {
+ fn try_index(self, len: usize) -> Option<(usize, usize)> {
+ if self.end <= len {
+ Some((0, self.end))
+ } else {
+ None
+ }
+ }
+}
+
+impl RangeBounds for RangeFull {
+ fn try_index(self, len: usize) -> Option<(usize, usize)> {
+ Some((0, len))
+ }
+}
diff --git a/vendor/dissimilar/src/tests.rs b/vendor/dissimilar/src/tests.rs
new file mode 100644
index 000000000..450d7f7e4
--- /dev/null
+++ b/vendor/dissimilar/src/tests.rs
@@ -0,0 +1,580 @@
+use super::*;
+
+macro_rules! diff_list {
+ () => {
+ Solution {
+ text1: Range::empty(),
+ text2: Range::empty(),
+ diffs: Vec::new(),
+ utf8: true,
+ }
+ };
+ ($($kind:ident($text:literal)),+ $(,)?) => {{
+ macro_rules! text1 {
+ (Insert, $s:literal) => { "" };
+ (Delete, $s:literal) => { $s };
+ (Equal, $s:literal) => { $s };
+ }
+ macro_rules! text2 {
+ (Insert, $s:literal) => { $s };
+ (Delete, $s:literal) => { "" };
+ (Equal, $s:literal) => { $s };
+ }
+ let text1 = concat!($(text1!($kind, $text)),*);
+ let text2 = concat!($(text2!($kind, $text)),*);
+ let (_i, _j) = (&mut 0, &mut 0);
+ macro_rules! range {
+ (Insert, $s:literal) => {
+ Diff::Insert(range(text2, _j, $s))
+ };
+ (Delete, $s:literal) => {
+ Diff::Delete(range(text1, _i, $s))
+ };
+ (Equal, $s:literal) => {
+ Diff::Equal(range(text1, _i, $s), range(text2, _j, $s))
+ };
+ }
+ Solution {
+ text1: Range::new(text1, ..),
+ text2: Range::new(text2, ..),
+ diffs: vec![$(range!($kind, $text)),*],
+ utf8: true,
+ }
+ }};
+}
+
+fn range<'a>(doc: &'a str, offset: &mut usize, text: &str) -> Range<'a> {
+ let range = Range {
+ doc,
+ offset: *offset,
+ len: text.len(),
+ };
+ *offset += text.len();
+ range
+}
+
+macro_rules! assert_diffs {
+ ([$($kind:ident($text:literal)),* $(,)?], $solution:ident, $msg:expr $(,)?) => {
+ let expected = &[$(Chunk::$kind($text)),*];
+ assert!(
+ same_diffs(expected, &$solution.diffs),
+ concat!($msg, "\nexpected={:#?}\nactual={:#?}"),
+ expected, $solution.diffs,
+ );
+ };
+}
+
+fn same_diffs(expected: &[Chunk], actual: &[Diff]) -> bool {
+ expected.len() == actual.len()
+ && expected.iter().zip(actual).all(|pair| match pair {
+ (Chunk::Insert(expected), Diff::Insert(actual)) => *expected == str(*actual),
+ (Chunk::Delete(expected), Diff::Delete(actual)) => *expected == str(*actual),
+ (Chunk::Equal(expected), Diff::Equal(actual1, actual2)) => {
+ *expected == str(*actual1) && *expected == str(*actual2)
+ }
+ (_, _) => false,
+ })
+}
+
+#[test]
+fn test_common_prefix() {
+ let text1 = Range::new("abc", ..);
+ let text2 = Range::new("xyz", ..);
+ assert_eq!(0, common_prefix_bytes(text1, text2), "Null case");
+
+ let text1 = Range::new("1234abcdef", ..);
+ let text2 = Range::new("1234xyz", ..);
+ assert_eq!(4, common_prefix_bytes(text1, text2), "Non-null case");
+
+ let text1 = Range::new("1234", ..);
+ let text2 = Range::new("1234xyz", ..);
+ assert_eq!(4, common_prefix_bytes(text1, text2), "Whole case");
+}
+
+#[test]
+fn test_common_suffix() {
+ let text1 = Range::new("abc", ..);
+ let text2 = Range::new("xyz", ..);
+ assert_eq!(0, common_suffix(text1, text2), "Null case");
+ assert_eq!(0, common_suffix_bytes(text1, text2), "Null case");
+
+ let text1 = Range::new("abcdef1234", ..);
+ let text2 = Range::new("xyz1234", ..);
+ assert_eq!(4, common_suffix(text1, text2), "Non-null case");
+ assert_eq!(4, common_suffix_bytes(text1, text2), "Non-null case");
+
+ let text1 = Range::new("1234", ..);
+ let text2 = Range::new("xyz1234", ..);
+ assert_eq!(4, common_suffix(text1, text2), "Whole case");
+ assert_eq!(4, common_suffix_bytes(text1, text2), "Whole case");
+}
+
+#[test]
+fn test_common_overlap() {
+ let text1 = Range::empty();
+ let text2 = Range::new("abcd", ..);
+ assert_eq!(0, common_overlap(text1, text2), "Null case");
+
+ let text1 = Range::new("abc", ..);
+ let text2 = Range::new("abcd", ..);
+ assert_eq!(3, common_overlap(text1, text2), "Whole case");
+
+ let text1 = Range::new("123456", ..);
+ let text2 = Range::new("abcd", ..);
+ assert_eq!(0, common_overlap(text1, text2), "No overlap");
+
+ let text1 = Range::new("123456xxx", ..);
+ let text2 = Range::new("xxxabcd", ..);
+ assert_eq!(3, common_overlap(text1, text2), "Overlap");
+
+ // Some overly clever languages (C#) may treat ligatures as equal to their
+ // component letters. E.g. U+FB01 == 'fi'
+ let text1 = Range::new("fi", ..);
+ let text2 = Range::new("\u{fb01}i", ..);
+ assert_eq!(0, common_overlap(text1, text2), "Unicode");
+}
+
+#[test]
+fn test_cleanup_merge() {
+ let mut solution = diff_list![];
+ cleanup_merge(&mut solution);
+ assert_diffs!([], solution, "Null case");
+
+ let mut solution = diff_list![Equal("a"), Delete("b"), Insert("c")];
+ cleanup_merge(&mut solution);
+ assert_diffs!(
+ [Equal("a"), Delete("b"), Insert("c")],
+ solution,
+ "No change case",
+ );
+
+ let mut solution = diff_list![Equal("a"), Equal("b"), Equal("c")];
+ cleanup_merge(&mut solution);
+ assert_diffs!([Equal("abc")], solution, "Merge equalities");
+
+ let mut solution = diff_list![Delete("a"), Delete("b"), Delete("c")];
+ cleanup_merge(&mut solution);
+ assert_diffs!([Delete("abc")], solution, "Merge deletions");
+
+ let mut solution = diff_list![Insert("a"), Insert("b"), Insert("c")];
+ cleanup_merge(&mut solution);
+ assert_diffs!([Insert("abc")], solution, "Merge insertions");
+
+ let mut solution = diff_list![
+ Delete("a"),
+ Insert("b"),
+ Delete("c"),
+ Insert("d"),
+ Equal("e"),
+ Equal("f"),
+ ];
+ cleanup_merge(&mut solution);
+ assert_diffs!(
+ [Delete("ac"), Insert("bd"), Equal("ef")],
+ solution,
+ "Merge interweave",
+ );
+
+ let mut solution = diff_list![Delete("a"), Insert("abc"), Delete("dc")];
+ cleanup_merge(&mut solution);
+ assert_diffs!(
+ [Equal("a"), Delete("d"), Insert("b"), Equal("c")],
+ solution,
+ "Prefix and suffix detection",
+ );
+
+ let mut solution = diff_list![
+ Equal("x"),
+ Delete("a"),
+ Insert("abc"),
+ Delete("dc"),
+ Equal("y"),
+ ];
+ cleanup_merge(&mut solution);
+ assert_diffs!(
+ [Equal("xa"), Delete("d"), Insert("b"), Equal("cy")],
+ solution,
+ "Prefix and suffix detection with equalities",
+ );
+
+ let mut solution = diff_list![Equal("a"), Insert("ba"), Equal("c")];
+ cleanup_merge(&mut solution);
+ assert_diffs!([Insert("ab"), Equal("ac")], solution, "Slide edit left");
+
+ let mut solution = diff_list![Equal("c"), Insert("ab"), Equal("a")];
+ cleanup_merge(&mut solution);
+ assert_diffs!([Equal("ca"), Insert("ba")], solution, "Slide edit right");
+
+ let mut solution = diff_list![
+ Equal("a"),
+ Delete("b"),
+ Equal("c"),
+ Delete("ac"),
+ Equal("x"),
+ ];
+ cleanup_merge(&mut solution);
+ assert_diffs!(
+ [Delete("abc"), Equal("acx")],
+ solution,
+ "Slide edit left recursive",
+ );
+
+ let mut solution = diff_list![
+ Equal("x"),
+ Delete("ca"),
+ Equal("c"),
+ Delete("b"),
+ Equal("a"),
+ ];
+ cleanup_merge(&mut solution);
+ assert_diffs!(
+ [Equal("xca"), Delete("cba")],
+ solution,
+ "Slide edit right recursive",
+ );
+
+ let mut solution = diff_list![Delete("b"), Insert("ab"), Equal("c")];
+ cleanup_merge(&mut solution);
+ assert_diffs!([Insert("a"), Equal("bc")], solution, "Empty range");
+
+ let mut solution = diff_list![Equal(""), Insert("a"), Equal("b")];
+ cleanup_merge(&mut solution);
+ assert_diffs!([Insert("a"), Equal("b")], solution, "Empty equality");
+}
+
+#[test]
+fn test_cleanup_semantic_lossless() {
+ let mut solution = diff_list![];
+ cleanup_semantic_lossless(&mut solution);
+ assert_diffs!([], solution, "Null case");
+
+ let mut solution = diff_list![
+ Equal("AAA\r\n\r\nBBB"),
+ Insert("\r\nDDD\r\n\r\nBBB"),
+ Equal("\r\nEEE"),
+ ];
+ cleanup_semantic_lossless(&mut solution);
+ assert_diffs!(
+ [
+ Equal("AAA\r\n\r\n"),
+ Insert("BBB\r\nDDD\r\n\r\n"),
+ Equal("BBB\r\nEEE"),
+ ],
+ solution,
+ "Blank lines",
+ );
+
+ let mut solution = diff_list![Equal("AAA\r\nBBB"), Insert(" DDD\r\nBBB"), Equal(" EEE")];
+ cleanup_semantic_lossless(&mut solution);
+ assert_diffs!(
+ [Equal("AAA\r\n"), Insert("BBB DDD\r\n"), Equal("BBB EEE")],
+ solution,
+ "Line boundaries",
+ );
+
+ let mut solution = diff_list![Equal("The c"), Insert("ow and the c"), Equal("at.")];
+ cleanup_semantic_lossless(&mut solution);
+ assert_diffs!(
+ [Equal("The "), Insert("cow and the "), Equal("cat.")],
+ solution,
+ "Word boundaries",
+ );
+
+ let mut solution = diff_list![Equal("The-c"), Insert("ow-and-the-c"), Equal("at.")];
+ cleanup_semantic_lossless(&mut solution);
+ assert_diffs!(
+ [Equal("The-"), Insert("cow-and-the-"), Equal("cat.")],
+ solution,
+ "Alphanumeric boundaries",
+ );
+
+ let mut solution = diff_list![Equal("a"), Delete("a"), Equal("ax")];
+ cleanup_semantic_lossless(&mut solution);
+ assert_diffs!([Delete("a"), Equal("aax")], solution, "Hitting the start");
+
+ let mut solution = diff_list![Equal("xa"), Delete("a"), Equal("a")];
+ cleanup_semantic_lossless(&mut solution);
+ assert_diffs!([Equal("xaa"), Delete("a")], solution, "Hitting the end");
+
+ let mut solution = diff_list![Equal("The xxx. The "), Insert("zzz. The "), Equal("yyy.")];
+ cleanup_semantic_lossless(&mut solution);
+ assert_diffs!(
+ [Equal("The xxx."), Insert(" The zzz."), Equal(" The yyy.")],
+ solution,
+ "Sentence boundaries",
+ );
+}
+
+#[test]
+fn test_cleanup_semantic() {
+ let mut solution = diff_list![];
+ cleanup_semantic(&mut solution);
+ assert_diffs!([], solution, "Null case");
+
+ let mut solution = diff_list![Delete("ab"), Insert("cd"), Equal("12"), Delete("e")];
+ cleanup_semantic(&mut solution);
+ assert_diffs!(
+ [Delete("ab"), Insert("cd"), Equal("12"), Delete("e")],
+ solution,
+ "No elimination #1",
+ );
+
+ let mut solution = diff_list![Delete("abc"), Insert("ABC"), Equal("1234"), Delete("wxyz")];
+ cleanup_semantic(&mut solution);
+ assert_diffs!(
+ [Delete("abc"), Insert("ABC"), Equal("1234"), Delete("wxyz")],
+ solution,
+ "No elimination #2",
+ );
+
+ let mut solution = diff_list![Delete("a"), Equal("b"), Delete("c")];
+ cleanup_semantic(&mut solution);
+ assert_diffs!([Delete("abc"), Insert("b")], solution, "Simple elimination",);
+
+ let mut solution = diff_list![
+ Delete("ab"),
+ Equal("cd"),
+ Delete("e"),
+ Equal("f"),
+ Insert("g"),
+ ];
+ cleanup_semantic(&mut solution);
+ assert_diffs!(
+ [Delete("abcdef"), Insert("cdfg")],
+ solution,
+ "Backpass elimination",
+ );
+
+ let mut solution = diff_list![
+ Insert("1"),
+ Equal("A"),
+ Delete("B"),
+ Insert("2"),
+ Equal("_"),
+ Insert("1"),
+ Equal("A"),
+ Delete("B"),
+ Insert("2"),
+ ];
+ cleanup_semantic(&mut solution);
+ assert_diffs!(
+ [Delete("AB_AB"), Insert("1A2_1A2")],
+ solution,
+ "Multiple elimination",
+ );
+
+ let mut solution = diff_list![Equal("The c"), Delete("ow and the c"), Equal("at.")];
+ cleanup_semantic(&mut solution);
+ assert_diffs!(
+ [Equal("The "), Delete("cow and the "), Equal("cat.")],
+ solution,
+ "Word boundaries",
+ );
+
+ let mut solution = diff_list![Delete("abcxx"), Insert("xxdef")];
+ cleanup_semantic(&mut solution);
+ assert_diffs!(
+ [Delete("abcxx"), Insert("xxdef")],
+ solution,
+ "No overlap elimination",
+ );
+
+ let mut solution = diff_list![Delete("abcxxx"), Insert("xxxdef")];
+ cleanup_semantic(&mut solution);
+ assert_diffs!(
+ [Delete("abc"), Equal("xxx"), Insert("def")],
+ solution,
+ "Overlap elimination",
+ );
+
+ let mut solution = diff_list![Delete("xxxabc"), Insert("defxxx")];
+ cleanup_semantic(&mut solution);
+ assert_diffs!(
+ [Insert("def"), Equal("xxx"), Delete("abc")],
+ solution,
+ "Reverse overlap elimination",
+ );
+
+ let mut solution = diff_list![
+ Delete("abcd1212"),
+ Insert("1212efghi"),
+ Equal("----"),
+ Delete("A3"),
+ Insert("3BC"),
+ ];
+ cleanup_semantic(&mut solution);
+ assert_diffs!(
+ [
+ Delete("abcd"),
+ Equal("1212"),
+ Insert("efghi"),
+ Equal("----"),
+ Delete("A"),
+ Equal("3"),
+ Insert("BC"),
+ ],
+ solution,
+ "Two overlap eliminations",
+ );
+}
+
+#[test]
+fn test_bisect() {
+ let text1 = Range::new("cat", ..);
+ let text2 = Range::new("map", ..);
+ let solution = Solution {
+ text1,
+ text2,
+ diffs: bisect(text1, text2),
+ utf8: false,
+ };
+ assert_diffs!(
+ [
+ Delete("c"),
+ Insert("m"),
+ Equal("a"),
+ Delete("t"),
+ Insert("p"),
+ ],
+ solution,
+ "Normal",
+ );
+}
+
+#[test]
+fn test_main() {
+ let solution = main(Range::empty(), Range::empty());
+ assert_diffs!([], solution, "Null case");
+
+ let solution = main(Range::new("abc", ..), Range::new("abc", ..));
+ assert_diffs!([Equal("abc")], solution, "Equality");
+
+ let solution = main(Range::new("abc", ..), Range::new("ab123c", ..));
+ assert_diffs!(
+ [Equal("ab"), Insert("123"), Equal("c")],
+ solution,
+ "Simple insertion",
+ );
+
+ let solution = main(Range::new("a123bc", ..), Range::new("abc", ..));
+ assert_diffs!(
+ [Equal("a"), Delete("123"), Equal("bc")],
+ solution,
+ "Simple deletion",
+ );
+
+ let solution = main(Range::new("abc", ..), Range::new("a123b456c", ..));
+ assert_diffs!(
+ [
+ Equal("a"),
+ Insert("123"),
+ Equal("b"),
+ Insert("456"),
+ Equal("c"),
+ ],
+ solution,
+ "Two insertions",
+ );
+
+ let solution = main(Range::new("a123b456c", ..), Range::new("abc", ..));
+ assert_diffs!(
+ [
+ Equal("a"),
+ Delete("123"),
+ Equal("b"),
+ Delete("456"),
+ Equal("c"),
+ ],
+ solution,
+ "Two deletions",
+ );
+
+ let solution = main(Range::new("a", ..), Range::new("b", ..));
+ assert_diffs!([Delete("a"), Insert("b")], solution, "Simple case #1");
+
+ let solution = main(
+ Range::new("Apples are a fruit.", ..),
+ Range::new("Bananas are also fruit.", ..),
+ );
+ assert_diffs!(
+ [
+ Delete("Apple"),
+ Insert("Banana"),
+ Equal("s are a"),
+ Insert("lso"),
+ Equal(" fruit."),
+ ],
+ solution,
+ "Simple case #2",
+ );
+
+ let solution = main(Range::new("ax\t", ..), Range::new("\u{0680}x\000", ..));
+ assert_diffs!(
+ [
+ Delete("a"),
+ Insert("\u{0680}"),
+ Equal("x"),
+ Delete("\t"),
+ Insert("\000"),
+ ],
+ solution,
+ "Simple case #3",
+ );
+
+ let solution = main(Range::new("1ayb2", ..), Range::new("abxab", ..));
+ assert_diffs!(
+ [
+ Delete("1"),
+ Equal("a"),
+ Delete("y"),
+ Equal("b"),
+ Delete("2"),
+ Insert("xab"),
+ ],
+ solution,
+ "Overlap #1",
+ );
+
+ let solution = main(Range::new("abcy", ..), Range::new("xaxcxabc", ..));
+ assert_diffs!(
+ [Insert("xaxcx"), Equal("abc"), Delete("y")],
+ solution,
+ "Overlap #2",
+ );
+
+ let solution = main(
+ Range::new("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", ..),
+ Range::new("a-bcd-efghijklmnopqrs", ..),
+ );
+ assert_diffs!(
+ [
+ Delete("ABCD"),
+ Equal("a"),
+ Delete("="),
+ Insert("-"),
+ Equal("bcd"),
+ Delete("="),
+ Insert("-"),
+ Equal("efghijklmnopqrs"),
+ Delete("EFGHIJKLMNOefg"),
+ ],
+ solution,
+ "Overlap #3",
+ );
+
+ let solution = main(
+ Range::new("a [[Pennsylvania]] and [[New", ..),
+ Range::new(" and [[Pennsylvania]]", ..),
+ );
+ assert_diffs!(
+ [
+ Insert(" "),
+ Equal("a"),
+ Insert("nd"),
+ Equal(" [[Pennsylvania]]"),
+ Delete(" and [[New"),
+ ],
+ solution,
+ "Large equality",
+ );
+}