summaryrefslogtreecommitdiffstats
path: root/vendor/similar/src/utils.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
commit10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
treebdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/similar/src/utils.rs
parentReleasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
downloadrustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz
rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/similar/src/utils.rs')
-rw-r--r--vendor/similar/src/utils.rs415
1 files changed, 415 insertions, 0 deletions
diff --git a/vendor/similar/src/utils.rs b/vendor/similar/src/utils.rs
new file mode 100644
index 000000000..1f8fdc989
--- /dev/null
+++ b/vendor/similar/src/utils.rs
@@ -0,0 +1,415 @@
+//! Utilities for common diff related operations.
+//!
+//! This module provides specialized utilities and simplified diff operations
+//! for common operations. It's useful when you want to work with text diffs
+//! and you're interested in getting vectors of these changes directly.
+//!
+//! # Slice Remapping
+//!
+//! When working with [`TextDiff`] it's common that one takes advantage of the
+//! built-in tokenization of the differ. This for instance lets you do
+//! grapheme level diffs. This is implemented by the differ generating rather
+//! small slices of strings and running a diff algorithm over them.
+//!
+//! The downside of this is that all the [`DiffOp`] objects produced by the
+//! diffing algorithm encode operations on these rather small slices. For
+//! a lot of use cases this is not what one wants which can make this very
+//! inconvenient. This module provides a [`TextDiffRemapper`] which lets you
+//! map from the ranges that the [`TextDiff`] returns to the original input
+//! strings. For more information see [`TextDiffRemapper`].
+//!
+//! # Simple Diff Functions
+//!
+//! This module provides a range of common test diff functions that will
+//! produce vectors of `(change_tag, value)` tuples. They will automatically
+//! optimize towards returning the most useful slice that one would expect for
+//! the type of diff performed.
+
+use std::hash::Hash;
+use std::ops::{Index, Range};
+
+use crate::{
+ capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff,
+};
+
+struct SliceRemapper<'x, T: ?Sized> {
+ source: &'x T,
+ indexes: Vec<Range<usize>>,
+}
+
+impl<'x, 'slices, T: DiffableStr + ?Sized> SliceRemapper<'x, T> {
+ fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> {
+ let indexes = slices
+ .iter()
+ .scan(0, |state, item| {
+ let start = *state;
+ let end = start + item.len();
+ *state = end;
+ Some(start..end)
+ })
+ .collect();
+ SliceRemapper { source, indexes }
+ }
+
+ fn slice(&self, range: Range<usize>) -> Option<&'x T> {
+ let start = self.indexes.get(range.start)?.start;
+ let end = self.indexes.get(range.end - 1)?.end;
+ Some(self.source.slice(start..end))
+ }
+}
+
+impl<'x, T: DiffableStr + ?Sized> Index<Range<usize>> for SliceRemapper<'x, T> {
+ type Output = T;
+
+ fn index(&self, range: Range<usize>) -> &Self::Output {
+ self.slice(range).expect("out of bounds")
+ }
+}
+
+/// A remapper that can remap diff ops to the original slices.
+///
+/// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from
+/// two strings and the internal tokenization is used, this remapper can take
+/// a range in the tokenized sequences and remap it to the original string.
+/// This is particularly useful when you want to do things like character or
+/// grapheme level diffs but you want to not have to iterate over small sequences
+/// but large consequitive ones from the source.
+///
+/// ```rust
+/// use similar::{ChangeTag, TextDiff};
+/// use similar::utils::TextDiffRemapper;
+///
+/// let old = "yo! foo bar baz";
+/// let new = "yo! foo bor baz";
+/// let diff = TextDiff::from_words(old, new);
+/// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
+/// let changes: Vec<_> = diff.ops()
+/// .iter()
+/// .flat_map(move |x| remapper.iter_slices(x))
+/// .collect();
+///
+/// assert_eq!(changes, vec![
+/// (ChangeTag::Equal, "yo! foo "),
+/// (ChangeTag::Delete, "bar"),
+/// (ChangeTag::Insert, "bor"),
+/// (ChangeTag::Equal, " baz")
+/// ]);
+pub struct TextDiffRemapper<'x, T: ?Sized> {
+ old: SliceRemapper<'x, T>,
+ new: SliceRemapper<'x, T>,
+}
+
+impl<'x, 'slices, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> {
+ /// Creates a new remapper from strings and slices.
+ pub fn new(
+ old_slices: &[&'x T],
+ new_slices: &[&'x T],
+ old: &'x T,
+ new: &'x T,
+ ) -> TextDiffRemapper<'x, T> {
+ TextDiffRemapper {
+ old: SliceRemapper::new(old, old_slices),
+ new: SliceRemapper::new(new, new_slices),
+ }
+ }
+
+ /// Creates a new remapper from a text diff and the original strings.
+ pub fn from_text_diff<'old, 'new, 'bufs>(
+ diff: &TextDiff<'old, 'new, 'bufs, T>,
+ old: &'x T,
+ new: &'x T,
+ ) -> TextDiffRemapper<'x, T>
+ where
+ 'old: 'x,
+ 'new: 'x,
+ {
+ TextDiffRemapper {
+ old: SliceRemapper::new(old, diff.old_slices()),
+ new: SliceRemapper::new(new, diff.new_slices()),
+ }
+ }
+
+ /// Slices into the old string.
+ pub fn slice_old(&self, range: Range<usize>) -> Option<&'x T> {
+ self.old.slice(range)
+ }
+
+ /// Slices into the new string.
+ pub fn slice_new(&self, range: Range<usize>) -> Option<&'x T> {
+ self.new.slice(range)
+ }
+
+ /// Given a diffop yields the changes it encodes against the original strings.
+ ///
+ /// This is the same as the [`DiffOp::iter_slices`] method.
+ ///
+ /// ## Panics
+ ///
+ /// This method can panic if the input strings passed to the constructor
+ /// are incompatible with the input strings passed to the diffing algorithm.
+ pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator<Item = (ChangeTag, &'x T)> {
+ // note: this is equivalent to the code in `DiffOp::iter_slices`. It is
+ // a copy/paste because the slicing currently cannot be well abstracted
+ // because of lifetime issues caused by the `Index` trait.
+ match *op {
+ DiffOp::Equal { old_index, len, .. } => {
+ Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len)))
+ .into_iter()
+ .chain(None.into_iter())
+ }
+ DiffOp::Insert {
+ new_index, new_len, ..
+ } => Some((
+ ChangeTag::Insert,
+ self.new.slice(new_index..new_index + new_len),
+ ))
+ .into_iter()
+ .chain(None.into_iter()),
+ DiffOp::Delete {
+ old_index, old_len, ..
+ } => Some((
+ ChangeTag::Delete,
+ self.old.slice(old_index..old_index + old_len),
+ ))
+ .into_iter()
+ .chain(None.into_iter()),
+ DiffOp::Replace {
+ old_index,
+ old_len,
+ new_index,
+ new_len,
+ } => Some((
+ ChangeTag::Delete,
+ self.old.slice(old_index..old_index + old_len),
+ ))
+ .into_iter()
+ .chain(
+ Some((
+ ChangeTag::Insert,
+ self.new.slice(new_index..new_index + new_len),
+ ))
+ .into_iter(),
+ ),
+ }
+ .map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds")))
+ }
+}
+
+/// Shortcut for diffing two slices.
+///
+/// This function produces the diff of two slices and returns a vector
+/// with the changes.
+///
+/// ```rust
+/// use similar::{Algorithm, ChangeTag};
+/// use similar::utils::diff_slices;
+///
+/// let old = "foo\nbar\nbaz".lines().collect::<Vec<_>>();
+/// let new = "foo\nbar\nBAZ".lines().collect::<Vec<_>>();
+/// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![
+/// (ChangeTag::Equal, &["foo", "bar"][..]),
+/// (ChangeTag::Delete, &["baz"][..]),
+/// (ChangeTag::Insert, &["BAZ"][..]),
+/// ]);
+/// ```
+pub fn diff_slices<'x, T: PartialEq + Hash + Ord>(
+ alg: Algorithm,
+ old: &'x [T],
+ new: &'x [T],
+) -> Vec<(ChangeTag, &'x [T])> {
+ capture_diff_slices(alg, old, new)
+ .iter()
+ .flat_map(|op| op.iter_slices(old, new))
+ .collect()
+}
+
+/// Shortcut for making a character level diff.
+///
+/// This function produces the diff of two strings and returns a vector
+/// with the changes. It returns connected slices into the original string
+/// rather than character level slices.
+///
+/// ```rust
+/// use similar::{Algorithm, ChangeTag};
+/// use similar::utils::diff_chars;
+///
+/// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz", "fooBARbaz"), vec![
+/// (ChangeTag::Equal, "foo"),
+/// (ChangeTag::Delete, "bar"),
+/// (ChangeTag::Insert, "BAR"),
+/// (ChangeTag::Equal, "baz"),
+/// ]);
+/// ```
+pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>(
+ alg: Algorithm,
+ old: &'x T,
+ new: &'x T,
+) -> Vec<(ChangeTag, &'x T::Output)> {
+ let old = old.as_diffable_str();
+ let new = new.as_diffable_str();
+ let diff = TextDiff::configure().algorithm(alg).diff_chars(old, new);
+ let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
+ diff.ops()
+ .iter()
+ .flat_map(move |x| remapper.iter_slices(x))
+ .collect()
+}
+
+/// Shortcut for making a word level diff.
+///
+/// This function produces the diff of two strings and returns a vector
+/// with the changes. It returns connected slices into the original string
+/// rather than word level slices.
+///
+/// ```rust
+/// use similar::{Algorithm, ChangeTag};
+/// use similar::utils::diff_words;
+///
+/// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz", "foo bor baz"), vec![
+/// (ChangeTag::Equal, "foo "),
+/// (ChangeTag::Delete, "bar"),
+/// (ChangeTag::Insert, "bor"),
+/// (ChangeTag::Equal, " baz"),
+/// ]);
+/// ```
+pub fn diff_words<'x, T: DiffableStrRef + ?Sized>(
+ alg: Algorithm,
+ old: &'x T,
+ new: &'x T,
+) -> Vec<(ChangeTag, &'x T::Output)> {
+ let old = old.as_diffable_str();
+ let new = new.as_diffable_str();
+ let diff = TextDiff::configure().algorithm(alg).diff_words(old, new);
+ let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
+ diff.ops()
+ .iter()
+ .flat_map(move |x| remapper.iter_slices(x))
+ .collect()
+}
+
+/// Shortcut for making a unicode word level diff.
+///
+/// This function produces the diff of two strings and returns a vector
+/// with the changes. It returns connected slices into the original string
+/// rather than word level slices.
+///
+/// ```rust
+/// use similar::{Algorithm, ChangeTag};
+/// use similar::utils::diff_unicode_words;
+///
+/// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
+/// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?";
+/// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![
+/// (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "),
+/// (ChangeTag::Delete, "32.3"),
+/// (ChangeTag::Insert, "9.84"),
+/// (ChangeTag::Equal, " "),
+/// (ChangeTag::Delete, "feet"),
+/// (ChangeTag::Insert, "meters"),
+/// (ChangeTag::Equal, ", right?")
+/// ]);
+/// ```
+///
+/// This requires the `unicode` feature.
+#[cfg(feature = "unicode")]
+pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>(
+ alg: Algorithm,
+ old: &'x T,
+ new: &'x T,
+) -> Vec<(ChangeTag, &'x T::Output)> {
+ let old = old.as_diffable_str();
+ let new = new.as_diffable_str();
+ let diff = TextDiff::configure()
+ .algorithm(alg)
+ .diff_unicode_words(old, new);
+ let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
+ diff.ops()
+ .iter()
+ .flat_map(move |x| remapper.iter_slices(x))
+ .collect()
+}
+
+/// Shortcut for making a grapheme level diff.
+///
+/// This function produces the diff of two strings and returns a vector
+/// with the changes. It returns connected slices into the original string
+/// rather than grapheme level slices.
+///
+/// ```rust
+/// use similar::{Algorithm, ChangeTag};
+/// use similar::utils::diff_graphemes;
+///
+/// let old = "The flag of Austria is 🇦🇹";
+/// let new = "The flag of Albania is 🇦🇱";
+/// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![
+/// (ChangeTag::Equal, "The flag of A"),
+/// (ChangeTag::Delete, "ustr"),
+/// (ChangeTag::Insert, "lban"),
+/// (ChangeTag::Equal, "ia is "),
+/// (ChangeTag::Delete, "🇦🇹"),
+/// (ChangeTag::Insert, "🇦🇱"),
+/// ]);
+/// ```
+///
+/// This requires the `unicode` feature.
+#[cfg(feature = "unicode")]
+pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>(
+ alg: Algorithm,
+ old: &'x T,
+ new: &'x T,
+) -> Vec<(ChangeTag, &'x T::Output)> {
+ let old = old.as_diffable_str();
+ let new = new.as_diffable_str();
+ let diff = TextDiff::configure()
+ .algorithm(alg)
+ .diff_graphemes(old, new);
+ let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
+ diff.ops()
+ .iter()
+ .flat_map(move |x| remapper.iter_slices(x))
+ .collect()
+}
+
+/// Shortcut for making a line diff.
+///
+/// This function produces the diff of two slices and returns a vector
+/// with the changes. Unlike [`diff_chars`] or [`diff_slices`] it returns a
+/// change tag for each line.
+///
+/// ```rust
+/// use similar::{Algorithm, ChangeTag};
+/// use similar::utils::diff_lines;
+///
+/// assert_eq!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz\nblah", "foo\nbar\nbaz\nblurgh"), vec![
+/// (ChangeTag::Equal, "foo\n"),
+/// (ChangeTag::Equal, "bar\n"),
+/// (ChangeTag::Equal, "baz\n"),
+/// (ChangeTag::Delete, "blah"),
+/// (ChangeTag::Insert, "blurgh"),
+/// ]);
+/// ```
+pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>(
+ alg: Algorithm,
+ old: &'x T,
+ new: &'x T,
+) -> Vec<(ChangeTag, &'x T::Output)> {
+ TextDiff::configure()
+ .algorithm(alg)
+ .diff_lines(old, new)
+ .iter_all_changes()
+ .map(|change| (change.tag(), change.value()))
+ .collect()
+}
+
+#[test]
+fn test_remapper() {
+ let a = "foo bar baz";
+ let words = a.tokenize_words();
+ dbg!(&words);
+ let remap = SliceRemapper::new(a, &words);
+ assert_eq!(remap.slice(0..3), Some("foo bar"));
+ assert_eq!(remap.slice(1..3), Some(" bar"));
+ assert_eq!(remap.slice(0..1), Some("foo"));
+ assert_eq!(remap.slice(0..5), Some("foo bar baz"));
+ assert_eq!(remap.slice(0..6), None);
+}