summaryrefslogtreecommitdiffstats
path: root/vendor/imara-diff/src/lib.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
commitc23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/imara-diff/src/lib.rs
parentReleasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz
rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/imara-diff/src/lib.rs')
-rw-r--r--vendor/imara-diff/src/lib.rs267
1 files changed, 0 insertions, 267 deletions
diff --git a/vendor/imara-diff/src/lib.rs b/vendor/imara-diff/src/lib.rs
deleted file mode 100644
index c20b750ca..000000000
--- a/vendor/imara-diff/src/lib.rs
+++ /dev/null
@@ -1,267 +0,0 @@
-//! Imara-diff is a solid (imara in swahili) diff library for rust.
-//! Solid refers to the fact that imara-diff provides very good runtime performance even
-//! in pathologic cases so that your application never appears to freeze while waiting on a diff.
-//! The performance improvements are achieved using battle tested heuristics used in gnu-diff and git
-//! that are known to yield fast runtime and performance.
-//!
-//! Imara-diff is also designed to be flexible so that it can be used with arbitrary collections and
-//! not just lists and strings and even allows reusing large parts of the computation when
-//! comparing the same file to multiple different files.
-//!
-//! Imara-diff provides two diff algorithms:
-//!
-//! * The linear-space variant of the well known [**myer** algorithm](http://www.xmailserver.org/diff2.pdf)
-//! * The **Histogram** algorithm which variant of the patience diff algorithm.
-//!
-//! Myers algorithm has been enhanced with preprocessing and multiple heuristics to ensure fast runtime in pathological
-//! cases to avoid quadratic time complexity and closely matches the behaviour of gnu-diff and git.
-//! The Histogram algorithm was originally ported from git but has been heavily optimized.
-//! The **Histogram algorithm outperforms Myers diff** by 10% - 100% across a **wide variety of workloads**.
-//!
-//! Imara-diffs algorithms have been benchmarked over a wide variety of real-world code.
-//! For example while comparing multiple different linux kernel it performs up to 30 times better than the `similar` crate:
-#![cfg_attr(doc, doc=concat!("<img width=\"600\" class=\"figure\" src=\"data:image/svg+xml;base64,", include_str!("../plots/linux_comparison.svg.base64"), "\"></img>"))]
-//!
-//! # Api Overview
-//!
-//! Imara-diff provides the [`UnifiedDiffBuilder`](crate::UnifiedDiffBuilder) for building
-//! a human-redable diff similar to the output of `git diff` or `diff -u`.
-//! This makes building a tool similar to gnu diff easy:
-//!
-//! ```
-//! use imara_diff::intern::InternedInput;
-//! use imara_diff::{diff, Algorithm, UnifiedDiffBuilder};
-//!
-//! let before = r#"fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! println!("hello world")
-//! }"#;
-//!
-//! let after = r#"// lorem ipsum
-//! fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! println!("hello world");
-//! println!("{foo}");
-//! }
-//! // foo
-//! "#;
-//!
-//! let input = InternedInput::new(before, after);
-//! let diff = diff(Algorithm::Histogram, &input, UnifiedDiffBuilder::new(&input));
-//! assert_eq!(
-//! diff,
-//! r#"@@ -1,5 +1,8 @@
-//! +// lorem ipsum
-//! fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! - println!("hello world")
-//! + println!("hello world");
-//! + println!("{foo}");
-//! }
-//! +// foo
-//! "#
-//! );
-//! ```
-//!
-//! If you want to process the diff in some way you can provide your own implementation of [`Sink`](crate::sink::Sink).
-//! For closures [`Sink`](crate::sink::Sink) is already implemented, so simple [`Sink`]s can be easily added:
-//!
-//! ```
-//! use std::ops::Range;
-//!
-//! use imara_diff::intern::InternedInput;
-//! use imara_diff::{diff, Algorithm, UnifiedDiffBuilder};
-//!
-//! let before = r#"fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! println!("hello world")
-//! }"#;
-//!
-//! let after = r#"// lorem ipsum
-//! fn foo() -> Bar {
-//! let mut foo = 2;
-//! foo *= 50;
-//! println!("hello world");
-//! println!("{foo}");
-//! }
-//! // foo
-//! "#;
-//!
-//! let mut insertions = Vec::new();
-//! let mut removals = Vec::new();
-//! let mut replacements = Vec::new();
-//!
-//! let input = InternedInput::new(before, after);
-//! let sink = |before: Range<u32>, after: Range<u32>| {
-//! let hunk_before: Vec<_> = input.before[before.start as usize..before.end as usize]
-//! .iter()
-//! .map(|&line| input.interner[line])
-//! .collect();
-//! let hunk_after: Vec<_> = input.after[after.start as usize..after.end as usize]
-//! .iter()
-//! .map(|&line| input.interner[line])
-//! .collect();
-//! if hunk_after.is_empty() {
-//! removals.push(hunk_before)
-//! } else if hunk_before.is_empty() {
-//! insertions.push(hunk_after)
-//! } else {
-//! replacements.push((hunk_before, hunk_after))
-//! }
-//! };
-//! let diff = diff(Algorithm::Histogram, &input, sink);
-//! assert_eq!(&insertions, &[vec!["// lorem ipsum"], vec!["// foo"]]);
-//! assert!(removals.is_empty());
-//! assert_eq!(
-//! &replacements,
-//! &[(
-//! vec![" println!(\"hello world\")"],
-//! vec![" println!(\"hello world\");", " println!(\"{foo}\");"]
-//! )]
-//! );
-//! ```
-//!
-//! For `&str` and `&[u8]` imara-diff will compute a line diff by default.
-//! To perform diffs of different tokenizations and collections you can implement the [`TokenSource`](crate::intern::TokenSource) trait.
-//! For example the imara-diff provides an alternative tokenziser for line-diffs that includes the line terminator in the line:
-//!
-//! ```
-//! use imara_diff::intern::InternedInput;
-//! use imara_diff::sink::Counter;
-//! use imara_diff::sources::lines_with_terminator;
-//! use imara_diff::{diff, Algorithm, UnifiedDiffBuilder};
-//!
-//! let before = "foo";
-//! let after = "foo\n";
-//!
-//! let input = InternedInput::new(before, after);
-//! let changes = diff(Algorithm::Histogram, &input, Counter::default());
-//! assert_eq!(changes.insertions, 0);
-//! assert_eq!(changes.removals, 0);
-//!
-//! let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after));
-//! let changes = diff(Algorithm::Histogram, &input, Counter::default());
-//! assert_eq!(changes.insertions, 1);
-//! assert_eq!(changes.removals, 1);
-//! ```
-
-use std::hash::Hash;
-
-#[cfg(feature = "unified_diff")]
-pub use unified_diff::UnifiedDiffBuilder;
-
-use crate::intern::{InternedInput, Token, TokenSource};
-pub use crate::sink::Sink;
-mod histogram;
-pub mod intern;
-mod myers;
-pub mod sink;
-pub mod sources;
-#[cfg(feature = "unified_diff")]
-mod unified_diff;
-mod util;
-
-#[cfg(test)]
-mod tests;
-
-/// `imara-diff` supports multiple different algorithms
-/// for computing an edit sequence.
-/// These algorithms have different performance and all produce different output.
-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-pub enum Algorithm {
- /// A variation of the [`patience` diff algorithm described by Bram Cohen's blog post](https://bramcohen.livejournal.com/73318.html)
- /// that uses a histogram to find the least common LCS.
- /// Just like the `patience` diff algorithm, this algorithm usually produces
- /// more human readable output then myers algorithm.
- /// However compared to the `patience` diff algorithm (which is slower then myers algorithm),
- /// the Histogram algorithm performs much better.
- ///
- /// The implementation here was originally ported from `git` but has been significantly
- /// modified to improve performance.
- /// As a result it consistently **performs better then myers algorithm** (5%-100%) over
- /// a wide variety of test data. For example a benchmark of diffing linux kernel commits is shown below:
- #[cfg_attr(doc, doc=concat!("<img width=\"600\" class=\"figure\" src=\"data:image/svg+xml;base64,", include_str!("../plots/linux_speedup.svg.base64"), "\"></img>"))]
- ///
- /// For pathological subsequences that only contain highly repeating tokens (64+ occurrences)
- /// the algorithm falls back on Myers algorithm (with heuristics) to avoid quadratic behavior.
- ///
- /// Compared to Myers algorithm, the Histogram diff algorithm is more focused on providing
- /// human readable diffs instead of minimal diffs. In practice this means that the edit-sequences
- /// produced by the histogram diff are often longer then those produced by Myers algorithm.
- ///
- /// The heuristic used by the histogram diff does not work well for inputs with small (often repeated)
- /// tokens. For example **character diffs do not work well** as most (english) text is madeup of
- /// a fairly small set of characters. The `Histogram` algorithm will automatically these cases and
- /// fallback to Myers algorithm. However this detection has a nontrivial overhead, so
- /// if its known upfront that the sort of tokens is very small `Myers` algorithm should
- /// be used instead.
- Histogram,
- /// An implementation of the linear space variant of
- /// [Myers `O((N+M)D)` algorithm](http://www.xmailserver.org/diff2.pdf).
- /// The algorithm is enhanced with preprocessing that removes
- /// tokens that don't occur in the other file at all.
- /// Furthermore two heuristics to the middle snake search are implemented
- /// that ensure reasonable runtime (mostly linear time complexity) even for large files.
- ///
- /// Due to the divide and conquer nature of the algorithm
- /// the edit sequenced produced are still fairly small even when the middle snake
- /// search is aborted by a heuristic.
- /// However, the produced edit sequences are not guaranteed to be fully minimal.
- /// If that property is vital to you, use the `MyersMinimal` algorithm instead.
- ///
- /// The implementation (including the preprocessing) are mostly
- /// ported from `git` and `gnu-diff` where Myers algorithm is used
- /// as the default diff algorithm.
- /// Therefore the used heuristics have been heavily battle tested and
- /// are known to behave well over a large variety of inputs
- Myers,
- /// Same as `Myers` but the early abort heuristics are disabled to guarantee
- /// a minimal edit sequence.
- /// This can mean significant slowdown in pathological cases.
- MyersMinimal,
-}
-
-impl Algorithm {
- #[cfg(test)]
- const ALL: [Self; 2] = [Algorithm::Histogram, Algorithm::Myers];
-}
-
-impl Default for Algorithm {
- fn default() -> Self {
- Algorithm::Histogram
- }
-}
-
-/// Computes an edit-script that transforms `input.before` into `input.after` using
-/// the specified `algorithm`
-/// The edit-script is passed to `sink.process_change` while it is produced.
-pub fn diff<S: Sink, T: Eq + Hash>(
- algorithm: Algorithm,
- input: &InternedInput<T>,
- sink: S,
-) -> S::Out {
- diff_with_tokens(algorithm, &input.before, &input.after, input.interner.num_tokens(), sink)
-}
-
-/// Computes an edit-script that transforms `before` into `after` using
-/// the specified `algorithm`
-/// The edit-script is passed to `sink.process_change` while it is produced.
-pub fn diff_with_tokens<S: Sink>(
- algorithm: Algorithm,
- before: &[Token],
- after: &[Token],
- num_tokens: u32,
- sink: S,
-) -> S::Out {
- assert!(before.len() < i32::MAX as usize, "imara-diff only supports up to {} tokens", i32::MAX);
- assert!(after.len() < i32::MAX as usize, "imara-diff only supports up to {} tokens", i32::MAX);
- match algorithm {
- Algorithm::Histogram => histogram::diff(before, after, num_tokens, sink),
- Algorithm::Myers => myers::diff(before, after, num_tokens, sink, false),
- Algorithm::MyersMinimal => myers::diff(before, after, num_tokens, sink, true),
- }
-}