summaryrefslogtreecommitdiffstats
path: root/vendor/imara-diff/benches
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
commit10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
treebdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/imara-diff/benches
parentReleasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
downloadrustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz
rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/imara-diff/benches')
-rw-r--r--vendor/imara-diff/benches/git_repo.rs156
1 files changed, 156 insertions, 0 deletions
diff --git a/vendor/imara-diff/benches/git_repo.rs b/vendor/imara-diff/benches/git_repo.rs
new file mode 100644
index 000000000..1c76b30f1
--- /dev/null
+++ b/vendor/imara-diff/benches/git_repo.rs
@@ -0,0 +1,156 @@
+use std::convert::Infallible;
+use std::path::PathBuf;
+
+use criterion::measurement::Measurement;
+use criterion::{
+ black_box, criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion,
+};
+use git_repository::object::tree::diff::{Action, Change};
+use git_repository::Id;
+use imara_diff::intern::InternedInput;
+use imara_diff::sink::Counter;
+use imara_diff::Algorithm;
+
+fn extract_diff(change: &Change) -> Option<(Vec<u8>, Vec<u8>)> {
+ use git_repository::object::tree::diff::change::Event::Modification;
+
+ let (previous_id, id) = match change.event {
+ Modification { previous_entry_mode, previous_id, entry_mode, id }
+ if previous_entry_mode.is_blob() && entry_mode.is_blob() =>
+ {
+ (previous_id, id)
+ }
+ _ => return None,
+ };
+
+ let old = previous_id.object().ok()?.detach().data;
+ let new = id.object().ok()?.detach().data;
+
+ Some((new, old))
+}
+
+fn git_tree_diff(from: Id, to: Id, diffs: &mut Vec<(Vec<u8>, Vec<u8>, usize)>) {
+ let from_tree = from.object().unwrap().peel_to_tree().unwrap();
+ let to_tree = to.object().unwrap().peel_to_tree().unwrap();
+ from_tree
+ .changes()
+ .track_filename()
+ .for_each_to_obtain_tree(&to_tree, |change| -> Result<_, Infallible> {
+ if let Some((old, new)) = extract_diff(&change) {
+ let input = InternedInput::new(&*old, &*new);
+ let changes =
+ imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total();
+ let complexity = changes * (old.len() + new.len());
+ diffs.push((old, new, complexity));
+ }
+ Ok(Action::Continue)
+ })
+ .unwrap();
+}
+
+pub fn project_root() -> PathBuf {
+ let dir = env!("CARGO_MANIFEST_DIR");
+ let mut res = PathBuf::from(dir);
+ while !res.join("README.md").exists() {
+ res = res.parent().expect("reached fs root without finding project root").to_owned()
+ }
+ res
+}
+
+fn bench_repo(c: &mut Criterion, name: &str, tag2: &str, tag1: &str, num_commits: usize) {
+ let path = project_root().join("bench_data").join("repos").join(name);
+ let repo = git_repository::open(path).unwrap();
+ let tag1 = repo.find_reference(tag1).unwrap().into_fully_peeled_id().unwrap();
+ let tag2 = repo.find_reference(tag2).unwrap().into_fully_peeled_id().unwrap();
+ let mut diffs = Vec::new();
+ git_tree_diff(tag1, tag2, &mut diffs);
+ let mut last_commit = tag2;
+ tag2.object()
+ .unwrap()
+ .into_commit()
+ .ancestors()
+ .all()
+ .unwrap()
+ .take(num_commits as usize)
+ .for_each(|parent| {
+ let parent = parent.unwrap();
+ git_tree_diff(last_commit, parent, &mut diffs);
+ last_commit = parent;
+ });
+ diffs.sort_unstable_by_key(|(_, _, complexity)| *complexity);
+
+ if std::env::var("PLOT").is_ok() {
+ let mut group = c.benchmark_group(format!("{name}_plot"));
+ group.sample_size(15);
+ bench_file_diffs(group, &diffs, 12, true);
+ } else {
+ bench_file_diffs(c.benchmark_group(name), &diffs, 2, false);
+ }
+}
+
+fn bench_file_diffs<M: Measurement>(
+ mut group: BenchmarkGroup<M>,
+ files: &[(Vec<u8>, Vec<u8>, usize)],
+ num_chunks: usize,
+ compare_to_similar: bool,
+) {
+ let mut run = |name, f: fn(&[u8], &[u8]) -> usize| {
+ let mut i = 0;
+ for chunk in files.chunks((files.len() + num_chunks - 1) / num_chunks) {
+ let mut average_complexity: usize = chunk.iter().map(|(_, _, it)| *it).sum();
+ average_complexity /= chunk.len();
+ println!("benchmarking {i}..{}/{}", i + chunk.len(), files.len());
+ i += chunk.len();
+ group.bench_function(
+ BenchmarkId::new(name, format!("{average_complexity}::{}", chunk.len())),
+ |b| {
+ b.iter(|| {
+ for (old, new, _) in chunk {
+ // myers algorithm is O(ND) where D is the length of the (minimal) edit script and N the sum of file lengths
+ // we use that as an x axis to find a meaningful way to plot a
+ black_box(f(old, new));
+ }
+ });
+ },
+ );
+ }
+ };
+
+ run("imara_diff-histogram", |file1, file2| {
+ let input = InternedInput::new(file1, file2);
+ imara_diff::diff(Algorithm::Histogram, &input, Counter::default()).total()
+ });
+
+ run("imara_diff-myers", |file1, file2| {
+ let input = InternedInput::new(file1, file2);
+ imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total()
+ });
+
+ if compare_to_similar {
+ run("similar", |file1, file2| {
+ let diff = similar::utils::diff_lines(similar::Algorithm::Myers, file1, file2);
+ diff.len()
+ });
+ }
+
+ group.finish();
+}
+
+fn rust(c: &mut Criterion) {
+ bench_repo(c, "rust", "1.64.0", "1.50.0", 30);
+}
+
+fn vscode(c: &mut Criterion) {
+ bench_repo(c, "vscode", "1.72.2", "1.41.0", 30);
+}
+
+fn linux(c: &mut Criterion) {
+ bench_repo(c, "linux", "v6.0", "v5.7", 30);
+}
+
+fn helix(c: &mut Criterion) {
+ bench_repo(c, "helix", "22.08.1", "v0.5.0", 30);
+}
+
+criterion_group!(realworld_repos, helix, rust, vscode, linux);
+criterion_main!(realworld_repos);