diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
commit | 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch) | |
tree | bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/imara-diff/benches | |
parent | Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff) | |
download | rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip |
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/imara-diff/benches')
-rw-r--r-- | vendor/imara-diff/benches/git_repo.rs | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/vendor/imara-diff/benches/git_repo.rs b/vendor/imara-diff/benches/git_repo.rs new file mode 100644 index 000000000..1c76b30f1 --- /dev/null +++ b/vendor/imara-diff/benches/git_repo.rs @@ -0,0 +1,156 @@ +use std::convert::Infallible; +use std::path::PathBuf; + +use criterion::measurement::Measurement; +use criterion::{ + black_box, criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion, +}; +use git_repository::object::tree::diff::{Action, Change}; +use git_repository::Id; +use imara_diff::intern::InternedInput; +use imara_diff::sink::Counter; +use imara_diff::Algorithm; + +fn extract_diff(change: &Change) -> Option<(Vec<u8>, Vec<u8>)> { + use git_repository::object::tree::diff::change::Event::Modification; + + let (previous_id, id) = match change.event { + Modification { previous_entry_mode, previous_id, entry_mode, id } + if previous_entry_mode.is_blob() && entry_mode.is_blob() => + { + (previous_id, id) + } + _ => return None, + }; + + let old = previous_id.object().ok()?.detach().data; + let new = id.object().ok()?.detach().data; + + Some((new, old)) +} + +fn git_tree_diff(from: Id, to: Id, diffs: &mut Vec<(Vec<u8>, Vec<u8>, usize)>) { + let from_tree = from.object().unwrap().peel_to_tree().unwrap(); + let to_tree = to.object().unwrap().peel_to_tree().unwrap(); + from_tree + .changes() + .track_filename() + .for_each_to_obtain_tree(&to_tree, |change| -> Result<_, Infallible> { + if let Some((old, new)) = extract_diff(&change) { + let input = InternedInput::new(&*old, &*new); + let changes = + imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total(); + let complexity = changes * (old.len() + new.len()); + diffs.push((old, new, complexity)); + } + Ok(Action::Continue) + }) + .unwrap(); +} + +pub fn project_root() -> PathBuf { + let dir = env!("CARGO_MANIFEST_DIR"); + let mut res = PathBuf::from(dir); + while !res.join("README.md").exists() { + res = res.parent().expect("reached fs root without finding project root").to_owned() + } + res +} + +fn bench_repo(c: &mut Criterion, name: &str, tag2: &str, tag1: &str, num_commits: usize) { + let path = project_root().join("bench_data").join("repos").join(name); + let repo = git_repository::open(path).unwrap(); + let tag1 = repo.find_reference(tag1).unwrap().into_fully_peeled_id().unwrap(); + let tag2 = repo.find_reference(tag2).unwrap().into_fully_peeled_id().unwrap(); + let mut diffs = Vec::new(); + git_tree_diff(tag1, tag2, &mut diffs); + let mut last_commit = tag2; + tag2.object() + .unwrap() + .into_commit() + .ancestors() + .all() + .unwrap() + .take(num_commits as usize) + .for_each(|parent| { + let parent = parent.unwrap(); + git_tree_diff(last_commit, parent, &mut diffs); + last_commit = parent; + }); + diffs.sort_unstable_by_key(|(_, _, complexity)| *complexity); + + if std::env::var("PLOT").is_ok() { + let mut group = c.benchmark_group(format!("{name}_plot")); + group.sample_size(15); + bench_file_diffs(group, &diffs, 12, true); + } else { + bench_file_diffs(c.benchmark_group(name), &diffs, 2, false); + } +} + +fn bench_file_diffs<M: Measurement>( + mut group: BenchmarkGroup<M>, + files: &[(Vec<u8>, Vec<u8>, usize)], + num_chunks: usize, + compare_to_similar: bool, +) { + let mut run = |name, f: fn(&[u8], &[u8]) -> usize| { + let mut i = 0; + for chunk in files.chunks((files.len() + num_chunks - 1) / num_chunks) { + let mut average_complexity: usize = chunk.iter().map(|(_, _, it)| *it).sum(); + average_complexity /= chunk.len(); + println!("benchmarking {i}..{}/{}", i + chunk.len(), files.len()); + i += chunk.len(); + group.bench_function( + BenchmarkId::new(name, format!("{average_complexity}::{}", chunk.len())), + |b| { + b.iter(|| { + for (old, new, _) in chunk { + // myers algorithm is O(ND) where D is the length of the (minimal) edit script and N the sum of file lengths + // we use that as an x axis to find a meaningful way to plot a + black_box(f(old, new)); + } + }); + }, + ); + } + }; + + run("imara_diff-histogram", |file1, file2| { + let input = InternedInput::new(file1, file2); + imara_diff::diff(Algorithm::Histogram, &input, Counter::default()).total() + }); + + run("imara_diff-myers", |file1, file2| { + let input = InternedInput::new(file1, file2); + imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total() + }); + + if compare_to_similar { + run("similar", |file1, file2| { + let diff = similar::utils::diff_lines(similar::Algorithm::Myers, file1, file2); + diff.len() + }); + } + + group.finish(); +} + +fn rust(c: &mut Criterion) { + bench_repo(c, "rust", "1.64.0", "1.50.0", 30); +} + +fn vscode(c: &mut Criterion) { + bench_repo(c, "vscode", "1.72.2", "1.41.0", 30); +} + +fn linux(c: &mut Criterion) { + bench_repo(c, "linux", "v6.0", "v5.7", 30); +} + +fn helix(c: &mut Criterion) { + bench_repo(c, "helix", "22.08.1", "v0.5.0", 30); +} + +criterion_group!(realworld_repos, helix, rust, vscode, linux); +criterion_main!(realworld_repos); |