diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
commit | 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch) | |
tree | bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix/src/object/blob.rs | |
parent | Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff) | |
download | rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip |
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix/src/object/blob.rs')
-rw-r--r-- | vendor/gix/src/object/blob.rs | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/vendor/gix/src/object/blob.rs b/vendor/gix/src/object/blob.rs new file mode 100644 index 000000000..f35605422 --- /dev/null +++ b/vendor/gix/src/object/blob.rs @@ -0,0 +1,148 @@ +/// +pub mod diff { + use std::ops::Range; + + use crate::{bstr::ByteSlice, object::blob::diff::line::Change}; + + /// A platform to keep temporary information to perform line diffs on modified blobs. + /// + pub struct Platform<'old, 'new> { + /// The previous version of the blob. + pub old: crate::Object<'old>, + /// The new version of the blob. + pub new: crate::Object<'new>, + /// The algorithm to use when calling [imara_diff::diff()][gix_diff::blob::diff()]. + /// This value is determined by the `diff.algorithm` configuration. + pub algo: gix_diff::blob::Algorithm, + } + + /// + pub mod init { + /// The error returned by [`Platform::from_ids()`][super::Platform::from_ids()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Could not find the previous blob or the new blob to diff against")] + FindExisting(#[from] crate::object::find::existing::Error), + #[error("Could not obtain diff algorithm from configuration")] + DiffAlgorithm(#[from] crate::config::diff::algorithm::Error), + } + } + + impl<'old, 'new> Platform<'old, 'new> { + /// Produce a platform for performing various diffs after obtaining the object data of `previous_id` and `new_id`. + /// + /// Note that these objects are treated as raw data and are assumed to be blobs. + pub fn from_ids( + previous_id: &crate::Id<'old>, + new_id: &crate::Id<'new>, + ) -> Result<Platform<'old, 'new>, init::Error> { + match previous_id + .object() + .and_then(|old| new_id.object().map(|new| (old, new))) + { + Ok((old, new)) => { + let algo = match new_id.repo.config.diff_algorithm() { + Ok(algo) => algo, + Err(err) => return Err(err.into()), + }; + Ok(Platform { old, new, algo }) + } + Err(err) => Err(err.into()), + } + } + } + + /// + pub mod line { + use crate::bstr::BStr; + + /// A change to a hunk of lines. + pub enum Change<'a, 'data> { + /// Lines were added. + Addition { + /// The lines themselves without terminator. + lines: &'a [&'data BStr], + }, + /// Lines were removed. + Deletion { + /// The lines themselves without terminator. + lines: &'a [&'data BStr], + }, + /// Lines have been replaced. + Modification { + /// The replaced lines without terminator. + lines_before: &'a [&'data BStr], + /// The new lines without terminator. + lines_after: &'a [&'data BStr], + }, + } + } + + impl<'old, 'new> Platform<'old, 'new> { + /// Perform a diff on lines between the old and the new version of a blob, passing each hunk of lines to `process_hunk`. + /// The diffing algorithm is determined by the `diff.algorithm` configuration. + /// + /// Note that you can invoke the diff more flexibly as well. + // TODO: more tests (only tested insertion right now) + pub fn lines<FnH, E>(&self, mut process_hunk: FnH) -> Result<(), E> + where + FnH: FnMut(line::Change<'_, '_>) -> Result<(), E>, + E: std::error::Error, + { + let input = self.line_tokens(); + let mut err = None; + let mut lines = Vec::new(); + gix_diff::blob::diff(self.algo, &input, |before: Range<u32>, after: Range<u32>| { + if err.is_some() { + return; + } + lines.clear(); + lines.extend( + input.before[before.start as usize..before.end as usize] + .iter() + .map(|&line| input.interner[line].as_bstr()), + ); + let end_of_before = lines.len(); + lines.extend( + input.after[after.start as usize..after.end as usize] + .iter() + .map(|&line| input.interner[line].as_bstr()), + ); + let hunk_before = &lines[..end_of_before]; + let hunk_after = &lines[end_of_before..]; + if hunk_after.is_empty() { + err = process_hunk(Change::Deletion { lines: hunk_before }).err(); + } else if hunk_before.is_empty() { + err = process_hunk(Change::Addition { lines: hunk_after }).err(); + } else { + err = process_hunk(Change::Modification { + lines_before: hunk_before, + lines_after: hunk_after, + }) + .err(); + } + }); + + match err { + Some(err) => Err(err), + None => Ok(()), + } + } + + /// Count the amount of removed and inserted lines efficiently. + pub fn line_counts(&self) -> gix_diff::blob::sink::Counter<()> { + let tokens = self.line_tokens(); + gix_diff::blob::diff(self.algo, &tokens, gix_diff::blob::sink::Counter::default()) + } + + /// Return a tokenizer which treats lines as smallest unit for use in a [diff operation][gix_diff::blob::diff()]. + /// + /// The line separator is determined according to normal git rules and filters. + pub fn line_tokens(&self) -> gix_diff::blob::intern::InternedInput<&[u8]> { + // TODO: make use of `core.eol` and/or filters to do line-counting correctly. It's probably + // OK to just know how these objects are saved to know what constitutes a line. + gix_diff::blob::intern::InternedInput::new(self.old.data.as_bytes(), self.new.data.as_bytes()) + } + } +} |