diff options
Diffstat (limited to 'vendor/gix-traverse/src')
-rw-r--r-- | vendor/gix-traverse/src/commit.rs | 323 |
1 files changed, 238 insertions, 85 deletions
diff --git a/vendor/gix-traverse/src/commit.rs b/vendor/gix-traverse/src/commit.rs index 95d48842a..29f5947b3 100644 --- a/vendor/gix-traverse/src/commit.rs +++ b/vendor/gix-traverse/src/commit.rs @@ -1,6 +1,9 @@ +use smallvec::SmallVec; + /// An iterator over the ancestors one or more starting commits pub struct Ancestors<Find, Predicate, StateMut> { find: Find, + cache: Option<gix_commitgraph::Graph>, predicate: Predicate, state: StateMut, parents: Parents, @@ -18,15 +21,36 @@ pub enum Parents { } /// Specify how to sort commits during traversal. +/// +/// ### Sample History +/// +/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp +/// (*their X-alignment doesn't matter*). +/// +/// ```text +/// ---1----2----4----7 <- second parent of 8 +/// \ \ +/// 3----5----6----8--- +/// ``` + #[derive(Default, Debug, Copy, Clone)] pub enum Sorting { /// Commits are sorted as they are mentioned in the commit graph. + /// + /// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1` + /// + /// ### Note + /// + /// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from + /// as it avoids overlapping branches. #[default] - Topological, + BreadthFirst, /// Commits are sorted by their commit time in descending order, that is newest first. /// /// The sorting applies to all currently queued commit ids and thus is full. /// + /// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` + /// /// # Performance /// /// This mode benefits greatly from having an object_cache in `find()` @@ -36,25 +60,45 @@ pub enum Sorting { /// a given time, stopping the iteration once no younger commits is queued to be traversed. /// /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache. + /// + /// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4` ByCommitTimeNewestFirstCutoffOlderThan { /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time. - time_in_seconds_since_epoch: u32, + seconds: gix_date::SecondsSinceUnixEpoch, }, } +/// The collection of parent ids we saw as part of the iteration. +/// +/// Note that this list is truncated if [`Parents::First`] was used. +pub type ParentIds = SmallVec<[gix_hash::ObjectId; 1]>; + +/// Information about a commit that we obtained naturally as part of the iteration. +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub struct Info { + /// The id of the commit. + pub id: gix_hash::ObjectId, + /// All parent ids we have encountered. Note that these will be at most one if [`Parents::First`] is enabled. + pub parent_ids: ParentIds, + /// The time at which the commit was created. It's only `Some(_)` if sorting is not [`Sorting::BreadthFirst`], as the walk + /// needs to require the commit-date. + pub commit_time: Option<gix_date::SecondsSinceUnixEpoch>, +} + /// pub mod ancestors { use std::{ borrow::{Borrow, BorrowMut}, collections::VecDeque, - iter::FromIterator, }; + use gix_date::SecondsSinceUnixEpoch; use gix_hash::{oid, ObjectId}; use gix_hashtable::HashSet; use gix_object::CommitRefIter; + use smallvec::SmallVec; - use crate::commit::{Ancestors, Parents, Sorting}; + use crate::commit::{collect_parents, Ancestors, Either, Info, ParentIds, Parents, Sorting}; /// The error is part of the item returned by the [Ancestors] iterator. #[derive(Debug, thiserror::Error)] @@ -69,35 +113,40 @@ pub mod ancestors { ObjectDecode(#[from] gix_object::decode::Error), } - type TimeInSeconds = u32; - /// The state used and potentially shared by multiple graph traversals. - #[derive(Default, Clone)] + #[derive(Clone)] pub struct State { - next: VecDeque<(ObjectId, TimeInSeconds)>, + next: VecDeque<ObjectId>, + queue: gix_revwalk::PriorityQueue<SecondsSinceUnixEpoch, ObjectId>, buf: Vec<u8>, seen: HashSet<ObjectId>, parents_buf: Vec<u8>, + parent_ids: SmallVec<[(ObjectId, SecondsSinceUnixEpoch); 2]>, + } + + impl Default for State { + fn default() -> Self { + State { + next: Default::default(), + queue: gix_revwalk::PriorityQueue::new(), + buf: vec![], + seen: Default::default(), + parents_buf: vec![], + parent_ids: Default::default(), + } + } } impl State { fn clear(&mut self) { self.next.clear(); + self.queue.clear(); self.buf.clear(); self.seen.clear(); } } /// Builder - impl<Find, Predicate, StateMut> Ancestors<Find, Predicate, StateMut> { - /// Change our commit parent handling mode to the given one. - pub fn parents(mut self, mode: Parents) -> Self { - self.parents = mode; - self - } - } - - /// Builder impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut> where Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>, @@ -107,32 +156,61 @@ pub mod ancestors { /// Set the sorting method, either topological or by author date pub fn sorting(mut self, sorting: Sorting) -> Result<Self, Error> { self.sorting = sorting; - if !matches!(self.sorting, Sorting::Topological) { - let mut cutoff_time_storage = self.sorting.cutoff_time().map(|cot| (cot, Vec::new())); - let state = self.state.borrow_mut(); - for (commit_id, commit_time) in &mut state.next { - let commit_iter = (self.find)(commit_id, &mut state.buf).map_err(|err| Error::FindExisting { - oid: *commit_id, - source: err.into(), - })?; - let time = commit_iter.committer()?.time.seconds_since_unix_epoch; - match &mut cutoff_time_storage { - Some((cutoff_time, storage)) if time >= *cutoff_time => { - storage.push((*commit_id, time)); + match self.sorting { + Sorting::BreadthFirst => { + self.queue_to_vecdeque(); + } + Sorting::ByCommitTimeNewestFirst | Sorting::ByCommitTimeNewestFirstCutoffOlderThan { .. } => { + let cutoff_time = self.sorting.cutoff_time(); + let state = self.state.borrow_mut(); + for commit_id in state.next.drain(..) { + let commit_iter = + (self.find)(&commit_id, &mut state.buf).map_err(|err| Error::FindExisting { + oid: commit_id, + source: err.into(), + })?; + let time = commit_iter.committer()?.time.seconds; + match cutoff_time { + Some(cutoff_time) if time >= cutoff_time => { + state.queue.insert(time, commit_id); + } + Some(_) => {} + None => { + state.queue.insert(time, commit_id); + } } - Some(_) => {} - None => *commit_time = time, } } - let mut v = match cutoff_time_storage { - Some((_, storage)) => storage, - None => Vec::from_iter(std::mem::take(&mut state.next).into_iter()), - }; - v.sort_by(|a, b| a.1.cmp(&b.1).reverse()); - state.next = v.into(); } Ok(self) } + + /// Change our commit parent handling mode to the given one. + pub fn parents(mut self, mode: Parents) -> Self { + self.parents = mode; + if matches!(self.parents, Parents::First) { + self.queue_to_vecdeque(); + } + self + } + + /// Set the commitgraph as `cache` to greatly accelerate any traversal. + /// + /// The cache will be used if possible, but we will fall-back without error to using the object + /// database for commit lookup. If the cache is corrupt, we will fall back to the object database as well. + pub fn commit_graph(mut self, cache: Option<gix_commitgraph::Graph>) -> Self { + self.cache = cache; + self + } + + fn queue_to_vecdeque(&mut self) { + let state = self.state.borrow_mut(); + state.next.extend( + std::mem::replace(&mut state.queue, gix_revwalk::PriorityQueue::new()) + .into_iter_unordered() + .map(|(_time, id)| id), + ); + } } /// Initialization @@ -191,12 +269,13 @@ pub mod ancestors { for tip in tips.map(Into::into) { let was_inserted = state.seen.insert(tip); if was_inserted && predicate(&tip) { - state.next.push_back((tip, 0)); + state.next.push_back(tip); } } } Self { find, + cache: None, predicate, state, parents: Default::default(), @@ -213,6 +292,11 @@ pub mod ancestors { pub fn commit_iter(&self) -> CommitRefIter<'_> { CommitRefIter::from_bytes(&self.state.borrow().buf) } + + /// Return the current commits data. + pub fn commit_data(&self) -> &[u8] { + &self.state.borrow().buf + } } impl<Find, Predicate, StateMut, E> Iterator for Ancestors<Find, Predicate, StateMut> @@ -222,18 +306,18 @@ pub mod ancestors { StateMut: BorrowMut<State>, E: std::error::Error + Send + Sync + 'static, { - type Item = Result<ObjectId, Error>; + type Item = Result<Info, Error>; fn next(&mut self) -> Option<Self::Item> { if matches!(self.parents, Parents::First) { self.next_by_topology() } else { match self.sorting { - Sorting::Topological => self.next_by_topology(), + Sorting::BreadthFirst => self.next_by_topology(), Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None), - Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - time_in_seconds_since_epoch, - } => self.next_by_commit_date(time_in_seconds_since_epoch.into()), + Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => { + self.next_by_commit_date(seconds.into()) + } } } } @@ -241,11 +325,9 @@ pub mod ancestors { impl Sorting { /// If not topo sort, provide the cutoff date if present. - fn cutoff_time(&self) -> Option<u32> { + fn cutoff_time(&self) -> Option<SecondsSinceUnixEpoch> { match self { - Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - time_in_seconds_since_epoch, - } => Some(*time_in_seconds_since_epoch), + Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(*seconds), _ => None, } } @@ -259,53 +341,53 @@ pub mod ancestors { StateMut: BorrowMut<State>, E: std::error::Error + Send + Sync + 'static, { - fn next_by_commit_date(&mut self, cutoff_older_than: Option<TimeInSeconds>) -> Option<Result<ObjectId, Error>> { + fn next_by_commit_date( + &mut self, + cutoff_older_than: Option<SecondsSinceUnixEpoch>, + ) -> Option<Result<Info, Error>> { let state = self.state.borrow_mut(); - let (oid, _commit_time) = state.next.pop_front()?; - match (self.find)(&oid, &mut state.buf) { - Ok(commit_iter) => { - let mut count = 0; + let (commit_time, oid) = state.queue.pop()?; + let mut parents: ParentIds = Default::default(); + match super::find(self.cache.as_ref(), &mut self.find, &oid, &mut state.buf) { + Ok(Either::CachedCommit(commit)) => { + if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { + // drop corrupt caches and try again with ODB + self.cache = None; + return self.next_by_commit_date(cutoff_older_than); + } + for (id, parent_commit_time) in state.parent_ids.drain(..) { + parents.push(id); + let was_inserted = state.seen.insert(id); + if !(was_inserted && (self.predicate)(&id)) { + continue; + } + + match cutoff_older_than { + Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue, + Some(_) | None => state.queue.insert(parent_commit_time, id), + } + } + } + Ok(Either::CommitRefIter(commit_iter)) => { for token in commit_iter { - count += 1; - let is_first = count == 1; match token { Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + parents.push(id); let was_inserted = state.seen.insert(id); if !(was_inserted && (self.predicate)(&id)) { - if is_first && matches!(self.parents, Parents::First) { - break; - } else { - continue; - } + continue; } let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok(); let parent_commit_time = parent - .and_then(|parent| { - parent - .committer() - .ok() - .map(|committer| committer.time.seconds_since_unix_epoch) - }) + .and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds)) .unwrap_or_default(); - let pos = match state.next.binary_search_by(|c| c.1.cmp(&parent_commit_time).reverse()) - { - Ok(_) => None, - Err(pos) => Some(pos), - }; match cutoff_older_than { Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue, - Some(_) | None => match pos { - Some(pos) => state.next.insert(pos, (id, parent_commit_time)), - None => state.next.push_back((id, parent_commit_time)), - }, - } - - if is_first && matches!(self.parents, Parents::First) { - break; + Some(_) | None => state.queue.insert(parent_commit_time, id), } } Ok(_unused_token) => break, @@ -320,7 +402,11 @@ pub mod ancestors { })) } } - Some(Ok(oid)) + Some(Ok(Info { + id: oid, + parent_ids: parents, + commit_time: Some(commit_time), + })) } } @@ -332,18 +418,38 @@ pub mod ancestors { StateMut: BorrowMut<State>, E: std::error::Error + Send + Sync + 'static, { - fn next_by_topology(&mut self) -> Option<Result<ObjectId, Error>> { + fn next_by_topology(&mut self) -> Option<Result<Info, Error>> { let state = self.state.borrow_mut(); - let (oid, _commit_time) = state.next.pop_front()?; - match (self.find)(&oid, &mut state.buf) { - Ok(commit_iter) => { + let oid = state.next.pop_front()?; + let mut parents: ParentIds = Default::default(); + match super::find(self.cache.as_ref(), &mut self.find, &oid, &mut state.buf) { + Ok(Either::CachedCommit(commit)) => { + if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { + // drop corrupt caches and try again with ODB + self.cache = None; + return self.next_by_topology(); + } + + for (id, _commit_time) in state.parent_ids.drain(..) { + parents.push(id); + let was_inserted = state.seen.insert(id); + if was_inserted && (self.predicate)(&id) { + state.next.push_back(id); + } + if matches!(self.parents, Parents::First) { + break; + } + } + } + Ok(Either::CommitRefIter(commit_iter)) => { for token in commit_iter { match token { Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + parents.push(id); let was_inserted = state.seen.insert(id); if was_inserted && (self.predicate)(&id) { - state.next.push_back((id, 0)); + state.next.push_back(id); } if matches!(self.parents, Parents::First) { break; @@ -361,7 +467,54 @@ pub mod ancestors { })) } } - Some(Ok(oid)) + Some(Ok(Info { + id: oid, + parent_ids: parents, + commit_time: None, + })) } } } + +enum Either<'buf, 'cache> { + CommitRefIter(gix_object::CommitRefIter<'buf>), + CachedCommit(gix_commitgraph::file::Commit<'cache>), +} + +fn collect_parents( + dest: &mut SmallVec<[(gix_hash::ObjectId, gix_date::SecondsSinceUnixEpoch); 2]>, + cache: Option<&gix_commitgraph::Graph>, + parents: gix_commitgraph::file::commit::Parents<'_>, +) -> bool { + dest.clear(); + let cache = cache.as_ref().expect("parents iter is available, backed by `cache`"); + for parent_id in parents { + match parent_id { + Ok(pos) => dest.push({ + let parent = cache.commit_at(pos); + ( + parent.id().to_owned(), + parent.committer_timestamp() as gix_date::SecondsSinceUnixEpoch, // we can't handle errors here and trying seems overkill + ) + }), + Err(_err) => return false, + } + } + true +} + +fn find<'cache, 'buf, Find, E>( + cache: Option<&'cache gix_commitgraph::Graph>, + mut find: Find, + id: &gix_hash::oid, + buf: &'buf mut Vec<u8>, +) -> Result<Either<'buf, 'cache>, E> +where + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::CommitRefIter<'a>, E>, + E: std::error::Error + Send + Sync + 'static, +{ + match cache.and_then(|cache| cache.commit_by_id(id).map(Either::CachedCommit)) { + Some(c) => Ok(c), + None => find(id, buf).map(Either::CommitRefIter), + } +} |