From 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:41:41 +0200 Subject: Merging upstream version 1.70.0+dfsg2. Signed-off-by: Daniel Baumann --- vendor/gix-traverse/src/commit.rs | 377 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 vendor/gix-traverse/src/commit.rs (limited to 'vendor/gix-traverse/src/commit.rs') diff --git a/vendor/gix-traverse/src/commit.rs b/vendor/gix-traverse/src/commit.rs new file mode 100644 index 000000000..890fde2a9 --- /dev/null +++ b/vendor/gix-traverse/src/commit.rs @@ -0,0 +1,377 @@ +/// An iterator over the ancestors one or more starting commits +pub struct Ancestors { + find: Find, + predicate: Predicate, + state: StateMut, + parents: Parents, + sorting: Sorting, +} + +/// Specify how to handle commit parents during traversal. +#[derive(Copy, Clone)] +pub enum Parents { + /// Traverse all parents, useful for traversing the entire ancestry. + All, + /// Only traverse along the first parent, which commonly ignores all branches. + First, +} + +impl Default for Parents { + fn default() -> Self { + Parents::All + } +} + +/// Specify how to sort commits during traversal. +#[derive(Copy, Clone)] +pub enum Sorting { + /// Commits are sorted as they are mentioned in the commit graph. + Topological, + /// Commits are sorted by their commit time in descending order, that is newest first. + /// + /// The sorting applies to all currently queued commit ids and thus is full. + /// + /// # Performance + /// + /// This mode benefits greatly from having an object_cache in `find()` + /// to avoid having to lookup each commit twice. + ByCommitTimeNewestFirst, + /// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than + /// a given time, stopping the iteration once no younger commits is queued to be traversed. + /// + /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache. + ByCommitTimeNewestFirstCutoffOlderThan { + /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time. + time_in_seconds_since_epoch: u32, + }, +} + +impl Default for Sorting { + fn default() -> Self { + Sorting::Topological + } +} + +/// +pub mod ancestors { + use std::{ + borrow::{Borrow, BorrowMut}, + collections::VecDeque, + iter::FromIterator, + }; + + use gix_hash::{oid, ObjectId}; + use gix_hashtable::HashSet; + use gix_object::CommitRefIter; + + use crate::commit::{Ancestors, Parents, Sorting}; + + /// The error is part of the item returned by the [Ancestors] iterator. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The commit {oid} could not be found")] + FindExisting { + oid: ObjectId, + source: Box, + }, + #[error(transparent)] + ObjectDecode(#[from] gix_object::decode::Error), + } + + type TimeInSeconds = u32; + + /// The state used and potentially shared by multiple graph traversals. + #[derive(Default, Clone)] + pub struct State { + next: VecDeque<(ObjectId, TimeInSeconds)>, + buf: Vec, + seen: HashSet, + parents_buf: Vec, + } + + impl State { + fn clear(&mut self) { + self.next.clear(); + self.buf.clear(); + self.seen.clear(); + } + } + + /// Builder + impl Ancestors { + /// Change our commit parent handling mode to the given one. + pub fn parents(mut self, mode: Parents) -> Self { + self.parents = mode; + self + } + } + + /// Builder + impl Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + /// Set the sorting method, either topological or by author date + pub fn sorting(mut self, sorting: Sorting) -> Result { + self.sorting = sorting; + if !matches!(self.sorting, Sorting::Topological) { + let mut cutoff_time_storage = self.sorting.cutoff_time().map(|cot| (cot, Vec::new())); + let state = self.state.borrow_mut(); + for (commit_id, commit_time) in state.next.iter_mut() { + let commit_iter = (self.find)(commit_id, &mut state.buf).map_err(|err| Error::FindExisting { + oid: *commit_id, + source: err.into(), + })?; + let time = commit_iter.committer()?.time.seconds_since_unix_epoch; + match &mut cutoff_time_storage { + Some((cutoff_time, storage)) if time >= *cutoff_time => { + storage.push((*commit_id, time)); + } + Some(_) => {} + None => *commit_time = time, + } + } + let mut v = match cutoff_time_storage { + Some((_, storage)) => storage, + None => Vec::from_iter(std::mem::take(&mut state.next).into_iter()), + }; + v.sort_by(|a, b| a.1.cmp(&b.1).reverse()); + state.next = v.into(); + } + Ok(self) + } + } + + /// Initialization + impl Ancestors bool, StateMut> + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + /// Create a new instance. + /// + /// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning + /// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function + /// as needed. + /// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing + /// this state. + /// * `tips` + /// * the starting points of the iteration, usually commits + /// * each commit they lead to will only be returned once, including the tip that started it + pub fn new(tips: impl IntoIterator>, state: StateMut, find: Find) -> Self { + Self::filtered(tips, state, find, |_| true) + } + } + + /// Initialization + impl Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + Predicate: FnMut(&oid) -> bool, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + /// Create a new instance with commit filtering enabled. + /// + /// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning + /// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function + /// as needed. + /// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing + /// this state. + /// * `tips` + /// * the starting points of the iteration, usually commits + /// * each commit they lead to will only be returned once, including the tip that started it + /// * `predicate` - indicate whether a given commit should be included in the result as well + /// as whether its parent commits should be traversed. + pub fn filtered( + tips: impl IntoIterator>, + mut state: StateMut, + find: Find, + mut predicate: Predicate, + ) -> Self { + let tips = tips.into_iter(); + { + let state = state.borrow_mut(); + state.clear(); + state.next.reserve(tips.size_hint().0); + for tip in tips.map(Into::into) { + let was_inserted = state.seen.insert(tip); + if was_inserted && predicate(&tip) { + state.next.push_back((tip, 0)); + } + } + } + Self { + find, + predicate, + state, + parents: Default::default(), + sorting: Default::default(), + } + } + } + /// Access + impl Ancestors + where + StateMut: Borrow, + { + /// Return an iterator for accessing more of the current commits data. + pub fn commit_iter(&self) -> CommitRefIter<'_> { + CommitRefIter::from_bytes(&self.state.borrow().buf) + } + } + + impl Iterator for Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + Predicate: FnMut(&oid) -> bool, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + type Item = Result; + + fn next(&mut self) -> Option { + if matches!(self.parents, Parents::First) { + self.next_by_topology() + } else { + match self.sorting { + Sorting::Topological => self.next_by_topology(), + Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None), + Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + time_in_seconds_since_epoch, + } => self.next_by_commit_date(time_in_seconds_since_epoch.into()), + } + } + } + } + + impl Sorting { + /// If not topo sort, provide the cutoff date if present. + fn cutoff_time(&self) -> Option { + match self { + Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + time_in_seconds_since_epoch, + } => Some(*time_in_seconds_since_epoch), + _ => None, + } + } + } + + /// Utilities + impl Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + Predicate: FnMut(&oid) -> bool, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + fn next_by_commit_date(&mut self, cutoff_older_than: Option) -> Option> { + let state = self.state.borrow_mut(); + + let (oid, _commit_time) = state.next.pop_front()?; + match (self.find)(&oid, &mut state.buf) { + Ok(commit_iter) => { + let mut count = 0; + for token in commit_iter { + count += 1; + let is_first = count == 1; + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + let was_inserted = state.seen.insert(id); + if !(was_inserted && (self.predicate)(&id)) { + if is_first && matches!(self.parents, Parents::First) { + break; + } else { + continue; + } + } + + let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok(); + let parent_commit_time = parent + .and_then(|parent| { + parent + .committer() + .ok() + .map(|committer| committer.time.seconds_since_unix_epoch) + }) + .unwrap_or_default(); + + let pos = match state.next.binary_search_by(|c| c.1.cmp(&parent_commit_time).reverse()) + { + Ok(_) => None, + Err(pos) => Some(pos), + }; + match cutoff_older_than { + Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue, + Some(_) | None => match pos { + Some(pos) => state.next.insert(pos, (id, parent_commit_time)), + None => state.next.push_back((id, parent_commit_time)), + }, + } + + if is_first && matches!(self.parents, Parents::First) { + break; + } + } + Ok(_unused_token) => break, + Err(err) => return Some(Err(err.into())), + } + } + } + Err(err) => { + return Some(Err(Error::FindExisting { + oid, + source: err.into(), + })) + } + } + Some(Ok(oid)) + } + } + + /// Utilities + impl Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + Predicate: FnMut(&oid) -> bool, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + fn next_by_topology(&mut self) -> Option> { + let state = self.state.borrow_mut(); + let (oid, _commit_time) = state.next.pop_front()?; + match (self.find)(&oid, &mut state.buf) { + Ok(commit_iter) => { + for token in commit_iter { + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + let was_inserted = state.seen.insert(id); + if was_inserted && (self.predicate)(&id) { + state.next.push_back((id, 0)); + } + if matches!(self.parents, Parents::First) { + break; + } + } + Ok(_a_token_past_the_parents) => break, + Err(err) => return Some(Err(err.into())), + } + } + } + Err(err) => { + return Some(Err(Error::FindExisting { + oid, + source: err.into(), + })) + } + } + Some(Ok(oid)) + } + } +} -- cgit v1.2.3