/// An iterator over the ancestors one or more starting commits pub struct Ancestors { find: Find, predicate: Predicate, state: StateMut, parents: Parents, sorting: Sorting, } /// Specify how to handle commit parents during traversal. #[derive(Default, Copy, Clone)] pub enum Parents { /// Traverse all parents, useful for traversing the entire ancestry. #[default] All, /// Only traverse along the first parent, which commonly ignores all branches. First, } /// Specify how to sort commits during traversal. #[derive(Default, Debug, Copy, Clone)] pub enum Sorting { /// Commits are sorted as they are mentioned in the commit graph. #[default] Topological, /// Commits are sorted by their commit time in descending order, that is newest first. /// /// The sorting applies to all currently queued commit ids and thus is full. /// /// # Performance /// /// This mode benefits greatly from having an object_cache in `find()` /// to avoid having to lookup each commit twice. ByCommitTimeNewestFirst, /// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than /// a given time, stopping the iteration once no younger commits is queued to be traversed. /// /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache. ByCommitTimeNewestFirstCutoffOlderThan { /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time. time_in_seconds_since_epoch: u32, }, } /// pub mod ancestors { use std::{ borrow::{Borrow, BorrowMut}, collections::VecDeque, iter::FromIterator, }; use gix_hash::{oid, ObjectId}; use gix_hashtable::HashSet; use gix_object::CommitRefIter; use crate::commit::{Ancestors, Parents, Sorting}; /// The error is part of the item returned by the [Ancestors] iterator. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("The commit {oid} could not be found")] FindExisting { oid: ObjectId, source: Box, }, #[error(transparent)] ObjectDecode(#[from] gix_object::decode::Error), } type TimeInSeconds = u32; /// The state used and potentially shared by multiple graph traversals. #[derive(Default, Clone)] pub struct State { next: VecDeque<(ObjectId, TimeInSeconds)>, buf: Vec, seen: HashSet, parents_buf: Vec, } impl State { fn clear(&mut self) { self.next.clear(); self.buf.clear(); self.seen.clear(); } } /// Builder impl Ancestors { /// Change our commit parent handling mode to the given one. pub fn parents(mut self, mode: Parents) -> Self { self.parents = mode; self } } /// Builder impl Ancestors where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, StateMut: BorrowMut, E: std::error::Error + Send + Sync + 'static, { /// Set the sorting method, either topological or by author date pub fn sorting(mut self, sorting: Sorting) -> Result { self.sorting = sorting; if !matches!(self.sorting, Sorting::Topological) { let mut cutoff_time_storage = self.sorting.cutoff_time().map(|cot| (cot, Vec::new())); let state = self.state.borrow_mut(); for (commit_id, commit_time) in state.next.iter_mut() { let commit_iter = (self.find)(commit_id, &mut state.buf).map_err(|err| Error::FindExisting { oid: *commit_id, source: err.into(), })?; let time = commit_iter.committer()?.time.seconds_since_unix_epoch; match &mut cutoff_time_storage { Some((cutoff_time, storage)) if time >= *cutoff_time => { storage.push((*commit_id, time)); } Some(_) => {} None => *commit_time = time, } } let mut v = match cutoff_time_storage { Some((_, storage)) => storage, None => Vec::from_iter(std::mem::take(&mut state.next).into_iter()), }; v.sort_by(|a, b| a.1.cmp(&b.1).reverse()); state.next = v.into(); } Ok(self) } } /// Initialization impl Ancestors bool, StateMut> where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, StateMut: BorrowMut, E: std::error::Error + Send + Sync + 'static, { /// Create a new instance. /// /// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning /// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function /// as needed. /// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing /// this state. /// * `tips` /// * the starting points of the iteration, usually commits /// * each commit they lead to will only be returned once, including the tip that started it pub fn new(tips: impl IntoIterator>, state: StateMut, find: Find) -> Self { Self::filtered(tips, state, find, |_| true) } } /// Initialization impl Ancestors where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, Predicate: FnMut(&oid) -> bool, StateMut: BorrowMut, E: std::error::Error + Send + Sync + 'static, { /// Create a new instance with commit filtering enabled. /// /// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning /// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function /// as needed. /// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing /// this state. /// * `tips` /// * the starting points of the iteration, usually commits /// * each commit they lead to will only be returned once, including the tip that started it /// * `predicate` - indicate whether a given commit should be included in the result as well /// as whether its parent commits should be traversed. pub fn filtered( tips: impl IntoIterator>, mut state: StateMut, find: Find, mut predicate: Predicate, ) -> Self { let tips = tips.into_iter(); { let state = state.borrow_mut(); state.clear(); state.next.reserve(tips.size_hint().0); for tip in tips.map(Into::into) { let was_inserted = state.seen.insert(tip); if was_inserted && predicate(&tip) { state.next.push_back((tip, 0)); } } } Self { find, predicate, state, parents: Default::default(), sorting: Default::default(), } } } /// Access impl Ancestors where StateMut: Borrow, { /// Return an iterator for accessing more of the current commits data. pub fn commit_iter(&self) -> CommitRefIter<'_> { CommitRefIter::from_bytes(&self.state.borrow().buf) } } impl Iterator for Ancestors where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, Predicate: FnMut(&oid) -> bool, StateMut: BorrowMut, E: std::error::Error + Send + Sync + 'static, { type Item = Result; fn next(&mut self) -> Option { if matches!(self.parents, Parents::First) { self.next_by_topology() } else { match self.sorting { Sorting::Topological => self.next_by_topology(), Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None), Sorting::ByCommitTimeNewestFirstCutoffOlderThan { time_in_seconds_since_epoch, } => self.next_by_commit_date(time_in_seconds_since_epoch.into()), } } } } impl Sorting { /// If not topo sort, provide the cutoff date if present. fn cutoff_time(&self) -> Option { match self { Sorting::ByCommitTimeNewestFirstCutoffOlderThan { time_in_seconds_since_epoch, } => Some(*time_in_seconds_since_epoch), _ => None, } } } /// Utilities impl Ancestors where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, Predicate: FnMut(&oid) -> bool, StateMut: BorrowMut, E: std::error::Error + Send + Sync + 'static, { fn next_by_commit_date(&mut self, cutoff_older_than: Option) -> Option> { let state = self.state.borrow_mut(); let (oid, _commit_time) = state.next.pop_front()?; match (self.find)(&oid, &mut state.buf) { Ok(commit_iter) => { let mut count = 0; for token in commit_iter { count += 1; let is_first = count == 1; match token { Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { let was_inserted = state.seen.insert(id); if !(was_inserted && (self.predicate)(&id)) { if is_first && matches!(self.parents, Parents::First) { break; } else { continue; } } let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok(); let parent_commit_time = parent .and_then(|parent| { parent .committer() .ok() .map(|committer| committer.time.seconds_since_unix_epoch) }) .unwrap_or_default(); let pos = match state.next.binary_search_by(|c| c.1.cmp(&parent_commit_time).reverse()) { Ok(_) => None, Err(pos) => Some(pos), }; match cutoff_older_than { Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue, Some(_) | None => match pos { Some(pos) => state.next.insert(pos, (id, parent_commit_time)), None => state.next.push_back((id, parent_commit_time)), }, } if is_first && matches!(self.parents, Parents::First) { break; } } Ok(_unused_token) => break, Err(err) => return Some(Err(err.into())), } } } Err(err) => { return Some(Err(Error::FindExisting { oid, source: err.into(), })) } } Some(Ok(oid)) } } /// Utilities impl Ancestors where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, Predicate: FnMut(&oid) -> bool, StateMut: BorrowMut, E: std::error::Error + Send + Sync + 'static, { fn next_by_topology(&mut self) -> Option> { let state = self.state.borrow_mut(); let (oid, _commit_time) = state.next.pop_front()?; match (self.find)(&oid, &mut state.buf) { Ok(commit_iter) => { for token in commit_iter { match token { Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { let was_inserted = state.seen.insert(id); if was_inserted && (self.predicate)(&id) { state.next.push_back((id, 0)); } if matches!(self.parents, Parents::First) { break; } } Ok(_a_token_past_the_parents) => break, Err(err) => return Some(Err(err.into())), } } } Err(err) => { return Some(Err(Error::FindExisting { oid, source: err.into(), })) } } Some(Ok(oid)) } } }