summaryrefslogtreecommitdiffstats
path: root/vendor/gix-traverse/src/commit.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
commitc23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/gix-traverse/src/commit.rs
parentReleasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz
rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-traverse/src/commit.rs')
-rw-r--r--vendor/gix-traverse/src/commit.rs323
1 files changed, 238 insertions, 85 deletions
diff --git a/vendor/gix-traverse/src/commit.rs b/vendor/gix-traverse/src/commit.rs
index 95d48842a..29f5947b3 100644
--- a/vendor/gix-traverse/src/commit.rs
+++ b/vendor/gix-traverse/src/commit.rs
@@ -1,6 +1,9 @@
+use smallvec::SmallVec;
+
/// An iterator over the ancestors one or more starting commits
pub struct Ancestors<Find, Predicate, StateMut> {
find: Find,
+ cache: Option<gix_commitgraph::Graph>,
predicate: Predicate,
state: StateMut,
parents: Parents,
@@ -18,15 +21,36 @@ pub enum Parents {
}
/// Specify how to sort commits during traversal.
+///
+/// ### Sample History
+///
+/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp
+/// (*their X-alignment doesn't matter*).
+///
+/// ```text
+/// ---1----2----4----7 <- second parent of 8
+/// \ \
+/// 3----5----6----8---
+/// ```
+
#[derive(Default, Debug, Copy, Clone)]
pub enum Sorting {
/// Commits are sorted as they are mentioned in the commit graph.
+ ///
+ /// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`
+ ///
+ /// ### Note
+ ///
+ /// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from
+ /// as it avoids overlapping branches.
#[default]
- Topological,
+ BreadthFirst,
/// Commits are sorted by their commit time in descending order, that is newest first.
///
/// The sorting applies to all currently queued commit ids and thus is full.
///
+ /// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1`
+ ///
/// # Performance
///
/// This mode benefits greatly from having an object_cache in `find()`
@@ -36,25 +60,45 @@ pub enum Sorting {
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
///
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
+ ///
+ /// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
ByCommitTimeNewestFirstCutoffOlderThan {
/// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
- time_in_seconds_since_epoch: u32,
+ seconds: gix_date::SecondsSinceUnixEpoch,
},
}
+/// The collection of parent ids we saw as part of the iteration.
+///
+/// Note that this list is truncated if [`Parents::First`] was used.
+pub type ParentIds = SmallVec<[gix_hash::ObjectId; 1]>;
+
+/// Information about a commit that we obtained naturally as part of the iteration.
+#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
+pub struct Info {
+ /// The id of the commit.
+ pub id: gix_hash::ObjectId,
+ /// All parent ids we have encountered. Note that these will be at most one if [`Parents::First`] is enabled.
+ pub parent_ids: ParentIds,
+ /// The time at which the commit was created. It's only `Some(_)` if sorting is not [`Sorting::BreadthFirst`], as the walk
+ /// needs to require the commit-date.
+ pub commit_time: Option<gix_date::SecondsSinceUnixEpoch>,
+}
+
///
pub mod ancestors {
use std::{
borrow::{Borrow, BorrowMut},
collections::VecDeque,
- iter::FromIterator,
};
+ use gix_date::SecondsSinceUnixEpoch;
use gix_hash::{oid, ObjectId};
use gix_hashtable::HashSet;
use gix_object::CommitRefIter;
+ use smallvec::SmallVec;
- use crate::commit::{Ancestors, Parents, Sorting};
+ use crate::commit::{collect_parents, Ancestors, Either, Info, ParentIds, Parents, Sorting};
/// The error is part of the item returned by the [Ancestors] iterator.
#[derive(Debug, thiserror::Error)]
@@ -69,35 +113,40 @@ pub mod ancestors {
ObjectDecode(#[from] gix_object::decode::Error),
}
- type TimeInSeconds = u32;
-
/// The state used and potentially shared by multiple graph traversals.
- #[derive(Default, Clone)]
+ #[derive(Clone)]
pub struct State {
- next: VecDeque<(ObjectId, TimeInSeconds)>,
+ next: VecDeque<ObjectId>,
+ queue: gix_revwalk::PriorityQueue<SecondsSinceUnixEpoch, ObjectId>,
buf: Vec<u8>,
seen: HashSet<ObjectId>,
parents_buf: Vec<u8>,
+ parent_ids: SmallVec<[(ObjectId, SecondsSinceUnixEpoch); 2]>,
+ }
+
+ impl Default for State {
+ fn default() -> Self {
+ State {
+ next: Default::default(),
+ queue: gix_revwalk::PriorityQueue::new(),
+ buf: vec![],
+ seen: Default::default(),
+ parents_buf: vec![],
+ parent_ids: Default::default(),
+ }
+ }
}
impl State {
fn clear(&mut self) {
self.next.clear();
+ self.queue.clear();
self.buf.clear();
self.seen.clear();
}
}
/// Builder
- impl<Find, Predicate, StateMut> Ancestors<Find, Predicate, StateMut> {
- /// Change our commit parent handling mode to the given one.
- pub fn parents(mut self, mode: Parents) -> Self {
- self.parents = mode;
- self
- }
- }
-
- /// Builder
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
@@ -107,32 +156,61 @@ pub mod ancestors {
/// Set the sorting method, either topological or by author date
pub fn sorting(mut self, sorting: Sorting) -> Result<Self, Error> {
self.sorting = sorting;
- if !matches!(self.sorting, Sorting::Topological) {
- let mut cutoff_time_storage = self.sorting.cutoff_time().map(|cot| (cot, Vec::new()));
- let state = self.state.borrow_mut();
- for (commit_id, commit_time) in &mut state.next {
- let commit_iter = (self.find)(commit_id, &mut state.buf).map_err(|err| Error::FindExisting {
- oid: *commit_id,
- source: err.into(),
- })?;
- let time = commit_iter.committer()?.time.seconds_since_unix_epoch;
- match &mut cutoff_time_storage {
- Some((cutoff_time, storage)) if time >= *cutoff_time => {
- storage.push((*commit_id, time));
+ match self.sorting {
+ Sorting::BreadthFirst => {
+ self.queue_to_vecdeque();
+ }
+ Sorting::ByCommitTimeNewestFirst | Sorting::ByCommitTimeNewestFirstCutoffOlderThan { .. } => {
+ let cutoff_time = self.sorting.cutoff_time();
+ let state = self.state.borrow_mut();
+ for commit_id in state.next.drain(..) {
+ let commit_iter =
+ (self.find)(&commit_id, &mut state.buf).map_err(|err| Error::FindExisting {
+ oid: commit_id,
+ source: err.into(),
+ })?;
+ let time = commit_iter.committer()?.time.seconds;
+ match cutoff_time {
+ Some(cutoff_time) if time >= cutoff_time => {
+ state.queue.insert(time, commit_id);
+ }
+ Some(_) => {}
+ None => {
+ state.queue.insert(time, commit_id);
+ }
}
- Some(_) => {}
- None => *commit_time = time,
}
}
- let mut v = match cutoff_time_storage {
- Some((_, storage)) => storage,
- None => Vec::from_iter(std::mem::take(&mut state.next).into_iter()),
- };
- v.sort_by(|a, b| a.1.cmp(&b.1).reverse());
- state.next = v.into();
}
Ok(self)
}
+
+ /// Change our commit parent handling mode to the given one.
+ pub fn parents(mut self, mode: Parents) -> Self {
+ self.parents = mode;
+ if matches!(self.parents, Parents::First) {
+ self.queue_to_vecdeque();
+ }
+ self
+ }
+
+ /// Set the commitgraph as `cache` to greatly accelerate any traversal.
+ ///
+ /// The cache will be used if possible, but we will fall-back without error to using the object
+ /// database for commit lookup. If the cache is corrupt, we will fall back to the object database as well.
+ pub fn commit_graph(mut self, cache: Option<gix_commitgraph::Graph>) -> Self {
+ self.cache = cache;
+ self
+ }
+
+ fn queue_to_vecdeque(&mut self) {
+ let state = self.state.borrow_mut();
+ state.next.extend(
+ std::mem::replace(&mut state.queue, gix_revwalk::PriorityQueue::new())
+ .into_iter_unordered()
+ .map(|(_time, id)| id),
+ );
+ }
}
/// Initialization
@@ -191,12 +269,13 @@ pub mod ancestors {
for tip in tips.map(Into::into) {
let was_inserted = state.seen.insert(tip);
if was_inserted && predicate(&tip) {
- state.next.push_back((tip, 0));
+ state.next.push_back(tip);
}
}
}
Self {
find,
+ cache: None,
predicate,
state,
parents: Default::default(),
@@ -213,6 +292,11 @@ pub mod ancestors {
pub fn commit_iter(&self) -> CommitRefIter<'_> {
CommitRefIter::from_bytes(&self.state.borrow().buf)
}
+
+ /// Return the current commits data.
+ pub fn commit_data(&self) -> &[u8] {
+ &self.state.borrow().buf
+ }
}
impl<Find, Predicate, StateMut, E> Iterator for Ancestors<Find, Predicate, StateMut>
@@ -222,18 +306,18 @@ pub mod ancestors {
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
- type Item = Result<ObjectId, Error>;
+ type Item = Result<Info, Error>;
fn next(&mut self) -> Option<Self::Item> {
if matches!(self.parents, Parents::First) {
self.next_by_topology()
} else {
match self.sorting {
- Sorting::Topological => self.next_by_topology(),
+ Sorting::BreadthFirst => self.next_by_topology(),
Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None),
- Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
- time_in_seconds_since_epoch,
- } => self.next_by_commit_date(time_in_seconds_since_epoch.into()),
+ Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => {
+ self.next_by_commit_date(seconds.into())
+ }
}
}
}
@@ -241,11 +325,9 @@ pub mod ancestors {
impl Sorting {
/// If not topo sort, provide the cutoff date if present.
- fn cutoff_time(&self) -> Option<u32> {
+ fn cutoff_time(&self) -> Option<SecondsSinceUnixEpoch> {
match self {
- Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
- time_in_seconds_since_epoch,
- } => Some(*time_in_seconds_since_epoch),
+ Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(*seconds),
_ => None,
}
}
@@ -259,53 +341,53 @@ pub mod ancestors {
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
- fn next_by_commit_date(&mut self, cutoff_older_than: Option<TimeInSeconds>) -> Option<Result<ObjectId, Error>> {
+ fn next_by_commit_date(
+ &mut self,
+ cutoff_older_than: Option<SecondsSinceUnixEpoch>,
+ ) -> Option<Result<Info, Error>> {
let state = self.state.borrow_mut();
- let (oid, _commit_time) = state.next.pop_front()?;
- match (self.find)(&oid, &mut state.buf) {
- Ok(commit_iter) => {
- let mut count = 0;
+ let (commit_time, oid) = state.queue.pop()?;
+ let mut parents: ParentIds = Default::default();
+ match super::find(self.cache.as_ref(), &mut self.find, &oid, &mut state.buf) {
+ Ok(Either::CachedCommit(commit)) => {
+ if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
+ // drop corrupt caches and try again with ODB
+ self.cache = None;
+ return self.next_by_commit_date(cutoff_older_than);
+ }
+ for (id, parent_commit_time) in state.parent_ids.drain(..) {
+ parents.push(id);
+ let was_inserted = state.seen.insert(id);
+ if !(was_inserted && (self.predicate)(&id)) {
+ continue;
+ }
+
+ match cutoff_older_than {
+ Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
+ Some(_) | None => state.queue.insert(parent_commit_time, id),
+ }
+ }
+ }
+ Ok(Either::CommitRefIter(commit_iter)) => {
for token in commit_iter {
- count += 1;
- let is_first = count == 1;
match token {
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
+ parents.push(id);
let was_inserted = state.seen.insert(id);
if !(was_inserted && (self.predicate)(&id)) {
- if is_first && matches!(self.parents, Parents::First) {
- break;
- } else {
- continue;
- }
+ continue;
}
let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok();
let parent_commit_time = parent
- .and_then(|parent| {
- parent
- .committer()
- .ok()
- .map(|committer| committer.time.seconds_since_unix_epoch)
- })
+ .and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds))
.unwrap_or_default();
- let pos = match state.next.binary_search_by(|c| c.1.cmp(&parent_commit_time).reverse())
- {
- Ok(_) => None,
- Err(pos) => Some(pos),
- };
match cutoff_older_than {
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
- Some(_) | None => match pos {
- Some(pos) => state.next.insert(pos, (id, parent_commit_time)),
- None => state.next.push_back((id, parent_commit_time)),
- },
- }
-
- if is_first && matches!(self.parents, Parents::First) {
- break;
+ Some(_) | None => state.queue.insert(parent_commit_time, id),
}
}
Ok(_unused_token) => break,
@@ -320,7 +402,11 @@ pub mod ancestors {
}))
}
}
- Some(Ok(oid))
+ Some(Ok(Info {
+ id: oid,
+ parent_ids: parents,
+ commit_time: Some(commit_time),
+ }))
}
}
@@ -332,18 +418,38 @@ pub mod ancestors {
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
- fn next_by_topology(&mut self) -> Option<Result<ObjectId, Error>> {
+ fn next_by_topology(&mut self) -> Option<Result<Info, Error>> {
let state = self.state.borrow_mut();
- let (oid, _commit_time) = state.next.pop_front()?;
- match (self.find)(&oid, &mut state.buf) {
- Ok(commit_iter) => {
+ let oid = state.next.pop_front()?;
+ let mut parents: ParentIds = Default::default();
+ match super::find(self.cache.as_ref(), &mut self.find, &oid, &mut state.buf) {
+ Ok(Either::CachedCommit(commit)) => {
+ if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
+ // drop corrupt caches and try again with ODB
+ self.cache = None;
+ return self.next_by_topology();
+ }
+
+ for (id, _commit_time) in state.parent_ids.drain(..) {
+ parents.push(id);
+ let was_inserted = state.seen.insert(id);
+ if was_inserted && (self.predicate)(&id) {
+ state.next.push_back(id);
+ }
+ if matches!(self.parents, Parents::First) {
+ break;
+ }
+ }
+ }
+ Ok(Either::CommitRefIter(commit_iter)) => {
for token in commit_iter {
match token {
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
+ parents.push(id);
let was_inserted = state.seen.insert(id);
if was_inserted && (self.predicate)(&id) {
- state.next.push_back((id, 0));
+ state.next.push_back(id);
}
if matches!(self.parents, Parents::First) {
break;
@@ -361,7 +467,54 @@ pub mod ancestors {
}))
}
}
- Some(Ok(oid))
+ Some(Ok(Info {
+ id: oid,
+ parent_ids: parents,
+ commit_time: None,
+ }))
}
}
}
+
+enum Either<'buf, 'cache> {
+ CommitRefIter(gix_object::CommitRefIter<'buf>),
+ CachedCommit(gix_commitgraph::file::Commit<'cache>),
+}
+
+fn collect_parents(
+ dest: &mut SmallVec<[(gix_hash::ObjectId, gix_date::SecondsSinceUnixEpoch); 2]>,
+ cache: Option<&gix_commitgraph::Graph>,
+ parents: gix_commitgraph::file::commit::Parents<'_>,
+) -> bool {
+ dest.clear();
+ let cache = cache.as_ref().expect("parents iter is available, backed by `cache`");
+ for parent_id in parents {
+ match parent_id {
+ Ok(pos) => dest.push({
+ let parent = cache.commit_at(pos);
+ (
+ parent.id().to_owned(),
+ parent.committer_timestamp() as gix_date::SecondsSinceUnixEpoch, // we can't handle errors here and trying seems overkill
+ )
+ }),
+ Err(_err) => return false,
+ }
+ }
+ true
+}
+
+fn find<'cache, 'buf, Find, E>(
+ cache: Option<&'cache gix_commitgraph::Graph>,
+ mut find: Find,
+ id: &gix_hash::oid,
+ buf: &'buf mut Vec<u8>,
+) -> Result<Either<'buf, 'cache>, E>
+where
+ Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::CommitRefIter<'a>, E>,
+ E: std::error::Error + Send + Sync + 'static,
+{
+ match cache.and_then(|cache| cache.commit_by_id(id).map(Either::CachedCommit)) {
+ Some(c) => Ok(c),
+ None => find(id, buf).map(Either::CommitRefIter),
+ }
+}