diff options
Diffstat (limited to 'vendor/gix-revwalk/src/graph')
-rw-r--r-- | vendor/gix-revwalk/src/graph/commit.rs | 149 | ||||
-rw-r--r-- | vendor/gix-revwalk/src/graph/errors.rs | 53 | ||||
-rw-r--r-- | vendor/gix-revwalk/src/graph/mod.rs | 321 |
3 files changed, 523 insertions, 0 deletions
diff --git a/vendor/gix-revwalk/src/graph/commit.rs b/vendor/gix-revwalk/src/graph/commit.rs new file mode 100644 index 000000000..e11a28e36 --- /dev/null +++ b/vendor/gix-revwalk/src/graph/commit.rs @@ -0,0 +1,149 @@ +use smallvec::SmallVec; + +use super::LazyCommit; +use crate::graph::{Commit, CommitterTimestamp, Either, Generation}; + +impl<'graph> LazyCommit<'graph> { + /// Return an iterator over the parents of this commit. + pub fn iter_parents(&self) -> Parents<'graph> { + let backing = match &self.backing { + Either::Left(buf) => Either::Left(gix_object::CommitRefIter::from_bytes(buf)), + Either::Right((cache, pos)) => Either::Right((*cache, cache.commit_at(*pos).iter_parents())), + }; + Parents { backing } + } + + /// Returns the timestamp at which this commit was created. + /// + /// This is the single-most important date for determining recency of commits. + /// Note that this can only fail if the commit is backed by the object database *and* parsing fails. + pub fn committer_timestamp(&self) -> Result<CommitterTimestamp, gix_object::decode::Error> { + Ok(match &self.backing { + Either::Left(buf) => { + gix_object::CommitRefIter::from_bytes(buf) + .committer()? + .time + .seconds_since_unix_epoch as CommitterTimestamp + } + Either::Right((cache, pos)) => cache.commit_at(*pos).committer_timestamp(), + }) + } + + /// Returns the generation of the commit if it is backed by a commit graph. + pub fn generation(&self) -> Option<Generation> { + match &self.backing { + Either::Left(_) => None, + Either::Right((cache, pos)) => cache.commit_at(*pos).generation().into(), + } + } + + /// Convert ourselves into an owned version, which effectively detaches us from the underlying graph. + /// Use `new_data()` to provide the `data` field for the owned `Commit`. + pub fn to_owned<T>(&self, new_data: impl FnOnce() -> T) -> Result<Commit<T>, to_owned::Error> { + let data = new_data(); + Ok(match &self.backing { + Either::Left(buf) => { + use gix_object::commit::ref_iter::Token; + let iter = gix_object::CommitRefIter::from_bytes(buf); + let mut parents = SmallVec::default(); + let mut timestamp = None; + for token in iter { + match token? { + Token::Tree { .. } => {} + Token::Parent { id } => parents.push(id), + Token::Author { .. } => {} + Token::Committer { signature } => { + timestamp = Some(signature.time.seconds_since_unix_epoch as CommitterTimestamp); + break; + } + _ => { + unreachable!( + "we break naturally after seeing the committer which is always at the same spot" + ) + } + } + } + Commit { + parents, + commit_time: timestamp.unwrap_or_default(), + generation: None, + data, + } + } + Either::Right((cache, pos)) => { + let mut parents = SmallVec::default(); + let commit = cache.commit_at(*pos); + for pos in commit.iter_parents() { + let pos = pos?; + parents.push(cache.commit_at(pos).id().to_owned()); + } + Commit { + parents, + commit_time: commit.committer_timestamp(), + generation: Some(commit.generation()), + data, + } + } + }) + } +} + +/// An iterator over the parents of a commit. +pub struct Parents<'graph> { + backing: Either< + gix_object::CommitRefIter<'graph>, + ( + &'graph gix_commitgraph::Graph, + gix_commitgraph::file::commit::Parents<'graph>, + ), + >, +} + +impl<'graph> Iterator for Parents<'graph> { + type Item = Result<gix_hash::ObjectId, iter_parents::Error>; + + fn next(&mut self) -> Option<Self::Item> { + match &mut self.backing { + Either::Left(it) => { + for token in it { + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => return Some(Ok(id)), + Ok(_unused_token) => break, + Err(err) => return Some(Err(err.into())), + } + } + None + } + Either::Right((cache, it)) => it + .next() + .map(|r| r.map(|pos| cache.id_at(pos).to_owned()).map_err(Into::into)), + } + } +} + +/// +pub mod iter_parents { + /// The error returned by the [`Parents`][super::Parents] iterator. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("An error occurred when parsing commit parents")] + DecodeCommit(#[from] gix_object::decode::Error), + #[error("An error occurred when parsing parents from the commit graph")] + DecodeCommitGraph(#[from] gix_commitgraph::file::commit::Error), + } +} + +/// +pub mod to_owned { + /// The error returned by [`to_owned()`][crate::graph::LazyCommit::to_owned()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("A commit could not be decoded during traversal")] + Decode(#[from] gix_object::decode::Error), + #[error("Could not find commit position in graph when traversing parents")] + CommitGraphParent(#[from] gix_commitgraph::file::commit::Error), + } +} diff --git a/vendor/gix-revwalk/src/graph/errors.rs b/vendor/gix-revwalk/src/graph/errors.rs new file mode 100644 index 000000000..a2d849fbf --- /dev/null +++ b/vendor/gix-revwalk/src/graph/errors.rs @@ -0,0 +1,53 @@ +/// +pub mod lookup { + /// The error returned by [`try_lookup()`][crate::Graph::try_lookup()]. + #[derive(Debug, thiserror::Error)] + #[error("There was an error looking up a commit")] + pub struct Error { + #[from] + source: Box<dyn std::error::Error + Send + Sync + 'static>, + } + + /// + pub mod commit { + /// The error returned by [`try_lookup_or_insert_commit()`][crate::Graph::try_lookup_or_insert_commit()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Find(#[from] crate::graph::lookup::Error), + #[error(transparent)] + ToOwned(#[from] crate::graph::commit::to_owned::Error), + } + } + + /// + pub mod existing { + /// The error returned by [`lookup()`][crate::Graph::lookup()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Find(#[from] super::Error), + #[error("Commit could not be found")] + Missing, + } + } +} + +/// +pub mod insert_parents { + use crate::graph::{commit::iter_parents, lookup}; + + /// The error returned by [`insert_parents()`][crate::Graph::insert_parents()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Lookup(#[from] lookup::existing::Error), + #[error("A commit could not be decoded during traversal")] + Decode(#[from] gix_object::decode::Error), + #[error(transparent)] + Parent(#[from] iter_parents::Error), + } +} diff --git a/vendor/gix-revwalk/src/graph/mod.rs b/vendor/gix-revwalk/src/graph/mod.rs new file mode 100644 index 000000000..cf7e1629e --- /dev/null +++ b/vendor/gix-revwalk/src/graph/mod.rs @@ -0,0 +1,321 @@ +use std::{fmt::Formatter, ops::Index}; + +use gix_hash::oid; +use smallvec::SmallVec; + +use crate::Graph; + +/// +pub mod commit; + +mod errors; +pub use errors::{insert_parents, lookup}; + +/// The time in seconds since unix epoch at which a commit was created. +pub type CommitterTimestamp = u64; + +/// The generation away from the HEAD of graph, useful to limit algorithms by topological depth as well. +/// +/// 0 would mean the starting point of the hierarchy, and 1 their parents. +/// This number is only available natively if there is a commit-graph. +pub type Generation = u32; + +impl<'find, T: std::fmt::Debug> std::fmt::Debug for Graph<'find, T> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(&self.set, f) + } +} + +impl<'find, T: Default> Graph<'find, T> { + /// Lookup `id` without failing if the commit doesn't exist, and assure that `id` is inserted into our set. + /// If it wasn't, associate it with the default value. Assure `update_data(data)` gets run. + /// Return the commit when done. + /// Note that none of the data updates happen if there was no commit named `id`. + pub fn try_lookup_or_insert( + &mut self, + id: gix_hash::ObjectId, + update_data: impl FnOnce(&mut T), + ) -> Result<Option<LazyCommit<'_>>, lookup::Error> { + self.try_lookup_or_insert_default(id, T::default, update_data) + } +} + +/// Access and mutation +impl<'find, T> Graph<'find, T> { + /// Returns true if `id` has data associated with it, meaning that we processed it already. + pub fn contains(&self, id: &gix_hash::oid) -> bool { + self.set.contains_key(id.as_ref()) + } + + /// Returns the data associated with `id` if available. + pub fn get(&self, id: &gix_hash::oid) -> Option<&T> { + self.set.get(id) + } + + /// Returns the data associated with `id` if available as mutable reference. + pub fn get_mut(&mut self, id: &gix_hash::oid) -> Option<&mut T> { + self.set.get_mut(id) + } + + /// Insert `id` into the graph and associate it with `value`, returning the previous value associated with it if it existed. + pub fn insert(&mut self, id: gix_hash::ObjectId, value: T) -> Option<T> { + self.set.insert(id, value) + } + + /// Remove all data from the graph to start over. + pub fn clear(&mut self) { + self.set.clear(); + } + + /// Insert the parents of commit named `id` to the graph and associate new parents with data + /// by calling `new_parent_data(parent_id, committer_timestamp)`, or update existing parents + /// data with `update_existing(parent_id, &mut existing_data)`. + /// If `first_parent` is `true`, only the first parent of commits will be looked at. + pub fn insert_parents( + &mut self, + id: &gix_hash::oid, + mut new_parent_data: impl FnMut(gix_hash::ObjectId, CommitterTimestamp) -> T, + mut update_existing: impl FnMut(gix_hash::ObjectId, &mut T), + first_parent: bool, + ) -> Result<(), insert_parents::Error> { + let commit = self.lookup(id)?; + let parents: SmallVec<[_; 2]> = commit.iter_parents().collect(); + for parent_id in parents { + let parent_id = parent_id?; + match self.set.entry(parent_id) { + gix_hashtable::hash_map::Entry::Vacant(entry) => { + let parent = match try_lookup(&parent_id, &mut self.find, self.cache.as_ref(), &mut self.parent_buf) + .map_err(|err| insert_parents::Error::Lookup(lookup::existing::Error::Find(err)))? + { + Some(p) => p, + None => continue, // skip missing objects, this is due to shallow clones for instance. + }; + + let parent_commit_date = parent.committer_timestamp().unwrap_or_default(); + entry.insert(new_parent_data(parent_id, parent_commit_date)); + } + gix_hashtable::hash_map::Entry::Occupied(mut entry) => { + update_existing(parent_id, entry.get_mut()); + } + } + if first_parent { + break; + } + } + Ok(()) + } +} + +/// Initialization +impl<'find, T> Graph<'find, T> { + /// Create a new instance with `find` to retrieve commits and optionally `cache` to accelerate commit access. + /// + /// ### Performance + /// + /// `find` should be optimized to access the same object repeatedly, ideally with an object cache to keep the last couple of + /// most recently used commits. + /// Furthermore, **none-existing commits should not trigger the pack-db to be refreshed.** Otherwise, performance may be sub-optimal + /// in shallow repositories as running into non-existing commits will trigger a refresh of the `packs` directory. + pub fn new<Find, E>(mut find: Find, cache: impl Into<Option<gix_commitgraph::Graph>>) -> Self + where + Find: + for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<Option<gix_object::CommitRefIter<'a>>, E> + 'find, + E: std::error::Error + Send + Sync + 'static, + { + Graph { + find: Box::new(move |id, buf| { + find(id, buf).map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync + 'static>) + }), + cache: cache.into(), + set: gix_hashtable::HashMap::default(), + buf: Vec::new(), + parent_buf: Vec::new(), + } + } +} + +/// commit access +impl<'find, T> Graph<'find, Commit<T>> { + /// Lookup `id` without failing if the commit doesn't exist, and assure that `id` is inserted into our set + /// with a commit with `new_data()` assigned. + /// `update_data(data)` gets run either on existing or on new data. + /// + /// Note that none of the data updates happen if `id` didn't exist. + pub fn try_lookup_or_insert_commit_default( + &mut self, + id: gix_hash::ObjectId, + new_data: impl FnOnce() -> T, + update_data: impl FnOnce(&mut T), + ) -> Result<Option<&mut Commit<T>>, lookup::commit::Error> { + match self.set.entry(id) { + gix_hashtable::hash_map::Entry::Vacant(entry) => { + let res = try_lookup(&id, &mut self.find, self.cache.as_ref(), &mut self.buf)?; + let commit = match res { + None => return Ok(None), + Some(commit) => commit, + }; + let mut commit = commit.to_owned(new_data)?; + update_data(&mut commit.data); + entry.insert(commit); + } + gix_hashtable::hash_map::Entry::Occupied(mut entry) => { + update_data(&mut entry.get_mut().data); + } + }; + Ok(self.set.get_mut(&id)) + } +} + +/// commit access +impl<'find, T: Default> Graph<'find, Commit<T>> { + /// Lookup `id` without failing if the commit doesn't exist, and assure that `id` is inserted into our set + /// with a commit and default data assigned. + /// `update_data(data)` gets run either on existing or on new data. + /// + /// Note that none of the data updates happen if `id` didn't exist. + /// + /// If only commit data is desired without the need for attaching custom data, use + /// [`try_lookup(id).to_owned()`][Graph::try_lookup()] instead. + pub fn try_lookup_or_insert_commit( + &mut self, + id: gix_hash::ObjectId, + update_data: impl FnOnce(&mut T), + ) -> Result<Option<&mut Commit<T>>, lookup::commit::Error> { + self.try_lookup_or_insert_commit_default(id, T::default, update_data) + } +} + +/// Lazy commit access +impl<'find, T> Graph<'find, T> { + /// Lookup `id` without failing if the commit doesn't exist, and assure that `id` is inserted into our set + /// with a `default` value assigned to it. + /// `update_data(data)` gets run either on existing or no new data. + /// Return the commit when done. + /// + /// Note that none of the data updates happen if `id` didn't exist. + /// + /// If only commit data is desired without the need for attaching custom data, use + /// [`try_lookup(id)`][Graph::try_lookup()] instead. + pub fn try_lookup_or_insert_default( + &mut self, + id: gix_hash::ObjectId, + default: impl FnOnce() -> T, + update_data: impl FnOnce(&mut T), + ) -> Result<Option<LazyCommit<'_>>, lookup::Error> { + let res = try_lookup(&id, &mut self.find, self.cache.as_ref(), &mut self.buf)?; + Ok(res.map(|commit| { + match self.set.entry(id) { + gix_hashtable::hash_map::Entry::Vacant(entry) => { + let mut data = default(); + update_data(&mut data); + entry.insert(data); + } + gix_hashtable::hash_map::Entry::Occupied(mut entry) => { + update_data(entry.get_mut()); + } + }; + commit + })) + } + + /// Try to lookup `id` and return a handle to it for accessing its data, but don't fail if the commit doesn't exist. + /// + /// It's possible that commits don't exist if the repository is shallow. + pub fn try_lookup(&mut self, id: &gix_hash::oid) -> Result<Option<LazyCommit<'_>>, lookup::Error> { + try_lookup(id, &mut self.find, self.cache.as_ref(), &mut self.buf) + } + + /// Lookup `id` and return a handle to it, or fail if it doesn't exist. + pub fn lookup(&mut self, id: &gix_hash::oid) -> Result<LazyCommit<'_>, lookup::existing::Error> { + self.try_lookup(id)?.ok_or(lookup::existing::Error::Missing) + } +} + +fn try_lookup<'graph>( + id: &gix_hash::oid, + find: &mut Box<FindFn<'_>>, + cache: Option<&'graph gix_commitgraph::Graph>, + buf: &'graph mut Vec<u8>, +) -> Result<Option<LazyCommit<'graph>>, lookup::Error> { + if let Some(cache) = cache { + if let Some(pos) = cache.lookup(id) { + return Ok(Some(LazyCommit { + backing: Either::Right((cache, pos)), + })); + } + } + #[allow(clippy::manual_map)] + Ok(match find(id, buf)? { + Some(_) => Some(LazyCommit { + backing: Either::Left(buf), + }), + None => None, + }) +} + +impl<'a, 'find, T> Index<&'a gix_hash::oid> for Graph<'find, T> { + type Output = T; + + fn index(&self, index: &'a oid) -> &Self::Output { + &self.set[index] + } +} + +/// A commit that contains all information we can obtain through the commit-graph, which is typically enough to fuel any graph iteration. +pub struct Commit<T> { + /// The parents of the commit. + pub parents: SmallVec<[gix_hash::ObjectId; 1]>, + /// The time at which the commit was created. + pub commit_time: CommitterTimestamp, + /// The generation of the commit, if available. + pub generation: Option<u32>, + /// Any kind of data to associate with this commit. + pub data: T, +} + +impl<T> std::fmt::Debug for Commit<T> +where + T: std::fmt::Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Commit") + .field("parents", &self.parents) + .field("commit_time", &self.commit_time) + .field("generation", &self.generation) + .field("data", &self.data) + .finish() + } +} + +impl<T> Clone for Commit<T> +where + T: Clone, +{ + fn clone(&self) -> Self { + Commit { + parents: self.parents.clone(), + commit_time: self.commit_time, + generation: self.generation, + data: self.data.clone(), + } + } +} + +/// A commit that provides access to graph-related information, on demand. +/// +/// The owned version of this type is called [`Commit`] and can be obtained by calling [`LazyCommit::to_owned()`]. +pub struct LazyCommit<'graph> { + backing: Either<&'graph [u8], (&'graph gix_commitgraph::Graph, gix_commitgraph::Position)>, +} + +pub(crate) type FindFn<'find> = dyn for<'a> FnMut( + &gix_hash::oid, + &'a mut Vec<u8>, + ) + -> Result<Option<gix_object::CommitRefIter<'a>>, Box<dyn std::error::Error + Send + Sync + 'static>> + + 'find; + +enum Either<T, U> { + Left(T), + Right(U), +} |