From 7e5d7eea9c580ef4b41a765bde624af431942b96 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:41:35 +0200 Subject: Merging upstream version 1.70.0+dfsg2. Signed-off-by: Daniel Baumann --- vendor/gix-traverse/src/commit.rs | 377 +++++++++++++++++++++++++++ vendor/gix-traverse/src/lib.rs | 9 + vendor/gix-traverse/src/tree/breadthfirst.rs | 107 ++++++++ vendor/gix-traverse/src/tree/mod.rs | 69 +++++ vendor/gix-traverse/src/tree/recorder.rs | 139 ++++++++++ 5 files changed, 701 insertions(+) create mode 100644 vendor/gix-traverse/src/commit.rs create mode 100644 vendor/gix-traverse/src/lib.rs create mode 100644 vendor/gix-traverse/src/tree/breadthfirst.rs create mode 100644 vendor/gix-traverse/src/tree/mod.rs create mode 100644 vendor/gix-traverse/src/tree/recorder.rs (limited to 'vendor/gix-traverse/src') diff --git a/vendor/gix-traverse/src/commit.rs b/vendor/gix-traverse/src/commit.rs new file mode 100644 index 000000000..890fde2a9 --- /dev/null +++ b/vendor/gix-traverse/src/commit.rs @@ -0,0 +1,377 @@ +/// An iterator over the ancestors one or more starting commits +pub struct Ancestors { + find: Find, + predicate: Predicate, + state: StateMut, + parents: Parents, + sorting: Sorting, +} + +/// Specify how to handle commit parents during traversal. +#[derive(Copy, Clone)] +pub enum Parents { + /// Traverse all parents, useful for traversing the entire ancestry. + All, + /// Only traverse along the first parent, which commonly ignores all branches. + First, +} + +impl Default for Parents { + fn default() -> Self { + Parents::All + } +} + +/// Specify how to sort commits during traversal. +#[derive(Copy, Clone)] +pub enum Sorting { + /// Commits are sorted as they are mentioned in the commit graph. + Topological, + /// Commits are sorted by their commit time in descending order, that is newest first. + /// + /// The sorting applies to all currently queued commit ids and thus is full. + /// + /// # Performance + /// + /// This mode benefits greatly from having an object_cache in `find()` + /// to avoid having to lookup each commit twice. + ByCommitTimeNewestFirst, + /// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than + /// a given time, stopping the iteration once no younger commits is queued to be traversed. + /// + /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache. + ByCommitTimeNewestFirstCutoffOlderThan { + /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time. + time_in_seconds_since_epoch: u32, + }, +} + +impl Default for Sorting { + fn default() -> Self { + Sorting::Topological + } +} + +/// +pub mod ancestors { + use std::{ + borrow::{Borrow, BorrowMut}, + collections::VecDeque, + iter::FromIterator, + }; + + use gix_hash::{oid, ObjectId}; + use gix_hashtable::HashSet; + use gix_object::CommitRefIter; + + use crate::commit::{Ancestors, Parents, Sorting}; + + /// The error is part of the item returned by the [Ancestors] iterator. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The commit {oid} could not be found")] + FindExisting { + oid: ObjectId, + source: Box, + }, + #[error(transparent)] + ObjectDecode(#[from] gix_object::decode::Error), + } + + type TimeInSeconds = u32; + + /// The state used and potentially shared by multiple graph traversals. + #[derive(Default, Clone)] + pub struct State { + next: VecDeque<(ObjectId, TimeInSeconds)>, + buf: Vec, + seen: HashSet, + parents_buf: Vec, + } + + impl State { + fn clear(&mut self) { + self.next.clear(); + self.buf.clear(); + self.seen.clear(); + } + } + + /// Builder + impl Ancestors { + /// Change our commit parent handling mode to the given one. + pub fn parents(mut self, mode: Parents) -> Self { + self.parents = mode; + self + } + } + + /// Builder + impl Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + /// Set the sorting method, either topological or by author date + pub fn sorting(mut self, sorting: Sorting) -> Result { + self.sorting = sorting; + if !matches!(self.sorting, Sorting::Topological) { + let mut cutoff_time_storage = self.sorting.cutoff_time().map(|cot| (cot, Vec::new())); + let state = self.state.borrow_mut(); + for (commit_id, commit_time) in state.next.iter_mut() { + let commit_iter = (self.find)(commit_id, &mut state.buf).map_err(|err| Error::FindExisting { + oid: *commit_id, + source: err.into(), + })?; + let time = commit_iter.committer()?.time.seconds_since_unix_epoch; + match &mut cutoff_time_storage { + Some((cutoff_time, storage)) if time >= *cutoff_time => { + storage.push((*commit_id, time)); + } + Some(_) => {} + None => *commit_time = time, + } + } + let mut v = match cutoff_time_storage { + Some((_, storage)) => storage, + None => Vec::from_iter(std::mem::take(&mut state.next).into_iter()), + }; + v.sort_by(|a, b| a.1.cmp(&b.1).reverse()); + state.next = v.into(); + } + Ok(self) + } + } + + /// Initialization + impl Ancestors bool, StateMut> + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + /// Create a new instance. + /// + /// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning + /// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function + /// as needed. + /// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing + /// this state. + /// * `tips` + /// * the starting points of the iteration, usually commits + /// * each commit they lead to will only be returned once, including the tip that started it + pub fn new(tips: impl IntoIterator>, state: StateMut, find: Find) -> Self { + Self::filtered(tips, state, find, |_| true) + } + } + + /// Initialization + impl Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + Predicate: FnMut(&oid) -> bool, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + /// Create a new instance with commit filtering enabled. + /// + /// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning + /// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function + /// as needed. + /// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing + /// this state. + /// * `tips` + /// * the starting points of the iteration, usually commits + /// * each commit they lead to will only be returned once, including the tip that started it + /// * `predicate` - indicate whether a given commit should be included in the result as well + /// as whether its parent commits should be traversed. + pub fn filtered( + tips: impl IntoIterator>, + mut state: StateMut, + find: Find, + mut predicate: Predicate, + ) -> Self { + let tips = tips.into_iter(); + { + let state = state.borrow_mut(); + state.clear(); + state.next.reserve(tips.size_hint().0); + for tip in tips.map(Into::into) { + let was_inserted = state.seen.insert(tip); + if was_inserted && predicate(&tip) { + state.next.push_back((tip, 0)); + } + } + } + Self { + find, + predicate, + state, + parents: Default::default(), + sorting: Default::default(), + } + } + } + /// Access + impl Ancestors + where + StateMut: Borrow, + { + /// Return an iterator for accessing more of the current commits data. + pub fn commit_iter(&self) -> CommitRefIter<'_> { + CommitRefIter::from_bytes(&self.state.borrow().buf) + } + } + + impl Iterator for Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + Predicate: FnMut(&oid) -> bool, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + type Item = Result; + + fn next(&mut self) -> Option { + if matches!(self.parents, Parents::First) { + self.next_by_topology() + } else { + match self.sorting { + Sorting::Topological => self.next_by_topology(), + Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None), + Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + time_in_seconds_since_epoch, + } => self.next_by_commit_date(time_in_seconds_since_epoch.into()), + } + } + } + } + + impl Sorting { + /// If not topo sort, provide the cutoff date if present. + fn cutoff_time(&self) -> Option { + match self { + Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + time_in_seconds_since_epoch, + } => Some(*time_in_seconds_since_epoch), + _ => None, + } + } + } + + /// Utilities + impl Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + Predicate: FnMut(&oid) -> bool, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + fn next_by_commit_date(&mut self, cutoff_older_than: Option) -> Option> { + let state = self.state.borrow_mut(); + + let (oid, _commit_time) = state.next.pop_front()?; + match (self.find)(&oid, &mut state.buf) { + Ok(commit_iter) => { + let mut count = 0; + for token in commit_iter { + count += 1; + let is_first = count == 1; + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + let was_inserted = state.seen.insert(id); + if !(was_inserted && (self.predicate)(&id)) { + if is_first && matches!(self.parents, Parents::First) { + break; + } else { + continue; + } + } + + let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok(); + let parent_commit_time = parent + .and_then(|parent| { + parent + .committer() + .ok() + .map(|committer| committer.time.seconds_since_unix_epoch) + }) + .unwrap_or_default(); + + let pos = match state.next.binary_search_by(|c| c.1.cmp(&parent_commit_time).reverse()) + { + Ok(_) => None, + Err(pos) => Some(pos), + }; + match cutoff_older_than { + Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue, + Some(_) | None => match pos { + Some(pos) => state.next.insert(pos, (id, parent_commit_time)), + None => state.next.push_back((id, parent_commit_time)), + }, + } + + if is_first && matches!(self.parents, Parents::First) { + break; + } + } + Ok(_unused_token) => break, + Err(err) => return Some(Err(err.into())), + } + } + } + Err(err) => { + return Some(Err(Error::FindExisting { + oid, + source: err.into(), + })) + } + } + Some(Ok(oid)) + } + } + + /// Utilities + impl Ancestors + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + Predicate: FnMut(&oid) -> bool, + StateMut: BorrowMut, + E: std::error::Error + Send + Sync + 'static, + { + fn next_by_topology(&mut self) -> Option> { + let state = self.state.borrow_mut(); + let (oid, _commit_time) = state.next.pop_front()?; + match (self.find)(&oid, &mut state.buf) { + Ok(commit_iter) => { + for token in commit_iter { + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + let was_inserted = state.seen.insert(id); + if was_inserted && (self.predicate)(&id) { + state.next.push_back((id, 0)); + } + if matches!(self.parents, Parents::First) { + break; + } + } + Ok(_a_token_past_the_parents) => break, + Err(err) => return Some(Err(err.into())), + } + } + } + Err(err) => { + return Some(Err(Error::FindExisting { + oid, + source: err.into(), + })) + } + } + Some(Ok(oid)) + } + } +} diff --git a/vendor/gix-traverse/src/lib.rs b/vendor/gix-traverse/src/lib.rs new file mode 100644 index 000000000..3cf6d2b3a --- /dev/null +++ b/vendor/gix-traverse/src/lib.rs @@ -0,0 +1,9 @@ +//! Various ways to traverse commit graphs and trees with implementations as iterator +#![deny(missing_docs, rust_2018_idioms)] +#![forbid(unsafe_code)] + +/// Commit traversal +pub mod commit; + +/// Tree traversal +pub mod tree; diff --git a/vendor/gix-traverse/src/tree/breadthfirst.rs b/vendor/gix-traverse/src/tree/breadthfirst.rs new file mode 100644 index 000000000..b0c7ffdf5 --- /dev/null +++ b/vendor/gix-traverse/src/tree/breadthfirst.rs @@ -0,0 +1,107 @@ +use std::collections::VecDeque; + +use gix_hash::ObjectId; + +/// The error is part of the item returned by the [`traverse()`][impl_::traverse()] function. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("The tree {oid} could not be found")] + NotFound { oid: ObjectId }, + #[error("The delegate cancelled the operation")] + Cancelled, + #[error(transparent)] + ObjectDecode(#[from] gix_object::decode::Error), +} + +/// The state used and potentially shared by multiple tree traversals. +#[derive(Default, Clone)] +pub struct State { + next: VecDeque, + buf: Vec, +} + +impl State { + fn clear(&mut self) { + self.next.clear(); + self.buf.clear(); + } +} + +pub(crate) mod impl_ { + use std::borrow::BorrowMut; + + use gix_hash::oid; + use gix_object::{tree::EntryMode, TreeRefIter}; + + use super::{Error, State}; + use crate::tree::Visit; + + /// Start a breadth-first iteration over the `root` trees entries. + /// + /// * `root` + /// * the tree to iterate in a nested fashion. + /// * `state` - all state used for the iteration. If multiple iterations are performed, allocations can be minimized by reusing + /// this state. + /// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning + /// an iterator over entries if the object is present and is a tree. Caching should be implemented within this function + /// as needed. The return value is `Option` which degenerates all error information. Not finding a commit should also + /// be considered an errors as all objects in the tree DAG should be present in the database. Hence [`Error::NotFound`] should + /// be escalated into a more specific error if its encountered by the caller. + /// * `delegate` - A way to observe entries and control the iteration while allowing the optimizer to let you pay only for what you use. + pub fn traverse( + root: TreeRefIter<'_>, + mut state: StateMut, + mut find: Find, + delegate: &mut V, + ) -> Result<(), Error> + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, + StateMut: BorrowMut, + V: Visit, + { + let state = state.borrow_mut(); + state.clear(); + let mut tree = root; + loop { + for entry in tree { + let entry = entry?; + match entry.mode { + EntryMode::Tree => { + use crate::tree::visit::Action::*; + delegate.push_path_component(entry.filename); + let action = delegate.visit_tree(&entry); + match action { + Skip => {} + Continue => { + delegate.pop_path_component(); + delegate.push_back_tracked_path_component(entry.filename); + state.next.push_back(entry.oid.to_owned()) + } + Cancel => { + return Err(Error::Cancelled); + } + } + } + _non_tree => { + delegate.push_path_component(entry.filename); + if delegate.visit_nontree(&entry).cancelled() { + return Err(Error::Cancelled); + } + } + } + delegate.pop_path_component(); + } + match state.next.pop_front() { + Some(oid) => { + delegate.pop_front_tracked_path_and_set_current(); + match find(&oid, &mut state.buf) { + Some(tree_iter) => tree = tree_iter, + None => return Err(Error::NotFound { oid: oid.to_owned() }), + } + } + None => break Ok(()), + } + } + } +} diff --git a/vendor/gix-traverse/src/tree/mod.rs b/vendor/gix-traverse/src/tree/mod.rs new file mode 100644 index 000000000..5ae01736c --- /dev/null +++ b/vendor/gix-traverse/src/tree/mod.rs @@ -0,0 +1,69 @@ +use std::collections::VecDeque; + +use gix_object::bstr::{BStr, BString}; + +/// A trait to allow responding to a traversal designed to observe all entries in a tree, recursively while keeping track of +/// paths if desired. +pub trait Visit { + /// Sets the full path path in front of the queue so future calls to push and pop components affect it instead. + fn pop_front_tracked_path_and_set_current(&mut self); + /// Append a `component` to the end of a path, which may be empty. + fn push_back_tracked_path_component(&mut self, component: &BStr); + /// Append a `component` to the end of a path, which may be empty. + fn push_path_component(&mut self, component: &BStr); + /// Removes the last component from the path, which may leave it empty. + fn pop_path_component(&mut self); + + /// Observe a tree entry that is a tree and return an instruction whether to continue or not. + /// [`Action::Skip`][visit::Action::Skip] can be used to prevent traversing it, for example if it's known to the caller already. + /// + /// The implementation may use the current path to learn where in the tree the change is located. + fn visit_tree(&mut self, entry: &gix_object::tree::EntryRef<'_>) -> visit::Action; + + /// Observe a tree entry that is NO tree and return an instruction whether to continue or not. + /// [`Action::Skip`][visit::Action::Skip] has no effect here. + /// + /// The implementation may use the current path to learn where in the tree the change is located. + fn visit_nontree(&mut self, entry: &gix_object::tree::EntryRef<'_>) -> visit::Action; +} + +/// A [Visit][Visit] implementation to record every observed change and keep track of the changed paths. +/// +/// Recorders can also be instructed to track the filename only, or no location at all. +#[derive(Clone, Debug)] +pub struct Recorder { + path_deque: VecDeque, + path: BString, + /// How to track the location. + location: Option, + /// The observed entries. + pub records: Vec, +} + +/// +pub mod visit { + /// What to do after an entry was [recorded][super::Visit::visit_tree()]. + #[derive(Clone, Copy, PartialOrd, PartialEq, Ord, Eq, Hash)] + pub enum Action { + /// Continue the traversal of entries. + Continue, + /// Stop the traversal of entries, making this te last call to [`visit_(tree|nontree)(…)`][super::Visit::visit_nontree()]. + Cancel, + /// Don't dive into the entry, skipping children effectively. Only useful in [`visit_tree(…)`][super::Visit::visit_tree()]. + Skip, + } + + impl Action { + /// Returns true if this action means to stop the traversal. + pub fn cancelled(&self) -> bool { + matches!(self, Action::Cancel) + } + } +} + +/// +pub mod recorder; + +/// +pub mod breadthfirst; +pub use breadthfirst::impl_::traverse as breadthfirst; diff --git a/vendor/gix-traverse/src/tree/recorder.rs b/vendor/gix-traverse/src/tree/recorder.rs new file mode 100644 index 000000000..d6144c9b9 --- /dev/null +++ b/vendor/gix-traverse/src/tree/recorder.rs @@ -0,0 +1,139 @@ +use gix_hash::ObjectId; +use gix_object::{ + bstr::{BStr, BString, ByteSlice, ByteVec}, + tree, +}; + +use crate::tree::{visit::Action, Recorder, Visit}; + +/// Describe how to track the location of an entry. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum Location { + /// Track the entire path, relative to the repository. + Path, + /// Keep only the file-name as location, which may be enough for some calculations. + /// + /// This is less expensive than tracking the entire `Path`. + FileName, +} + +/// An owned entry as observed by a call to [`visit_(tree|nontree)(…)`][Visit::visit_tree()], enhanced with the full path to it. +/// Otherwise similar to [`gix_object::tree::EntryRef`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Entry { + /// The kind of entry, similar to entries in a unix directory tree. + pub mode: tree::EntryMode, + /// The full path to the entry. A root entry would be `d`, and a file `a` within the directory would be `d/a`. + /// + /// This is independent of the platform and the path separators actually used there. + pub filepath: BString, + /// The id of the entry which can be used to locate it in an object database. + pub oid: ObjectId, +} + +impl Entry { + fn new(entry: &tree::EntryRef<'_>, filepath: BString) -> Self { + Entry { + filepath, + oid: entry.oid.to_owned(), + mode: entry.mode, + } + } +} + +impl Default for Recorder { + fn default() -> Self { + Recorder { + path_deque: Default::default(), + path: Default::default(), + location: Location::Path.into(), + records: vec![], + } + } +} + +impl Recorder { + fn pop_element(&mut self) { + if let Some(pos) = self.path.rfind_byte(b'/') { + self.path.resize(pos, 0); + } else { + self.path.clear(); + } + } + + fn push_element(&mut self, name: &BStr) { + if !self.path.is_empty() { + self.path.push(b'/'); + } + self.path.push_str(name); + } +} + +/// Builder +impl Recorder { + /// Obtain a copy of the currently tracked, full path of the entry. + pub fn track_location(mut self, location: Option) -> Self { + self.location = location; + self + } +} + +/// Access +impl Recorder { + /// Obtain a copy of the currently tracked, full path of the entry. + pub fn path_clone(&self) -> BString { + self.path.clone() + } + + /// Return the currently set path. + pub fn path(&self) -> &BStr { + self.path.as_ref() + } +} + +impl Visit for Recorder { + fn pop_front_tracked_path_and_set_current(&mut self) { + if let Some(Location::Path) = self.location { + self.path = self + .path_deque + .pop_front() + .expect("every call is matched with push_tracked_path_component"); + } + } + + fn push_back_tracked_path_component(&mut self, component: &BStr) { + if let Some(Location::Path) = self.location { + self.push_element(component); + self.path_deque.push_back(self.path.clone()); + } + } + + fn push_path_component(&mut self, component: &BStr) { + match self.location { + None => {} + Some(Location::Path) => { + self.push_element(component); + } + Some(Location::FileName) => { + self.path.clear(); + self.path.extend_from_slice(component); + } + } + } + + fn pop_path_component(&mut self) { + if let Some(Location::Path) = self.location { + self.pop_element() + } + } + + fn visit_tree(&mut self, entry: &tree::EntryRef<'_>) -> Action { + self.records.push(Entry::new(entry, self.path_clone())); + Action::Continue + } + + fn visit_nontree(&mut self, entry: &tree::EntryRef<'_>) -> Action { + self.records.push(Entry::new(entry, self.path_clone())); + Action::Continue + } +} -- cgit v1.2.3