summaryrefslogtreecommitdiffstats
path: root/vendor/gix-odb/src/store_impls/dynamic/iter.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-odb/src/store_impls/dynamic/iter.rs')
-rw-r--r--vendor/gix-odb/src/store_impls/dynamic/iter.rs236
1 files changed, 236 insertions, 0 deletions
diff --git a/vendor/gix-odb/src/store_impls/dynamic/iter.rs b/vendor/gix-odb/src/store_impls/dynamic/iter.rs
new file mode 100644
index 000000000..bbe859e7c
--- /dev/null
+++ b/vendor/gix-odb/src/store_impls/dynamic/iter.rs
@@ -0,0 +1,236 @@
+use std::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter};
+
+use gix_hash::ObjectId;
+
+use crate::{
+ loose,
+ store::{handle, handle::SingleOrMultiIndex, types::PackId},
+ store_impls::dynamic,
+};
+
+struct EntryForOrdering {
+ pack_offset: u64,
+ entry_index: u32,
+ pack_index: u16,
+}
+
+enum State {
+ Pack {
+ index_iter: IntoIter<handle::IndexLookup>,
+ index: handle::IndexLookup,
+ ordered_entries: Option<Vec<EntryForOrdering>>,
+ entry_index: u32,
+ num_objects: u32,
+ },
+ Loose {
+ iter: loose::Iter,
+ index: usize,
+ },
+ Depleted,
+}
+
+/// Define the order in which objects are returned.
+#[derive(Debug, Copy, Clone)]
+pub enum Ordering {
+ /// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects
+ /// as sorted by their names as well.
+ ///
+ /// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible,
+ /// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this
+ /// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order.
+ PackLexicographicalThenLooseLexicographical,
+ /// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first.
+ /// Then follow loose objects sorted by their names.
+ ///
+ /// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping
+ /// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches
+ /// are more efficiently used that way.
+ PackAscendingOffsetThenLooseLexicographical,
+}
+
+impl Default for Ordering {
+ fn default() -> Self {
+ Ordering::PackLexicographicalThenLooseLexicographical
+ }
+}
+
+/// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an
+/// order that is costly to traverse when querying object information or decoding them.
+///
+/// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off.
+pub struct AllObjects {
+ state: State,
+ num_objects: usize,
+ loose_dbs: Arc<Vec<loose::Store>>,
+ order: Ordering,
+}
+
+/// Builder
+impl AllObjects {
+ /// Set the ordering of the objects returned, trading off memory and latency for object query performance.
+ pub fn with_ordering(mut self, order: Ordering) -> Self {
+ self.order = order;
+ self
+ }
+}
+
+impl AllObjects {
+ /// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread.
+ pub fn new(db: &dynamic::Store) -> Result<Self, crate::store::load_index::Error> {
+ let snapshot = db.load_all_indices()?;
+
+ let packed_objects = snapshot
+ .indices
+ .iter()
+ .fold(0usize, |dbc, index| dbc.saturating_add(index.num_objects() as usize));
+ let mut index_iter = snapshot.indices.into_iter();
+ let loose_dbs = snapshot.loose_dbs;
+ let order = Default::default();
+ let state = match index_iter.next() {
+ Some(index) => {
+ let num_objects = index.num_objects();
+ State::Pack {
+ index_iter,
+ ordered_entries: maybe_sort_entries(&index, order),
+ index,
+ entry_index: 0,
+ num_objects,
+ }
+ }
+ None => {
+ let index = 0;
+ State::Loose {
+ iter: loose_dbs.get(index).expect("at least one loose db").iter(),
+ index,
+ }
+ }
+ };
+ Ok(AllObjects {
+ state,
+ loose_dbs,
+ num_objects: packed_objects,
+ order,
+ })
+ }
+}
+
+fn maybe_sort_entries(index: &handle::IndexLookup, order: Ordering) -> Option<Vec<EntryForOrdering>> {
+ let mut order: Vec<_> = match order {
+ Ordering::PackLexicographicalThenLooseLexicographical => return None,
+ Ordering::PackAscendingOffsetThenLooseLexicographical => match &index.file {
+ // We know that we cannot have more than u32 entry indices per pack.
+ SingleOrMultiIndex::Single { index, .. } => index
+ .iter()
+ .enumerate()
+ .map(|(idx, e)| EntryForOrdering {
+ pack_offset: e.pack_offset,
+ entry_index: idx as u32,
+ pack_index: 0,
+ })
+ .collect(),
+ SingleOrMultiIndex::Multi { index, .. } => index
+ .iter()
+ .enumerate()
+ .map(|(idx, e)| EntryForOrdering {
+ pack_offset: e.pack_offset,
+ entry_index: idx as u32,
+ pack_index: {
+ debug_assert!(
+ e.pack_index < PackId::max_packs_in_multi_index(),
+ "this shows the relation between u16 and pack_index (u32) and why this is OK"
+ );
+ e.pack_index as u16
+ },
+ })
+ .collect(),
+ },
+ };
+ order.sort_by(|a, b| {
+ a.pack_index
+ .cmp(&b.pack_index)
+ .then_with(|| a.pack_offset.cmp(&b.pack_offset))
+ });
+ Some(order)
+}
+
+impl Iterator for AllObjects {
+ type Item = Result<ObjectId, loose::iter::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ match &mut self.state {
+ State::Depleted => None,
+ State::Pack {
+ index_iter,
+ ordered_entries,
+ index,
+ entry_index,
+ num_objects,
+ } => {
+ if *entry_index < *num_objects {
+ let oid = match ordered_entries {
+ Some(entries) => index.oid_at_index(entries[*entry_index as usize].entry_index),
+ None => index.oid_at_index(*entry_index),
+ }
+ .to_owned();
+ *entry_index += 1;
+ Some(Ok(oid))
+ } else {
+ match index_iter.next() {
+ Some(new_index) => {
+ *ordered_entries = maybe_sort_entries(&new_index, self.order);
+ *index = new_index;
+ *entry_index = 0;
+ *num_objects = index.num_objects();
+ }
+ None => {
+ let index = 0;
+ self.state = State::Loose {
+ iter: self.loose_dbs.get(index).expect("at least one loose odb").iter(),
+ index,
+ }
+ }
+ }
+ self.next()
+ }
+ }
+ State::Loose { iter, index } => match iter.next() {
+ Some(id) => Some(id),
+ None => {
+ *index += 1;
+ match self.loose_dbs.get(*index).map(|ldb| ldb.iter()) {
+ Some(new_iter) => {
+ *iter = new_iter;
+ self.next()
+ }
+ None => {
+ self.state = State::Depleted;
+ None
+ }
+ }
+ }
+ },
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.num_objects, None)
+ }
+}
+
+impl<S> super::Handle<S>
+where
+ S: Deref<Target = super::Store> + Clone,
+{
+ /// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates),
+ /// followed by all loose objects.
+ pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
+ AllObjects::new(self.store_ref())
+ }
+}
+
+impl dynamic::Store {
+ /// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store.
+ pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
+ AllObjects::new(self)
+ }
+}