summaryrefslogtreecommitdiffstats
path: root/vendor/gix-pack/src/cache
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-pack/src/cache')
-rw-r--r--vendor/gix-pack/src/cache/delta/from_offsets.rs161
-rw-r--r--vendor/gix-pack/src/cache/delta/mod.rs216
-rw-r--r--vendor/gix-pack/src/cache/delta/traverse/mod.rs177
-rw-r--r--vendor/gix-pack/src/cache/delta/traverse/resolve.rs154
-rw-r--r--vendor/gix-pack/src/cache/delta/traverse/util.rs63
-rw-r--r--vendor/gix-pack/src/cache/lru.rs165
-rw-r--r--vendor/gix-pack/src/cache/mod.rs55
-rw-r--r--vendor/gix-pack/src/cache/object.rs123
8 files changed, 1114 insertions, 0 deletions
diff --git a/vendor/gix-pack/src/cache/delta/from_offsets.rs b/vendor/gix-pack/src/cache/delta/from_offsets.rs
new file mode 100644
index 000000000..8acb4a802
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/from_offsets.rs
@@ -0,0 +1,161 @@
+use std::{
+ convert::TryFrom,
+ fs, io,
+ io::{BufRead, Read, Seek, SeekFrom},
+ sync::atomic::{AtomicBool, Ordering},
+ time::Instant,
+};
+
+use gix_features::progress::{self, Progress};
+
+use crate::{cache::delta::Tree, data};
+
+/// Returned by [`Tree::from_offsets_in_pack()`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("{message}")]
+ Io { source: io::Error, message: &'static str },
+ #[error(transparent)]
+ Header(#[from] crate::data::header::decode::Error),
+ #[error("Could find object with id {id} in this pack. Thin packs are not supported")]
+ UnresolvedRefDelta { id: gix_hash::ObjectId },
+ #[error(transparent)]
+ Tree(#[from] crate::cache::delta::Error),
+ #[error("Interrupted")]
+ Interrupted,
+}
+
+const PACK_HEADER_LEN: usize = 12;
+
+/// Generate tree from certain input
+impl<T> Tree<T> {
+ /// Create a new `Tree` from any data sorted by offset, ascending as returned by the `data_sorted_by_offsets` iterator.
+ /// * `get_pack_offset(item: &T`) -> data::Offset` is a function returning the pack offset of the given item, which can be used
+ /// for obtaining the objects entry within the pack.
+ /// * `pack_path` is the path to the pack file itself and from which to read the entry data, which is a pack file matching the offsets
+ /// returned by `get_pack_offset(…)`.
+ /// * `progress` is used to track progress when creating the tree.
+ /// * `resolve_in_pack_id(gix_hash::oid) -> Option<data::Offset>` takes an object ID and tries to resolve it to an object within this pack if
+ /// possible. Failing to do so aborts the operation, and this function is not expected to be called in usual packs. It's a theoretical
+ /// possibility though as old packs might have referred to their objects using the 20 bytes hash, instead of their encoded offset from the base.
+ ///
+ /// Note that the sort order is ascending. The given pack file path must match the provided offsets.
+ pub fn from_offsets_in_pack(
+ pack_path: impl AsRef<std::path::Path>,
+ data_sorted_by_offsets: impl Iterator<Item = T>,
+ get_pack_offset: impl Fn(&T) -> data::Offset,
+ resolve_in_pack_id: impl Fn(&gix_hash::oid) -> Option<data::Offset>,
+ mut progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ object_hash: gix_hash::Kind,
+ ) -> Result<Self, Error> {
+ let mut r = io::BufReader::with_capacity(
+ 8192 * 8, // this value directly corresponds to performance, 8k (default) is about 4x slower than 64k
+ fs::File::open(pack_path).map_err(|err| Error::Io {
+ source: err,
+ message: "open pack path",
+ })?,
+ );
+
+ let anticipated_num_objects = if let Some(num_objects) = data_sorted_by_offsets.size_hint().1 {
+ progress.init(Some(num_objects), progress::count("objects"));
+ num_objects
+ } else {
+ 0
+ };
+ let mut tree = Tree::with_capacity(anticipated_num_objects)?;
+
+ {
+ // safety check - assure ourselves it's a pack we can handle
+ let mut buf = [0u8; PACK_HEADER_LEN];
+ r.read_exact(&mut buf).map_err(|err| Error::Io {
+ source: err,
+ message: "reading header buffer with at least 12 bytes failed - pack file truncated?",
+ })?;
+ crate::data::header::decode(&buf)?;
+ }
+
+ let then = Instant::now();
+
+ let mut previous_cursor_position = None::<u64>;
+
+ let hash_len = object_hash.len_in_bytes();
+ for (idx, data) in data_sorted_by_offsets.enumerate() {
+ let pack_offset = get_pack_offset(&data);
+ if let Some(previous_offset) = previous_cursor_position {
+ Self::advance_cursor_to_pack_offset(&mut r, pack_offset, previous_offset)?;
+ };
+ let entry = crate::data::Entry::from_read(&mut r, pack_offset, hash_len).map_err(|err| Error::Io {
+ source: err,
+ message: "EOF while parsing header",
+ })?;
+ previous_cursor_position = Some(pack_offset + entry.header_size() as u64);
+
+ use crate::data::entry::Header::*;
+ match entry.header {
+ Tree | Blob | Commit | Tag => {
+ tree.add_root(pack_offset, data)?;
+ }
+ RefDelta { base_id } => {
+ resolve_in_pack_id(base_id.as_ref())
+ .ok_or(Error::UnresolvedRefDelta { id: base_id })
+ .and_then(|base_pack_offset| {
+ tree.add_child(base_pack_offset, pack_offset, data).map_err(Into::into)
+ })?;
+ }
+ OfsDelta { base_distance } => {
+ let base_pack_offset = pack_offset
+ .checked_sub(base_distance)
+ .expect("in bound distance for deltas");
+ tree.add_child(base_pack_offset, pack_offset, data)?;
+ }
+ };
+ progress.inc();
+ if idx % 10_000 == 0 && should_interrupt.load(Ordering::SeqCst) {
+ return Err(Error::Interrupted);
+ }
+ }
+
+ progress.show_throughput(then);
+ Ok(tree)
+ }
+
+ fn advance_cursor_to_pack_offset(
+ r: &mut io::BufReader<fs::File>,
+ pack_offset: u64,
+ previous_offset: u64,
+ ) -> Result<(), Error> {
+ let bytes_to_skip: u64 = pack_offset
+ .checked_sub(previous_offset)
+ .expect("continuously ascending pack offsets");
+ if bytes_to_skip == 0 {
+ return Ok(());
+ }
+ let buf = r.fill_buf().map_err(|err| Error::Io {
+ source: err,
+ message: "skip bytes",
+ })?;
+ if buf.is_empty() {
+ // This means we have reached the end of file and can't make progress anymore, before we have satisfied our need
+ // for more
+ return Err(Error::Io {
+ source: io::Error::new(
+ io::ErrorKind::UnexpectedEof,
+ "ran out of bytes before reading desired amount of bytes",
+ ),
+ message: "index file is damaged or corrupt",
+ });
+ }
+ if bytes_to_skip <= u64::try_from(buf.len()).expect("sensible buffer size") {
+ // SAFETY: bytes_to_skip <= buf.len() <= usize::MAX
+ r.consume(bytes_to_skip as usize);
+ } else {
+ r.seek(SeekFrom::Start(pack_offset)).map_err(|err| Error::Io {
+ source: err,
+ message: "seek to next entry",
+ })?;
+ }
+ Ok(())
+ }
+}
diff --git a/vendor/gix-pack/src/cache/delta/mod.rs b/vendor/gix-pack/src/cache/delta/mod.rs
new file mode 100644
index 000000000..f4c1b6fc6
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/mod.rs
@@ -0,0 +1,216 @@
+/// Returned when using various methods on a [`Tree`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("Pack offsets must only increment. The previous pack offset was {last_pack_offset}, the current one is {pack_offset}")]
+ InvariantIncreasingPackOffset {
+ /// The last seen pack offset
+ last_pack_offset: crate::data::Offset,
+ /// The invariant violating offset
+ pack_offset: crate::data::Offset,
+ },
+}
+
+///
+pub mod traverse;
+
+///
+pub mod from_offsets;
+
+/// An item stored within the [`Tree`]
+pub struct Item<T> {
+ /// The offset into the pack file at which the pack entry's data is located.
+ pub offset: crate::data::Offset,
+ /// The offset of the next item in the pack file.
+ pub next_offset: crate::data::Offset,
+ /// Data to store with each Item, effectively data associated with each entry in a pack.
+ pub data: T,
+ /// Indices into our Tree's `items`, one for each pack entry that depends on us.
+ ///
+ /// Limited to u32 as that's the maximum amount of objects in a pack.
+ children: Vec<u32>,
+}
+
+/// Identify what kind of node we have last seen
+enum NodeKind {
+ Root,
+ Child,
+}
+
+/// A tree that allows one-time iteration over all nodes and their children, consuming it in the process,
+/// while being shareable among threads without a lock.
+/// It does this by making the guarantee that iteration only happens once.
+pub struct Tree<T> {
+ /// The root nodes, i.e. base objects
+ root_items: Vec<Item<T>>,
+ /// The child nodes, i.e. those that rely a base object, like ref and ofs delta objects
+ child_items: Vec<Item<T>>,
+ /// The last encountered node was either a root or a child.
+ last_seen: Option<NodeKind>,
+ /// Future child offsets, associating their offset into the pack with their index in the items array.
+ /// (parent_offset, child_index)
+ future_child_offsets: Vec<(crate::data::Offset, usize)>,
+}
+
+impl<T> Tree<T> {
+ /// Instantiate a empty tree capable of storing `num_objects` amounts of items.
+ pub fn with_capacity(num_objects: usize) -> Result<Self, Error> {
+ Ok(Tree {
+ root_items: Vec::with_capacity(num_objects / 2),
+ child_items: Vec::with_capacity(num_objects / 2),
+ last_seen: None,
+ future_child_offsets: Vec::new(),
+ })
+ }
+
+ fn num_items(&self) -> usize {
+ self.root_items.len() + self.child_items.len()
+ }
+
+ fn assert_is_incrementing_and_update_next_offset(&mut self, offset: crate::data::Offset) -> Result<(), Error> {
+ let items = match &self.last_seen {
+ Some(NodeKind::Root) => &mut self.root_items,
+ Some(NodeKind::Child) => &mut self.child_items,
+ None => return Ok(()),
+ };
+ let item = &mut items.last_mut().expect("last seen won't lie");
+ if offset <= item.offset {
+ return Err(Error::InvariantIncreasingPackOffset {
+ last_pack_offset: item.offset,
+ pack_offset: offset,
+ });
+ }
+ item.next_offset = offset;
+ Ok(())
+ }
+
+ fn set_pack_entries_end_and_resolve_ref_offsets(
+ &mut self,
+ pack_entries_end: crate::data::Offset,
+ ) -> Result<(), traverse::Error> {
+ if !self.future_child_offsets.is_empty() {
+ for (parent_offset, child_index) in self.future_child_offsets.drain(..) {
+ if let Ok(i) = self.child_items.binary_search_by_key(&parent_offset, |i| i.offset) {
+ self.child_items[i].children.push(child_index as u32);
+ } else if let Ok(i) = self.root_items.binary_search_by_key(&parent_offset, |i| i.offset) {
+ self.root_items[i].children.push(child_index as u32);
+ } else {
+ return Err(traverse::Error::OutOfPackRefDelta {
+ base_pack_offset: parent_offset,
+ });
+ }
+ }
+ }
+
+ self.assert_is_incrementing_and_update_next_offset(pack_entries_end)
+ .expect("BUG: pack now is smaller than all previously seen entries");
+ Ok(())
+ }
+
+ /// Add a new root node, one that only has children but is not a child itself, at the given pack `offset` and associate
+ /// custom `data` with it.
+ pub fn add_root(&mut self, offset: crate::data::Offset, data: T) -> Result<(), Error> {
+ self.assert_is_incrementing_and_update_next_offset(offset)?;
+ self.last_seen = NodeKind::Root.into();
+ self.root_items.push(Item {
+ offset,
+ next_offset: 0,
+ data,
+ children: Default::default(),
+ });
+ Ok(())
+ }
+
+ /// Add a child of the item at `base_offset` which itself resides at pack `offset` and associate custom `data` with it.
+ pub fn add_child(
+ &mut self,
+ base_offset: crate::data::Offset,
+ offset: crate::data::Offset,
+ data: T,
+ ) -> Result<(), Error> {
+ self.assert_is_incrementing_and_update_next_offset(offset)?;
+
+ let next_child_index = self.child_items.len();
+ if let Ok(i) = self.child_items.binary_search_by_key(&base_offset, |i| i.offset) {
+ self.child_items[i].children.push(next_child_index as u32);
+ } else if let Ok(i) = self.root_items.binary_search_by_key(&base_offset, |i| i.offset) {
+ self.root_items[i].children.push(next_child_index as u32);
+ } else {
+ self.future_child_offsets.push((base_offset, next_child_index));
+ }
+
+ self.last_seen = NodeKind::Child.into();
+ self.child_items.push(Item {
+ offset,
+ next_offset: 0,
+ data,
+ children: Default::default(),
+ });
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ mod tree {
+ mod from_offsets_in_pack {
+ use std::sync::atomic::AtomicBool;
+
+ use crate as pack;
+
+ const SMALL_PACK_INDEX: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx";
+ const SMALL_PACK: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack";
+
+ const INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx";
+ const PACK_FOR_INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack";
+
+ use gix_testtools::fixture_path;
+
+ #[test]
+ fn v1() -> Result<(), Box<dyn std::error::Error>> {
+ tree(INDEX_V1, PACK_FOR_INDEX_V1)
+ }
+
+ #[test]
+ fn v2() -> Result<(), Box<dyn std::error::Error>> {
+ tree(SMALL_PACK_INDEX, SMALL_PACK)
+ }
+
+ fn tree(index_path: &str, pack_path: &str) -> Result<(), Box<dyn std::error::Error>> {
+ let idx = pack::index::File::at(fixture_path(index_path), gix_hash::Kind::Sha1)?;
+ crate::cache::delta::Tree::from_offsets_in_pack(
+ fixture_path(pack_path),
+ idx.sorted_offsets().into_iter(),
+ |ofs| *ofs,
+ |id| idx.lookup(id).map(|index| idx.pack_offset_at_index(index)),
+ gix_features::progress::Discard,
+ &AtomicBool::new(false),
+ gix_hash::Kind::Sha1,
+ )?;
+ Ok(())
+ }
+ }
+ }
+
+ #[test]
+ fn size_of_pack_tree_item() {
+ use super::Item;
+ assert_eq!(std::mem::size_of::<[Item<()>; 7_500_000]>(), 300_000_000);
+ }
+
+ #[test]
+ fn size_of_pack_verify_data_structure() {
+ use super::Item;
+ pub struct EntryWithDefault {
+ _index_entry: crate::index::Entry,
+ _kind: gix_object::Kind,
+ _object_size: u64,
+ _decompressed_size: u64,
+ _compressed_size: u64,
+ _header_size: u16,
+ _level: u16,
+ }
+
+ assert_eq!(std::mem::size_of::<[Item<EntryWithDefault>; 7_500_000]>(), 840_000_000);
+ }
+}
diff --git a/vendor/gix-pack/src/cache/delta/traverse/mod.rs b/vendor/gix-pack/src/cache/delta/traverse/mod.rs
new file mode 100644
index 000000000..bfe2ec687
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/traverse/mod.rs
@@ -0,0 +1,177 @@
+use std::sync::atomic::{AtomicBool, Ordering};
+
+use gix_features::{
+ parallel::in_parallel_with_slice,
+ progress::{self, Progress},
+ threading::{lock, Mutable, OwnShared},
+};
+
+use crate::{
+ cache::delta::{traverse::util::ItemSliceSend, Item, Tree},
+ data::EntryRange,
+};
+
+mod resolve;
+pub(crate) mod util;
+
+/// Returned by [`Tree::traverse()`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("{message}")]
+ ZlibInflate {
+ source: gix_features::zlib::inflate::Error,
+ message: &'static str,
+ },
+ #[error("The resolver failed to obtain the pack entry bytes for the entry at {pack_offset}")]
+ ResolveFailed { pack_offset: u64 },
+ #[error("One of the object inspectors failed")]
+ Inspect(#[from] Box<dyn std::error::Error + Send + Sync>),
+ #[error("Interrupted")]
+ Interrupted,
+ #[error(
+ "The base at {base_pack_offset} was referred to by a ref-delta, but it was never added to the tree as if the pack was still thin."
+ )]
+ OutOfPackRefDelta {
+ /// The base's offset which was from a resolved ref-delta that didn't actually get added to the tree
+ base_pack_offset: crate::data::Offset,
+ },
+}
+
+/// Additional context passed to the `inspect_object(…)` function of the [`Tree::traverse()`] method.
+pub struct Context<'a, S> {
+ /// The pack entry describing the object
+ pub entry: &'a crate::data::Entry,
+ /// The offset at which `entry` ends in the pack, useful to learn about the exact range of `entry` within the pack.
+ pub entry_end: u64,
+ /// The decompressed object itself, ready to be decoded.
+ pub decompressed: &'a [u8],
+ /// Custom state known to the function
+ pub state: &'a mut S,
+ /// The depth at which this object resides in the delta-tree. It represents the amount of base objects, with 0 indicating
+ /// an 'undeltified' object, and higher values indicating delta objects with the given amount of bases.
+ pub level: u16,
+}
+
+/// Options for [`Tree::traverse()`].
+pub struct Options<'a, P1, P2> {
+ /// is a progress instance to track progress for each object in the traversal.
+ pub object_progress: P1,
+ /// is a progress instance to track the overall progress.
+ pub size_progress: P2,
+ /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on
+ /// the amount of available logical cores.
+ pub thread_limit: Option<usize>,
+ /// Abort the operation if the value is `true`.
+ pub should_interrupt: &'a AtomicBool,
+ /// specifies what kind of hashes we expect to be stored in oid-delta entries, which is viable to decoding them
+ /// with the correct size.
+ pub object_hash: gix_hash::Kind,
+}
+
+/// The outcome of [`Tree::traverse()`]
+pub struct Outcome<T> {
+ /// The items that have no children in the pack, i.e. base objects.
+ pub roots: Vec<Item<T>>,
+ /// The items that children to a root object, i.e. delta objects.
+ pub children: Vec<Item<T>>,
+}
+
+impl<T> Tree<T>
+where
+ T: Send,
+{
+ /// Traverse this tree of delta objects with a function `inspect_object` to process each object at will.
+ ///
+ /// * `should_run_in_parallel() -> bool` returns true if the underlying pack is big enough to warrant parallel traversal at all.
+ /// * `resolve(EntrySlice, &mut Vec<u8>) -> Option<()>` resolves the bytes in the pack for the given `EntrySlice` and stores them in the
+ /// output vector. It returns `Some(())` if the object existed in the pack, or `None` to indicate a resolution error, which would abort the
+ /// operation as well.
+ /// * `pack_entries_end` marks one-past-the-last byte of the last entry in the pack, as the last entries size would otherwise
+ /// be unknown as it's not part of the index file.
+ /// * `new_thread_state() -> State` is a function to create state to be used in each thread, invoked once per thread.
+ /// * `inspect_object(node_data: &mut T, progress: Progress, context: Context<ThreadLocal State>) -> Result<(), CustomError>` is a function
+ /// running for each thread receiving fully decoded objects along with contextual information, which either succeeds with `Ok(())`
+ /// or returns a `CustomError`.
+ /// Note that `node_data` can be modified to allow storing maintaining computation results on a per-object basis.
+ ///
+ /// This method returns a vector of all tree items, along with their potentially modified custom node data.
+ ///
+ /// _Note_ that this method consumed the Tree to assure safe parallel traversal with mutation support.
+ pub fn traverse<F, P1, P2, MBFN, S, E>(
+ mut self,
+ resolve: F,
+ pack_entries_end: u64,
+ new_thread_state: impl Fn() -> S + Send + Clone,
+ inspect_object: MBFN,
+ Options {
+ thread_limit,
+ object_progress,
+ mut size_progress,
+ should_interrupt,
+ object_hash,
+ }: Options<'_, P1, P2>,
+ ) -> Result<Outcome<T>, Error>
+ where
+ F: for<'r> Fn(EntryRange, &'r mut Vec<u8>) -> Option<()> + Send + Clone,
+ P1: Progress,
+ P2: Progress,
+ MBFN: Fn(&mut T, &mut <P1 as Progress>::SubProgress, Context<'_, S>) -> Result<(), E> + Send + Clone,
+ E: std::error::Error + Send + Sync + 'static,
+ {
+ self.set_pack_entries_end_and_resolve_ref_offsets(pack_entries_end)?;
+ let object_progress = OwnShared::new(Mutable::new(object_progress));
+
+ let num_objects = self.num_items();
+ let object_counter = {
+ let mut progress = lock(&object_progress);
+ progress.init(Some(num_objects), progress::count("objects"));
+ progress.counter()
+ };
+ size_progress.init(None, progress::bytes());
+ let size_counter = size_progress.counter();
+ let child_items = self.child_items.as_mut_slice();
+
+ let start = std::time::Instant::now();
+ in_parallel_with_slice(
+ &mut self.root_items,
+ thread_limit,
+ {
+ let object_progress = object_progress.clone();
+ let child_items = ItemSliceSend(child_items as *mut [Item<T>]);
+ move |thread_index| {
+ (
+ Vec::<u8>::with_capacity(4096),
+ lock(&object_progress)
+ .add_child_with_id(format!("thread {thread_index}"), gix_features::progress::UNKNOWN),
+ new_thread_state(),
+ resolve.clone(),
+ inspect_object.clone(),
+ ItemSliceSend(child_items.0),
+ )
+ }
+ },
+ {
+ move |node, state| {
+ resolve::deltas(
+ object_counter.clone(),
+ size_counter.clone(),
+ node,
+ state,
+ object_hash.len_in_bytes(),
+ )
+ }
+ },
+ || (!should_interrupt.load(Ordering::Relaxed)).then(|| std::time::Duration::from_millis(50)),
+ |_| (),
+ )?;
+
+ lock(&object_progress).show_throughput(start);
+ size_progress.show_throughput(start);
+
+ Ok(Outcome {
+ roots: self.root_items,
+ children: self.child_items,
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/cache/delta/traverse/resolve.rs b/vendor/gix-pack/src/cache/delta/traverse/resolve.rs
new file mode 100644
index 000000000..fc94d87ef
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/traverse/resolve.rs
@@ -0,0 +1,154 @@
+use std::{cell::RefCell, collections::BTreeMap, sync::atomic::Ordering};
+
+use gix_features::{progress::Progress, zlib};
+
+use crate::{
+ cache::delta::{
+ traverse::{
+ util::{ItemSliceSend, Node},
+ Context, Error,
+ },
+ Item,
+ },
+ data::EntryRange,
+};
+
+pub(crate) fn deltas<T, F, P, MBFN, S, E>(
+ object_counter: Option<gix_features::progress::StepShared>,
+ size_counter: Option<gix_features::progress::StepShared>,
+ node: &mut crate::cache::delta::Item<T>,
+ (bytes_buf, ref mut progress, state, resolve, modify_base, child_items): &mut (
+ Vec<u8>,
+ P,
+ S,
+ F,
+ MBFN,
+ ItemSliceSend<Item<T>>,
+ ),
+ hash_len: usize,
+) -> Result<(), Error>
+where
+ T: Send,
+ F: for<'r> Fn(EntryRange, &'r mut Vec<u8>) -> Option<()>,
+ P: Progress,
+ MBFN: Fn(&mut T, &mut P, Context<'_, S>) -> Result<(), E>,
+ E: std::error::Error + Send + Sync + 'static,
+{
+ let mut decompressed_bytes_by_pack_offset = BTreeMap::new();
+ let bytes_buf = RefCell::new(bytes_buf);
+ let decompress_from_resolver = |slice: EntryRange| -> Result<(crate::data::Entry, u64, Vec<u8>), Error> {
+ let mut bytes_buf = bytes_buf.borrow_mut();
+ bytes_buf.resize((slice.end - slice.start) as usize, 0);
+ resolve(slice.clone(), &mut bytes_buf).ok_or(Error::ResolveFailed {
+ pack_offset: slice.start,
+ })?;
+ let entry = crate::data::Entry::from_bytes(&bytes_buf, slice.start, hash_len);
+ let compressed = &bytes_buf[entry.header_size()..];
+ let decompressed_len = entry.decompressed_size as usize;
+ Ok((entry, slice.end, decompress_all_at_once(compressed, decompressed_len)?))
+ };
+
+ // Traverse the tree breadth first and loose the data produced for the base as it won't be needed anymore.
+ progress.init(None, gix_features::progress::count_with_decimals("objects", 2));
+
+ // each node is a base, and its children always start out as deltas which become a base after applying them.
+ // These will be pushed onto our stack until all are processed
+ let root_level = 0;
+ let mut nodes: Vec<_> = vec![(
+ root_level,
+ Node {
+ item: node,
+ child_items: child_items.0,
+ },
+ )];
+ while let Some((level, mut base)) = nodes.pop() {
+ let (base_entry, entry_end, base_bytes) = if level == root_level {
+ decompress_from_resolver(base.entry_slice())?
+ } else {
+ decompressed_bytes_by_pack_offset
+ .remove(&base.offset())
+ .expect("we store the resolved delta buffer when done")
+ };
+
+ // anything done here must be repeated further down for leaf-nodes.
+ // This way we avoid retaining their decompressed memory longer than needed (they have no children,
+ // thus their memory can be released right away, using 18% less peak memory on the linux kernel).
+ {
+ modify_base(
+ base.data(),
+ progress,
+ Context {
+ entry: &base_entry,
+ entry_end,
+ decompressed: &base_bytes,
+ state,
+ level,
+ },
+ )
+ .map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
+ object_counter.as_ref().map(|c| c.fetch_add(1, Ordering::SeqCst));
+ size_counter
+ .as_ref()
+ .map(|c| c.fetch_add(base_bytes.len(), Ordering::SeqCst));
+ }
+
+ for mut child in base.into_child_iter() {
+ let (mut child_entry, entry_end, delta_bytes) = decompress_from_resolver(child.entry_slice())?;
+ let (base_size, consumed) = crate::data::delta::decode_header_size(&delta_bytes);
+ let mut header_ofs = consumed;
+ assert_eq!(
+ base_bytes.len(),
+ base_size as usize,
+ "recorded base size in delta does not match"
+ );
+ let (result_size, consumed) = crate::data::delta::decode_header_size(&delta_bytes[consumed..]);
+ header_ofs += consumed;
+
+ let mut fully_resolved_delta_bytes = bytes_buf.borrow_mut();
+ fully_resolved_delta_bytes.resize(result_size as usize, 0);
+ crate::data::delta::apply(&base_bytes, &mut fully_resolved_delta_bytes, &delta_bytes[header_ofs..]);
+
+ // FIXME: this actually invalidates the "pack_offset()" computation, which is not obvious to consumers
+ // at all
+ child_entry.header = base_entry.header; // assign the actual object type, instead of 'delta'
+ if child.has_children() {
+ decompressed_bytes_by_pack_offset.insert(
+ child.offset(),
+ (child_entry, entry_end, fully_resolved_delta_bytes.to_owned()),
+ );
+ nodes.push((level + 1, child));
+ } else {
+ modify_base(
+ child.data(),
+ progress,
+ Context {
+ entry: &child_entry,
+ entry_end,
+ decompressed: &fully_resolved_delta_bytes,
+ state,
+ level: level + 1,
+ },
+ )
+ .map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
+ object_counter.as_ref().map(|c| c.fetch_add(1, Ordering::SeqCst));
+ size_counter
+ .as_ref()
+ .map(|c| c.fetch_add(base_bytes.len(), Ordering::SeqCst));
+ }
+ }
+ }
+
+ Ok(())
+}
+
+fn decompress_all_at_once(b: &[u8], decompressed_len: usize) -> Result<Vec<u8>, Error> {
+ let mut out = Vec::new();
+ out.resize(decompressed_len, 0);
+ zlib::Inflate::default()
+ .once(b, &mut out)
+ .map_err(|err| Error::ZlibInflate {
+ source: err,
+ message: "Failed to decompress entry",
+ })?;
+ Ok(out)
+}
diff --git a/vendor/gix-pack/src/cache/delta/traverse/util.rs b/vendor/gix-pack/src/cache/delta/traverse/util.rs
new file mode 100644
index 000000000..e7caf2ff5
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/traverse/util.rs
@@ -0,0 +1,63 @@
+use crate::cache::delta::Item;
+
+pub struct ItemSliceSend<T>(pub *mut [T])
+where
+ T: Send;
+
+impl<T> Clone for ItemSliceSend<T>
+where
+ T: Send,
+{
+ fn clone(&self) -> Self {
+ ItemSliceSend(self.0)
+ }
+}
+
+// SAFETY: T is `Send`, and we only ever access one T at a time. And, ptrs need that assurance, I wonder if it's always right.
+#[allow(unsafe_code)]
+unsafe impl<T> Send for ItemSliceSend<T> where T: Send {}
+
+/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`].
+pub struct Node<'a, T> {
+ pub item: &'a mut Item<T>,
+ pub child_items: *mut [Item<T>],
+}
+
+impl<'a, T> Node<'a, T> {
+ /// Returns the offset into the pack at which the `Node`s data is located.
+ pub fn offset(&self) -> u64 {
+ self.item.offset
+ }
+
+ /// Returns the slice into the data pack at which the pack entry is located.
+ pub fn entry_slice(&self) -> crate::data::EntryRange {
+ self.item.offset..self.item.next_offset
+ }
+
+ /// Returns the node data associated with this node.
+ pub fn data(&mut self) -> &mut T {
+ &mut self.item.data
+ }
+
+ /// Returns true if this node has children, e.g. is not a leaf in the tree.
+ pub fn has_children(&self) -> bool {
+ !self.item.children.is_empty()
+ }
+
+ /// Transform this `Node` into an iterator over its children.
+ ///
+ /// Children are `Node`s referring to pack entries whose base object is this pack entry.
+ pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a {
+ let children = self.child_items;
+ self.item.children.iter().map(move |&index| {
+ // SAFETY: The children array is alive by the 'a lifetime.
+ // SAFETY: The index is a valid index into the children array.
+ // SAFETY: The resulting mutable pointer cannot be yielded by any other node.
+ #[allow(unsafe_code)]
+ Node {
+ item: unsafe { &mut *(children as *mut Item<T>).add(index as usize) },
+ child_items: children,
+ }
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/cache/lru.rs b/vendor/gix-pack/src/cache/lru.rs
new file mode 100644
index 000000000..bba4f5d33
--- /dev/null
+++ b/vendor/gix-pack/src/cache/lru.rs
@@ -0,0 +1,165 @@
+use super::DecodeEntry;
+
+#[cfg(feature = "pack-cache-lru-dynamic")]
+mod memory {
+ use std::num::NonZeroUsize;
+
+ use clru::WeightScale;
+
+ use super::DecodeEntry;
+
+ struct Entry {
+ data: Vec<u8>,
+ kind: gix_object::Kind,
+ compressed_size: usize,
+ }
+
+ type Key = (u32, u64);
+ struct CustomScale;
+
+ impl WeightScale<Key, Entry> for CustomScale {
+ fn weight(&self, _key: &Key, value: &Entry) -> usize {
+ value.data.len()
+ }
+ }
+
+ /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
+ pub struct MemoryCappedHashmap {
+ inner: clru::CLruCache<Key, Entry, std::collections::hash_map::RandomState, CustomScale>,
+ free_list: Vec<Vec<u8>>,
+ debug: gix_features::cache::Debug,
+ }
+
+ impl MemoryCappedHashmap {
+ /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes`
+ /// object data.
+ pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
+ MemoryCappedHashmap {
+ inner: clru::CLruCache::with_config(
+ clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
+ .with_scale(CustomScale),
+ ),
+ free_list: Vec::new(),
+ debug: gix_features::cache::Debug::new(format!("MemoryCappedHashmap({memory_cap_in_bytes}B)")),
+ }
+ }
+ }
+
+ impl DecodeEntry for MemoryCappedHashmap {
+ fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
+ self.debug.put();
+ if let Ok(Some(previous_entry)) = self.inner.put_with_weight(
+ (pack_id, offset),
+ Entry {
+ data: self
+ .free_list
+ .pop()
+ .map(|mut v| {
+ v.clear();
+ v.resize(data.len(), 0);
+ v.copy_from_slice(data);
+ v
+ })
+ .unwrap_or_else(|| Vec::from(data)),
+ kind,
+ compressed_size,
+ },
+ ) {
+ self.free_list.push(previous_entry.data)
+ }
+ }
+
+ fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
+ let res = self.inner.get(&(pack_id, offset)).map(|e| {
+ out.resize(e.data.len(), 0);
+ out.copy_from_slice(&e.data);
+ (e.kind, e.compressed_size)
+ });
+ if res.is_some() {
+ self.debug.hit()
+ } else {
+ self.debug.miss()
+ }
+ res
+ }
+ }
+}
+
+#[cfg(feature = "pack-cache-lru-dynamic")]
+pub use memory::MemoryCappedHashmap;
+
+#[cfg(feature = "pack-cache-lru-static")]
+mod _static {
+ use super::DecodeEntry;
+ struct Entry {
+ pack_id: u32,
+ offset: u64,
+ data: Vec<u8>,
+ kind: gix_object::Kind,
+ compressed_size: usize,
+ }
+
+ /// A cache using a least-recently-used implementation capable of storing the `SIZE` most recent objects.
+ /// The cache must be small as the search is 'naive' and the underlying data structure is a linked list.
+ /// Values of 64 seem to improve performance.
+ pub struct StaticLinkedList<const SIZE: usize> {
+ inner: uluru::LRUCache<Entry, SIZE>,
+ free_list: Vec<Vec<u8>>,
+ debug: gix_features::cache::Debug,
+ }
+
+ impl<const SIZE: usize> Default for StaticLinkedList<SIZE> {
+ fn default() -> Self {
+ StaticLinkedList {
+ inner: Default::default(),
+ free_list: Vec::new(),
+ debug: gix_features::cache::Debug::new(format!("StaticLinkedList<{SIZE}>")),
+ }
+ }
+ }
+
+ impl<const SIZE: usize> DecodeEntry for StaticLinkedList<SIZE> {
+ fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
+ self.debug.put();
+ if let Some(previous) = self.inner.insert(Entry {
+ offset,
+ pack_id,
+ data: self
+ .free_list
+ .pop()
+ .map(|mut v| {
+ v.clear();
+ v.resize(data.len(), 0);
+ v.copy_from_slice(data);
+ v
+ })
+ .unwrap_or_else(|| Vec::from(data)),
+ kind,
+ compressed_size,
+ }) {
+ self.free_list.push(previous.data)
+ }
+ }
+
+ fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
+ let res = self.inner.lookup(|e: &mut Entry| {
+ if e.pack_id == pack_id && e.offset == offset {
+ out.resize(e.data.len(), 0);
+ out.copy_from_slice(&e.data);
+ Some((e.kind, e.compressed_size))
+ } else {
+ None
+ }
+ });
+ if res.is_some() {
+ self.debug.hit()
+ } else {
+ self.debug.miss()
+ }
+ res
+ }
+ }
+}
+
+#[cfg(feature = "pack-cache-lru-static")]
+pub use _static::StaticLinkedList;
diff --git a/vendor/gix-pack/src/cache/mod.rs b/vendor/gix-pack/src/cache/mod.rs
new file mode 100644
index 000000000..cf4b94df8
--- /dev/null
+++ b/vendor/gix-pack/src/cache/mod.rs
@@ -0,0 +1,55 @@
+use std::ops::DerefMut;
+
+use gix_object::Kind;
+
+/// A trait to model putting objects at a given pack `offset` into a cache, and fetching them.
+///
+/// It is used to speed up [pack traversals][crate::index::File::traverse()].
+pub trait DecodeEntry {
+ /// Store a fully decoded object at `offset` of `kind` with `compressed_size` and `data` in the cache.
+ ///
+ /// It is up to the cache implementation whether that actually happens or not.
+ fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize);
+ /// Attempt to fetch the object at `offset` and store its decoded bytes in `out`, as previously stored with [`DecodeEntry::put()`], and return
+ /// its (object `kind`, `decompressed_size`)
+ fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)>;
+}
+
+/// A cache that stores nothing and retrieves nothing, thus it _never_ caches.
+#[derive(Default)]
+pub struct Never;
+
+impl DecodeEntry for Never {
+ fn put(&mut self, _pack_id: u32, _offset: u64, _data: &[u8], _kind: gix_object::Kind, _compressed_size: usize) {}
+ fn get(&mut self, _pack_id: u32, _offset: u64, _out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
+ None
+ }
+}
+
+impl<T: DecodeEntry + ?Sized> DecodeEntry for Box<T> {
+ fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: Kind, compressed_size: usize) {
+ self.deref_mut().put(pack_id, offset, data, kind, compressed_size)
+ }
+
+ fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(Kind, usize)> {
+ self.deref_mut().get(pack_id, offset, out)
+ }
+}
+
+/// A way of storing and retrieving entire objects to and from a cache.
+pub trait Object {
+ /// Put the object going by `id` of `kind` with `data` into the cache.
+ fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]);
+
+ /// Try to retrieve the object named `id` and place its data into `out` if available and return `Some(kind)` if found.
+ fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind>;
+}
+
+/// Various implementations of [`DecodeEntry`] using least-recently-used algorithms.
+#[cfg(any(feature = "pack-cache-lru-dynamic", feature = "pack-cache-lru-static"))]
+pub mod lru;
+
+pub mod object;
+
+///
+pub(crate) mod delta;
diff --git a/vendor/gix-pack/src/cache/object.rs b/vendor/gix-pack/src/cache/object.rs
new file mode 100644
index 000000000..e64f47a8c
--- /dev/null
+++ b/vendor/gix-pack/src/cache/object.rs
@@ -0,0 +1,123 @@
+//! # Note
+//!
+//! This module is a bit 'misplaced' if spelled out like 'gix_pack::cache::object::*' but is best placed here for code re-use and
+//! general usefulness.
+use crate::cache;
+
+#[cfg(feature = "object-cache-dynamic")]
+mod memory {
+ use std::num::NonZeroUsize;
+
+ use clru::WeightScale;
+
+ use crate::cache;
+
+ struct Entry {
+ data: Vec<u8>,
+ kind: gix_object::Kind,
+ }
+
+ type Key = gix_hash::ObjectId;
+
+ struct CustomScale;
+
+ impl WeightScale<Key, Entry> for CustomScale {
+ fn weight(&self, key: &Key, value: &Entry) -> usize {
+ value.data.len() + std::mem::size_of::<Entry>() + key.as_bytes().len()
+ }
+ }
+
+ /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
+ pub struct MemoryCappedHashmap {
+ inner: clru::CLruCache<Key, Entry, gix_hashtable::hash::Builder, CustomScale>,
+ free_list: Vec<Vec<u8>>,
+ debug: gix_features::cache::Debug,
+ }
+
+ impl MemoryCappedHashmap {
+ /// The amount of bytes we can hold in total, or the value we saw in `new(…)`.
+ pub fn capacity(&self) -> usize {
+ self.inner.capacity()
+ }
+ /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes`
+ /// object data.
+ pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
+ MemoryCappedHashmap {
+ inner: clru::CLruCache::with_config(
+ clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
+ .with_hasher(gix_hashtable::hash::Builder::default())
+ .with_scale(CustomScale),
+ ),
+ free_list: Vec::new(),
+ debug: gix_features::cache::Debug::new(format!("MemoryCappedObjectHashmap({memory_cap_in_bytes}B)")),
+ }
+ }
+ }
+
+ impl cache::Object for MemoryCappedHashmap {
+ /// Put the object going by `id` of `kind` with `data` into the cache.
+ fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]) {
+ self.debug.put();
+ if let Ok(Some(previous_entry)) = self.inner.put_with_weight(
+ id,
+ Entry {
+ data: self
+ .free_list
+ .pop()
+ .map(|mut v| {
+ v.clear();
+ v.resize(data.len(), 0);
+ v.copy_from_slice(data);
+ v
+ })
+ .unwrap_or_else(|| Vec::from(data)),
+ kind,
+ },
+ ) {
+ self.free_list.push(previous_entry.data)
+ }
+ }
+
+ /// Try to retrieve the object named `id` and place its data into `out` if available and return `Some(kind)` if found.
+ fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind> {
+ let res = self.inner.get(id).map(|e| {
+ out.resize(e.data.len(), 0);
+ out.copy_from_slice(&e.data);
+ e.kind
+ });
+ if res.is_some() {
+ self.debug.hit()
+ } else {
+ self.debug.miss()
+ }
+ res
+ }
+ }
+}
+#[cfg(feature = "object-cache-dynamic")]
+pub use memory::MemoryCappedHashmap;
+
+/// A cache implementation that doesn't do any caching.
+pub struct Never;
+
+impl cache::Object for Never {
+ /// Noop
+ fn put(&mut self, _id: gix_hash::ObjectId, _kind: gix_object::Kind, _data: &[u8]) {}
+
+ /// Noop
+ fn get(&mut self, _id: &gix_hash::ObjectId, _out: &mut Vec<u8>) -> Option<gix_object::Kind> {
+ None
+ }
+}
+
+impl<T: cache::Object + ?Sized> cache::Object for Box<T> {
+ fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]) {
+ use std::ops::DerefMut;
+ self.deref_mut().put(id, kind, data)
+ }
+
+ fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind> {
+ use std::ops::DerefMut;
+ self.deref_mut().get(id, out)
+ }
+}