Merging upstream version 1.70.0+dfsg2.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:41:41 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:41:41 +0000
commit: 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
tree: bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-pack/src/cache
parent: Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
download: rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz
rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip
8 files changed, 1114 insertions, 0 deletions
diff --git a/vendor/gix-pack/src/cache/delta/from_offsets.rs b/vendor/gix-pack/src/cache/delta/from_offsets.rs
new file mode 100644
index 000000000..8acb4a802
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/from_offsets.rs
@@ -0,0 +1,161 @@
+use std::{
+    convert::TryFrom,
+    fs, io,
+    io::{BufRead, Read, Seek, SeekFrom},
+    sync::atomic::{AtomicBool, Ordering},
+    time::Instant,
+};
+
+use gix_features::progress::{self, Progress};
+
+use crate::{cache::delta::Tree, data};
+
+/// Returned by [`Tree::from_offsets_in_pack()`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+    #[error("{message}")]
+    Io { source: io::Error, message: &'static str },
+    #[error(transparent)]
+    Header(#[from] crate::data::header::decode::Error),
+    #[error("Could find object with id {id} in this pack. Thin packs are not supported")]
+    UnresolvedRefDelta { id: gix_hash::ObjectId },
+    #[error(transparent)]
+    Tree(#[from] crate::cache::delta::Error),
+    #[error("Interrupted")]
+    Interrupted,
+}
+
+const PACK_HEADER_LEN: usize = 12;
+
+/// Generate tree from certain input
+impl<T> Tree<T> {
+    /// Create a new `Tree` from any data sorted by offset, ascending as returned by the `data_sorted_by_offsets` iterator.
+    /// * `get_pack_offset(item: &T`) -> data::Offset` is a function returning the pack offset of the given item, which can be used
+    /// for obtaining the objects entry within the pack.
+    /// * `pack_path` is the path to the pack file itself and from which to read the entry data, which is a pack file matching the offsets
+    /// returned by `get_pack_offset(…)`.
+    /// * `progress` is used to track progress when creating the tree.
+    /// * `resolve_in_pack_id(gix_hash::oid) -> Option<data::Offset>` takes an object ID and tries to resolve it to an object within this pack if
+    /// possible. Failing to do so aborts the operation, and this function is not expected to be called in usual packs. It's a theoretical
+    /// possibility though as old packs might have referred to their objects using the 20 bytes hash, instead of their encoded offset from the base.
+    ///
+    /// Note that the sort order is ascending. The given pack file path must match the provided offsets.
+    pub fn from_offsets_in_pack(
+        pack_path: impl AsRef<std::path::Path>,
+        data_sorted_by_offsets: impl Iterator<Item = T>,
+        get_pack_offset: impl Fn(&T) -> data::Offset,
+        resolve_in_pack_id: impl Fn(&gix_hash::oid) -> Option<data::Offset>,
+        mut progress: impl Progress,
+        should_interrupt: &AtomicBool,
+        object_hash: gix_hash::Kind,
+    ) -> Result<Self, Error> {
+        let mut r = io::BufReader::with_capacity(
+            8192 * 8, // this value directly corresponds to performance, 8k (default) is about 4x slower than 64k
+            fs::File::open(pack_path).map_err(|err| Error::Io {
+                source: err,
+                message: "open pack path",
+            })?,
+        );
+
+        let anticipated_num_objects = if let Some(num_objects) = data_sorted_by_offsets.size_hint().1 {
+            progress.init(Some(num_objects), progress::count("objects"));
+            num_objects
+        } else {
+            0
+        };
+        let mut tree = Tree::with_capacity(anticipated_num_objects)?;
+
+        {
+            // safety check - assure ourselves it's a pack we can handle
+            let mut buf = [0u8; PACK_HEADER_LEN];
+            r.read_exact(&mut buf).map_err(|err| Error::Io {
+                source: err,
+                message: "reading header buffer with at least 12 bytes failed - pack file truncated?",
+            })?;
+            crate::data::header::decode(&buf)?;
+        }
+
+        let then = Instant::now();
+
+        let mut previous_cursor_position = None::<u64>;
+
+        let hash_len = object_hash.len_in_bytes();
+        for (idx, data) in data_sorted_by_offsets.enumerate() {
+            let pack_offset = get_pack_offset(&data);
+            if let Some(previous_offset) = previous_cursor_position {
+                Self::advance_cursor_to_pack_offset(&mut r, pack_offset, previous_offset)?;
+            };
+            let entry = crate::data::Entry::from_read(&mut r, pack_offset, hash_len).map_err(|err| Error::Io {
+                source: err,
+                message: "EOF while parsing header",
+            })?;
+            previous_cursor_position = Some(pack_offset + entry.header_size() as u64);
+
+            use crate::data::entry::Header::*;
+            match entry.header {
+                Tree | Blob | Commit | Tag => {
+                    tree.add_root(pack_offset, data)?;
+                }
+                RefDelta { base_id } => {
+                    resolve_in_pack_id(base_id.as_ref())
+                        .ok_or(Error::UnresolvedRefDelta { id: base_id })
+                        .and_then(|base_pack_offset| {
+                            tree.add_child(base_pack_offset, pack_offset, data).map_err(Into::into)
+                        })?;
+                }
+                OfsDelta { base_distance } => {
+                    let base_pack_offset = pack_offset
+                        .checked_sub(base_distance)
+                        .expect("in bound distance for deltas");
+                    tree.add_child(base_pack_offset, pack_offset, data)?;
+                }
+            };
+            progress.inc();
+            if idx % 10_000 == 0 && should_interrupt.load(Ordering::SeqCst) {
+                return Err(Error::Interrupted);
+            }
+        }
+
+        progress.show_throughput(then);
+        Ok(tree)
+    }
+
+    fn advance_cursor_to_pack_offset(
+        r: &mut io::BufReader<fs::File>,
+        pack_offset: u64,
+        previous_offset: u64,
+    ) -> Result<(), Error> {
+        let bytes_to_skip: u64 = pack_offset
+            .checked_sub(previous_offset)
+            .expect("continuously ascending pack offsets");
+        if bytes_to_skip == 0 {
+            return Ok(());
+        }
+        let buf = r.fill_buf().map_err(|err| Error::Io {
+            source: err,
+            message: "skip bytes",
+        })?;
+        if buf.is_empty() {
+            // This means we have reached the end of file and can't make progress anymore, before we have satisfied our need
+            // for more
+            return Err(Error::Io {
+                source: io::Error::new(
+                    io::ErrorKind::UnexpectedEof,
+                    "ran out of bytes before reading desired amount of bytes",
+                ),
+                message: "index file is damaged or corrupt",
+            });
+        }
+        if bytes_to_skip <= u64::try_from(buf.len()).expect("sensible buffer size") {
+            // SAFETY: bytes_to_skip <= buf.len() <= usize::MAX
+            r.consume(bytes_to_skip as usize);
+        } else {
+            r.seek(SeekFrom::Start(pack_offset)).map_err(|err| Error::Io {
+                source: err,
+                message: "seek to next entry",
+            })?;
+        }
+        Ok(())
+    }
+}
diff --git a/vendor/gix-pack/src/cache/delta/mod.rs b/vendor/gix-pack/src/cache/delta/mod.rs
new file mode 100644
index 000000000..f4c1b6fc6
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/mod.rs
@@ -0,0 +1,216 @@
+/// Returned when using various methods on a [`Tree`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+    #[error("Pack offsets must only increment. The previous pack offset was {last_pack_offset}, the current one is {pack_offset}")]
+    InvariantIncreasingPackOffset {
+        /// The last seen pack offset
+        last_pack_offset: crate::data::Offset,
+        /// The invariant violating offset
+        pack_offset: crate::data::Offset,
+    },
+}
+
+///
+pub mod traverse;
+
+///
+pub mod from_offsets;
+
+/// An item stored within the [`Tree`]
+pub struct Item<T> {
+    /// The offset into the pack file at which the pack entry's data is located.
+    pub offset: crate::data::Offset,
+    /// The offset of the next item in the pack file.
+    pub next_offset: crate::data::Offset,
+    /// Data to store with each Item, effectively data associated with each entry in a pack.
+    pub data: T,
+    /// Indices into our Tree's `items`, one for each pack entry that depends on us.
+    ///
+    /// Limited to u32 as that's the maximum amount of objects in a pack.
+    children: Vec<u32>,
+}
+
+/// Identify what kind of node we have last seen
+enum NodeKind {
+    Root,
+    Child,
+}
+
+/// A tree that allows one-time iteration over all nodes and their children, consuming it in the process,
+/// while being shareable among threads without a lock.
+/// It does this by making the guarantee that iteration only happens once.
+pub struct Tree<T> {
+    /// The root nodes, i.e. base objects
+    root_items: Vec<Item<T>>,
+    /// The child nodes, i.e. those that rely a base object, like ref and ofs delta objects
+    child_items: Vec<Item<T>>,
+    /// The last encountered node was either a root or a child.
+    last_seen: Option<NodeKind>,
+    /// Future child offsets, associating their offset into the pack with their index in the items array.
+    /// (parent_offset, child_index)
+    future_child_offsets: Vec<(crate::data::Offset, usize)>,
+}
+
+impl<T> Tree<T> {
+    /// Instantiate a empty tree capable of storing `num_objects` amounts of items.
+    pub fn with_capacity(num_objects: usize) -> Result<Self, Error> {
+        Ok(Tree {
+            root_items: Vec::with_capacity(num_objects / 2),
+            child_items: Vec::with_capacity(num_objects / 2),
+            last_seen: None,
+            future_child_offsets: Vec::new(),
+        })
+    }
+
+    fn num_items(&self) -> usize {
+        self.root_items.len() + self.child_items.len()
+    }
+
+    fn assert_is_incrementing_and_update_next_offset(&mut self, offset: crate::data::Offset) -> Result<(), Error> {
+        let items = match &self.last_seen {
+            Some(NodeKind::Root) => &mut self.root_items,
+            Some(NodeKind::Child) => &mut self.child_items,
+            None => return Ok(()),
+        };
+        let item = &mut items.last_mut().expect("last seen won't lie");
+        if offset <= item.offset {
+            return Err(Error::InvariantIncreasingPackOffset {
+                last_pack_offset: item.offset,
+                pack_offset: offset,
+            });
+        }
+        item.next_offset = offset;
+        Ok(())
+    }
+
+    fn set_pack_entries_end_and_resolve_ref_offsets(
+        &mut self,
+        pack_entries_end: crate::data::Offset,
+    ) -> Result<(), traverse::Error> {
+        if !self.future_child_offsets.is_empty() {
+            for (parent_offset, child_index) in self.future_child_offsets.drain(..) {
+                if let Ok(i) = self.child_items.binary_search_by_key(&parent_offset, |i| i.offset) {
+                    self.child_items[i].children.push(child_index as u32);
+                } else if let Ok(i) = self.root_items.binary_search_by_key(&parent_offset, |i| i.offset) {
+                    self.root_items[i].children.push(child_index as u32);
+                } else {
+                    return Err(traverse::Error::OutOfPackRefDelta {
+                        base_pack_offset: parent_offset,
+                    });
+                }
+            }
+        }
+
+        self.assert_is_incrementing_and_update_next_offset(pack_entries_end)
+            .expect("BUG: pack now is smaller than all previously seen entries");
+        Ok(())
+    }
+
+    /// Add a new root node, one that only has children but is not a child itself, at the given pack `offset` and associate
+    /// custom `data` with it.
+    pub fn add_root(&mut self, offset: crate::data::Offset, data: T) -> Result<(), Error> {
+        self.assert_is_incrementing_and_update_next_offset(offset)?;
+        self.last_seen = NodeKind::Root.into();
+        self.root_items.push(Item {
+            offset,
+            next_offset: 0,
+            data,
+            children: Default::default(),
+        });
+        Ok(())
+    }
+
+    /// Add a child of the item at `base_offset` which itself resides at pack `offset` and associate custom `data` with it.
+    pub fn add_child(
+        &mut self,
+        base_offset: crate::data::Offset,
+        offset: crate::data::Offset,
+        data: T,
+    ) -> Result<(), Error> {
+        self.assert_is_incrementing_and_update_next_offset(offset)?;
+
+        let next_child_index = self.child_items.len();
+        if let Ok(i) = self.child_items.binary_search_by_key(&base_offset, |i| i.offset) {
+            self.child_items[i].children.push(next_child_index as u32);
+        } else if let Ok(i) = self.root_items.binary_search_by_key(&base_offset, |i| i.offset) {
+            self.root_items[i].children.push(next_child_index as u32);
+        } else {
+            self.future_child_offsets.push((base_offset, next_child_index));
+        }
+
+        self.last_seen = NodeKind::Child.into();
+        self.child_items.push(Item {
+            offset,
+            next_offset: 0,
+            data,
+            children: Default::default(),
+        });
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    mod tree {
+        mod from_offsets_in_pack {
+            use std::sync::atomic::AtomicBool;
+
+            use crate as pack;
+
+            const SMALL_PACK_INDEX: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx";
+            const SMALL_PACK: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack";
+
+            const INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx";
+            const PACK_FOR_INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack";
+
+            use gix_testtools::fixture_path;
+
+            #[test]
+            fn v1() -> Result<(), Box<dyn std::error::Error>> {
+                tree(INDEX_V1, PACK_FOR_INDEX_V1)
+            }
+
+            #[test]
+            fn v2() -> Result<(), Box<dyn std::error::Error>> {
+                tree(SMALL_PACK_INDEX, SMALL_PACK)
+            }
+
+            fn tree(index_path: &str, pack_path: &str) -> Result<(), Box<dyn std::error::Error>> {
+                let idx = pack::index::File::at(fixture_path(index_path), gix_hash::Kind::Sha1)?;
+                crate::cache::delta::Tree::from_offsets_in_pack(
+                    fixture_path(pack_path),
+                    idx.sorted_offsets().into_iter(),
+                    |ofs| *ofs,
+                    |id| idx.lookup(id).map(|index| idx.pack_offset_at_index(index)),
+                    gix_features::progress::Discard,
+                    &AtomicBool::new(false),
+                    gix_hash::Kind::Sha1,
+                )?;
+                Ok(())
+            }
+        }
+    }
+
+    #[test]
+    fn size_of_pack_tree_item() {
+        use super::Item;
+        assert_eq!(std::mem::size_of::<[Item<()>; 7_500_000]>(), 300_000_000);
+    }
+
+    #[test]
+    fn size_of_pack_verify_data_structure() {
+        use super::Item;
+        pub struct EntryWithDefault {
+            _index_entry: crate::index::Entry,
+            _kind: gix_object::Kind,
+            _object_size: u64,
+            _decompressed_size: u64,
+            _compressed_size: u64,
+            _header_size: u16,
+            _level: u16,
+        }
+
+        assert_eq!(std::mem::size_of::<[Item<EntryWithDefault>; 7_500_000]>(), 840_000_000);
+    }
+}
diff --git a/vendor/gix-pack/src/cache/delta/traverse/mod.rs b/vendor/gix-pack/src/cache/delta/traverse/mod.rs
new file mode 100644
index 000000000..bfe2ec687
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/traverse/mod.rs
@@ -0,0 +1,177 @@
+use std::sync::atomic::{AtomicBool, Ordering};
+
+use gix_features::{
+    parallel::in_parallel_with_slice,
+    progress::{self, Progress},
+    threading::{lock, Mutable, OwnShared},
+};
+
+use crate::{
+    cache::delta::{traverse::util::ItemSliceSend, Item, Tree},
+    data::EntryRange,
+};
+
+mod resolve;
+pub(crate) mod util;
+
+/// Returned by [`Tree::traverse()`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+    #[error("{message}")]
+    ZlibInflate {
+        source: gix_features::zlib::inflate::Error,
+        message: &'static str,
+    },
+    #[error("The resolver failed to obtain the pack entry bytes for the entry at {pack_offset}")]
+    ResolveFailed { pack_offset: u64 },
+    #[error("One of the object inspectors failed")]
+    Inspect(#[from] Box<dyn std::error::Error + Send + Sync>),
+    #[error("Interrupted")]
+    Interrupted,
+    #[error(
+    "The base at {base_pack_offset} was referred to by a ref-delta, but it was never added to the tree as if the pack was still thin."
+    )]
+    OutOfPackRefDelta {
+        /// The base's offset which was from a resolved ref-delta that didn't actually get added to the tree
+        base_pack_offset: crate::data::Offset,
+    },
+}
+
+/// Additional context passed to the `inspect_object(…)` function of the [`Tree::traverse()`] method.
+pub struct Context<'a, S> {
+    /// The pack entry describing the object
+    pub entry: &'a crate::data::Entry,
+    /// The offset at which `entry` ends in the pack, useful to learn about the exact range of `entry` within the pack.
+    pub entry_end: u64,
+    /// The decompressed object itself, ready to be decoded.
+    pub decompressed: &'a [u8],
+    /// Custom state known to the function
+    pub state: &'a mut S,
+    /// The depth at which this object resides in the delta-tree. It represents the amount of base objects, with 0 indicating
+    /// an 'undeltified' object, and higher values indicating delta objects with the given amount of bases.
+    pub level: u16,
+}
+
+/// Options for [`Tree::traverse()`].
+pub struct Options<'a, P1, P2> {
+    /// is a progress instance to track progress for each object in the traversal.
+    pub object_progress: P1,
+    /// is a progress instance to track the overall progress.
+    pub size_progress: P2,
+    /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on
+    /// the amount of available logical cores.
+    pub thread_limit: Option<usize>,
+    /// Abort the operation if the value is `true`.
+    pub should_interrupt: &'a AtomicBool,
+    /// specifies what kind of hashes we expect to be stored in oid-delta entries, which is viable to decoding them
+    /// with the correct size.
+    pub object_hash: gix_hash::Kind,
+}
+
+/// The outcome of [`Tree::traverse()`]
+pub struct Outcome<T> {
+    /// The items that have no children in the pack, i.e. base objects.
+    pub roots: Vec<Item<T>>,
+    /// The items that children to a root object, i.e. delta objects.
+    pub children: Vec<Item<T>>,
+}
+
+impl<T> Tree<T>
+where
+    T: Send,
+{
+    /// Traverse this tree of delta objects with a function `inspect_object` to process each object at will.
+    ///
+    /// * `should_run_in_parallel() -> bool` returns true if the underlying pack is big enough to warrant parallel traversal at all.
+    /// * `resolve(EntrySlice, &mut Vec<u8>) -> Option<()>` resolves the bytes in the pack for the given `EntrySlice` and stores them in the
+    ///   output vector. It returns `Some(())` if the object existed in the pack, or `None` to indicate a resolution error, which would abort the
+    ///   operation as well.
+    /// * `pack_entries_end` marks one-past-the-last byte of the last entry in the pack, as the last entries size would otherwise
+    ///   be unknown as it's not part of the index file.
+    /// * `new_thread_state() -> State` is a function to create state to be used in each thread, invoked once per thread.
+    /// * `inspect_object(node_data: &mut T, progress: Progress, context: Context<ThreadLocal State>) -> Result<(), CustomError>` is a function
+    ///   running for each thread receiving fully decoded objects along with contextual information, which either succeeds with `Ok(())`
+    ///   or returns a `CustomError`.
+    ///   Note that `node_data` can be modified to allow storing maintaining computation results on a per-object basis.
+    ///
+    /// This method returns a vector of all tree items, along with their potentially modified custom node data.
+    ///
+    /// _Note_ that this method consumed the Tree to assure safe parallel traversal with mutation support.
+    pub fn traverse<F, P1, P2, MBFN, S, E>(
+        mut self,
+        resolve: F,
+        pack_entries_end: u64,
+        new_thread_state: impl Fn() -> S + Send + Clone,
+        inspect_object: MBFN,
+        Options {
+            thread_limit,
+            object_progress,
+            mut size_progress,
+            should_interrupt,
+            object_hash,
+        }: Options<'_, P1, P2>,
+    ) -> Result<Outcome<T>, Error>
+    where
+        F: for<'r> Fn(EntryRange, &'r mut Vec<u8>) -> Option<()> + Send + Clone,
+        P1: Progress,
+        P2: Progress,
+        MBFN: Fn(&mut T, &mut <P1 as Progress>::SubProgress, Context<'_, S>) -> Result<(), E> + Send + Clone,
+        E: std::error::Error + Send + Sync + 'static,
+    {
+        self.set_pack_entries_end_and_resolve_ref_offsets(pack_entries_end)?;
+        let object_progress = OwnShared::new(Mutable::new(object_progress));
+
+        let num_objects = self.num_items();
+        let object_counter = {
+            let mut progress = lock(&object_progress);
+            progress.init(Some(num_objects), progress::count("objects"));
+            progress.counter()
+        };
+        size_progress.init(None, progress::bytes());
+        let size_counter = size_progress.counter();
+        let child_items = self.child_items.as_mut_slice();
+
+        let start = std::time::Instant::now();
+        in_parallel_with_slice(
+            &mut self.root_items,
+            thread_limit,
+            {
+                let object_progress = object_progress.clone();
+                let child_items = ItemSliceSend(child_items as *mut [Item<T>]);
+                move |thread_index| {
+                    (
+                        Vec::<u8>::with_capacity(4096),
+                        lock(&object_progress)
+                            .add_child_with_id(format!("thread {thread_index}"), gix_features::progress::UNKNOWN),
+                        new_thread_state(),
+                        resolve.clone(),
+                        inspect_object.clone(),
+                        ItemSliceSend(child_items.0),
+                    )
+                }
+            },
+            {
+                move |node, state| {
+                    resolve::deltas(
+                        object_counter.clone(),
+                        size_counter.clone(),
+                        node,
+                        state,
+                        object_hash.len_in_bytes(),
+                    )
+                }
+            },
+            || (!should_interrupt.load(Ordering::Relaxed)).then(|| std::time::Duration::from_millis(50)),
+            |_| (),
+        )?;
+
+        lock(&object_progress).show_throughput(start);
+        size_progress.show_throughput(start);
+
+        Ok(Outcome {
+            roots: self.root_items,
+            children: self.child_items,
+        })
+    }
+}
diff --git a/vendor/gix-pack/src/cache/delta/traverse/resolve.rs b/vendor/gix-pack/src/cache/delta/traverse/resolve.rs
new file mode 100644
index 000000000..fc94d87ef
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/traverse/resolve.rs
@@ -0,0 +1,154 @@
+use std::{cell::RefCell, collections::BTreeMap, sync::atomic::Ordering};
+
+use gix_features::{progress::Progress, zlib};
+
+use crate::{
+    cache::delta::{
+        traverse::{
+            util::{ItemSliceSend, Node},
+            Context, Error,
+        },
+        Item,
+    },
+    data::EntryRange,
+};
+
+pub(crate) fn deltas<T, F, P, MBFN, S, E>(
+    object_counter: Option<gix_features::progress::StepShared>,
+    size_counter: Option<gix_features::progress::StepShared>,
+    node: &mut crate::cache::delta::Item<T>,
+    (bytes_buf, ref mut progress, state, resolve, modify_base, child_items): &mut (
+        Vec<u8>,
+        P,
+        S,
+        F,
+        MBFN,
+        ItemSliceSend<Item<T>>,
+    ),
+    hash_len: usize,
+) -> Result<(), Error>
+where
+    T: Send,
+    F: for<'r> Fn(EntryRange, &'r mut Vec<u8>) -> Option<()>,
+    P: Progress,
+    MBFN: Fn(&mut T, &mut P, Context<'_, S>) -> Result<(), E>,
+    E: std::error::Error + Send + Sync + 'static,
+{
+    let mut decompressed_bytes_by_pack_offset = BTreeMap::new();
+    let bytes_buf = RefCell::new(bytes_buf);
+    let decompress_from_resolver = |slice: EntryRange| -> Result<(crate::data::Entry, u64, Vec<u8>), Error> {
+        let mut bytes_buf = bytes_buf.borrow_mut();
+        bytes_buf.resize((slice.end - slice.start) as usize, 0);
+        resolve(slice.clone(), &mut bytes_buf).ok_or(Error::ResolveFailed {
+            pack_offset: slice.start,
+        })?;
+        let entry = crate::data::Entry::from_bytes(&bytes_buf, slice.start, hash_len);
+        let compressed = &bytes_buf[entry.header_size()..];
+        let decompressed_len = entry.decompressed_size as usize;
+        Ok((entry, slice.end, decompress_all_at_once(compressed, decompressed_len)?))
+    };
+
+    // Traverse the tree breadth first and loose the data produced for the base as it won't be needed anymore.
+    progress.init(None, gix_features::progress::count_with_decimals("objects", 2));
+
+    // each node is a base, and its children always start out as deltas which become a base after applying them.
+    // These will be pushed onto our stack until all are processed
+    let root_level = 0;
+    let mut nodes: Vec<_> = vec![(
+        root_level,
+        Node {
+            item: node,
+            child_items: child_items.0,
+        },
+    )];
+    while let Some((level, mut base)) = nodes.pop() {
+        let (base_entry, entry_end, base_bytes) = if level == root_level {
+            decompress_from_resolver(base.entry_slice())?
+        } else {
+            decompressed_bytes_by_pack_offset
+                .remove(&base.offset())
+                .expect("we store the resolved delta buffer when done")
+        };
+
+        // anything done here must be repeated further down for leaf-nodes.
+        // This way we avoid retaining their decompressed memory longer than needed (they have no children,
+        // thus their memory can be released right away, using 18% less peak memory on the linux kernel).
+        {
+            modify_base(
+                base.data(),
+                progress,
+                Context {
+                    entry: &base_entry,
+                    entry_end,
+                    decompressed: &base_bytes,
+                    state,
+                    level,
+                },
+            )
+            .map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
+            object_counter.as_ref().map(|c| c.fetch_add(1, Ordering::SeqCst));
+            size_counter
+                .as_ref()
+                .map(|c| c.fetch_add(base_bytes.len(), Ordering::SeqCst));
+        }
+
+        for mut child in base.into_child_iter() {
+            let (mut child_entry, entry_end, delta_bytes) = decompress_from_resolver(child.entry_slice())?;
+            let (base_size, consumed) = crate::data::delta::decode_header_size(&delta_bytes);
+            let mut header_ofs = consumed;
+            assert_eq!(
+                base_bytes.len(),
+                base_size as usize,
+                "recorded base size in delta does not match"
+            );
+            let (result_size, consumed) = crate::data::delta::decode_header_size(&delta_bytes[consumed..]);
+            header_ofs += consumed;
+
+            let mut fully_resolved_delta_bytes = bytes_buf.borrow_mut();
+            fully_resolved_delta_bytes.resize(result_size as usize, 0);
+            crate::data::delta::apply(&base_bytes, &mut fully_resolved_delta_bytes, &delta_bytes[header_ofs..]);
+
+            // FIXME: this actually invalidates the "pack_offset()" computation, which is not obvious to consumers
+            //        at all
+            child_entry.header = base_entry.header; // assign the actual object type, instead of 'delta'
+            if child.has_children() {
+                decompressed_bytes_by_pack_offset.insert(
+                    child.offset(),
+                    (child_entry, entry_end, fully_resolved_delta_bytes.to_owned()),
+                );
+                nodes.push((level + 1, child));
+            } else {
+                modify_base(
+                    child.data(),
+                    progress,
+                    Context {
+                        entry: &child_entry,
+                        entry_end,
+                        decompressed: &fully_resolved_delta_bytes,
+                        state,
+                        level: level + 1,
+                    },
+                )
+                .map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
+                object_counter.as_ref().map(|c| c.fetch_add(1, Ordering::SeqCst));
+                size_counter
+                    .as_ref()
+                    .map(|c| c.fetch_add(base_bytes.len(), Ordering::SeqCst));
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn decompress_all_at_once(b: &[u8], decompressed_len: usize) -> Result<Vec<u8>, Error> {
+    let mut out = Vec::new();
+    out.resize(decompressed_len, 0);
+    zlib::Inflate::default()
+        .once(b, &mut out)
+        .map_err(|err| Error::ZlibInflate {
+            source: err,
+            message: "Failed to decompress entry",
+        })?;
+    Ok(out)
+}
diff --git a/vendor/gix-pack/src/cache/delta/traverse/util.rs b/vendor/gix-pack/src/cache/delta/traverse/util.rs
new file mode 100644
index 000000000..e7caf2ff5
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/traverse/util.rs
@@ -0,0 +1,63 @@
+use crate::cache::delta::Item;
+
+pub struct ItemSliceSend<T>(pub *mut [T])
+where
+    T: Send;
+
+impl<T> Clone for ItemSliceSend<T>
+where
+    T: Send,
+{
+    fn clone(&self) -> Self {
+        ItemSliceSend(self.0)
+    }
+}
+
+// SAFETY: T is `Send`, and we only ever access one T at a time. And, ptrs need that assurance, I wonder if it's always right.
+#[allow(unsafe_code)]
+unsafe impl<T> Send for ItemSliceSend<T> where T: Send {}
+
+/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`].
+pub struct Node<'a, T> {
+    pub item: &'a mut Item<T>,
+    pub child_items: *mut [Item<T>],
+}
+
+impl<'a, T> Node<'a, T> {
+    /// Returns the offset into the pack at which the `Node`s data is located.
+    pub fn offset(&self) -> u64 {
+        self.item.offset
+    }
+
+    /// Returns the slice into the data pack at which the pack entry is located.
+    pub fn entry_slice(&self) -> crate::data::EntryRange {
+        self.item.offset..self.item.next_offset
+    }
+
+    /// Returns the node data associated with this node.
+    pub fn data(&mut self) -> &mut T {
+        &mut self.item.data
+    }
+
+    /// Returns true if this node has children, e.g. is not a leaf in the tree.
+    pub fn has_children(&self) -> bool {
+        !self.item.children.is_empty()
+    }
+
+    /// Transform this `Node` into an iterator over its children.
+    ///
+    /// Children are `Node`s referring to pack entries whose base object is this pack entry.
+    pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a {
+        let children = self.child_items;
+        self.item.children.iter().map(move |&index| {
+            // SAFETY: The children array is alive by the 'a lifetime.
+            // SAFETY: The index is a valid index into the children array.
+            // SAFETY: The resulting mutable pointer cannot be yielded by any other node.
+            #[allow(unsafe_code)]
+            Node {
+                item: unsafe { &mut *(children as *mut Item<T>).add(index as usize) },
+                child_items: children,
+            }
+        })
+    }
+}
diff --git a/vendor/gix-pack/src/cache/lru.rs b/vendor/gix-pack/src/cache/lru.rs
new file mode 100644
index 000000000..bba4f5d33
--- /dev/null
+++ b/vendor/gix-pack/src/cache/lru.rs
@@ -0,0 +1,165 @@
+use super::DecodeEntry;
+
+#[cfg(feature = "pack-cache-lru-dynamic")]
+mod memory {
+    use std::num::NonZeroUsize;
+
+    use clru::WeightScale;
+
+    use super::DecodeEntry;
+
+    struct Entry {
+        data: Vec<u8>,
+        kind: gix_object::Kind,
+        compressed_size: usize,
+    }
+
+    type Key = (u32, u64);
+    struct CustomScale;
+
+    impl WeightScale<Key, Entry> for CustomScale {
+        fn weight(&self, _key: &Key, value: &Entry) -> usize {
+            value.data.len()
+        }
+    }
+
+    /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
+    pub struct MemoryCappedHashmap {
+        inner: clru::CLruCache<Key, Entry, std::collections::hash_map::RandomState, CustomScale>,
+        free_list: Vec<Vec<u8>>,
+        debug: gix_features::cache::Debug,
+    }
+
+    impl MemoryCappedHashmap {
+        /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes`
+        /// object data.
+        pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
+            MemoryCappedHashmap {
+                inner: clru::CLruCache::with_config(
+                    clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
+                        .with_scale(CustomScale),
+                ),
+                free_list: Vec::new(),
+                debug: gix_features::cache::Debug::new(format!("MemoryCappedHashmap({memory_cap_in_bytes}B)")),
+            }
+        }
+    }
+
+    impl DecodeEntry for MemoryCappedHashmap {
+        fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
+            self.debug.put();
+            if let Ok(Some(previous_entry)) = self.inner.put_with_weight(
+                (pack_id, offset),
+                Entry {
+                    data: self
+                        .free_list
+                        .pop()
+                        .map(|mut v| {
+                            v.clear();
+                            v.resize(data.len(), 0);
+                            v.copy_from_slice(data);
+                            v
+                        })
+                        .unwrap_or_else(|| Vec::from(data)),
+                    kind,
+                    compressed_size,
+                },
+            ) {
+                self.free_list.push(previous_entry.data)
+            }
+        }
+
+        fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
+            let res = self.inner.get(&(pack_id, offset)).map(|e| {
+                out.resize(e.data.len(), 0);
+                out.copy_from_slice(&e.data);
+                (e.kind, e.compressed_size)
+            });
+            if res.is_some() {
+                self.debug.hit()
+            } else {
+                self.debug.miss()
+            }
+            res
+        }
+    }
+}
+
+#[cfg(feature = "pack-cache-lru-dynamic")]
+pub use memory::MemoryCappedHashmap;
+
+#[cfg(feature = "pack-cache-lru-static")]
+mod _static {
+    use super::DecodeEntry;
+    struct Entry {
+        pack_id: u32,
+        offset: u64,
+        data: Vec<u8>,
+        kind: gix_object::Kind,
+        compressed_size: usize,
+    }
+
+    /// A cache using a least-recently-used implementation capable of storing the `SIZE` most recent objects.
+    /// The cache must be small as the search is 'naive' and the underlying data structure is a linked list.
+    /// Values of 64 seem to improve performance.
+    pub struct StaticLinkedList<const SIZE: usize> {
+        inner: uluru::LRUCache<Entry, SIZE>,
+        free_list: Vec<Vec<u8>>,
+        debug: gix_features::cache::Debug,
+    }
+
+    impl<const SIZE: usize> Default for StaticLinkedList<SIZE> {
+        fn default() -> Self {
+            StaticLinkedList {
+                inner: Default::default(),
+                free_list: Vec::new(),
+                debug: gix_features::cache::Debug::new(format!("StaticLinkedList<{SIZE}>")),
+            }
+        }
+    }
+
+    impl<const SIZE: usize> DecodeEntry for StaticLinkedList<SIZE> {
+        fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
+            self.debug.put();
+            if let Some(previous) = self.inner.insert(Entry {
+                offset,
+                pack_id,
+                data: self
+                    .free_list
+                    .pop()
+                    .map(|mut v| {
+                        v.clear();
+                        v.resize(data.len(), 0);
+                        v.copy_from_slice(data);
+                        v
+                    })
+                    .unwrap_or_else(|| Vec::from(data)),
+                kind,
+                compressed_size,
+            }) {
+                self.free_list.push(previous.data)
+            }
+        }
+
+        fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
+            let res = self.inner.lookup(|e: &mut Entry| {
+                if e.pack_id == pack_id && e.offset == offset {
+                    out.resize(e.data.len(), 0);
+                    out.copy_from_slice(&e.data);
+                    Some((e.kind, e.compressed_size))
+                } else {
+                    None
+                }
+            });
+            if res.is_some() {
+                self.debug.hit()
+            } else {
+                self.debug.miss()
+            }
+            res
+        }
+    }
+}
+
+#[cfg(feature = "pack-cache-lru-static")]
+pub use _static::StaticLinkedList;
diff --git a/vendor/gix-pack/src/cache/mod.rs b/vendor/gix-pack/src/cache/mod.rs
new file mode 100644
index 000000000..cf4b94df8
--- /dev/null
+++ b/vendor/gix-pack/src/cache/mod.rs
@@ -0,0 +1,55 @@
+use std::ops::DerefMut;
+
+use gix_object::Kind;
+
+/// A trait to model putting objects at a given pack `offset` into a cache, and fetching them.
+///
+/// It is used to speed up [pack traversals][crate::index::File::traverse()].
+pub trait DecodeEntry {
+    /// Store a fully decoded object at `offset` of `kind` with `compressed_size` and `data` in the cache.
+    ///
+    /// It is up to the cache implementation whether that actually happens or not.
+    fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize);
+    /// Attempt to fetch the object at `offset` and store its decoded bytes in `out`, as previously stored with [`DecodeEntry::put()`], and return
+    /// its (object `kind`, `decompressed_size`)
+    fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)>;
+}
+
+/// A cache that stores nothing and retrieves nothing, thus it _never_ caches.
+#[derive(Default)]
+pub struct Never;
+
+impl DecodeEntry for Never {
+    fn put(&mut self, _pack_id: u32, _offset: u64, _data: &[u8], _kind: gix_object::Kind, _compressed_size: usize) {}
+    fn get(&mut self, _pack_id: u32, _offset: u64, _out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
+        None
+    }
+}
+
+impl<T: DecodeEntry + ?Sized> DecodeEntry for Box<T> {
+    fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: Kind, compressed_size: usize) {
+        self.deref_mut().put(pack_id, offset, data, kind, compressed_size)
+    }
+
+    fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(Kind, usize)> {
+        self.deref_mut().get(pack_id, offset, out)
+    }
+}
+
+/// A way of storing and retrieving entire objects to and from a cache.
+pub trait Object {
+    /// Put the object going by `id` of `kind` with `data` into the cache.
+    fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]);
+
+    /// Try to retrieve the object named `id` and place its data into `out` if available and return `Some(kind)` if found.
+    fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind>;
+}
+
+/// Various implementations of [`DecodeEntry`] using least-recently-used algorithms.
+#[cfg(any(feature = "pack-cache-lru-dynamic", feature = "pack-cache-lru-static"))]
+pub mod lru;
+
+pub mod object;
+
+///
+pub(crate) mod delta;
diff --git a/vendor/gix-pack/src/cache/object.rs b/vendor/gix-pack/src/cache/object.rs
new file mode 100644
index 000000000..e64f47a8c
--- /dev/null
+++ b/vendor/gix-pack/src/cache/object.rs
@@ -0,0 +1,123 @@
+//! # Note
+//!
+//! This module is a bit 'misplaced' if spelled out like 'gix_pack::cache::object::*' but is best placed here for code re-use and
+//! general usefulness.
+use crate::cache;
+
+#[cfg(feature = "object-cache-dynamic")]
+mod memory {
+    use std::num::NonZeroUsize;
+
+    use clru::WeightScale;
+
+    use crate::cache;
+
+    struct Entry {
+        data: Vec<u8>,
+        kind: gix_object::Kind,
+    }
+
+    type Key = gix_hash::ObjectId;
+
+    struct CustomScale;
+
+    impl WeightScale<Key, Entry> for CustomScale {
+        fn weight(&self, key: &Key, value: &Entry) -> usize {
+            value.data.len() + std::mem::size_of::<Entry>() + key.as_bytes().len()
+        }
+    }
+
+    /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
+    pub struct MemoryCappedHashmap {
+        inner: clru::CLruCache<Key, Entry, gix_hashtable::hash::Builder, CustomScale>,
+        free_list: Vec<Vec<u8>>,
+        debug: gix_features::cache::Debug,
+    }
+
+    impl MemoryCappedHashmap {
+        /// The amount of bytes we can hold in total, or the value we saw in `new(…)`.
+        pub fn capacity(&self) -> usize {
+            self.inner.capacity()
+        }
+        /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes`
+        /// object data.
+        pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
+            MemoryCappedHashmap {
+                inner: clru::CLruCache::with_config(
+                    clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
+                        .with_hasher(gix_hashtable::hash::Builder::default())
+                        .with_scale(CustomScale),
+                ),
+                free_list: Vec::new(),
+                debug: gix_features::cache::Debug::new(format!("MemoryCappedObjectHashmap({memory_cap_in_bytes}B)")),
+            }
+        }
+    }
+
+    impl cache::Object for MemoryCappedHashmap {
+        /// Put the object going by `id` of `kind` with `data` into the cache.
+        fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]) {
+            self.debug.put();
+            if let Ok(Some(previous_entry)) = self.inner.put_with_weight(
+                id,
+                Entry {
+                    data: self
+                        .free_list
+                        .pop()
+                        .map(|mut v| {
+                            v.clear();
+                            v.resize(data.len(), 0);
+                            v.copy_from_slice(data);
+                            v
+                        })
+                        .unwrap_or_else(|| Vec::from(data)),
+                    kind,
+                },
+            ) {
+                self.free_list.push(previous_entry.data)
+            }
+        }
+
+        /// Try to retrieve the object named `id` and place its data into `out` if available and return `Some(kind)` if found.
+        fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind> {
+            let res = self.inner.get(id).map(|e| {
+                out.resize(e.data.len(), 0);
+                out.copy_from_slice(&e.data);
+                e.kind
+            });
+            if res.is_some() {
+                self.debug.hit()
+            } else {
+                self.debug.miss()
+            }
+            res
+        }
+    }
+}
+#[cfg(feature = "object-cache-dynamic")]
+pub use memory::MemoryCappedHashmap;
+
+/// A cache implementation that doesn't do any caching.
+pub struct Never;
+
+impl cache::Object for Never {
+    /// Noop
+    fn put(&mut self, _id: gix_hash::ObjectId, _kind: gix_object::Kind, _data: &[u8]) {}
+
+    /// Noop
+    fn get(&mut self, _id: &gix_hash::ObjectId, _out: &mut Vec<u8>) -> Option<gix_object::Kind> {
+        None
+    }
+}
+
+impl<T: cache::Object + ?Sized> cache::Object for Box<T> {
+    fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]) {
+        use std::ops::DerefMut;
+        self.deref_mut().put(id, kind, data)
+    }
+
+    fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind> {
+        use std::ops::DerefMut;
+        self.deref_mut().get(id, out)
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:41:41 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:41:41 +0000
commit	10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
tree	bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-pack/src/cache
parent	Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
download	rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip