summaryrefslogtreecommitdiffstats
path: root/vendor/gix-pack/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
commit10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
treebdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-pack/src
parentReleasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
downloadrustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz
rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-pack/src')
-rw-r--r--vendor/gix-pack/src/bundle/find.rs63
-rw-r--r--vendor/gix-pack/src/bundle/init.rs46
-rw-r--r--vendor/gix-pack/src/bundle/mod.rs60
-rw-r--r--vendor/gix-pack/src/bundle/write/error.rs17
-rw-r--r--vendor/gix-pack/src/bundle/write/mod.rs378
-rw-r--r--vendor/gix-pack/src/bundle/write/types.rs120
-rw-r--r--vendor/gix-pack/src/cache/delta/from_offsets.rs161
-rw-r--r--vendor/gix-pack/src/cache/delta/mod.rs216
-rw-r--r--vendor/gix-pack/src/cache/delta/traverse/mod.rs177
-rw-r--r--vendor/gix-pack/src/cache/delta/traverse/resolve.rs154
-rw-r--r--vendor/gix-pack/src/cache/delta/traverse/util.rs63
-rw-r--r--vendor/gix-pack/src/cache/lru.rs165
-rw-r--r--vendor/gix-pack/src/cache/mod.rs55
-rw-r--r--vendor/gix-pack/src/cache/object.rs123
-rw-r--r--vendor/gix-pack/src/data/delta.rs70
-rw-r--r--vendor/gix-pack/src/data/entry/decode.rs125
-rw-r--r--vendor/gix-pack/src/data/entry/header.rs150
-rw-r--r--vendor/gix-pack/src/data/entry/mod.rs53
-rw-r--r--vendor/gix-pack/src/data/file/decode/entry.rs422
-rw-r--r--vendor/gix-pack/src/data/file/decode/header.rs114
-rw-r--r--vendor/gix-pack/src/data/file/decode/mod.rs16
-rw-r--r--vendor/gix-pack/src/data/file/init.rs41
-rw-r--r--vendor/gix-pack/src/data/file/mod.rs9
-rw-r--r--vendor/gix-pack/src/data/file/verify.rs42
-rw-r--r--vendor/gix-pack/src/data/header.rs55
-rw-r--r--vendor/gix-pack/src/data/input/bytes_to_entries.rs295
-rw-r--r--vendor/gix-pack/src/data/input/entries_to_bytes.rs155
-rw-r--r--vendor/gix-pack/src/data/input/entry.rs65
-rw-r--r--vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs211
-rw-r--r--vendor/gix-pack/src/data/input/mod.rs41
-rw-r--r--vendor/gix-pack/src/data/input/types.rs73
-rw-r--r--vendor/gix-pack/src/data/mod.rs134
-rw-r--r--vendor/gix-pack/src/data/output/bytes.rs156
-rw-r--r--vendor/gix-pack/src/data/output/count/mod.rs49
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/mod.rs405
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/reduce.rs49
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/tree.rs124
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/types.rs105
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/util.rs24
-rw-r--r--vendor/gix-pack/src/data/output/entry/iter_from_counts.rs428
-rw-r--r--vendor/gix-pack/src/data/output/entry/mod.rs181
-rw-r--r--vendor/gix-pack/src/data/output/mod.rs41
-rw-r--r--vendor/gix-pack/src/find.rs63
-rw-r--r--vendor/gix-pack/src/find_traits.rs295
-rw-r--r--vendor/gix-pack/src/index/access.rs290
-rw-r--r--vendor/gix-pack/src/index/init.rs91
-rw-r--r--vendor/gix-pack/src/index/mod.rs155
-rw-r--r--vendor/gix-pack/src/index/traverse/error.rs44
-rw-r--r--vendor/gix-pack/src/index/traverse/mod.rs245
-rw-r--r--vendor/gix-pack/src/index/traverse/reduce.rs129
-rw-r--r--vendor/gix-pack/src/index/traverse/types.rs123
-rw-r--r--vendor/gix-pack/src/index/traverse/with_index.rs230
-rw-r--r--vendor/gix-pack/src/index/traverse/with_lookup.rs190
-rw-r--r--vendor/gix-pack/src/index/util.rs47
-rw-r--r--vendor/gix-pack/src/index/verify.rs290
-rw-r--r--vendor/gix-pack/src/index/write/encode.rs127
-rw-r--r--vendor/gix-pack/src/index/write/error.rs25
-rw-r--r--vendor/gix-pack/src/index/write/mod.rs263
-rwxr-xr-xvendor/gix-pack/src/lib.rs73
-rw-r--r--vendor/gix-pack/src/multi_index/access.rs143
-rw-r--r--vendor/gix-pack/src/multi_index/chunk.rs276
-rw-r--r--vendor/gix-pack/src/multi_index/init.rs157
-rw-r--r--vendor/gix-pack/src/multi_index/mod.rs57
-rw-r--r--vendor/gix-pack/src/multi_index/verify.rs337
-rw-r--r--vendor/gix-pack/src/multi_index/write.rs244
-rw-r--r--vendor/gix-pack/src/verify.rs64
66 files changed, 9389 insertions, 0 deletions
diff --git a/vendor/gix-pack/src/bundle/find.rs b/vendor/gix-pack/src/bundle/find.rs
new file mode 100644
index 000000000..d39ed49a9
--- /dev/null
+++ b/vendor/gix-pack/src/bundle/find.rs
@@ -0,0 +1,63 @@
+impl crate::Bundle {
+ /// Find an object with the given [`ObjectId`][gix_hash::ObjectId] and place its data into `out`.
+ ///
+ /// [`cache`][crate::cache::DecodeEntry] is used to accelerate the lookup.
+ ///
+ /// **Note** that ref deltas are automatically resolved within this pack only, which makes this implementation unusable
+ /// for thin packs, which by now are expected to be resolved already.
+ pub fn find<'a>(
+ &self,
+ id: impl AsRef<gix_hash::oid>,
+ out: &'a mut Vec<u8>,
+ cache: &mut impl crate::cache::DecodeEntry,
+ ) -> Result<Option<(gix_object::Data<'a>, crate::data::entry::Location)>, crate::data::decode::Error> {
+ let idx = match self.index.lookup(id) {
+ Some(idx) => idx,
+ None => return Ok(None),
+ };
+ self.get_object_by_index(idx, out, cache).map(Some)
+ }
+
+ /// Special-use function to get an object given an index previously returned from
+ /// internal_find_pack_index.
+ ///
+ /// # Panics
+ ///
+ /// If `index` is out of bounds.
+ pub fn get_object_by_index<'a>(
+ &self,
+ idx: u32,
+ out: &'a mut Vec<u8>,
+ cache: &mut impl crate::cache::DecodeEntry,
+ ) -> Result<(gix_object::Data<'a>, crate::data::entry::Location), crate::data::decode::Error> {
+ let ofs = self.index.pack_offset_at_index(idx);
+ let pack_entry = self.pack.entry(ofs);
+ let header_size = pack_entry.header_size();
+ self.pack
+ .decode_entry(
+ pack_entry,
+ out,
+ |id, _out| {
+ self.index.lookup(id).map(|idx| {
+ crate::data::decode::entry::ResolvedBase::InPack(
+ self.pack.entry(self.index.pack_offset_at_index(idx)),
+ )
+ })
+ },
+ cache,
+ )
+ .map(move |r| {
+ (
+ gix_object::Data {
+ kind: r.kind,
+ data: out.as_slice(),
+ },
+ crate::data::entry::Location {
+ pack_id: self.pack.id,
+ pack_offset: ofs,
+ entry_size: r.compressed_size + header_size,
+ },
+ )
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/bundle/init.rs b/vendor/gix-pack/src/bundle/init.rs
new file mode 100644
index 000000000..3ba5257ed
--- /dev/null
+++ b/vendor/gix-pack/src/bundle/init.rs
@@ -0,0 +1,46 @@
+use std::path::{Path, PathBuf};
+
+use crate::Bundle;
+
+/// Returned by [`Bundle::at()`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("An 'idx' extension is expected of an index file: '{0}'")]
+ InvalidPath(PathBuf),
+ #[error(transparent)]
+ Pack(#[from] crate::data::header::decode::Error),
+ #[error(transparent)]
+ Index(#[from] crate::index::init::Error),
+}
+
+/// Initialization
+impl Bundle {
+ /// Create a `Bundle` from `path`, which is either a pack file _(*.pack)_ or an index file _(*.idx)_.
+ ///
+ /// The corresponding complementary file is expected to be present.
+ ///
+ /// The `object_hash` is a way to read (and write) the same file format with different hashes, as the hash kind
+ /// isn't stored within the file format itself.
+ pub fn at(path: impl AsRef<Path>, object_hash: gix_hash::Kind) -> Result<Self, Error> {
+ Self::at_inner(path.as_ref(), object_hash)
+ }
+
+ fn at_inner(path: &Path, object_hash: gix_hash::Kind) -> Result<Self, Error> {
+ let ext = path
+ .extension()
+ .and_then(|e| e.to_str())
+ .ok_or_else(|| Error::InvalidPath(path.to_owned()))?;
+ Ok(match ext {
+ "idx" => Self {
+ index: crate::index::File::at(path, object_hash)?,
+ pack: crate::data::File::at(path.with_extension("pack"), object_hash)?,
+ },
+ "pack" => Self {
+ pack: crate::data::File::at(path, object_hash)?,
+ index: crate::index::File::at(path.with_extension("idx"), object_hash)?,
+ },
+ _ => return Err(Error::InvalidPath(path.to_owned())),
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/bundle/mod.rs b/vendor/gix-pack/src/bundle/mod.rs
new file mode 100644
index 000000000..076b355d9
--- /dev/null
+++ b/vendor/gix-pack/src/bundle/mod.rs
@@ -0,0 +1,60 @@
+///
+pub mod init;
+
+mod find;
+///
+#[cfg(not(feature = "wasm"))]
+pub mod write;
+
+///
+pub mod verify {
+ use std::sync::atomic::AtomicBool;
+
+ use gix_features::progress::Progress;
+
+ ///
+ pub mod integrity {
+ /// Returned by [`Bundle::verify_integrity()`][crate::Bundle::verify_integrity()].
+ pub struct Outcome<P> {
+ /// The computed checksum of the index which matched the stored one.
+ pub actual_index_checksum: gix_hash::ObjectId,
+ /// The packs traversal outcome
+ pub pack_traverse_outcome: crate::index::traverse::Statistics,
+ /// The provided progress instance.
+ pub progress: P,
+ }
+ }
+
+ use crate::Bundle;
+
+ impl Bundle {
+ /// Similar to [`crate::index::File::verify_integrity()`] but more convenient to call as the presence of the
+ /// pack file is a given.
+ pub fn verify_integrity<C, P, F>(
+ &self,
+ progress: P,
+ should_interrupt: &AtomicBool,
+ options: crate::index::verify::integrity::Options<F>,
+ ) -> Result<integrity::Outcome<P>, crate::index::traverse::Error<crate::index::verify::integrity::Error>>
+ where
+ P: Progress,
+ C: crate::cache::DecodeEntry,
+ F: Fn() -> C + Send + Clone,
+ {
+ self.index
+ .verify_integrity(
+ Some(crate::index::verify::PackContext {
+ data: &self.pack,
+ options,
+ }),
+ progress,
+ should_interrupt,
+ )
+ .map(|o| integrity::Outcome {
+ actual_index_checksum: o.actual_index_checksum,
+ pack_traverse_outcome: o.pack_traverse_statistics.expect("pack is set"),
+ progress: o.progress,
+ })
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/bundle/write/error.rs b/vendor/gix-pack/src/bundle/write/error.rs
new file mode 100644
index 000000000..883c34029
--- /dev/null
+++ b/vendor/gix-pack/src/bundle/write/error.rs
@@ -0,0 +1,17 @@
+use std::io;
+
+use gix_tempfile::handle::Writable;
+
+/// The error returned by [`Bundle::write_to_directory()`][crate::Bundle::write_to_directory()]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("An IO error occurred when reading the pack or creating a temporary file")]
+ Io(#[from] io::Error),
+ #[error(transparent)]
+ PackIter(#[from] crate::data::input::Error),
+ #[error("Could not move a temporary file into its desired place")]
+ Persist(#[from] gix_tempfile::handle::persist::Error<Writable>),
+ #[error(transparent)]
+ IndexWrite(#[from] crate::index::write::Error),
+}
diff --git a/vendor/gix-pack/src/bundle/write/mod.rs b/vendor/gix-pack/src/bundle/write/mod.rs
new file mode 100644
index 000000000..fc0284b53
--- /dev/null
+++ b/vendor/gix-pack/src/bundle/write/mod.rs
@@ -0,0 +1,378 @@
+use std::{
+ io,
+ io::Write,
+ marker::PhantomData,
+ path::{Path, PathBuf},
+ sync::{atomic::AtomicBool, Arc},
+};
+
+use gix_features::{interrupt, progress, progress::Progress};
+use gix_tempfile::{AutoRemove, ContainingDirectory};
+
+use crate::data;
+
+mod error;
+pub use error::Error;
+
+mod types;
+use types::{LockWriter, PassThrough};
+pub use types::{Options, Outcome};
+
+use crate::bundle::write::types::SharedTempFile;
+
+type ThinPackLookupFn = Box<dyn for<'a> FnMut(gix_hash::ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>>;
+type ThinPackLookupFnSend =
+ Box<dyn for<'a> FnMut(gix_hash::ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>> + Send + 'static>;
+
+/// The progress ids used in [`write_to_directory()`][crate::Bundle::write_to_directory()].
+///
+/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+#[derive(Debug, Copy, Clone)]
+pub enum ProgressId {
+ /// The amount of bytes read from the input pack data file.
+ ReadPackBytes,
+ /// A root progress counting logical steps towards an index file on disk.
+ ///
+ /// Underneath will be more progress information related to actually producing the index.
+ IndexingSteps(PhantomData<crate::index::write::ProgressId>),
+}
+
+impl From<ProgressId> for gix_features::progress::Id {
+ fn from(v: ProgressId) -> Self {
+ match v {
+ ProgressId::ReadPackBytes => *b"BWRB",
+ ProgressId::IndexingSteps(_) => *b"BWCI",
+ }
+ }
+}
+
+impl crate::Bundle {
+ /// Given a `pack` data stream, write it along with a generated index into the `directory` if `Some` or discard all output if `None`.
+ ///
+ /// In the latter case, the functionality provided here is more a kind of pack data stream validation.
+ ///
+ /// * `progress` provides detailed progress information which can be discarded with [`gix_features::progress::Discard`].
+ /// * `should_interrupt` is checked regularly and when true, the whole operation will stop.
+ /// * `thin_pack_base_object_lookup_fn` If set, we expect to see a thin-pack with objects that reference their base object by object id which is
+ /// expected to exist in the object database the bundle is contained within.
+ /// `options` further configure how the task is performed.
+ ///
+ /// # Note
+ ///
+ /// * the resulting pack may be empty, that is, contains zero objects in some situations. This is a valid reply by a server and should
+ /// be accounted for.
+ /// - Empty packs always have the same name and not handling this case will result in at most one superfluous pack.
+ pub fn write_to_directory<P>(
+ pack: impl io::BufRead,
+ directory: Option<impl AsRef<Path>>,
+ mut progress: P,
+ should_interrupt: &AtomicBool,
+ thin_pack_base_object_lookup_fn: Option<ThinPackLookupFn>,
+ options: Options,
+ ) -> Result<Outcome, Error>
+ where
+ P: Progress,
+ {
+ let mut read_progress = progress.add_child_with_id("read pack", ProgressId::ReadPackBytes.into());
+ read_progress.init(None, progress::bytes());
+ let pack = progress::Read {
+ inner: pack,
+ progress: progress::ThroughputOnDrop::new(read_progress),
+ };
+
+ let object_hash = options.object_hash;
+ let data_file = Arc::new(parking_lot::Mutex::new(io::BufWriter::with_capacity(
+ 64 * 1024,
+ match directory.as_ref() {
+ Some(directory) => gix_tempfile::new(directory, ContainingDirectory::Exists, AutoRemove::Tempfile)?,
+ None => gix_tempfile::new(std::env::temp_dir(), ContainingDirectory::Exists, AutoRemove::Tempfile)?,
+ },
+ )));
+ let (pack_entries_iter, pack_version): (
+ Box<dyn Iterator<Item = Result<data::input::Entry, data::input::Error>>>,
+ _,
+ ) = match thin_pack_base_object_lookup_fn {
+ Some(thin_pack_lookup_fn) => {
+ let pack = interrupt::Read {
+ inner: pack,
+ should_interrupt,
+ };
+ let buffered_pack = io::BufReader::new(pack);
+ let pack_entries_iter = data::input::LookupRefDeltaObjectsIter::new(
+ data::input::BytesToEntriesIter::new_from_header(
+ buffered_pack,
+ options.iteration_mode,
+ data::input::EntryDataMode::KeepAndCrc32,
+ object_hash,
+ )?,
+ thin_pack_lookup_fn,
+ );
+ let pack_version = pack_entries_iter.inner.version();
+ let pack_entries_iter = data::input::EntriesToBytesIter::new(
+ pack_entries_iter,
+ LockWriter {
+ writer: data_file.clone(),
+ },
+ pack_version,
+ gix_hash::Kind::Sha1, // Thin packs imply a pack being transported, and there we only ever know SHA1 at the moment.
+ );
+ (Box::new(pack_entries_iter), pack_version)
+ }
+ None => {
+ let pack = PassThrough {
+ reader: interrupt::Read {
+ inner: pack,
+ should_interrupt,
+ },
+ writer: Some(data_file.clone()),
+ };
+ // This buf-reader is required to assure we call 'read()' in order to fill the (extra) buffer. Otherwise all the counting
+ // we do with the wrapped pack reader doesn't work as it does not expect anyone to call BufRead functions directly.
+ // However, this is exactly what's happening in the ZipReader implementation that is eventually used.
+ // The performance impact of this is probably negligible, compared to all the other work that is done anyway :D.
+ let buffered_pack = io::BufReader::new(pack);
+ let pack_entries_iter = data::input::BytesToEntriesIter::new_from_header(
+ buffered_pack,
+ options.iteration_mode,
+ data::input::EntryDataMode::Crc32,
+ object_hash,
+ )?;
+ let pack_version = pack_entries_iter.version();
+ (Box::new(pack_entries_iter), pack_version)
+ }
+ };
+ let WriteOutcome {
+ outcome,
+ data_path,
+ index_path,
+ keep_path,
+ } = crate::Bundle::inner_write(
+ directory,
+ progress,
+ options,
+ data_file,
+ pack_entries_iter,
+ should_interrupt,
+ pack_version,
+ )?;
+
+ Ok(Outcome {
+ index: outcome,
+ object_hash,
+ pack_version,
+ data_path,
+ index_path,
+ keep_path,
+ })
+ }
+
+ /// Equivalent to [`write_to_directory()`][crate::Bundle::write_to_directory()] but offloads reading of the pack into its own thread, hence the `Send + 'static'` bounds.
+ ///
+ /// # Note
+ ///
+ /// As it sends portions of the input to a thread it requires the 'static lifetime for the interrupt flags. This can only
+ /// be satisfied by a static AtomicBool which is only suitable for programs that only run one of these operations at a time
+ /// or don't mind that all of them abort when the flag is set.
+ pub fn write_to_directory_eagerly<P>(
+ pack: impl io::Read + Send + 'static,
+ pack_size: Option<u64>,
+ directory: Option<impl AsRef<Path>>,
+ mut progress: P,
+ should_interrupt: &'static AtomicBool,
+ thin_pack_base_object_lookup_fn: Option<ThinPackLookupFnSend>,
+ options: Options,
+ ) -> Result<Outcome, Error>
+ where
+ P: Progress,
+ P::SubProgress: 'static,
+ {
+ let mut read_progress = progress.add_child_with_id("read pack", ProgressId::ReadPackBytes.into()); /* Bundle Write Read pack Bytes*/
+ read_progress.init(pack_size.map(|s| s as usize), progress::bytes());
+ let pack = progress::Read {
+ inner: pack,
+ progress: progress::ThroughputOnDrop::new(read_progress),
+ };
+
+ let data_file = Arc::new(parking_lot::Mutex::new(io::BufWriter::new(match directory.as_ref() {
+ Some(directory) => gix_tempfile::new(directory, ContainingDirectory::Exists, AutoRemove::Tempfile)?,
+ None => gix_tempfile::new(std::env::temp_dir(), ContainingDirectory::Exists, AutoRemove::Tempfile)?,
+ })));
+ let object_hash = options.object_hash;
+ let eight_pages = 4096 * 8;
+ let (pack_entries_iter, pack_version): (
+ Box<dyn Iterator<Item = Result<data::input::Entry, data::input::Error>> + Send + 'static>,
+ _,
+ ) = match thin_pack_base_object_lookup_fn {
+ Some(thin_pack_lookup_fn) => {
+ let pack = interrupt::Read {
+ inner: pack,
+ should_interrupt,
+ };
+ let buffered_pack = io::BufReader::with_capacity(eight_pages, pack);
+ let pack_entries_iter = data::input::LookupRefDeltaObjectsIter::new(
+ data::input::BytesToEntriesIter::new_from_header(
+ buffered_pack,
+ options.iteration_mode,
+ data::input::EntryDataMode::KeepAndCrc32,
+ object_hash,
+ )?,
+ thin_pack_lookup_fn,
+ );
+ let pack_kind = pack_entries_iter.inner.version();
+ (Box::new(pack_entries_iter), pack_kind)
+ }
+ None => {
+ let pack = PassThrough {
+ reader: interrupt::Read {
+ inner: pack,
+ should_interrupt,
+ },
+ writer: Some(data_file.clone()),
+ };
+ let buffered_pack = io::BufReader::with_capacity(eight_pages, pack);
+ let pack_entries_iter = data::input::BytesToEntriesIter::new_from_header(
+ buffered_pack,
+ options.iteration_mode,
+ data::input::EntryDataMode::Crc32,
+ object_hash,
+ )?;
+ let pack_kind = pack_entries_iter.version();
+ (Box::new(pack_entries_iter), pack_kind)
+ }
+ };
+ let num_objects = pack_entries_iter.size_hint().0;
+ let pack_entries_iter =
+ gix_features::parallel::EagerIterIf::new(move || num_objects > 25_000, pack_entries_iter, 5_000, 5);
+
+ let WriteOutcome {
+ outcome,
+ data_path,
+ index_path,
+ keep_path,
+ } = crate::Bundle::inner_write(
+ directory,
+ progress,
+ options,
+ data_file,
+ pack_entries_iter,
+ should_interrupt,
+ pack_version,
+ )?;
+
+ Ok(Outcome {
+ index: outcome,
+ object_hash,
+ pack_version,
+ data_path,
+ index_path,
+ keep_path,
+ })
+ }
+
+ fn inner_write(
+ directory: Option<impl AsRef<Path>>,
+ mut progress: impl Progress,
+ Options {
+ thread_limit,
+ iteration_mode: _,
+ index_version: index_kind,
+ object_hash,
+ }: Options,
+ data_file: SharedTempFile,
+ pack_entries_iter: impl Iterator<Item = Result<data::input::Entry, data::input::Error>>,
+ should_interrupt: &AtomicBool,
+ pack_version: data::Version,
+ ) -> Result<WriteOutcome, Error> {
+ let indexing_progress = progress.add_child_with_id(
+ "create index file",
+ ProgressId::IndexingSteps(Default::default()).into(),
+ );
+ Ok(match directory {
+ Some(directory) => {
+ let directory = directory.as_ref();
+ let mut index_file = gix_tempfile::new(directory, ContainingDirectory::Exists, AutoRemove::Tempfile)?;
+
+ let outcome = crate::index::File::write_data_iter_to_stream(
+ index_kind,
+ {
+ let data_file = Arc::clone(&data_file);
+ move || new_pack_file_resolver(data_file)
+ },
+ pack_entries_iter,
+ thread_limit,
+ indexing_progress,
+ &mut index_file,
+ should_interrupt,
+ object_hash,
+ pack_version,
+ )?;
+
+ let data_path = directory.join(format!("pack-{}.pack", outcome.data_hash.to_hex()));
+ let index_path = data_path.with_extension("idx");
+ let keep_path = data_path.with_extension("keep");
+
+ std::fs::write(&keep_path, b"")?;
+ Arc::try_unwrap(data_file)
+ .expect("only one handle left after pack was consumed")
+ .into_inner()
+ .into_inner()
+ .map_err(|err| Error::from(err.into_error()))?
+ .persist(&data_path)?;
+ index_file
+ .persist(&index_path)
+ .map_err(|err| {
+ progress.info(format!(
+ "pack file at {} is retained despite failing to move the index file into place. You can use plumbing to make it usable.",
+ data_path.display()
+ ));
+ err
+ })?;
+ WriteOutcome {
+ outcome,
+ data_path: Some(data_path),
+ index_path: Some(index_path),
+ keep_path: Some(keep_path),
+ }
+ }
+ None => WriteOutcome {
+ outcome: crate::index::File::write_data_iter_to_stream(
+ index_kind,
+ move || new_pack_file_resolver(data_file),
+ pack_entries_iter,
+ thread_limit,
+ indexing_progress,
+ io::sink(),
+ should_interrupt,
+ object_hash,
+ pack_version,
+ )?,
+ data_path: None,
+ index_path: None,
+ keep_path: None,
+ },
+ })
+ }
+}
+
+fn new_pack_file_resolver(
+ data_file: SharedTempFile,
+) -> io::Result<impl Fn(data::EntryRange, &mut Vec<u8>) -> Option<()> + Send + Clone> {
+ let mut guard = data_file.lock();
+ guard.flush()?;
+ let mapped_file = Arc::new(crate::mmap::read_only(
+ &guard.get_mut().with_mut(|f| f.path().to_owned())?,
+ )?);
+ let pack_data_lookup = move |range: std::ops::Range<u64>, out: &mut Vec<u8>| -> Option<()> {
+ mapped_file
+ .get(range.start as usize..range.end as usize)
+ .map(|pack_entry| out.copy_from_slice(pack_entry))
+ };
+ Ok(pack_data_lookup)
+}
+
+struct WriteOutcome {
+ outcome: crate::index::write::Outcome,
+ data_path: Option<PathBuf>,
+ index_path: Option<PathBuf>,
+ keep_path: Option<PathBuf>,
+}
diff --git a/vendor/gix-pack/src/bundle/write/types.rs b/vendor/gix-pack/src/bundle/write/types.rs
new file mode 100644
index 000000000..56c14ac59
--- /dev/null
+++ b/vendor/gix-pack/src/bundle/write/types.rs
@@ -0,0 +1,120 @@
+use std::{hash::Hash, io, io::SeekFrom, path::PathBuf, sync::Arc};
+
+use gix_tempfile::handle::Writable;
+
+/// Configuration for [write_to_directory][crate::Bundle::write_to_directory()] or
+/// [write_to_directory_eagerly][crate::Bundle::write_to_directory_eagerly()]
+#[derive(Debug, Clone)]
+pub struct Options {
+ /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used.
+ pub thread_limit: Option<usize>,
+ /// Determine how much processing to spend on protecting against corruption or recovering from errors.
+ pub iteration_mode: crate::data::input::Mode,
+ /// The version of pack index to write, should be [`crate::index::Version::default()`]
+ pub index_version: crate::index::Version,
+ /// The kind of hash to use when writing the bundle.
+ pub object_hash: gix_hash::Kind,
+}
+
+impl Default for Options {
+ /// Options which favor speed and correctness and write the most commonly supported index file.
+ fn default() -> Self {
+ Options {
+ thread_limit: None,
+ iteration_mode: crate::data::input::Mode::Verify,
+ index_version: Default::default(),
+ object_hash: Default::default(),
+ }
+ }
+}
+
+/// Returned by [write_to_directory][crate::Bundle::write_to_directory()] or
+/// [write_to_directory_eagerly][crate::Bundle::write_to_directory_eagerly()]
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Outcome {
+ /// The successful result of the index write operation
+ pub index: crate::index::write::Outcome,
+ /// The version of the pack
+ pub pack_version: crate::data::Version,
+ /// The kind of hash stored within the pack and indices
+ pub object_hash: gix_hash::Kind,
+
+ /// The path to the pack index file
+ pub index_path: Option<PathBuf>,
+ /// The path to the pack data file
+ pub data_path: Option<PathBuf>,
+ /// The path to the `.keep` file to prevent collection of the newly written pack until refs are pointing to it.
+ ///
+ /// The file is created right before moving the pack data and index data into place (i.e. `data_path` and `index_path`)
+ /// and is expected to be removed by the caller when ready.
+ pub keep_path: Option<PathBuf>,
+}
+
+impl Outcome {
+ /// Instantiate a bundle from the newly written index and data file that are represented by this `Outcome`
+ pub fn to_bundle(&self) -> Option<Result<crate::Bundle, crate::bundle::init::Error>> {
+ self.index_path
+ .as_ref()
+ .map(|path| crate::Bundle::at(path, self.object_hash))
+ }
+}
+
+pub(crate) type SharedTempFile = Arc<parking_lot::Mutex<std::io::BufWriter<gix_tempfile::Handle<Writable>>>>;
+
+pub(crate) struct PassThrough<R> {
+ pub reader: R,
+ pub writer: Option<SharedTempFile>,
+}
+
+impl<R> io::Read for PassThrough<R>
+where
+ R: io::Read,
+{
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ let bytes_read = self.reader.read(buf)?;
+ if let Some(writer) = self.writer.as_mut() {
+ use std::io::Write;
+ writer.lock().write_all(&buf[..bytes_read])?;
+ }
+ Ok(bytes_read)
+ }
+}
+impl<R> io::BufRead for PassThrough<R>
+where
+ R: io::BufRead,
+{
+ fn fill_buf(&mut self) -> io::Result<&[u8]> {
+ self.reader.fill_buf()
+ }
+
+ fn consume(&mut self, amt: usize) {
+ self.reader.consume(amt)
+ }
+}
+
+pub(crate) struct LockWriter {
+ pub writer: SharedTempFile,
+}
+
+impl io::Write for LockWriter {
+ fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+ self.writer.lock().write(buf)
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ self.writer.lock().flush()
+ }
+}
+
+impl io::Read for LockWriter {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ self.writer.lock().get_mut().read(buf)
+ }
+}
+
+impl io::Seek for LockWriter {
+ fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
+ self.writer.lock().seek(pos)
+ }
+}
diff --git a/vendor/gix-pack/src/cache/delta/from_offsets.rs b/vendor/gix-pack/src/cache/delta/from_offsets.rs
new file mode 100644
index 000000000..8acb4a802
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/from_offsets.rs
@@ -0,0 +1,161 @@
+use std::{
+ convert::TryFrom,
+ fs, io,
+ io::{BufRead, Read, Seek, SeekFrom},
+ sync::atomic::{AtomicBool, Ordering},
+ time::Instant,
+};
+
+use gix_features::progress::{self, Progress};
+
+use crate::{cache::delta::Tree, data};
+
+/// Returned by [`Tree::from_offsets_in_pack()`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("{message}")]
+ Io { source: io::Error, message: &'static str },
+ #[error(transparent)]
+ Header(#[from] crate::data::header::decode::Error),
+ #[error("Could find object with id {id} in this pack. Thin packs are not supported")]
+ UnresolvedRefDelta { id: gix_hash::ObjectId },
+ #[error(transparent)]
+ Tree(#[from] crate::cache::delta::Error),
+ #[error("Interrupted")]
+ Interrupted,
+}
+
+const PACK_HEADER_LEN: usize = 12;
+
+/// Generate tree from certain input
+impl<T> Tree<T> {
+ /// Create a new `Tree` from any data sorted by offset, ascending as returned by the `data_sorted_by_offsets` iterator.
+ /// * `get_pack_offset(item: &T`) -> data::Offset` is a function returning the pack offset of the given item, which can be used
+ /// for obtaining the objects entry within the pack.
+ /// * `pack_path` is the path to the pack file itself and from which to read the entry data, which is a pack file matching the offsets
+ /// returned by `get_pack_offset(…)`.
+ /// * `progress` is used to track progress when creating the tree.
+ /// * `resolve_in_pack_id(gix_hash::oid) -> Option<data::Offset>` takes an object ID and tries to resolve it to an object within this pack if
+ /// possible. Failing to do so aborts the operation, and this function is not expected to be called in usual packs. It's a theoretical
+ /// possibility though as old packs might have referred to their objects using the 20 bytes hash, instead of their encoded offset from the base.
+ ///
+ /// Note that the sort order is ascending. The given pack file path must match the provided offsets.
+ pub fn from_offsets_in_pack(
+ pack_path: impl AsRef<std::path::Path>,
+ data_sorted_by_offsets: impl Iterator<Item = T>,
+ get_pack_offset: impl Fn(&T) -> data::Offset,
+ resolve_in_pack_id: impl Fn(&gix_hash::oid) -> Option<data::Offset>,
+ mut progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ object_hash: gix_hash::Kind,
+ ) -> Result<Self, Error> {
+ let mut r = io::BufReader::with_capacity(
+ 8192 * 8, // this value directly corresponds to performance, 8k (default) is about 4x slower than 64k
+ fs::File::open(pack_path).map_err(|err| Error::Io {
+ source: err,
+ message: "open pack path",
+ })?,
+ );
+
+ let anticipated_num_objects = if let Some(num_objects) = data_sorted_by_offsets.size_hint().1 {
+ progress.init(Some(num_objects), progress::count("objects"));
+ num_objects
+ } else {
+ 0
+ };
+ let mut tree = Tree::with_capacity(anticipated_num_objects)?;
+
+ {
+ // safety check - assure ourselves it's a pack we can handle
+ let mut buf = [0u8; PACK_HEADER_LEN];
+ r.read_exact(&mut buf).map_err(|err| Error::Io {
+ source: err,
+ message: "reading header buffer with at least 12 bytes failed - pack file truncated?",
+ })?;
+ crate::data::header::decode(&buf)?;
+ }
+
+ let then = Instant::now();
+
+ let mut previous_cursor_position = None::<u64>;
+
+ let hash_len = object_hash.len_in_bytes();
+ for (idx, data) in data_sorted_by_offsets.enumerate() {
+ let pack_offset = get_pack_offset(&data);
+ if let Some(previous_offset) = previous_cursor_position {
+ Self::advance_cursor_to_pack_offset(&mut r, pack_offset, previous_offset)?;
+ };
+ let entry = crate::data::Entry::from_read(&mut r, pack_offset, hash_len).map_err(|err| Error::Io {
+ source: err,
+ message: "EOF while parsing header",
+ })?;
+ previous_cursor_position = Some(pack_offset + entry.header_size() as u64);
+
+ use crate::data::entry::Header::*;
+ match entry.header {
+ Tree | Blob | Commit | Tag => {
+ tree.add_root(pack_offset, data)?;
+ }
+ RefDelta { base_id } => {
+ resolve_in_pack_id(base_id.as_ref())
+ .ok_or(Error::UnresolvedRefDelta { id: base_id })
+ .and_then(|base_pack_offset| {
+ tree.add_child(base_pack_offset, pack_offset, data).map_err(Into::into)
+ })?;
+ }
+ OfsDelta { base_distance } => {
+ let base_pack_offset = pack_offset
+ .checked_sub(base_distance)
+ .expect("in bound distance for deltas");
+ tree.add_child(base_pack_offset, pack_offset, data)?;
+ }
+ };
+ progress.inc();
+ if idx % 10_000 == 0 && should_interrupt.load(Ordering::SeqCst) {
+ return Err(Error::Interrupted);
+ }
+ }
+
+ progress.show_throughput(then);
+ Ok(tree)
+ }
+
+ fn advance_cursor_to_pack_offset(
+ r: &mut io::BufReader<fs::File>,
+ pack_offset: u64,
+ previous_offset: u64,
+ ) -> Result<(), Error> {
+ let bytes_to_skip: u64 = pack_offset
+ .checked_sub(previous_offset)
+ .expect("continuously ascending pack offsets");
+ if bytes_to_skip == 0 {
+ return Ok(());
+ }
+ let buf = r.fill_buf().map_err(|err| Error::Io {
+ source: err,
+ message: "skip bytes",
+ })?;
+ if buf.is_empty() {
+ // This means we have reached the end of file and can't make progress anymore, before we have satisfied our need
+ // for more
+ return Err(Error::Io {
+ source: io::Error::new(
+ io::ErrorKind::UnexpectedEof,
+ "ran out of bytes before reading desired amount of bytes",
+ ),
+ message: "index file is damaged or corrupt",
+ });
+ }
+ if bytes_to_skip <= u64::try_from(buf.len()).expect("sensible buffer size") {
+ // SAFETY: bytes_to_skip <= buf.len() <= usize::MAX
+ r.consume(bytes_to_skip as usize);
+ } else {
+ r.seek(SeekFrom::Start(pack_offset)).map_err(|err| Error::Io {
+ source: err,
+ message: "seek to next entry",
+ })?;
+ }
+ Ok(())
+ }
+}
diff --git a/vendor/gix-pack/src/cache/delta/mod.rs b/vendor/gix-pack/src/cache/delta/mod.rs
new file mode 100644
index 000000000..f4c1b6fc6
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/mod.rs
@@ -0,0 +1,216 @@
+/// Returned when using various methods on a [`Tree`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("Pack offsets must only increment. The previous pack offset was {last_pack_offset}, the current one is {pack_offset}")]
+ InvariantIncreasingPackOffset {
+ /// The last seen pack offset
+ last_pack_offset: crate::data::Offset,
+ /// The invariant violating offset
+ pack_offset: crate::data::Offset,
+ },
+}
+
+///
+pub mod traverse;
+
+///
+pub mod from_offsets;
+
+/// An item stored within the [`Tree`]
+pub struct Item<T> {
+ /// The offset into the pack file at which the pack entry's data is located.
+ pub offset: crate::data::Offset,
+ /// The offset of the next item in the pack file.
+ pub next_offset: crate::data::Offset,
+ /// Data to store with each Item, effectively data associated with each entry in a pack.
+ pub data: T,
+ /// Indices into our Tree's `items`, one for each pack entry that depends on us.
+ ///
+ /// Limited to u32 as that's the maximum amount of objects in a pack.
+ children: Vec<u32>,
+}
+
+/// Identify what kind of node we have last seen
+enum NodeKind {
+ Root,
+ Child,
+}
+
+/// A tree that allows one-time iteration over all nodes and their children, consuming it in the process,
+/// while being shareable among threads without a lock.
+/// It does this by making the guarantee that iteration only happens once.
+pub struct Tree<T> {
+ /// The root nodes, i.e. base objects
+ root_items: Vec<Item<T>>,
+ /// The child nodes, i.e. those that rely a base object, like ref and ofs delta objects
+ child_items: Vec<Item<T>>,
+ /// The last encountered node was either a root or a child.
+ last_seen: Option<NodeKind>,
+ /// Future child offsets, associating their offset into the pack with their index in the items array.
+ /// (parent_offset, child_index)
+ future_child_offsets: Vec<(crate::data::Offset, usize)>,
+}
+
+impl<T> Tree<T> {
+ /// Instantiate a empty tree capable of storing `num_objects` amounts of items.
+ pub fn with_capacity(num_objects: usize) -> Result<Self, Error> {
+ Ok(Tree {
+ root_items: Vec::with_capacity(num_objects / 2),
+ child_items: Vec::with_capacity(num_objects / 2),
+ last_seen: None,
+ future_child_offsets: Vec::new(),
+ })
+ }
+
+ fn num_items(&self) -> usize {
+ self.root_items.len() + self.child_items.len()
+ }
+
+ fn assert_is_incrementing_and_update_next_offset(&mut self, offset: crate::data::Offset) -> Result<(), Error> {
+ let items = match &self.last_seen {
+ Some(NodeKind::Root) => &mut self.root_items,
+ Some(NodeKind::Child) => &mut self.child_items,
+ None => return Ok(()),
+ };
+ let item = &mut items.last_mut().expect("last seen won't lie");
+ if offset <= item.offset {
+ return Err(Error::InvariantIncreasingPackOffset {
+ last_pack_offset: item.offset,
+ pack_offset: offset,
+ });
+ }
+ item.next_offset = offset;
+ Ok(())
+ }
+
+ fn set_pack_entries_end_and_resolve_ref_offsets(
+ &mut self,
+ pack_entries_end: crate::data::Offset,
+ ) -> Result<(), traverse::Error> {
+ if !self.future_child_offsets.is_empty() {
+ for (parent_offset, child_index) in self.future_child_offsets.drain(..) {
+ if let Ok(i) = self.child_items.binary_search_by_key(&parent_offset, |i| i.offset) {
+ self.child_items[i].children.push(child_index as u32);
+ } else if let Ok(i) = self.root_items.binary_search_by_key(&parent_offset, |i| i.offset) {
+ self.root_items[i].children.push(child_index as u32);
+ } else {
+ return Err(traverse::Error::OutOfPackRefDelta {
+ base_pack_offset: parent_offset,
+ });
+ }
+ }
+ }
+
+ self.assert_is_incrementing_and_update_next_offset(pack_entries_end)
+ .expect("BUG: pack now is smaller than all previously seen entries");
+ Ok(())
+ }
+
+ /// Add a new root node, one that only has children but is not a child itself, at the given pack `offset` and associate
+ /// custom `data` with it.
+ pub fn add_root(&mut self, offset: crate::data::Offset, data: T) -> Result<(), Error> {
+ self.assert_is_incrementing_and_update_next_offset(offset)?;
+ self.last_seen = NodeKind::Root.into();
+ self.root_items.push(Item {
+ offset,
+ next_offset: 0,
+ data,
+ children: Default::default(),
+ });
+ Ok(())
+ }
+
+ /// Add a child of the item at `base_offset` which itself resides at pack `offset` and associate custom `data` with it.
+ pub fn add_child(
+ &mut self,
+ base_offset: crate::data::Offset,
+ offset: crate::data::Offset,
+ data: T,
+ ) -> Result<(), Error> {
+ self.assert_is_incrementing_and_update_next_offset(offset)?;
+
+ let next_child_index = self.child_items.len();
+ if let Ok(i) = self.child_items.binary_search_by_key(&base_offset, |i| i.offset) {
+ self.child_items[i].children.push(next_child_index as u32);
+ } else if let Ok(i) = self.root_items.binary_search_by_key(&base_offset, |i| i.offset) {
+ self.root_items[i].children.push(next_child_index as u32);
+ } else {
+ self.future_child_offsets.push((base_offset, next_child_index));
+ }
+
+ self.last_seen = NodeKind::Child.into();
+ self.child_items.push(Item {
+ offset,
+ next_offset: 0,
+ data,
+ children: Default::default(),
+ });
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ mod tree {
+ mod from_offsets_in_pack {
+ use std::sync::atomic::AtomicBool;
+
+ use crate as pack;
+
+ const SMALL_PACK_INDEX: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx";
+ const SMALL_PACK: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack";
+
+ const INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx";
+ const PACK_FOR_INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack";
+
+ use gix_testtools::fixture_path;
+
+ #[test]
+ fn v1() -> Result<(), Box<dyn std::error::Error>> {
+ tree(INDEX_V1, PACK_FOR_INDEX_V1)
+ }
+
+ #[test]
+ fn v2() -> Result<(), Box<dyn std::error::Error>> {
+ tree(SMALL_PACK_INDEX, SMALL_PACK)
+ }
+
+ fn tree(index_path: &str, pack_path: &str) -> Result<(), Box<dyn std::error::Error>> {
+ let idx = pack::index::File::at(fixture_path(index_path), gix_hash::Kind::Sha1)?;
+ crate::cache::delta::Tree::from_offsets_in_pack(
+ fixture_path(pack_path),
+ idx.sorted_offsets().into_iter(),
+ |ofs| *ofs,
+ |id| idx.lookup(id).map(|index| idx.pack_offset_at_index(index)),
+ gix_features::progress::Discard,
+ &AtomicBool::new(false),
+ gix_hash::Kind::Sha1,
+ )?;
+ Ok(())
+ }
+ }
+ }
+
+ #[test]
+ fn size_of_pack_tree_item() {
+ use super::Item;
+ assert_eq!(std::mem::size_of::<[Item<()>; 7_500_000]>(), 300_000_000);
+ }
+
+ #[test]
+ fn size_of_pack_verify_data_structure() {
+ use super::Item;
+ pub struct EntryWithDefault {
+ _index_entry: crate::index::Entry,
+ _kind: gix_object::Kind,
+ _object_size: u64,
+ _decompressed_size: u64,
+ _compressed_size: u64,
+ _header_size: u16,
+ _level: u16,
+ }
+
+ assert_eq!(std::mem::size_of::<[Item<EntryWithDefault>; 7_500_000]>(), 840_000_000);
+ }
+}
diff --git a/vendor/gix-pack/src/cache/delta/traverse/mod.rs b/vendor/gix-pack/src/cache/delta/traverse/mod.rs
new file mode 100644
index 000000000..bfe2ec687
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/traverse/mod.rs
@@ -0,0 +1,177 @@
+use std::sync::atomic::{AtomicBool, Ordering};
+
+use gix_features::{
+ parallel::in_parallel_with_slice,
+ progress::{self, Progress},
+ threading::{lock, Mutable, OwnShared},
+};
+
+use crate::{
+ cache::delta::{traverse::util::ItemSliceSend, Item, Tree},
+ data::EntryRange,
+};
+
+mod resolve;
+pub(crate) mod util;
+
+/// Returned by [`Tree::traverse()`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("{message}")]
+ ZlibInflate {
+ source: gix_features::zlib::inflate::Error,
+ message: &'static str,
+ },
+ #[error("The resolver failed to obtain the pack entry bytes for the entry at {pack_offset}")]
+ ResolveFailed { pack_offset: u64 },
+ #[error("One of the object inspectors failed")]
+ Inspect(#[from] Box<dyn std::error::Error + Send + Sync>),
+ #[error("Interrupted")]
+ Interrupted,
+ #[error(
+ "The base at {base_pack_offset} was referred to by a ref-delta, but it was never added to the tree as if the pack was still thin."
+ )]
+ OutOfPackRefDelta {
+ /// The base's offset which was from a resolved ref-delta that didn't actually get added to the tree
+ base_pack_offset: crate::data::Offset,
+ },
+}
+
+/// Additional context passed to the `inspect_object(…)` function of the [`Tree::traverse()`] method.
+pub struct Context<'a, S> {
+ /// The pack entry describing the object
+ pub entry: &'a crate::data::Entry,
+ /// The offset at which `entry` ends in the pack, useful to learn about the exact range of `entry` within the pack.
+ pub entry_end: u64,
+ /// The decompressed object itself, ready to be decoded.
+ pub decompressed: &'a [u8],
+ /// Custom state known to the function
+ pub state: &'a mut S,
+ /// The depth at which this object resides in the delta-tree. It represents the amount of base objects, with 0 indicating
+ /// an 'undeltified' object, and higher values indicating delta objects with the given amount of bases.
+ pub level: u16,
+}
+
+/// Options for [`Tree::traverse()`].
+pub struct Options<'a, P1, P2> {
+ /// is a progress instance to track progress for each object in the traversal.
+ pub object_progress: P1,
+ /// is a progress instance to track the overall progress.
+ pub size_progress: P2,
+ /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on
+ /// the amount of available logical cores.
+ pub thread_limit: Option<usize>,
+ /// Abort the operation if the value is `true`.
+ pub should_interrupt: &'a AtomicBool,
+ /// specifies what kind of hashes we expect to be stored in oid-delta entries, which is viable to decoding them
+ /// with the correct size.
+ pub object_hash: gix_hash::Kind,
+}
+
+/// The outcome of [`Tree::traverse()`]
+pub struct Outcome<T> {
+ /// The items that have no children in the pack, i.e. base objects.
+ pub roots: Vec<Item<T>>,
+ /// The items that children to a root object, i.e. delta objects.
+ pub children: Vec<Item<T>>,
+}
+
+impl<T> Tree<T>
+where
+ T: Send,
+{
+ /// Traverse this tree of delta objects with a function `inspect_object` to process each object at will.
+ ///
+ /// * `should_run_in_parallel() -> bool` returns true if the underlying pack is big enough to warrant parallel traversal at all.
+ /// * `resolve(EntrySlice, &mut Vec<u8>) -> Option<()>` resolves the bytes in the pack for the given `EntrySlice` and stores them in the
+ /// output vector. It returns `Some(())` if the object existed in the pack, or `None` to indicate a resolution error, which would abort the
+ /// operation as well.
+ /// * `pack_entries_end` marks one-past-the-last byte of the last entry in the pack, as the last entries size would otherwise
+ /// be unknown as it's not part of the index file.
+ /// * `new_thread_state() -> State` is a function to create state to be used in each thread, invoked once per thread.
+ /// * `inspect_object(node_data: &mut T, progress: Progress, context: Context<ThreadLocal State>) -> Result<(), CustomError>` is a function
+ /// running for each thread receiving fully decoded objects along with contextual information, which either succeeds with `Ok(())`
+ /// or returns a `CustomError`.
+ /// Note that `node_data` can be modified to allow storing maintaining computation results on a per-object basis.
+ ///
+ /// This method returns a vector of all tree items, along with their potentially modified custom node data.
+ ///
+ /// _Note_ that this method consumed the Tree to assure safe parallel traversal with mutation support.
+ pub fn traverse<F, P1, P2, MBFN, S, E>(
+ mut self,
+ resolve: F,
+ pack_entries_end: u64,
+ new_thread_state: impl Fn() -> S + Send + Clone,
+ inspect_object: MBFN,
+ Options {
+ thread_limit,
+ object_progress,
+ mut size_progress,
+ should_interrupt,
+ object_hash,
+ }: Options<'_, P1, P2>,
+ ) -> Result<Outcome<T>, Error>
+ where
+ F: for<'r> Fn(EntryRange, &'r mut Vec<u8>) -> Option<()> + Send + Clone,
+ P1: Progress,
+ P2: Progress,
+ MBFN: Fn(&mut T, &mut <P1 as Progress>::SubProgress, Context<'_, S>) -> Result<(), E> + Send + Clone,
+ E: std::error::Error + Send + Sync + 'static,
+ {
+ self.set_pack_entries_end_and_resolve_ref_offsets(pack_entries_end)?;
+ let object_progress = OwnShared::new(Mutable::new(object_progress));
+
+ let num_objects = self.num_items();
+ let object_counter = {
+ let mut progress = lock(&object_progress);
+ progress.init(Some(num_objects), progress::count("objects"));
+ progress.counter()
+ };
+ size_progress.init(None, progress::bytes());
+ let size_counter = size_progress.counter();
+ let child_items = self.child_items.as_mut_slice();
+
+ let start = std::time::Instant::now();
+ in_parallel_with_slice(
+ &mut self.root_items,
+ thread_limit,
+ {
+ let object_progress = object_progress.clone();
+ let child_items = ItemSliceSend(child_items as *mut [Item<T>]);
+ move |thread_index| {
+ (
+ Vec::<u8>::with_capacity(4096),
+ lock(&object_progress)
+ .add_child_with_id(format!("thread {thread_index}"), gix_features::progress::UNKNOWN),
+ new_thread_state(),
+ resolve.clone(),
+ inspect_object.clone(),
+ ItemSliceSend(child_items.0),
+ )
+ }
+ },
+ {
+ move |node, state| {
+ resolve::deltas(
+ object_counter.clone(),
+ size_counter.clone(),
+ node,
+ state,
+ object_hash.len_in_bytes(),
+ )
+ }
+ },
+ || (!should_interrupt.load(Ordering::Relaxed)).then(|| std::time::Duration::from_millis(50)),
+ |_| (),
+ )?;
+
+ lock(&object_progress).show_throughput(start);
+ size_progress.show_throughput(start);
+
+ Ok(Outcome {
+ roots: self.root_items,
+ children: self.child_items,
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/cache/delta/traverse/resolve.rs b/vendor/gix-pack/src/cache/delta/traverse/resolve.rs
new file mode 100644
index 000000000..fc94d87ef
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/traverse/resolve.rs
@@ -0,0 +1,154 @@
+use std::{cell::RefCell, collections::BTreeMap, sync::atomic::Ordering};
+
+use gix_features::{progress::Progress, zlib};
+
+use crate::{
+ cache::delta::{
+ traverse::{
+ util::{ItemSliceSend, Node},
+ Context, Error,
+ },
+ Item,
+ },
+ data::EntryRange,
+};
+
+pub(crate) fn deltas<T, F, P, MBFN, S, E>(
+ object_counter: Option<gix_features::progress::StepShared>,
+ size_counter: Option<gix_features::progress::StepShared>,
+ node: &mut crate::cache::delta::Item<T>,
+ (bytes_buf, ref mut progress, state, resolve, modify_base, child_items): &mut (
+ Vec<u8>,
+ P,
+ S,
+ F,
+ MBFN,
+ ItemSliceSend<Item<T>>,
+ ),
+ hash_len: usize,
+) -> Result<(), Error>
+where
+ T: Send,
+ F: for<'r> Fn(EntryRange, &'r mut Vec<u8>) -> Option<()>,
+ P: Progress,
+ MBFN: Fn(&mut T, &mut P, Context<'_, S>) -> Result<(), E>,
+ E: std::error::Error + Send + Sync + 'static,
+{
+ let mut decompressed_bytes_by_pack_offset = BTreeMap::new();
+ let bytes_buf = RefCell::new(bytes_buf);
+ let decompress_from_resolver = |slice: EntryRange| -> Result<(crate::data::Entry, u64, Vec<u8>), Error> {
+ let mut bytes_buf = bytes_buf.borrow_mut();
+ bytes_buf.resize((slice.end - slice.start) as usize, 0);
+ resolve(slice.clone(), &mut bytes_buf).ok_or(Error::ResolveFailed {
+ pack_offset: slice.start,
+ })?;
+ let entry = crate::data::Entry::from_bytes(&bytes_buf, slice.start, hash_len);
+ let compressed = &bytes_buf[entry.header_size()..];
+ let decompressed_len = entry.decompressed_size as usize;
+ Ok((entry, slice.end, decompress_all_at_once(compressed, decompressed_len)?))
+ };
+
+ // Traverse the tree breadth first and loose the data produced for the base as it won't be needed anymore.
+ progress.init(None, gix_features::progress::count_with_decimals("objects", 2));
+
+ // each node is a base, and its children always start out as deltas which become a base after applying them.
+ // These will be pushed onto our stack until all are processed
+ let root_level = 0;
+ let mut nodes: Vec<_> = vec![(
+ root_level,
+ Node {
+ item: node,
+ child_items: child_items.0,
+ },
+ )];
+ while let Some((level, mut base)) = nodes.pop() {
+ let (base_entry, entry_end, base_bytes) = if level == root_level {
+ decompress_from_resolver(base.entry_slice())?
+ } else {
+ decompressed_bytes_by_pack_offset
+ .remove(&base.offset())
+ .expect("we store the resolved delta buffer when done")
+ };
+
+ // anything done here must be repeated further down for leaf-nodes.
+ // This way we avoid retaining their decompressed memory longer than needed (they have no children,
+ // thus their memory can be released right away, using 18% less peak memory on the linux kernel).
+ {
+ modify_base(
+ base.data(),
+ progress,
+ Context {
+ entry: &base_entry,
+ entry_end,
+ decompressed: &base_bytes,
+ state,
+ level,
+ },
+ )
+ .map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
+ object_counter.as_ref().map(|c| c.fetch_add(1, Ordering::SeqCst));
+ size_counter
+ .as_ref()
+ .map(|c| c.fetch_add(base_bytes.len(), Ordering::SeqCst));
+ }
+
+ for mut child in base.into_child_iter() {
+ let (mut child_entry, entry_end, delta_bytes) = decompress_from_resolver(child.entry_slice())?;
+ let (base_size, consumed) = crate::data::delta::decode_header_size(&delta_bytes);
+ let mut header_ofs = consumed;
+ assert_eq!(
+ base_bytes.len(),
+ base_size as usize,
+ "recorded base size in delta does not match"
+ );
+ let (result_size, consumed) = crate::data::delta::decode_header_size(&delta_bytes[consumed..]);
+ header_ofs += consumed;
+
+ let mut fully_resolved_delta_bytes = bytes_buf.borrow_mut();
+ fully_resolved_delta_bytes.resize(result_size as usize, 0);
+ crate::data::delta::apply(&base_bytes, &mut fully_resolved_delta_bytes, &delta_bytes[header_ofs..]);
+
+ // FIXME: this actually invalidates the "pack_offset()" computation, which is not obvious to consumers
+ // at all
+ child_entry.header = base_entry.header; // assign the actual object type, instead of 'delta'
+ if child.has_children() {
+ decompressed_bytes_by_pack_offset.insert(
+ child.offset(),
+ (child_entry, entry_end, fully_resolved_delta_bytes.to_owned()),
+ );
+ nodes.push((level + 1, child));
+ } else {
+ modify_base(
+ child.data(),
+ progress,
+ Context {
+ entry: &child_entry,
+ entry_end,
+ decompressed: &fully_resolved_delta_bytes,
+ state,
+ level: level + 1,
+ },
+ )
+ .map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
+ object_counter.as_ref().map(|c| c.fetch_add(1, Ordering::SeqCst));
+ size_counter
+ .as_ref()
+ .map(|c| c.fetch_add(base_bytes.len(), Ordering::SeqCst));
+ }
+ }
+ }
+
+ Ok(())
+}
+
+fn decompress_all_at_once(b: &[u8], decompressed_len: usize) -> Result<Vec<u8>, Error> {
+ let mut out = Vec::new();
+ out.resize(decompressed_len, 0);
+ zlib::Inflate::default()
+ .once(b, &mut out)
+ .map_err(|err| Error::ZlibInflate {
+ source: err,
+ message: "Failed to decompress entry",
+ })?;
+ Ok(out)
+}
diff --git a/vendor/gix-pack/src/cache/delta/traverse/util.rs b/vendor/gix-pack/src/cache/delta/traverse/util.rs
new file mode 100644
index 000000000..e7caf2ff5
--- /dev/null
+++ b/vendor/gix-pack/src/cache/delta/traverse/util.rs
@@ -0,0 +1,63 @@
+use crate::cache::delta::Item;
+
+pub struct ItemSliceSend<T>(pub *mut [T])
+where
+ T: Send;
+
+impl<T> Clone for ItemSliceSend<T>
+where
+ T: Send,
+{
+ fn clone(&self) -> Self {
+ ItemSliceSend(self.0)
+ }
+}
+
+// SAFETY: T is `Send`, and we only ever access one T at a time. And, ptrs need that assurance, I wonder if it's always right.
+#[allow(unsafe_code)]
+unsafe impl<T> Send for ItemSliceSend<T> where T: Send {}
+
+/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`].
+pub struct Node<'a, T> {
+ pub item: &'a mut Item<T>,
+ pub child_items: *mut [Item<T>],
+}
+
+impl<'a, T> Node<'a, T> {
+ /// Returns the offset into the pack at which the `Node`s data is located.
+ pub fn offset(&self) -> u64 {
+ self.item.offset
+ }
+
+ /// Returns the slice into the data pack at which the pack entry is located.
+ pub fn entry_slice(&self) -> crate::data::EntryRange {
+ self.item.offset..self.item.next_offset
+ }
+
+ /// Returns the node data associated with this node.
+ pub fn data(&mut self) -> &mut T {
+ &mut self.item.data
+ }
+
+ /// Returns true if this node has children, e.g. is not a leaf in the tree.
+ pub fn has_children(&self) -> bool {
+ !self.item.children.is_empty()
+ }
+
+ /// Transform this `Node` into an iterator over its children.
+ ///
+ /// Children are `Node`s referring to pack entries whose base object is this pack entry.
+ pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a {
+ let children = self.child_items;
+ self.item.children.iter().map(move |&index| {
+ // SAFETY: The children array is alive by the 'a lifetime.
+ // SAFETY: The index is a valid index into the children array.
+ // SAFETY: The resulting mutable pointer cannot be yielded by any other node.
+ #[allow(unsafe_code)]
+ Node {
+ item: unsafe { &mut *(children as *mut Item<T>).add(index as usize) },
+ child_items: children,
+ }
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/cache/lru.rs b/vendor/gix-pack/src/cache/lru.rs
new file mode 100644
index 000000000..bba4f5d33
--- /dev/null
+++ b/vendor/gix-pack/src/cache/lru.rs
@@ -0,0 +1,165 @@
+use super::DecodeEntry;
+
+#[cfg(feature = "pack-cache-lru-dynamic")]
+mod memory {
+ use std::num::NonZeroUsize;
+
+ use clru::WeightScale;
+
+ use super::DecodeEntry;
+
+ struct Entry {
+ data: Vec<u8>,
+ kind: gix_object::Kind,
+ compressed_size: usize,
+ }
+
+ type Key = (u32, u64);
+ struct CustomScale;
+
+ impl WeightScale<Key, Entry> for CustomScale {
+ fn weight(&self, _key: &Key, value: &Entry) -> usize {
+ value.data.len()
+ }
+ }
+
+ /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
+ pub struct MemoryCappedHashmap {
+ inner: clru::CLruCache<Key, Entry, std::collections::hash_map::RandomState, CustomScale>,
+ free_list: Vec<Vec<u8>>,
+ debug: gix_features::cache::Debug,
+ }
+
+ impl MemoryCappedHashmap {
+ /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes`
+ /// object data.
+ pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
+ MemoryCappedHashmap {
+ inner: clru::CLruCache::with_config(
+ clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
+ .with_scale(CustomScale),
+ ),
+ free_list: Vec::new(),
+ debug: gix_features::cache::Debug::new(format!("MemoryCappedHashmap({memory_cap_in_bytes}B)")),
+ }
+ }
+ }
+
+ impl DecodeEntry for MemoryCappedHashmap {
+ fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
+ self.debug.put();
+ if let Ok(Some(previous_entry)) = self.inner.put_with_weight(
+ (pack_id, offset),
+ Entry {
+ data: self
+ .free_list
+ .pop()
+ .map(|mut v| {
+ v.clear();
+ v.resize(data.len(), 0);
+ v.copy_from_slice(data);
+ v
+ })
+ .unwrap_or_else(|| Vec::from(data)),
+ kind,
+ compressed_size,
+ },
+ ) {
+ self.free_list.push(previous_entry.data)
+ }
+ }
+
+ fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
+ let res = self.inner.get(&(pack_id, offset)).map(|e| {
+ out.resize(e.data.len(), 0);
+ out.copy_from_slice(&e.data);
+ (e.kind, e.compressed_size)
+ });
+ if res.is_some() {
+ self.debug.hit()
+ } else {
+ self.debug.miss()
+ }
+ res
+ }
+ }
+}
+
+#[cfg(feature = "pack-cache-lru-dynamic")]
+pub use memory::MemoryCappedHashmap;
+
+#[cfg(feature = "pack-cache-lru-static")]
+mod _static {
+ use super::DecodeEntry;
+ struct Entry {
+ pack_id: u32,
+ offset: u64,
+ data: Vec<u8>,
+ kind: gix_object::Kind,
+ compressed_size: usize,
+ }
+
+ /// A cache using a least-recently-used implementation capable of storing the `SIZE` most recent objects.
+ /// The cache must be small as the search is 'naive' and the underlying data structure is a linked list.
+ /// Values of 64 seem to improve performance.
+ pub struct StaticLinkedList<const SIZE: usize> {
+ inner: uluru::LRUCache<Entry, SIZE>,
+ free_list: Vec<Vec<u8>>,
+ debug: gix_features::cache::Debug,
+ }
+
+ impl<const SIZE: usize> Default for StaticLinkedList<SIZE> {
+ fn default() -> Self {
+ StaticLinkedList {
+ inner: Default::default(),
+ free_list: Vec::new(),
+ debug: gix_features::cache::Debug::new(format!("StaticLinkedList<{SIZE}>")),
+ }
+ }
+ }
+
+ impl<const SIZE: usize> DecodeEntry for StaticLinkedList<SIZE> {
+ fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
+ self.debug.put();
+ if let Some(previous) = self.inner.insert(Entry {
+ offset,
+ pack_id,
+ data: self
+ .free_list
+ .pop()
+ .map(|mut v| {
+ v.clear();
+ v.resize(data.len(), 0);
+ v.copy_from_slice(data);
+ v
+ })
+ .unwrap_or_else(|| Vec::from(data)),
+ kind,
+ compressed_size,
+ }) {
+ self.free_list.push(previous.data)
+ }
+ }
+
+ fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
+ let res = self.inner.lookup(|e: &mut Entry| {
+ if e.pack_id == pack_id && e.offset == offset {
+ out.resize(e.data.len(), 0);
+ out.copy_from_slice(&e.data);
+ Some((e.kind, e.compressed_size))
+ } else {
+ None
+ }
+ });
+ if res.is_some() {
+ self.debug.hit()
+ } else {
+ self.debug.miss()
+ }
+ res
+ }
+ }
+}
+
+#[cfg(feature = "pack-cache-lru-static")]
+pub use _static::StaticLinkedList;
diff --git a/vendor/gix-pack/src/cache/mod.rs b/vendor/gix-pack/src/cache/mod.rs
new file mode 100644
index 000000000..cf4b94df8
--- /dev/null
+++ b/vendor/gix-pack/src/cache/mod.rs
@@ -0,0 +1,55 @@
+use std::ops::DerefMut;
+
+use gix_object::Kind;
+
+/// A trait to model putting objects at a given pack `offset` into a cache, and fetching them.
+///
+/// It is used to speed up [pack traversals][crate::index::File::traverse()].
+pub trait DecodeEntry {
+ /// Store a fully decoded object at `offset` of `kind` with `compressed_size` and `data` in the cache.
+ ///
+ /// It is up to the cache implementation whether that actually happens or not.
+ fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize);
+ /// Attempt to fetch the object at `offset` and store its decoded bytes in `out`, as previously stored with [`DecodeEntry::put()`], and return
+ /// its (object `kind`, `decompressed_size`)
+ fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)>;
+}
+
+/// A cache that stores nothing and retrieves nothing, thus it _never_ caches.
+#[derive(Default)]
+pub struct Never;
+
+impl DecodeEntry for Never {
+ fn put(&mut self, _pack_id: u32, _offset: u64, _data: &[u8], _kind: gix_object::Kind, _compressed_size: usize) {}
+ fn get(&mut self, _pack_id: u32, _offset: u64, _out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
+ None
+ }
+}
+
+impl<T: DecodeEntry + ?Sized> DecodeEntry for Box<T> {
+ fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: Kind, compressed_size: usize) {
+ self.deref_mut().put(pack_id, offset, data, kind, compressed_size)
+ }
+
+ fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(Kind, usize)> {
+ self.deref_mut().get(pack_id, offset, out)
+ }
+}
+
+/// A way of storing and retrieving entire objects to and from a cache.
+pub trait Object {
+ /// Put the object going by `id` of `kind` with `data` into the cache.
+ fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]);
+
+ /// Try to retrieve the object named `id` and place its data into `out` if available and return `Some(kind)` if found.
+ fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind>;
+}
+
+/// Various implementations of [`DecodeEntry`] using least-recently-used algorithms.
+#[cfg(any(feature = "pack-cache-lru-dynamic", feature = "pack-cache-lru-static"))]
+pub mod lru;
+
+pub mod object;
+
+///
+pub(crate) mod delta;
diff --git a/vendor/gix-pack/src/cache/object.rs b/vendor/gix-pack/src/cache/object.rs
new file mode 100644
index 000000000..e64f47a8c
--- /dev/null
+++ b/vendor/gix-pack/src/cache/object.rs
@@ -0,0 +1,123 @@
+//! # Note
+//!
+//! This module is a bit 'misplaced' if spelled out like 'gix_pack::cache::object::*' but is best placed here for code re-use and
+//! general usefulness.
+use crate::cache;
+
+#[cfg(feature = "object-cache-dynamic")]
+mod memory {
+ use std::num::NonZeroUsize;
+
+ use clru::WeightScale;
+
+ use crate::cache;
+
+ struct Entry {
+ data: Vec<u8>,
+ kind: gix_object::Kind,
+ }
+
+ type Key = gix_hash::ObjectId;
+
+ struct CustomScale;
+
+ impl WeightScale<Key, Entry> for CustomScale {
+ fn weight(&self, key: &Key, value: &Entry) -> usize {
+ value.data.len() + std::mem::size_of::<Entry>() + key.as_bytes().len()
+ }
+ }
+
+ /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
+ pub struct MemoryCappedHashmap {
+ inner: clru::CLruCache<Key, Entry, gix_hashtable::hash::Builder, CustomScale>,
+ free_list: Vec<Vec<u8>>,
+ debug: gix_features::cache::Debug,
+ }
+
+ impl MemoryCappedHashmap {
+ /// The amount of bytes we can hold in total, or the value we saw in `new(…)`.
+ pub fn capacity(&self) -> usize {
+ self.inner.capacity()
+ }
+ /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes`
+ /// object data.
+ pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
+ MemoryCappedHashmap {
+ inner: clru::CLruCache::with_config(
+ clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
+ .with_hasher(gix_hashtable::hash::Builder::default())
+ .with_scale(CustomScale),
+ ),
+ free_list: Vec::new(),
+ debug: gix_features::cache::Debug::new(format!("MemoryCappedObjectHashmap({memory_cap_in_bytes}B)")),
+ }
+ }
+ }
+
+ impl cache::Object for MemoryCappedHashmap {
+ /// Put the object going by `id` of `kind` with `data` into the cache.
+ fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]) {
+ self.debug.put();
+ if let Ok(Some(previous_entry)) = self.inner.put_with_weight(
+ id,
+ Entry {
+ data: self
+ .free_list
+ .pop()
+ .map(|mut v| {
+ v.clear();
+ v.resize(data.len(), 0);
+ v.copy_from_slice(data);
+ v
+ })
+ .unwrap_or_else(|| Vec::from(data)),
+ kind,
+ },
+ ) {
+ self.free_list.push(previous_entry.data)
+ }
+ }
+
+ /// Try to retrieve the object named `id` and place its data into `out` if available and return `Some(kind)` if found.
+ fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind> {
+ let res = self.inner.get(id).map(|e| {
+ out.resize(e.data.len(), 0);
+ out.copy_from_slice(&e.data);
+ e.kind
+ });
+ if res.is_some() {
+ self.debug.hit()
+ } else {
+ self.debug.miss()
+ }
+ res
+ }
+ }
+}
+#[cfg(feature = "object-cache-dynamic")]
+pub use memory::MemoryCappedHashmap;
+
+/// A cache implementation that doesn't do any caching.
+pub struct Never;
+
+impl cache::Object for Never {
+ /// Noop
+ fn put(&mut self, _id: gix_hash::ObjectId, _kind: gix_object::Kind, _data: &[u8]) {}
+
+ /// Noop
+ fn get(&mut self, _id: &gix_hash::ObjectId, _out: &mut Vec<u8>) -> Option<gix_object::Kind> {
+ None
+ }
+}
+
+impl<T: cache::Object + ?Sized> cache::Object for Box<T> {
+ fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]) {
+ use std::ops::DerefMut;
+ self.deref_mut().put(id, kind, data)
+ }
+
+ fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind> {
+ use std::ops::DerefMut;
+ self.deref_mut().get(id, out)
+ }
+}
diff --git a/vendor/gix-pack/src/data/delta.rs b/vendor/gix-pack/src/data/delta.rs
new file mode 100644
index 000000000..a898e4aaf
--- /dev/null
+++ b/vendor/gix-pack/src/data/delta.rs
@@ -0,0 +1,70 @@
+/// Given the decompressed pack delta `d`, decode a size in bytes (either the base object size or the result object size)
+/// Equivalent to [this canonical git function](https://github.com/git/git/blob/311531c9de557d25ac087c1637818bd2aad6eb3a/delta.h#L89)
+pub fn decode_header_size(d: &[u8]) -> (u64, usize) {
+ let mut i = 0;
+ let mut size = 0u64;
+ let mut consumed = 0;
+ for cmd in d.iter() {
+ consumed += 1;
+ size |= (*cmd as u64 & 0x7f) << i;
+ i += 7;
+ if *cmd & 0x80 == 0 {
+ break;
+ }
+ }
+ (size, consumed)
+}
+
+pub fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) {
+ let mut i = 0;
+ while let Some(cmd) = data.get(i) {
+ i += 1;
+ match cmd {
+ cmd if cmd & 0b1000_0000 != 0 => {
+ let (mut ofs, mut size): (u32, u32) = (0, 0);
+ if cmd & 0b0000_0001 != 0 {
+ ofs = data[i] as u32;
+ i += 1;
+ }
+ if cmd & 0b0000_0010 != 0 {
+ ofs |= (data[i] as u32) << 8;
+ i += 1;
+ }
+ if cmd & 0b0000_0100 != 0 {
+ ofs |= (data[i] as u32) << 16;
+ i += 1;
+ }
+ if cmd & 0b0000_1000 != 0 {
+ ofs |= (data[i] as u32) << 24;
+ i += 1;
+ }
+ if cmd & 0b0001_0000 != 0 {
+ size = data[i] as u32;
+ i += 1;
+ }
+ if cmd & 0b0010_0000 != 0 {
+ size |= (data[i] as u32) << 8;
+ i += 1;
+ }
+ if cmd & 0b0100_0000 != 0 {
+ size |= (data[i] as u32) << 16;
+ i += 1;
+ }
+ if size == 0 {
+ size = 0x10000; // 65536
+ }
+ let ofs = ofs as usize;
+ std::io::Write::write(&mut target, &base[ofs..ofs + size as usize])
+ .expect("delta copy from base: byte slices must match");
+ }
+ 0 => panic!("encountered unsupported command code: 0"),
+ size => {
+ std::io::Write::write(&mut target, &data[i..i + *size as usize])
+ .expect("delta copy data: slice sizes to match up");
+ i += *size as usize;
+ }
+ }
+ }
+ assert_eq!(i, data.len());
+ assert_eq!(target.len(), 0);
+}
diff --git a/vendor/gix-pack/src/data/entry/decode.rs b/vendor/gix-pack/src/data/entry/decode.rs
new file mode 100644
index 000000000..79d7aecff
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/decode.rs
@@ -0,0 +1,125 @@
+use std::io;
+
+use gix_features::decode::{leb64, leb64_from_read};
+
+use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
+use crate::data;
+
+/// Decoding
+impl data::Entry {
+ /// Decode an entry from the given entry data `d`, providing the `pack_offset` to allow tracking the start of the entry data section.
+ ///
+ /// # Panics
+ ///
+ /// If we cannot understand the header, garbage data is likely to trigger this.
+ pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> data::Entry {
+ let (type_id, size, mut consumed) = parse_header_info(d);
+
+ use crate::data::entry::Header::*;
+ let object = match type_id {
+ OFS_DELTA => {
+ let (distance, leb_bytes) = leb64(&d[consumed..]);
+ let delta = OfsDelta {
+ base_distance: distance,
+ };
+ consumed += leb_bytes;
+ delta
+ }
+ REF_DELTA => {
+ let delta = RefDelta {
+ base_id: gix_hash::ObjectId::from(&d[consumed..][..hash_len]),
+ };
+ consumed += hash_len;
+ delta
+ }
+ BLOB => Blob,
+ TREE => Tree,
+ COMMIT => Commit,
+ TAG => Tag,
+ _ => panic!("We currently don't support any V3 features or extensions"),
+ };
+ data::Entry {
+ header: object,
+ decompressed_size: size,
+ data_offset: pack_offset + consumed as u64,
+ }
+ }
+
+ /// Instantiate an `Entry` from the reader `r`, providing the `pack_offset` to allow tracking the start of the entry data section.
+ pub fn from_read(
+ mut r: impl io::Read,
+ pack_offset: data::Offset,
+ hash_len: usize,
+ ) -> Result<data::Entry, io::Error> {
+ let (type_id, size, mut consumed) = streaming_parse_header_info(&mut r)?;
+
+ use crate::data::entry::Header::*;
+ let object = match type_id {
+ OFS_DELTA => {
+ let (distance, leb_bytes) = leb64_from_read(&mut r)?;
+ let delta = OfsDelta {
+ base_distance: distance,
+ };
+ consumed += leb_bytes;
+ delta
+ }
+ REF_DELTA => {
+ let mut buf = gix_hash::Kind::buf();
+ let hash = &mut buf[..hash_len];
+ r.read_exact(hash)?;
+ #[allow(clippy::redundant_slicing)]
+ let delta = RefDelta {
+ base_id: gix_hash::ObjectId::from(&hash[..]),
+ };
+ consumed += hash_len;
+ delta
+ }
+ BLOB => Blob,
+ TREE => Tree,
+ COMMIT => Commit,
+ TAG => Tag,
+ _ => panic!("We currently don't support any V3 features or extensions"),
+ };
+ Ok(data::Entry {
+ header: object,
+ decompressed_size: size,
+ data_offset: pack_offset + consumed as u64,
+ })
+ }
+}
+
+#[inline]
+fn streaming_parse_header_info(mut read: impl io::Read) -> Result<(u8, u64, usize), io::Error> {
+ let mut byte = [0u8; 1];
+ read.read_exact(&mut byte)?;
+ let mut c = byte[0];
+ let mut i = 1;
+ let type_id = (c >> 4) & 0b0000_0111;
+ let mut size = c as u64 & 0b0000_1111;
+ let mut s = 4;
+ while c & 0b1000_0000 != 0 {
+ read.read_exact(&mut byte)?;
+ c = byte[0];
+ i += 1;
+ size += ((c & 0b0111_1111) as u64) << s;
+ s += 7
+ }
+ Ok((type_id, size, i))
+}
+
+/// Parses the header of a pack-entry, yielding object type id, decompressed object size, and consumed bytes
+#[inline]
+fn parse_header_info(data: &[u8]) -> (u8, u64, usize) {
+ let mut c = data[0];
+ let mut i = 1;
+ let type_id = (c >> 4) & 0b0000_0111;
+ let mut size = c as u64 & 0b0000_1111;
+ let mut s = 4;
+ while c & 0b1000_0000 != 0 {
+ c = data[i];
+ i += 1;
+ size += ((c & 0b0111_1111) as u64) << s;
+ s += 7
+ }
+ (type_id, size, i)
+}
diff --git a/vendor/gix-pack/src/data/entry/header.rs b/vendor/gix-pack/src/data/entry/header.rs
new file mode 100644
index 000000000..83983eab0
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/header.rs
@@ -0,0 +1,150 @@
+use std::io;
+
+use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
+use crate::data;
+
+/// The header portion of a pack data entry, identifying the kind of stored object.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[allow(missing_docs)]
+pub enum Header {
+ /// The object is a commit
+ Commit,
+ /// The object is a tree
+ Tree,
+ /// The object is a blob
+ Blob,
+ /// The object is a tag
+ Tag,
+ /// Describes a delta-object which needs to be applied to a base. The base object is identified by the `base_id` field
+ /// which is found within the parent repository.
+ /// Most commonly used for **thin-packs** when receiving pack files from the server to refer to objects that are not
+ /// part of the pack but expected to be present in the receivers repository.
+ ///
+ /// # Note
+ /// This could also be an object within this pack if the LSB encoded offset would be larger than 20 bytes, which is unlikely to
+ /// happen.
+ ///
+ /// **The naming** is exactly the same as the canonical implementation uses, namely **REF_DELTA**.
+ RefDelta { base_id: gix_hash::ObjectId },
+ /// Describes a delta-object present in this pack which acts as base for this object.
+ /// The base object is measured as a distance from this objects
+ /// pack offset, so that `base_pack_offset = this_objects_pack_offset - base_distance`
+ ///
+ /// # Note
+ ///
+ /// **The naming** is exactly the same as the canonical implementation uses, namely **OFS_DELTA**.
+ OfsDelta { base_distance: u64 },
+}
+
+impl Header {
+ /// Subtract `distance` from `pack_offset` safely without the chance for overflow or no-ops if `distance` is 0.
+ pub fn verified_base_pack_offset(pack_offset: data::Offset, distance: u64) -> Option<data::Offset> {
+ if distance == 0 {
+ return None;
+ }
+ pack_offset.checked_sub(distance)
+ }
+ /// Convert the header's object kind into [`gix_object::Kind`] if possible
+ pub fn as_kind(&self) -> Option<gix_object::Kind> {
+ use gix_object::Kind::*;
+ Some(match self {
+ Header::Tree => Tree,
+ Header::Blob => Blob,
+ Header::Commit => Commit,
+ Header::Tag => Tag,
+ Header::RefDelta { .. } | Header::OfsDelta { .. } => return None,
+ })
+ }
+ /// Convert this header's object kind into the packs internal representation
+ pub fn as_type_id(&self) -> u8 {
+ use Header::*;
+ match self {
+ Blob => BLOB,
+ Tree => TREE,
+ Commit => COMMIT,
+ Tag => TAG,
+ OfsDelta { .. } => OFS_DELTA,
+ RefDelta { .. } => REF_DELTA,
+ }
+ }
+ /// Return's true if this is a delta object, i.e. not a full object.
+ pub fn is_delta(&self) -> bool {
+ matches!(self, Header::OfsDelta { .. } | Header::RefDelta { .. })
+ }
+ /// Return's true if this is a base object, i.e. not a delta object.
+ pub fn is_base(&self) -> bool {
+ !self.is_delta()
+ }
+}
+
+impl Header {
+ /// Encode this header along the given `decompressed_size_in_bytes` into the `out` write stream for use within a data pack.
+ ///
+ /// Returns the amount of bytes written to `out`.
+ /// `decompressed_size_in_bytes` is the full size in bytes of the object that this header represents
+ pub fn write_to(&self, decompressed_size_in_bytes: u64, mut out: impl io::Write) -> io::Result<usize> {
+ let mut size = decompressed_size_in_bytes;
+ let mut written = 1;
+ let mut c: u8 = (self.as_type_id() << 4) | (size as u8 & 0b0000_1111);
+ size >>= 4;
+ while size != 0 {
+ out.write_all(&[c | 0b1000_0000])?;
+ written += 1;
+ c = size as u8 & 0b0111_1111;
+ size >>= 7;
+ }
+ out.write_all(&[c])?;
+
+ use Header::*;
+ match self {
+ RefDelta { base_id: oid } => {
+ out.write_all(oid.as_slice())?;
+ written += oid.as_slice().len();
+ }
+ OfsDelta { base_distance } => {
+ let mut buf = [0u8; 10];
+ let buf = leb64_encode(*base_distance, &mut buf);
+ out.write_all(buf)?;
+ written += buf.len();
+ }
+ Blob | Tree | Commit | Tag => {}
+ }
+ Ok(written)
+ }
+
+ /// The size of the header in bytes when serialized
+ pub fn size(&self, decompressed_size: u64) -> usize {
+ self.write_to(decompressed_size, io::sink())
+ .expect("io::sink() to never fail")
+ }
+}
+
+#[inline]
+fn leb64_encode(mut n: u64, buf: &mut [u8; 10]) -> &[u8] {
+ let mut bytes_written = 1;
+ buf[buf.len() - 1] = n as u8 & 0b0111_1111;
+ for out in buf.iter_mut().rev().skip(1) {
+ n >>= 7;
+ if n == 0 {
+ break;
+ }
+ n -= 1;
+ *out = 0b1000_0000 | (n as u8 & 0b0111_1111);
+ bytes_written += 1;
+ }
+ debug_assert_eq!(n, 0, "BUG: buffer must be large enough to hold a 64 bit integer");
+ &buf[buf.len() - bytes_written..]
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn leb64_encode_max_int() {
+ let mut buf = [0u8; 10];
+ let buf = leb64_encode(u64::MAX, &mut buf);
+ assert_eq!(buf.len(), 10, "10 bytes should be used when 64bits are encoded");
+ }
+}
diff --git a/vendor/gix-pack/src/data/entry/mod.rs b/vendor/gix-pack/src/data/entry/mod.rs
new file mode 100644
index 000000000..f11c39c5c
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/mod.rs
@@ -0,0 +1,53 @@
+use crate::data::Entry;
+
+const _TYPE_EXT1: u8 = 0;
+const COMMIT: u8 = 1;
+const TREE: u8 = 2;
+const BLOB: u8 = 3;
+const TAG: u8 = 4;
+const _TYPE_EXT2: u8 = 5;
+const OFS_DELTA: u8 = 6;
+const REF_DELTA: u8 = 7;
+
+/// A way to uniquely identify the location of an entry within a pack bundle
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Location {
+ /// The id of the pack containing the object. It's unique within its frame of reference which is the owning object database.
+ pub pack_id: u32,
+ /// The size of the entry of disk so that the range of bytes of the entry is `pack_offset..pack_offset + entry_size`.
+ pub entry_size: usize,
+ /// The start of the entry in the pack identified by `pack_id`.
+ pub pack_offset: data::Offset,
+}
+
+impl Location {
+ /// Compute a range suitable for lookup in pack data using the [`entry_slice()`][crate::data::File::entry_slice()] method.
+ pub fn entry_range(&self, pack_offset: data::Offset) -> crate::data::EntryRange {
+ pack_offset..pack_offset + self.entry_size as u64
+ }
+}
+
+/// Access
+impl Entry {
+ /// Compute the pack offset to the base entry of the object represented by this entry.
+ pub fn base_pack_offset(&self, distance: u64) -> data::Offset {
+ let pack_offset = self.data_offset - self.header_size() as u64;
+ pack_offset.checked_sub(distance).expect("in-bound distance of deltas")
+ }
+ /// The pack offset at which this entry starts
+ pub fn pack_offset(&self) -> data::Offset {
+ self.data_offset - self.header_size() as u64
+ }
+ /// The amount of bytes used to describe this entry in the pack. The header starts at [`Self::pack_offset()`]
+ pub fn header_size(&self) -> usize {
+ self.header.size(self.decompressed_size)
+ }
+}
+
+mod decode;
+
+mod header;
+pub use header::Header;
+
+use crate::data;
diff --git a/vendor/gix-pack/src/data/file/decode/entry.rs b/vendor/gix-pack/src/data/file/decode/entry.rs
new file mode 100644
index 000000000..60fefec0f
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/decode/entry.rs
@@ -0,0 +1,422 @@
+use std::{convert::TryInto, ops::Range};
+
+use gix_features::zlib;
+use smallvec::SmallVec;
+
+use crate::{
+ cache, data,
+ data::{delta, file::decode::Error, File},
+};
+
+/// A return value of a resolve function, which given an [`ObjectId`][gix_hash::ObjectId] determines where an object can be found.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum ResolvedBase {
+ /// Indicate an object is within this pack, at the given entry, and thus can be looked up locally.
+ InPack(data::Entry),
+ /// Indicates the object of `kind` was found outside of the pack, and its data was written into an output
+ /// vector which now has a length of `end`.
+ #[allow(missing_docs)]
+ OutOfPack { kind: gix_object::Kind, end: usize },
+}
+
+#[derive(Debug)]
+struct Delta {
+ data: Range<usize>,
+ base_size: usize,
+ result_size: usize,
+
+ decompressed_size: usize,
+ data_offset: data::Offset,
+}
+
+/// Additional information and statistics about a successfully decoded object produced by [`File::decode_entry()`].
+///
+/// Useful to understand the effectiveness of the pack compression or the cost of decompression.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Outcome {
+ /// The kind of resolved object.
+ pub kind: gix_object::Kind,
+ /// The amount of deltas in the chain of objects that had to be resolved beforehand.
+ ///
+ /// This number is affected by the [`Cache`][cache::DecodeEntry] implementation, with cache hits shortening the
+ /// delta chain accordingly
+ pub num_deltas: u32,
+ /// The total decompressed size of all pack entries in the delta chain
+ pub decompressed_size: u64,
+ /// The total compressed size of all pack entries in the delta chain
+ pub compressed_size: usize,
+ /// The total size of the decoded object.
+ pub object_size: u64,
+}
+
+impl Outcome {
+ pub(crate) fn default_from_kind(kind: gix_object::Kind) -> Self {
+ Self {
+ kind,
+ num_deltas: 0,
+ decompressed_size: 0,
+ compressed_size: 0,
+ object_size: 0,
+ }
+ }
+ fn from_object_entry(kind: gix_object::Kind, entry: &data::Entry, compressed_size: usize) -> Self {
+ Self {
+ kind,
+ num_deltas: 0,
+ decompressed_size: entry.decompressed_size,
+ compressed_size,
+ object_size: entry.decompressed_size,
+ }
+ }
+}
+
+/// Decompression of objects
+impl File {
+ /// Decompress the given `entry` into `out` and return the amount of bytes read from the pack data.
+ ///
+ /// _Note_ that this method does not resolve deltified objects, but merely decompresses their content
+ /// `out` is expected to be large enough to hold `entry.size` bytes.
+ ///
+ /// # Panics
+ ///
+ /// If `out` isn't large enough to hold the decompressed `entry`
+ pub fn decompress_entry(&self, entry: &data::Entry, out: &mut [u8]) -> Result<usize, Error> {
+ assert!(
+ out.len() as u64 >= entry.decompressed_size,
+ "output buffer isn't large enough to hold decompressed result, want {}, have {}",
+ entry.decompressed_size,
+ out.len()
+ );
+
+ self.decompress_entry_from_data_offset(entry.data_offset, out)
+ .map_err(Into::into)
+ }
+
+ fn assure_v2(&self) {
+ assert!(
+ matches!(self.version, crate::data::Version::V2),
+ "Only V2 is implemented"
+ );
+ }
+
+ /// Obtain the [`Entry`][crate::data::Entry] at the given `offset` into the pack.
+ ///
+ /// The `offset` is typically obtained from the pack index file.
+ pub fn entry(&self, offset: data::Offset) -> data::Entry {
+ self.assure_v2();
+ let pack_offset: usize = offset.try_into().expect("offset representable by machine");
+ assert!(pack_offset <= self.data.len(), "offset out of bounds");
+
+ let object_data = &self.data[pack_offset..];
+ data::Entry::from_bytes(object_data, offset, self.hash_len)
+ }
+
+ /// Decompress the object expected at the given data offset, sans pack header. This information is only
+ /// known after the pack header was parsed.
+ /// Note that this method does not resolve deltified objects, but merely decompresses their content
+ /// `out` is expected to be large enough to hold `entry.size` bytes.
+ /// Returns the amount of packed bytes there read from the pack data file.
+ pub(crate) fn decompress_entry_from_data_offset(
+ &self,
+ data_offset: data::Offset,
+ out: &mut [u8],
+ ) -> Result<usize, zlib::inflate::Error> {
+ let offset: usize = data_offset.try_into().expect("offset representable by machine");
+ assert!(offset < self.data.len(), "entry offset out of bounds");
+
+ zlib::Inflate::default()
+ .once(&self.data[offset..], out)
+ .map(|(_status, consumed_in, _consumed_out)| consumed_in)
+ }
+
+ /// Like `decompress_entry_from_data_offset`, but returns consumed input and output.
+ pub(crate) fn decompress_entry_from_data_offset_2(
+ &self,
+ data_offset: data::Offset,
+ out: &mut [u8],
+ ) -> Result<(usize, usize), zlib::inflate::Error> {
+ let offset: usize = data_offset.try_into().expect("offset representable by machine");
+ assert!(offset < self.data.len(), "entry offset out of bounds");
+
+ zlib::Inflate::default()
+ .once(&self.data[offset..], out)
+ .map(|(_status, consumed_in, consumed_out)| (consumed_in, consumed_out))
+ }
+
+ /// Decode an entry, resolving delta's as needed, while growing the `out` vector if there is not enough
+ /// space to hold the result object.
+ ///
+ /// The `entry` determines which object to decode, and is commonly obtained with the help of a pack index file or through pack iteration.
+ ///
+ /// `resolve` is a function to lookup objects with the given [`ObjectId`][gix_hash::ObjectId], in case the full object id is used to refer to
+ /// a base object, instead of an in-pack offset.
+ ///
+ /// `delta_cache` is a mechanism to avoid looking up base objects multiple times when decompressing multiple objects in a row.
+ /// Use a [Noop-Cache][cache::Never] to disable caching all together at the cost of repeating work.
+ pub fn decode_entry(
+ &self,
+ entry: data::Entry,
+ out: &mut Vec<u8>,
+ resolve: impl Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
+ delta_cache: &mut impl cache::DecodeEntry,
+ ) -> Result<Outcome, Error> {
+ use crate::data::entry::Header::*;
+ match entry.header {
+ Tree | Blob | Commit | Tag => {
+ out.resize(
+ entry
+ .decompressed_size
+ .try_into()
+ .expect("size representable by machine"),
+ 0,
+ );
+ self.decompress_entry(&entry, out.as_mut_slice()).map(|consumed_input| {
+ Outcome::from_object_entry(
+ entry.header.as_kind().expect("a non-delta entry"),
+ &entry,
+ consumed_input,
+ )
+ })
+ }
+ OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, out, delta_cache),
+ }
+ }
+
+ /// resolve: technically, this shouldn't ever be required as stored local packs don't refer to objects by id
+ /// that are outside of the pack. Unless, of course, the ref refers to an object within this pack, which means
+ /// it's very, very large as 20bytes are smaller than the corresponding MSB encoded number
+ fn resolve_deltas(
+ &self,
+ last: data::Entry,
+ resolve: impl Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
+ out: &mut Vec<u8>,
+ cache: &mut impl cache::DecodeEntry,
+ ) -> Result<Outcome, Error> {
+ // all deltas, from the one that produces the desired object (first) to the oldest at the end of the chain
+ let mut chain = SmallVec::<[Delta; 10]>::default();
+ let first_entry = last.clone();
+ let mut cursor = last;
+ let mut base_buffer_size: Option<usize> = None;
+ let mut object_kind: Option<gix_object::Kind> = None;
+ let mut consumed_input: Option<usize> = None;
+
+ // Find the first full base, either an undeltified object in the pack or a reference to another object.
+ let mut total_delta_data_size: u64 = 0;
+ while cursor.header.is_delta() {
+ if let Some((kind, packed_size)) = cache.get(self.id, cursor.data_offset, out) {
+ base_buffer_size = Some(out.len());
+ object_kind = Some(kind);
+ // If the input entry is a cache hit, keep the packed size as it must be returned.
+ // Otherwise, the packed size will be determined later when decompressing the input delta
+ if total_delta_data_size == 0 {
+ consumed_input = Some(packed_size);
+ }
+ break;
+ }
+ total_delta_data_size += cursor.decompressed_size;
+ let decompressed_size = cursor
+ .decompressed_size
+ .try_into()
+ .expect("a single delta size small enough to fit a usize");
+ chain.push(Delta {
+ data: Range {
+ start: 0,
+ end: decompressed_size,
+ },
+ base_size: 0,
+ result_size: 0,
+ decompressed_size,
+ data_offset: cursor.data_offset,
+ });
+ use crate::data::entry::Header;
+ cursor = match cursor.header {
+ Header::OfsDelta { base_distance } => self.entry(cursor.base_pack_offset(base_distance)),
+ Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) {
+ Some(ResolvedBase::InPack(entry)) => entry,
+ Some(ResolvedBase::OutOfPack { end, kind }) => {
+ base_buffer_size = Some(end);
+ object_kind = Some(kind);
+ break;
+ }
+ None => return Err(Error::DeltaBaseUnresolved(base_id)),
+ },
+ _ => unreachable!("cursor.is_delta() only allows deltas here"),
+ };
+ }
+
+ // This can happen if the cache held the first entry itself
+ // We will just treat it as an object then, even though it's technically incorrect.
+ if chain.is_empty() {
+ return Ok(Outcome::from_object_entry(
+ object_kind.expect("object kind as set by cache"),
+ &first_entry,
+ consumed_input.expect("consumed bytes as set by cache"),
+ ));
+ };
+
+ // First pass will decompress all delta data and keep it in our output buffer
+ // [<possibly resolved base object>]<delta-1..delta-n>...
+ // so that we can find the biggest result size.
+ let total_delta_data_size: usize = total_delta_data_size.try_into().expect("delta data to fit in memory");
+
+ let chain_len = chain.len();
+ let (first_buffer_end, second_buffer_end) = {
+ let delta_start = base_buffer_size.unwrap_or(0);
+ out.resize(delta_start + total_delta_data_size, 0);
+
+ let delta_range = Range {
+ start: delta_start,
+ end: delta_start + total_delta_data_size,
+ };
+ let mut instructions = &mut out[delta_range.clone()];
+ let mut relative_delta_start = 0;
+ let mut biggest_result_size = 0;
+ for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
+ let consumed_from_data_offset = self.decompress_entry_from_data_offset(
+ delta.data_offset,
+ &mut instructions[..delta.decompressed_size],
+ )?;
+ let is_last_delta_to_be_applied = delta_idx + 1 == chain_len;
+ if is_last_delta_to_be_applied {
+ consumed_input = Some(consumed_from_data_offset);
+ }
+
+ let (base_size, offset) = delta::decode_header_size(instructions);
+ let mut bytes_consumed_by_header = offset;
+ biggest_result_size = biggest_result_size.max(base_size);
+ delta.base_size = base_size.try_into().expect("base size fits into usize");
+
+ let (result_size, offset) = delta::decode_header_size(&instructions[offset..]);
+ bytes_consumed_by_header += offset;
+ biggest_result_size = biggest_result_size.max(result_size);
+ delta.result_size = result_size.try_into().expect("result size fits into usize");
+
+ // the absolute location into the instructions buffer, so we keep track of the end point of the last
+ delta.data.start = relative_delta_start + bytes_consumed_by_header;
+ relative_delta_start += delta.decompressed_size;
+ delta.data.end = relative_delta_start;
+
+ instructions = &mut instructions[delta.decompressed_size..];
+ }
+
+ // Now we can produce a buffer like this
+ // [<biggest-result-buffer, possibly filled with resolved base object data>]<biggest-result-buffer><delta-1..delta-n>
+ // from [<possibly resolved base object>]<delta-1..delta-n>...
+ let biggest_result_size: usize = biggest_result_size
+ .try_into()
+ .expect("biggest result size small enough to fit into usize");
+ let first_buffer_size = biggest_result_size;
+ let second_buffer_size = first_buffer_size;
+ out.resize(first_buffer_size + second_buffer_size + total_delta_data_size, 0);
+
+ // Now 'rescue' the deltas, because in the next step we possibly overwrite that portion
+ // of memory with the base object (in the majority of cases)
+ let second_buffer_end = {
+ let end = first_buffer_size + second_buffer_size;
+ if delta_range.start < end {
+ // …this means that the delta size is even larger than two uncompressed worst-case
+ // intermediate results combined. It would already be undesirable to have it bigger
+ // then the target size (as you could just store the object in whole).
+ // However, this just means that it reuses existing deltas smartly, which as we rightfully
+ // remember stand for an object each. However, this means a lot of data is read to restore
+ // a single object sometimes. Fair enough - package size is minimized that way.
+ out.copy_within(delta_range, end);
+ } else {
+ let (buffers, instructions) = out.split_at_mut(end);
+ instructions.copy_from_slice(&buffers[delta_range]);
+ }
+ end
+ };
+
+ // If we don't have a out-of-pack object already, fill the base-buffer by decompressing the full object
+ // at which the cursor is left after the iteration
+ if base_buffer_size.is_none() {
+ let base_entry = cursor;
+ debug_assert!(!base_entry.header.is_delta());
+ object_kind = base_entry.header.as_kind();
+ self.decompress_entry_from_data_offset(base_entry.data_offset, out)?;
+ }
+
+ (first_buffer_size, second_buffer_end)
+ };
+
+ // From oldest to most recent, apply all deltas, swapping the buffer back and forth
+ // TODO: once we have more tests, we could optimize this memory-intensive work to
+ // analyse the delta-chains to only copy data once - after all, with 'copy-from-base' deltas,
+ // all data originates from one base at some point.
+ // `out` is: [source-buffer][target-buffer][max-delta-instructions-buffer]
+ let (buffers, instructions) = out.split_at_mut(second_buffer_end);
+ let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
+
+ let mut last_result_size = None;
+ for (
+ delta_idx,
+ Delta {
+ data,
+ base_size,
+ result_size,
+ ..
+ },
+ ) in chain.into_iter().rev().enumerate()
+ {
+ let data = &mut instructions[data];
+ if delta_idx + 1 == chain_len {
+ last_result_size = Some(result_size);
+ }
+ delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data);
+ // use the target as source for the next delta
+ std::mem::swap(&mut source_buf, &mut target_buf);
+ }
+
+ let last_result_size = last_result_size.expect("at least one delta chain item");
+ // uneven chains leave the target buffer after the source buffer
+ // FIXME(Performance) If delta-chains are uneven, we know we will have to copy bytes over here
+ // Instead we could use a different start buffer, to naturally end up with the result in the
+ // right one.
+ // However, this is a bit more complicated than just that - you have to deal with the base
+ // object, which should also be placed in the second buffer right away. You don't have that
+ // control/knowledge for out-of-pack bases, so this is a special case to deal with, too.
+ // Maybe these invariants can be represented in the type system though.
+ if chain_len % 2 == 1 {
+ // this seems inverted, but remember: we swapped the buffers on the last iteration
+ target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
+ }
+ out.resize(last_result_size, 0);
+
+ let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve");
+ let consumed_input = consumed_input.expect("at least one decompressed delta object");
+ cache.put(
+ self.id,
+ first_entry.data_offset,
+ out.as_slice(),
+ object_kind,
+ consumed_input,
+ );
+ Ok(Outcome {
+ kind: object_kind,
+ // technically depending on the cache, the chain size is not correct as it might
+ // have been cut short by a cache hit. The caller must deactivate the cache to get
+ // actual results
+ num_deltas: chain_len as u32,
+ decompressed_size: first_entry.decompressed_size,
+ compressed_size: consumed_input,
+ object_size: last_result_size as u64,
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn size_of_decode_entry_outcome() {
+ assert_eq!(
+ std::mem::size_of::<Outcome>(),
+ 32,
+ "this shouldn't change without use noticing as it's returned a lot"
+ );
+ }
+}
diff --git a/vendor/gix-pack/src/data/file/decode/header.rs b/vendor/gix-pack/src/data/file/decode/header.rs
new file mode 100644
index 000000000..1f4b1de0a
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/decode/header.rs
@@ -0,0 +1,114 @@
+use crate::{
+ data,
+ data::{delta, file::decode::Error, File},
+};
+
+/// A return value of a resolve function, which given an [`ObjectId`][gix_hash::ObjectId] determines where an object can be found.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum ResolvedBase {
+ /// Indicate an object is within this pack, at the given entry, and thus can be looked up locally.
+ InPack(data::Entry),
+ /// Indicates the object of `kind` was found outside of the pack.
+ OutOfPack {
+ /// The kind of object we found when reading the header of the out-of-pack base.
+ kind: gix_object::Kind,
+ /// The amount of deltas encountered if the object was packed as well.
+ num_deltas: Option<u32>,
+ },
+}
+
+/// Additional information and statistics about a successfully decoded object produced by [`File::decode_header()`].
+///
+/// Useful to understand the effectiveness of the pack compression or the cost of decompression.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Outcome {
+ /// The kind of resolved object.
+ pub kind: gix_object::Kind,
+ /// The decompressed size of the object.
+ pub object_size: u64,
+ /// The amount of deltas in the chain of objects that had to be resolved beforehand.
+ pub num_deltas: u32,
+}
+
+/// Obtain object information quickly.
+impl File {
+ /// Resolve the object header information starting at `entry`, following the chain of entries as needed.
+ ///
+ /// The `entry` determines which object to decode, and is commonly obtained with the help of a pack index file or through pack iteration.
+ ///
+ /// `resolve` is a function to lookup objects with the given [`ObjectId`][gix_hash::ObjectId], in case the full object id
+ /// is used to refer to a base object, instead of an in-pack offset.
+ pub fn decode_header(
+ &self,
+ mut entry: data::Entry,
+ resolve: impl Fn(&gix_hash::oid) -> Option<ResolvedBase>,
+ ) -> Result<Outcome, Error> {
+ use crate::data::entry::Header::*;
+ let mut num_deltas = 0;
+ let mut first_delta_decompressed_size = None::<u64>;
+ loop {
+ match entry.header {
+ Tree | Blob | Commit | Tag => {
+ return Ok(Outcome {
+ kind: entry.header.as_kind().expect("always valid for non-refs"),
+ object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size),
+ num_deltas,
+ });
+ }
+ OfsDelta { base_distance } => {
+ num_deltas += 1;
+ if first_delta_decompressed_size.is_none() {
+ first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?);
+ }
+ entry = self.entry(entry.base_pack_offset(base_distance))
+ }
+ RefDelta { base_id } => {
+ num_deltas += 1;
+ if first_delta_decompressed_size.is_none() {
+ first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?);
+ }
+ match resolve(base_id.as_ref()) {
+ Some(ResolvedBase::InPack(base_entry)) => entry = base_entry,
+ Some(ResolvedBase::OutOfPack {
+ kind,
+ num_deltas: origin_num_deltas,
+ }) => {
+ return Ok(Outcome {
+ kind,
+ object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size),
+ num_deltas: origin_num_deltas.unwrap_or_default() + num_deltas,
+ })
+ }
+ None => return Err(Error::DeltaBaseUnresolved(base_id)),
+ }
+ }
+ };
+ }
+ }
+
+ #[inline]
+ fn decode_delta_object_size(&self, entry: &data::Entry) -> Result<u64, Error> {
+ let mut buf = [0_u8; 32];
+ let used = self.decompress_entry_from_data_offset_2(entry.data_offset, &mut buf)?.1;
+ let buf = &buf[..used];
+ let (_base_size, offset) = delta::decode_header_size(buf);
+ let (result_size, _offset) = delta::decode_header_size(&buf[offset..]);
+ Ok(result_size)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn size_of_decode_entry_outcome() {
+ assert_eq!(
+ std::mem::size_of::<Outcome>(),
+ 16,
+ "this shouldn't change without use noticing as it's returned a lot"
+ );
+ }
+}
diff --git a/vendor/gix-pack/src/data/file/decode/mod.rs b/vendor/gix-pack/src/data/file/decode/mod.rs
new file mode 100644
index 000000000..10bb7f19b
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/decode/mod.rs
@@ -0,0 +1,16 @@
+///
+pub mod entry;
+///
+pub mod header;
+
+/// Returned by [`File::decode_header()`][crate::data::File::decode_header()],
+/// [`File::decode_entry()`][crate::data::File::decode_entry()] and .
+/// [`File::decompress_entry()`][crate::data::File::decompress_entry()]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("Failed to decompress pack entry")]
+ ZlibInflate(#[from] gix_features::zlib::inflate::Error),
+ #[error("A delta chain could not be followed as the ref base with id {0} could not be found")]
+ DeltaBaseUnresolved(gix_hash::ObjectId),
+}
diff --git a/vendor/gix-pack/src/data/file/init.rs b/vendor/gix-pack/src/data/file/init.rs
new file mode 100644
index 000000000..b16072417
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/init.rs
@@ -0,0 +1,41 @@
+use std::{convert::TryInto, path::Path};
+
+use crate::data;
+
+/// Instantiation
+impl data::File {
+ /// Try opening a data file at the given `path`.
+ ///
+ /// The `object_hash` is a way to read (and write) the same file format with different hashes, as the hash kind
+ /// isn't stored within the file format itself.
+ pub fn at(path: impl AsRef<Path>, object_hash: gix_hash::Kind) -> Result<data::File, data::header::decode::Error> {
+ Self::at_inner(path.as_ref(), object_hash)
+ }
+
+ fn at_inner(path: &Path, object_hash: gix_hash::Kind) -> Result<data::File, data::header::decode::Error> {
+ use crate::data::header::N32_SIZE;
+ let hash_len = object_hash.len_in_bytes();
+
+ let data = crate::mmap::read_only(path).map_err(|e| data::header::decode::Error::Io {
+ source: e,
+ path: path.to_owned(),
+ })?;
+ let pack_len = data.len();
+ if pack_len < N32_SIZE * 3 + hash_len {
+ return Err(data::header::decode::Error::Corrupt(format!(
+ "Pack data of size {pack_len} is too small for even an empty pack with shortest hash"
+ )));
+ }
+ let (kind, num_objects) =
+ data::header::decode(&data[..12].try_into().expect("enough data after previous check"))?;
+ Ok(data::File {
+ data,
+ path: path.to_owned(),
+ id: gix_features::hash::crc32(path.as_os_str().to_string_lossy().as_bytes()),
+ version: kind,
+ num_objects,
+ hash_len,
+ object_hash,
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/data/file/mod.rs b/vendor/gix-pack/src/data/file/mod.rs
new file mode 100644
index 000000000..6bfe0e272
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/mod.rs
@@ -0,0 +1,9 @@
+mod init;
+///
+pub mod verify;
+
+///
+pub mod decode;
+
+/// The bytes used as header in a pack data file.
+pub type Header = [u8; 12];
diff --git a/vendor/gix-pack/src/data/file/verify.rs b/vendor/gix-pack/src/data/file/verify.rs
new file mode 100644
index 000000000..afec20826
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/verify.rs
@@ -0,0 +1,42 @@
+use std::sync::atomic::AtomicBool;
+
+use gix_features::progress::Progress;
+
+use crate::data::File;
+
+///
+pub mod checksum {
+ /// Returned by [`data::File::verify_checksum()`][crate::data::File::verify_checksum()].
+ pub type Error = crate::verify::checksum::Error;
+}
+
+/// Checksums and verify checksums
+impl File {
+ /// The checksum in the trailer of this pack data file
+ pub fn checksum(&self) -> gix_hash::ObjectId {
+ gix_hash::ObjectId::from(&self.data[self.data.len() - self.hash_len..])
+ }
+
+ /// Verifies that the checksum of the packfile over all bytes preceding it indeed matches the actual checksum,
+ /// returning the actual checksum equivalent to the return value of [`checksum()`][File::checksum()] if there
+ /// is no mismatch.
+ ///
+ /// Note that if no `progress` is desired, one can pass [`gix_features::progress::Discard`].
+ ///
+ /// Have a look at [`index::File::verify_integrity(…)`][crate::index::File::verify_integrity()] for an
+ /// even more thorough integrity check.
+ pub fn verify_checksum(
+ &self,
+ progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ ) -> Result<gix_hash::ObjectId, checksum::Error> {
+ crate::verify::checksum_on_disk_or_mmap(
+ self.path(),
+ &self.data,
+ self.checksum(),
+ self.object_hash,
+ progress,
+ should_interrupt,
+ )
+ }
+}
diff --git a/vendor/gix-pack/src/data/header.rs b/vendor/gix-pack/src/data/header.rs
new file mode 100644
index 000000000..348a4ca24
--- /dev/null
+++ b/vendor/gix-pack/src/data/header.rs
@@ -0,0 +1,55 @@
+use crate::data;
+
+pub(crate) const N32_SIZE: usize = std::mem::size_of::<u32>();
+
+/// Parses the first 12 bytes of a pack file, returning the pack version as well as the number of objects contained in the pack.
+pub fn decode(data: &[u8; 12]) -> Result<(data::Version, u32), decode::Error> {
+ let mut ofs = 0;
+ if &data[ofs..ofs + b"PACK".len()] != b"PACK" {
+ return Err(decode::Error::Corrupt("Pack data type not recognized".into()));
+ }
+ ofs += N32_SIZE;
+ let kind = match crate::read_u32(&data[ofs..ofs + N32_SIZE]) {
+ 2 => data::Version::V2,
+ 3 => data::Version::V3,
+ v => return Err(decode::Error::UnsupportedVersion(v)),
+ };
+ ofs += N32_SIZE;
+ let num_objects = crate::read_u32(&data[ofs..ofs + N32_SIZE]);
+
+ Ok((kind, num_objects))
+}
+
+/// Write a pack data header at `version` with `num_objects` and return a buffer.
+pub fn encode(version: data::Version, num_objects: u32) -> [u8; 12] {
+ use crate::data::Version::*;
+ let mut buf = [0u8; 12];
+ buf[..4].copy_from_slice(b"PACK");
+ buf[4..8].copy_from_slice(
+ &match version {
+ V2 => 2u32,
+ V3 => 3,
+ }
+ .to_be_bytes()[..],
+ );
+ buf[8..].copy_from_slice(&num_objects.to_be_bytes()[..]);
+ buf
+}
+
+///
+pub mod decode {
+ /// Returned by [`decode()`][super::decode()].
+ #[derive(thiserror::Error, Debug)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Could not open pack file at '{path}'")]
+ Io {
+ source: std::io::Error,
+ path: std::path::PathBuf,
+ },
+ #[error("{0}")]
+ Corrupt(String),
+ #[error("Unsupported pack version: {0}")]
+ UnsupportedVersion(u32),
+ }
+}
diff --git a/vendor/gix-pack/src/data/input/bytes_to_entries.rs b/vendor/gix-pack/src/data/input/bytes_to_entries.rs
new file mode 100644
index 000000000..cf20d5fbf
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/bytes_to_entries.rs
@@ -0,0 +1,295 @@
+use std::{fs, io};
+
+use gix_features::{
+ hash,
+ hash::Sha1,
+ zlib::{stream::inflate::ReadBoxed, Decompress},
+};
+use gix_hash::ObjectId;
+
+use crate::data::input;
+
+/// An iterator over [`Entries`][input::Entry] in a byte stream.
+///
+/// The iterator used as part of [Bundle::write_to_directory(…)][crate::Bundle::write_to_directory()].
+pub struct BytesToEntriesIter<BR> {
+ read: BR,
+ decompressor: Option<Box<Decompress>>,
+ offset: u64,
+ had_error: bool,
+ version: crate::data::Version,
+ objects_left: u32,
+ hash: Option<Sha1>,
+ mode: input::Mode,
+ compressed: input::EntryDataMode,
+ compressed_buf: Option<Vec<u8>>,
+ hash_len: usize,
+ object_hash: gix_hash::Kind,
+}
+
+/// Access
+impl<BR> BytesToEntriesIter<BR> {
+ /// The pack version currently being iterated
+ pub fn version(&self) -> crate::data::Version {
+ self.version
+ }
+
+ /// The kind of iteration
+ pub fn mode(&self) -> input::Mode {
+ self.mode
+ }
+}
+
+/// Initialization
+impl<BR> BytesToEntriesIter<BR>
+where
+ BR: io::BufRead,
+{
+ /// Obtain an iterator from a `read` stream to a pack data file and configure it using `mode` and `compressed`.
+ /// `object_hash` specifies which hash is used for objects in ref-delta entries.
+ ///
+ /// Note that `read` is expected at the beginning of a valid pack data file with a header, entries and a trailer.
+ pub fn new_from_header(
+ mut read: BR,
+ mode: input::Mode,
+ compressed: input::EntryDataMode,
+ object_hash: gix_hash::Kind,
+ ) -> Result<BytesToEntriesIter<BR>, input::Error> {
+ let mut header_data = [0u8; 12];
+ read.read_exact(&mut header_data)?;
+
+ let (version, num_objects) = crate::data::header::decode(&header_data)?;
+ assert_eq!(
+ version,
+ crate::data::Version::V2,
+ "let's stop here if we see undocumented pack formats"
+ );
+ Ok(BytesToEntriesIter {
+ read,
+ decompressor: None,
+ compressed,
+ offset: 12,
+ had_error: false,
+ version,
+ objects_left: num_objects,
+ hash: (mode != input::Mode::AsIs).then(|| {
+ let mut hash = gix_features::hash::hasher(object_hash);
+ hash.update(&header_data);
+ hash
+ }),
+ mode,
+ compressed_buf: None,
+ hash_len: object_hash.len_in_bytes(),
+ object_hash,
+ })
+ }
+
+ fn next_inner(&mut self) -> Result<input::Entry, input::Error> {
+ self.objects_left -= 1; // even an error counts as objects
+
+ // Read header
+ let entry = match self.hash.take() {
+ Some(hash) => {
+ let mut read = read_and_pass_to(
+ &mut self.read,
+ hash::Write {
+ inner: io::sink(),
+ hash,
+ },
+ );
+ let res = crate::data::Entry::from_read(&mut read, self.offset, self.hash_len);
+ self.hash = Some(read.write.hash);
+ res
+ }
+ None => crate::data::Entry::from_read(&mut self.read, self.offset, self.hash_len),
+ }
+ .map_err(input::Error::from)?;
+
+ // Decompress object to learn its compressed bytes
+ let mut decompressor = self
+ .decompressor
+ .take()
+ .unwrap_or_else(|| Box::new(Decompress::new(true)));
+ let compressed_buf = self.compressed_buf.take().unwrap_or_else(|| Vec::with_capacity(4096));
+ decompressor.reset(true);
+ let mut decompressed_reader = ReadBoxed {
+ inner: read_and_pass_to(
+ &mut self.read,
+ if self.compressed.keep() {
+ Vec::with_capacity(entry.decompressed_size as usize)
+ } else {
+ compressed_buf
+ },
+ ),
+ decompressor,
+ };
+
+ let bytes_copied = io::copy(&mut decompressed_reader, &mut io::sink())?;
+ if bytes_copied != entry.decompressed_size {
+ return Err(input::Error::IncompletePack {
+ actual: bytes_copied,
+ expected: entry.decompressed_size,
+ });
+ }
+
+ let pack_offset = self.offset;
+ let compressed_size = decompressed_reader.decompressor.total_in();
+ self.offset += entry.header_size() as u64 + compressed_size;
+ self.decompressor = Some(decompressed_reader.decompressor);
+
+ let mut compressed = decompressed_reader.inner.write;
+ debug_assert_eq!(
+ compressed_size,
+ compressed.len() as u64,
+ "we must track exactly the same amount of bytes as read by the decompressor"
+ );
+ if let Some(hash) = self.hash.as_mut() {
+ hash.update(&compressed);
+ }
+
+ let crc32 = if self.compressed.crc32() {
+ let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()];
+ let header_len = entry.header.write_to(bytes_copied, header_buf.as_mut())?;
+ let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]);
+ Some(gix_features::hash::crc32_update(state, &compressed))
+ } else {
+ None
+ };
+
+ let compressed = if self.compressed.keep() {
+ Some(compressed)
+ } else {
+ compressed.clear();
+ self.compressed_buf = Some(compressed);
+ None
+ };
+
+ // Last objects gets trailer (which is potentially verified)
+ let trailer = self.try_read_trailer()?;
+ Ok(input::Entry {
+ header: entry.header,
+ header_size: entry.header_size() as u16,
+ compressed,
+ compressed_size,
+ crc32,
+ pack_offset,
+ decompressed_size: bytes_copied,
+ trailer,
+ })
+ }
+
+ fn try_read_trailer(&mut self) -> Result<Option<ObjectId>, input::Error> {
+ Ok(if self.objects_left == 0 {
+ let mut id = gix_hash::ObjectId::null(self.object_hash);
+ if let Err(err) = self.read.read_exact(id.as_mut_slice()) {
+ if self.mode != input::Mode::Restore {
+ return Err(err.into());
+ }
+ }
+
+ if let Some(hash) = self.hash.take() {
+ let actual_id = gix_hash::ObjectId::from(hash.digest());
+ if self.mode == input::Mode::Restore {
+ id = actual_id;
+ }
+ if id != actual_id {
+ return Err(input::Error::ChecksumMismatch {
+ actual: actual_id,
+ expected: id,
+ });
+ }
+ }
+ Some(id)
+ } else if self.mode == input::Mode::Restore {
+ let hash = self.hash.clone().expect("in restore mode a hash is set");
+ Some(gix_hash::ObjectId::from(hash.digest()))
+ } else {
+ None
+ })
+ }
+}
+
+fn read_and_pass_to<R: io::Read, W: io::Write>(read: &mut R, to: W) -> PassThrough<&mut R, W> {
+ PassThrough { read, write: to }
+}
+
+impl<R> Iterator for BytesToEntriesIter<R>
+where
+ R: io::BufRead,
+{
+ type Item = Result<input::Entry, input::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.had_error || self.objects_left == 0 {
+ return None;
+ }
+ let result = self.next_inner();
+ self.had_error = result.is_err();
+ if self.had_error {
+ self.objects_left = 0;
+ }
+ if self.mode == input::Mode::Restore && self.had_error {
+ None
+ } else {
+ Some(result)
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.objects_left as usize, Some(self.objects_left as usize))
+ }
+}
+
+impl<R> std::iter::ExactSizeIterator for BytesToEntriesIter<R> where R: io::BufRead {}
+
+struct PassThrough<R, W> {
+ read: R,
+ write: W,
+}
+
+impl<R, W> io::BufRead for PassThrough<R, W>
+where
+ Self: io::Read,
+ R: io::BufRead,
+ W: io::Write,
+{
+ fn fill_buf(&mut self) -> io::Result<&[u8]> {
+ self.read.fill_buf()
+ }
+
+ fn consume(&mut self, amt: usize) {
+ let buf = self
+ .read
+ .fill_buf()
+ .expect("never fail as we called fill-buf before and this does nothing");
+ self.write
+ .write_all(&buf[..amt])
+ .expect("a write to never fail - should be a memory buffer");
+ self.read.consume(amt)
+ }
+}
+
+impl<R, W> io::Read for PassThrough<R, W>
+where
+ W: io::Write,
+ R: io::Read,
+{
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ let bytes_read = self.read.read(buf)?;
+ self.write.write_all(&buf[..bytes_read])?;
+ Ok(bytes_read)
+ }
+}
+
+impl crate::data::File {
+ /// Returns an iterator over [`Entries`][crate::data::input::Entry], without making use of the memory mapping.
+ pub fn streaming_iter(&self) -> Result<BytesToEntriesIter<impl io::BufRead>, input::Error> {
+ let reader = io::BufReader::with_capacity(4096 * 8, fs::File::open(&self.path)?);
+ BytesToEntriesIter::new_from_header(
+ reader,
+ input::Mode::Verify,
+ input::EntryDataMode::KeepAndCrc32,
+ self.object_hash,
+ )
+ }
+}
diff --git a/vendor/gix-pack/src/data/input/entries_to_bytes.rs b/vendor/gix-pack/src/data/input/entries_to_bytes.rs
new file mode 100644
index 000000000..a8c21e653
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/entries_to_bytes.rs
@@ -0,0 +1,155 @@
+use std::iter::Peekable;
+
+use gix_features::hash;
+
+use crate::data::input;
+
+/// An implementation of [`Iterator`] to write [encoded entries][input::Entry] to an inner implementation each time
+/// `next()` is called.
+///
+/// It is able to deal with an unknown amount of objects as it will rewrite the pack header once the entries iterator
+/// is depleted and compute the hash in one go by re-reading the whole file.
+pub struct EntriesToBytesIter<I: Iterator, W> {
+ /// An iterator for input [`input::Entry`] instances
+ pub input: Peekable<I>,
+ /// A way of writing encoded bytes.
+ output: W,
+ /// Our trailing hash when done writing all input entries
+ trailer: Option<gix_hash::ObjectId>,
+ /// The amount of objects in the iteration and the version of the packfile to be written.
+ /// Will be `None` to signal the header was written already.
+ data_version: crate::data::Version,
+ /// The amount of entries seen so far
+ num_entries: u32,
+ /// If we are done, no additional writes will occur
+ is_done: bool,
+ /// The kind of hash to use for the digest
+ object_hash: gix_hash::Kind,
+}
+
+impl<I, W> EntriesToBytesIter<I, W>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ W: std::io::Read + std::io::Write + std::io::Seek,
+{
+ /// Create a new instance reading [entries][input::Entry] from an `input` iterator and write pack data bytes to
+ /// `output` writer, resembling a pack of `version`. The amount of entries will be dynamically determined and
+ /// the pack is completed once the last entry was written.
+ /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version.
+ ///
+ /// # Panics
+ ///
+ /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors.
+ pub fn new(input: I, output: W, version: crate::data::Version, object_hash: gix_hash::Kind) -> Self {
+ assert!(
+ matches!(version, crate::data::Version::V2),
+ "currently only pack version 2 can be written",
+ );
+ assert!(
+ matches!(object_hash, gix_hash::Kind::Sha1),
+ "currently only Sha1 is supported, right now we don't know how other hashes are encoded",
+ );
+ EntriesToBytesIter {
+ input: input.peekable(),
+ output,
+ object_hash,
+ num_entries: 0,
+ trailer: None,
+ data_version: version,
+ is_done: false,
+ }
+ }
+
+ /// Returns the trailing hash over all ~ entries once done.
+ /// It's `None` if we are not yet done writing.
+ pub fn digest(&self) -> Option<gix_hash::ObjectId> {
+ self.trailer
+ }
+
+ fn next_inner(&mut self, entry: input::Entry) -> Result<input::Entry, input::Error> {
+ if self.num_entries == 0 {
+ let header_bytes = crate::data::header::encode(self.data_version, 0);
+ self.output.write_all(&header_bytes[..])?;
+ }
+ self.num_entries += 1;
+ entry.header.write_to(entry.decompressed_size, &mut self.output)?;
+ std::io::copy(
+ &mut entry
+ .compressed
+ .as_deref()
+ .expect("caller must configure generator to keep compressed bytes"),
+ &mut self.output,
+ )?;
+ Ok(entry)
+ }
+
+ fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), input::Error> {
+ let header_bytes = crate::data::header::encode(self.data_version, self.num_entries);
+ let num_bytes_written = if last_entry.is_some() {
+ self.output.stream_position()?
+ } else {
+ header_bytes.len() as u64
+ };
+ self.output.rewind()?;
+ self.output.write_all(&header_bytes[..])?;
+ self.output.flush()?;
+
+ self.output.rewind()?;
+ let interrupt_never = std::sync::atomic::AtomicBool::new(false);
+ let digest = hash::bytes(
+ &mut self.output,
+ num_bytes_written as usize,
+ self.object_hash,
+ &mut gix_features::progress::Discard,
+ &interrupt_never,
+ )?;
+ self.output.write_all(digest.as_slice())?;
+ self.output.flush()?;
+
+ self.is_done = true;
+ if let Some(last_entry) = last_entry {
+ last_entry.trailer = Some(digest);
+ }
+ self.trailer = Some(digest);
+ Ok(())
+ }
+}
+
+impl<I, W> Iterator for EntriesToBytesIter<I, W>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ W: std::io::Read + std::io::Write + std::io::Seek,
+{
+ /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input.
+ type Item = Result<input::Entry, input::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.is_done {
+ return None;
+ }
+
+ match self.input.next() {
+ Some(res) => Some(match res {
+ Ok(entry) => self.next_inner(entry).and_then(|mut entry| {
+ if self.input.peek().is_none() {
+ self.write_header_and_digest(Some(&mut entry)).map(|_| entry)
+ } else {
+ Ok(entry)
+ }
+ }),
+ Err(err) => {
+ self.is_done = true;
+ Err(err)
+ }
+ }),
+ None => match self.write_header_and_digest(None) {
+ Ok(_) => None,
+ Err(err) => Some(Err(err)),
+ },
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.input.size_hint()
+ }
+}
diff --git a/vendor/gix-pack/src/data/input/entry.rs b/vendor/gix-pack/src/data/input/entry.rs
new file mode 100644
index 000000000..74d4800a0
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/entry.rs
@@ -0,0 +1,65 @@
+use std::io::Write;
+
+use crate::data::{entry::Header, input};
+
+impl input::Entry {
+ /// Create a new input entry from a given data `obj` set to be placed at the given `pack_offset`.
+ ///
+ /// This method is useful when arbitrary base entries are created
+ pub fn from_data_obj(obj: &gix_object::Data<'_>, pack_offset: u64) -> Result<Self, input::Error> {
+ let header = to_header(obj.kind);
+ let compressed = compress_data(obj)?;
+ let compressed_size = compressed.len() as u64;
+ let mut entry = input::Entry {
+ header,
+ header_size: header.size(obj.data.len() as u64) as u16,
+ pack_offset,
+ compressed: Some(compressed),
+ compressed_size,
+ crc32: None,
+ decompressed_size: obj.data.len() as u64,
+ trailer: None,
+ };
+ entry.crc32 = Some(entry.compute_crc32());
+ Ok(entry)
+ }
+ /// The amount of bytes this entry may consume in a pack data file
+ pub fn bytes_in_pack(&self) -> u64 {
+ self.header_size as u64 + self.compressed_size
+ }
+
+ /// Update our CRC value by recalculating it from our header and compressed data.
+ pub fn compute_crc32(&self) -> u32 {
+ let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()];
+ let header_len = self
+ .header
+ .write_to(self.decompressed_size, header_buf.as_mut())
+ .expect("write to memory will not fail");
+ let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]);
+ gix_features::hash::crc32_update(state, self.compressed.as_ref().expect("we always set it"))
+ }
+}
+
+fn to_header(kind: gix_object::Kind) -> Header {
+ use gix_object::Kind::*;
+ match kind {
+ Tree => Header::Tree,
+ Blob => Header::Blob,
+ Commit => Header::Commit,
+ Tag => Header::Tag,
+ }
+}
+
+fn compress_data(obj: &gix_object::Data<'_>) -> Result<Vec<u8>, input::Error> {
+ let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new());
+ if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) {
+ match err.kind() {
+ std::io::ErrorKind::Other => return Err(input::Error::Io(err)),
+ err => {
+ unreachable!("Should never see other errors than zlib, but got {:?}", err,)
+ }
+ }
+ };
+ out.flush().expect("zlib flush should never fail");
+ Ok(out.into_inner())
+}
diff --git a/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs
new file mode 100644
index 000000000..f52c645f8
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs
@@ -0,0 +1,211 @@
+use std::convert::TryInto;
+
+use gix_hash::ObjectId;
+
+use crate::data::{entry::Header, input};
+
+/// An iterator to resolve thin packs on the fly.
+pub struct LookupRefDeltaObjectsIter<I, LFn> {
+ /// The inner iterator whose entries we will resolve.
+ pub inner: I,
+ lookup: LFn,
+ /// The cached delta to provide next time we are called, it's the delta to go with the base we just resolved in its place.
+ next_delta: Option<input::Entry>,
+ /// Fuse to stop iteration after first missing object.
+ error: bool,
+ /// The overall pack-offset we accumulated thus far. Each inserted entry offsets all following
+ /// objects by its length. We need to determine exactly where the object was inserted to see if its affected at all.
+ inserted_entry_length_at_offset: Vec<Change>,
+ /// The sum of all entries added so far, as a cache to avoid recomputation
+ inserted_entries_length_in_bytes: i64,
+ buf: Vec<u8>,
+}
+
+impl<I, LFn> LookupRefDeltaObjectsIter<I, LFn>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>,
+{
+ /// Create a new instance wrapping `iter` and using `lookup` as function to retrieve objects that will serve as bases
+ /// for ref deltas seen while traversing `iter`.
+ pub fn new(iter: I, lookup: LFn) -> Self {
+ LookupRefDeltaObjectsIter {
+ inner: iter,
+ lookup,
+ error: false,
+ inserted_entry_length_at_offset: Vec::new(),
+ inserted_entries_length_in_bytes: 0,
+ next_delta: None,
+ buf: Vec::new(),
+ }
+ }
+
+ fn shifted_pack_offset(&self, pack_offset: u64) -> u64 {
+ let new_ofs = pack_offset as i64 + self.inserted_entries_length_in_bytes;
+ new_ofs.try_into().expect("offset value is never becomes negative")
+ }
+
+ /// positive `size_change` values mean an object grew or was more commonly, was inserted. Negative values
+ /// mean the object shrunk, usually because there header changed from ref-deltas to ofs deltas.
+ fn track_change(
+ &mut self,
+ shifted_pack_offset: u64,
+ pack_offset: u64,
+ size_change: i64,
+ oid: impl Into<Option<ObjectId>>,
+ ) {
+ if size_change == 0 {
+ return;
+ }
+ self.inserted_entry_length_at_offset.push(Change {
+ shifted_pack_offset,
+ pack_offset,
+ size_change_in_bytes: size_change,
+ oid: oid.into().unwrap_or_else(||
+ // NOTE: this value acts as sentinel and the actual hash kind doesn't matter.
+ gix_hash::Kind::Sha1.null()),
+ });
+ self.inserted_entries_length_in_bytes += size_change;
+ }
+
+ fn shift_entry_and_point_to_base_by_offset(&mut self, entry: &mut input::Entry, base_distance: u64) {
+ let pack_offset = entry.pack_offset;
+ entry.pack_offset = self.shifted_pack_offset(pack_offset);
+ entry.header = Header::OfsDelta { base_distance };
+ let previous_header_size = entry.header_size;
+ entry.header_size = entry.header.size(entry.decompressed_size) as u16;
+
+ let change = entry.header_size as i64 - previous_header_size as i64;
+ entry.crc32 = Some(entry.compute_crc32());
+ self.track_change(entry.pack_offset, pack_offset, change, None);
+ }
+}
+
+impl<I, LFn> Iterator for LookupRefDeltaObjectsIter<I, LFn>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>,
+{
+ type Item = Result<input::Entry, input::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.error {
+ return None;
+ }
+ if let Some(delta) = self.next_delta.take() {
+ return Some(Ok(delta));
+ }
+ match self.inner.next() {
+ Some(Ok(mut entry)) => match entry.header {
+ Header::RefDelta { base_id } => {
+ match self.inserted_entry_length_at_offset.iter().rfind(|e| e.oid == base_id) {
+ None => {
+ let base_entry = match (self.lookup)(base_id, &mut self.buf) {
+ Some(obj) => {
+ let current_pack_offset = entry.pack_offset;
+ let mut entry = match input::Entry::from_data_obj(&obj, 0) {
+ Ok(e) => e,
+ Err(err) => return Some(Err(err)),
+ };
+ entry.pack_offset = self.shifted_pack_offset(current_pack_offset);
+ self.track_change(
+ entry.pack_offset,
+ current_pack_offset,
+ entry.bytes_in_pack() as i64,
+ base_id,
+ );
+ entry
+ }
+ None => {
+ self.error = true;
+ return Some(Err(input::Error::NotFound { object_id: base_id }));
+ }
+ };
+
+ {
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, base_entry.bytes_in_pack());
+ self.next_delta = Some(entry);
+ }
+ Some(Ok(base_entry))
+ }
+ Some(base_entry) => {
+ let base_distance =
+ self.shifted_pack_offset(entry.pack_offset) - base_entry.shifted_pack_offset;
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, base_distance);
+ Some(Ok(entry))
+ }
+ }
+ }
+ _ => {
+ if self.inserted_entries_length_in_bytes != 0 {
+ if let Header::OfsDelta { base_distance } = entry.header {
+ // We have to find the new distance based on the previous distance to the base, using the absolute
+ // pack offset computed from it as stored in `base_pack_offset`.
+ let base_pack_offset = entry
+ .pack_offset
+ .checked_sub(base_distance)
+ .expect("distance to be in range of pack");
+ match self
+ .inserted_entry_length_at_offset
+ .binary_search_by_key(&base_pack_offset, |c| c.pack_offset)
+ {
+ Ok(index) => {
+ let index = {
+ let maybe_index_of_actual_entry = index + 1;
+ self.inserted_entry_length_at_offset
+ .get(maybe_index_of_actual_entry)
+ .and_then(|c| {
+ (c.pack_offset == base_pack_offset)
+ .then_some(maybe_index_of_actual_entry)
+ })
+ .unwrap_or(index)
+ };
+ let new_distance = self
+ .shifted_pack_offset(entry.pack_offset)
+ .checked_sub(self.inserted_entry_length_at_offset[index].shifted_pack_offset)
+ .expect("a base that is behind us in the pack");
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
+ }
+ Err(index) => {
+ let change_since_offset = self.inserted_entry_length_at_offset[index..]
+ .iter()
+ .map(|c| c.size_change_in_bytes)
+ .sum::<i64>();
+ let new_distance: u64 = {
+ (base_distance as i64 + change_since_offset)
+ .try_into()
+ .expect("it still points behind us")
+ };
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
+ }
+ }
+ } else {
+ // Offset this entry by all changes (positive or negative) that we saw thus far.
+ entry.pack_offset = self.shifted_pack_offset(entry.pack_offset);
+ }
+ }
+ Some(Ok(entry))
+ }
+ },
+ other => other,
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let (min, max) = self.inner.size_hint();
+ max.map(|max| (min, Some(max * 2))).unwrap_or_else(|| (min * 2, None))
+ }
+}
+
+#[derive(Debug)]
+struct Change {
+ /// The original pack offset as mentioned in the entry we saw. This is used to find this as base object if deltas refer to it by
+ /// old offset.
+ pack_offset: u64,
+ /// The new pack offset that is the shifted location of the pack entry in the pack.
+ shifted_pack_offset: u64,
+ /// The size change of the entry header, negative values denote shrinking, positive denote growing.
+ size_change_in_bytes: i64,
+ /// The object id of the entry responsible for the change, or null if it's an entry just for tracking an insertion.
+ oid: ObjectId,
+}
diff --git a/vendor/gix-pack/src/data/input/mod.rs b/vendor/gix-pack/src/data/input/mod.rs
new file mode 100644
index 000000000..df191de67
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/mod.rs
@@ -0,0 +1,41 @@
+/// An item of the iteration produced by [`BytesToEntriesIter`]
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Entry {
+ /// The header of a pack entry
+ pub header: crate::data::entry::Header,
+ /// The amount of bytes used to encode the `header`. `pack_offset + header_size` is the beginning of
+ /// the compressed data in the pack.
+ pub header_size: u16,
+ /// The first byte of the entry at which the `header` can be read.
+ pub pack_offset: u64,
+ /// The bytes consumed while producing `decompressed`
+ /// These do not contain the header, which makes it possible to easily replace a RefDelta with offset deltas
+ /// when resolving thin packs.
+ /// Depends on `CompressionMode` when the iterator is initialized.
+ pub compressed: Option<Vec<u8>>,
+ /// The amount of bytes the compressed portion of the entry takes, i.e. the portion behind behind the header.
+ pub compressed_size: u64,
+ /// The CRC32 over the complete entry, that is encoded header and compressed object data.
+ /// Depends on `CompressionMode` when the iterator is initialized
+ pub crc32: Option<u32>,
+ /// The amount of decompressed bytes of the entry.
+ pub decompressed_size: u64,
+ /// Set for the last object in the iteration, providing the hash over all bytes of the iteration
+ /// for use as trailer in a pack or to verify it matches the trailer.
+ pub trailer: Option<gix_hash::ObjectId>,
+}
+
+mod entry;
+
+mod types;
+pub use types::{EntryDataMode, Error, Mode};
+
+mod bytes_to_entries;
+pub use bytes_to_entries::BytesToEntriesIter;
+
+mod lookup_ref_delta_objects;
+pub use lookup_ref_delta_objects::LookupRefDeltaObjectsIter;
+
+mod entries_to_bytes;
+pub use entries_to_bytes::EntriesToBytesIter;
diff --git a/vendor/gix-pack/src/data/input/types.rs b/vendor/gix-pack/src/data/input/types.rs
new file mode 100644
index 000000000..6fcd459e2
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/types.rs
@@ -0,0 +1,73 @@
+use std::io;
+
+/// Returned by [`BytesToEntriesIter::new_from_header()`][crate::data::input::BytesToEntriesIter::new_from_header()] and as part
+/// of `Item` of [`BytesToEntriesIter`][crate::data::input::BytesToEntriesIter].
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("An IO operation failed while streaming an entry")]
+ Io(#[from] io::Error),
+ #[error(transparent)]
+ PackParse(#[from] crate::data::header::decode::Error),
+ #[error("pack checksum in trailer was {expected}, but actual checksum was {actual}")]
+ ChecksumMismatch {
+ expected: gix_hash::ObjectId,
+ actual: gix_hash::ObjectId,
+ },
+ #[error("pack is incomplete: it was decompressed into {actual} bytes but {expected} bytes where expected.")]
+ IncompletePack { actual: u64, expected: u64 },
+ #[error("The object {object_id} could not be decoded or wasn't found")]
+ NotFound { object_id: gix_hash::ObjectId },
+}
+
+/// Iteration Mode
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum Mode {
+ /// Provide the trailer as read from the pack
+ AsIs,
+ /// Generate an own hash and trigger an error on the last iterated object
+ /// if it does not match the hash provided with the pack.
+ ///
+ /// This way the one iterating the data cannot miss corruption as long as
+ /// the iteration is continued through to the end.
+ Verify,
+ /// Generate an own hash and if there was an error or the objects are depleted early
+ /// due to partial packs, return the last valid entry and with our own hash thus far.
+ /// Note that the existing pack hash, if present, will be ignored.
+ /// As we won't know which objects fails, every object will have the hash obtained thus far.
+ /// This also means that algorithms must know about this possibility, or else might wrongfully
+ /// assume the pack is finished.
+ Restore,
+}
+
+/// Define what to do with the compressed bytes portion of a pack [`Entry`][super::Entry]
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum EntryDataMode {
+ /// Do nothing with the compressed bytes we read
+ Ignore,
+ /// Only create a CRC32 of the entry, otherwise similar to `Ignore`
+ Crc32,
+ /// Keep them and pass them along in a newly allocated buffer
+ Keep,
+ /// As above, but also compute a CRC32
+ KeepAndCrc32,
+}
+
+impl EntryDataMode {
+ /// Returns true if a crc32 should be computed
+ pub fn crc32(&self) -> bool {
+ match self {
+ EntryDataMode::KeepAndCrc32 | EntryDataMode::Crc32 => true,
+ EntryDataMode::Keep | EntryDataMode::Ignore => false,
+ }
+ }
+ /// Returns true if compressed bytes should be kept
+ pub fn keep(&self) -> bool {
+ match self {
+ EntryDataMode::Keep | EntryDataMode::KeepAndCrc32 => true,
+ EntryDataMode::Ignore | EntryDataMode::Crc32 => false,
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/mod.rs b/vendor/gix-pack/src/data/mod.rs
new file mode 100644
index 000000000..da717fc1a
--- /dev/null
+++ b/vendor/gix-pack/src/data/mod.rs
@@ -0,0 +1,134 @@
+//! a pack data file
+use std::{convert::TryInto, path::Path};
+
+/// The offset to an entry into the pack data file, relative to its beginning.
+pub type Offset = u64;
+
+/// An identifier to uniquely identify all packs loaded within a known context or namespace.
+pub type Id = u32;
+
+use memmap2::Mmap;
+
+/// An representing an full- or delta-object within a pack
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Entry {
+ /// The entry's header
+ pub header: entry::Header,
+ /// The decompressed size of the entry in bytes.
+ ///
+ /// Note that for non-delta entries this will be the size of the object itself.
+ pub decompressed_size: u64,
+ /// absolute offset to compressed object data in the pack, just behind the entry's header
+ pub data_offset: Offset,
+}
+
+mod file;
+pub use file::{decode, verify, Header};
+///
+pub mod header;
+
+///
+pub mod init {
+ pub use super::header::decode::Error;
+}
+
+///
+pub mod entry;
+
+///
+pub mod input;
+
+/// Utilities to encode pack data entries and write them to a `Write` implementation to resemble a pack data file.
+pub mod output;
+
+/// A slice into a pack file denoting a pack entry.
+///
+/// An entry can be decoded into an object.
+pub type EntryRange = std::ops::Range<Offset>;
+
+/// Supported versions of a pack data file
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[allow(missing_docs)]
+pub enum Version {
+ V2,
+ V3,
+}
+
+impl Default for Version {
+ fn default() -> Self {
+ Version::V2
+ }
+}
+
+/// A pack data file
+pub struct File {
+ data: Mmap,
+ path: std::path::PathBuf,
+ /// A value to represent this pack uniquely when used with cache lookup, or a way to identify this pack by its location on disk.
+ /// The same location on disk should yield the same id.
+ ///
+ /// These must be unique per pack and must be stable, that is they don't change if the pack doesn't change.
+ /// If the same id is assigned (or reassigned) to different packs, pack creation or cache access will fail in hard-to-debug ways.
+ ///
+ /// This value is controlled by the owning object store, which can use it in whichever way it wants as long as the above constraints are met.
+ pub id: Id,
+ version: Version,
+ num_objects: u32,
+ /// The size of the hash contained within. This is entirely determined by the caller, and repositories have to know which hash to use
+ /// based on their configuration.
+ hash_len: usize,
+ object_hash: gix_hash::Kind,
+}
+
+/// Information about the pack data file itself
+impl File {
+ /// The pack data version of this file
+ pub fn version(&self) -> Version {
+ self.version
+ }
+ /// The number of objects stored in this pack data file
+ pub fn num_objects(&self) -> u32 {
+ self.num_objects
+ }
+ /// The length of all mapped data, including the pack header and the pack trailer
+ pub fn data_len(&self) -> usize {
+ self.data.len()
+ }
+ /// The kind of hash we use internally.
+ pub fn object_hash(&self) -> gix_hash::Kind {
+ self.object_hash
+ }
+ /// The position of the byte one past the last pack entry, or in other terms, the first byte of the trailing hash.
+ pub fn pack_end(&self) -> usize {
+ self.data.len() - self.hash_len
+ }
+
+ /// The path to the pack data file on disk
+ pub fn path(&self) -> &Path {
+ &self.path
+ }
+
+ /// Returns the pack data at the given slice if its range is contained in the mapped pack data
+ pub fn entry_slice(&self, slice: EntryRange) -> Option<&[u8]> {
+ let entry_end: usize = slice.end.try_into().expect("end of pack fits into usize");
+ let entry_start = slice.start as usize;
+ self.data.get(entry_start..entry_end)
+ }
+
+ /// Returns the CRC32 of the pack data indicated by `pack_offset` and the `size` of the mapped data.
+ ///
+ /// _Note:_ finding the right size is only possible by decompressing
+ /// the pack entry beforehand, or by using the (to be sorted) offsets stored in an index file.
+ ///
+ /// # Panics
+ ///
+ /// If `pack_offset` or `size` are pointing to a range outside of the mapped pack data.
+ pub fn entry_crc32(&self, pack_offset: Offset, size: usize) -> u32 {
+ let pack_offset: usize = pack_offset.try_into().expect("pack_size fits into usize");
+ gix_features::hash::crc32(&self.data[pack_offset..pack_offset + size])
+ }
+}
+
+pub(crate) mod delta;
diff --git a/vendor/gix-pack/src/data/output/bytes.rs b/vendor/gix-pack/src/data/output/bytes.rs
new file mode 100644
index 000000000..ec219db9d
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/bytes.rs
@@ -0,0 +1,156 @@
+use std::io::Write;
+
+use gix_features::hash;
+
+use crate::data::output;
+
+/// The error returned by `next()` in the [`FromEntriesIter`] iterator.
+#[allow(missing_docs)]
+#[derive(Debug, thiserror::Error)]
+pub enum Error<E>
+where
+ E: std::error::Error + 'static,
+{
+ #[error(transparent)]
+ Io(#[from] std::io::Error),
+ #[error(transparent)]
+ Input(E),
+}
+
+/// An implementation of [`Iterator`] to write [encoded entries][output::Entry] to an inner implementation each time
+/// `next()` is called.
+pub struct FromEntriesIter<I, W> {
+ /// An iterator for input [`output::Entry`] instances
+ pub input: I,
+ /// A way of writing encoded bytes.
+ output: hash::Write<W>,
+ /// Our trailing hash when done writing all input entries
+ trailer: Option<gix_hash::ObjectId>,
+ /// The amount of objects in the iteration and the version of the packfile to be written.
+ /// Will be `None` to signal the header was written already.
+ header_info: Option<(crate::data::Version, u32)>,
+ /// The pack data version with which pack entries should be written.
+ entry_version: crate::data::Version,
+ /// The amount of written bytes thus far
+ written: u64,
+ /// Required to quickly find offsets by object IDs, as future objects may refer to those in the past to become a delta offset base.
+ /// It stores the pack offsets at which objects begin.
+ /// Additionally we store if an object was invalid, and if so we will not write it nor will we allow delta objects to it.
+ pack_offsets_and_validity: Vec<(u64, bool)>,
+ /// If we are done, no additional writes will occur
+ is_done: bool,
+}
+
+impl<I, W, E> FromEntriesIter<I, W>
+where
+ I: Iterator<Item = Result<Vec<output::Entry>, E>>,
+ W: std::io::Write,
+ E: std::error::Error + 'static,
+{
+ /// Create a new instance reading [entries][output::Entry] from an `input` iterator and write pack data bytes to
+ /// `output` writer, resembling a pack of `version` with exactly `num_entries` amount of objects contained in it.
+ /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version.
+ ///
+ /// The input chunks are expected to be sorted already. You can use the [InOrderIter][gix_features::parallel::InOrderIter] to assure
+ /// this happens on the fly holding entire chunks in memory as long as needed for them to be dispensed in order.
+ ///
+ /// # Panics
+ ///
+ /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors.
+ pub fn new(
+ input: I,
+ output: W,
+ num_entries: u32,
+ version: crate::data::Version,
+ object_hash: gix_hash::Kind,
+ ) -> Self {
+ assert!(
+ matches!(version, crate::data::Version::V2),
+ "currently only pack version 2 can be written",
+ );
+ FromEntriesIter {
+ input,
+ output: hash::Write::new(output, object_hash),
+ trailer: None,
+ entry_version: version,
+ pack_offsets_and_validity: Vec::with_capacity(num_entries as usize),
+ written: 0,
+ header_info: Some((version, num_entries)),
+ is_done: false,
+ }
+ }
+
+ /// Consume this instance and return the `output` implementation.
+ ///
+ /// _Note_ that the `input` iterator can be moved out of this instance beforehand.
+ pub fn into_write(self) -> W {
+ self.output.inner
+ }
+
+ /// Returns the trailing hash over all written entries once done.
+ /// It's `None` if we are not yet done writing.
+ pub fn digest(&self) -> Option<gix_hash::ObjectId> {
+ self.trailer
+ }
+
+ fn next_inner(&mut self) -> Result<u64, Error<E>> {
+ let previous_written = self.written;
+ if let Some((version, num_entries)) = self.header_info.take() {
+ let header_bytes = crate::data::header::encode(version, num_entries);
+ self.output.write_all(&header_bytes[..])?;
+ self.written += header_bytes.len() as u64;
+ }
+ match self.input.next() {
+ Some(entries) => {
+ for entry in entries.map_err(Error::Input)? {
+ if entry.is_invalid() {
+ self.pack_offsets_and_validity.push((0, false));
+ continue;
+ };
+ self.pack_offsets_and_validity.push((self.written, true));
+ let header = entry.to_entry_header(self.entry_version, |index| {
+ let (base_offset, is_valid_object) = self.pack_offsets_and_validity[index];
+ if !is_valid_object {
+ unreachable!("if you see this the object database is correct as a delta refers to a non-existing object")
+ }
+ self.written - base_offset
+ });
+ self.written += header.write_to(entry.decompressed_size as u64, &mut self.output)? as u64;
+ self.written += std::io::copy(&mut &*entry.compressed_data, &mut self.output)?;
+ }
+ }
+ None => {
+ let digest = self.output.hash.clone().digest();
+ self.output.inner.write_all(&digest[..])?;
+ self.written += digest.len() as u64;
+ self.output.inner.flush()?;
+ self.is_done = true;
+ self.trailer = Some(gix_hash::ObjectId::from(digest));
+ }
+ };
+ Ok(self.written - previous_written)
+ }
+}
+
+impl<I, W, E> Iterator for FromEntriesIter<I, W>
+where
+ I: Iterator<Item = Result<Vec<output::Entry>, E>>,
+ W: std::io::Write,
+ E: std::error::Error + 'static,
+{
+ /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input.
+ type Item = Result<u64, Error<E>>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.is_done {
+ return None;
+ }
+ Some(match self.next_inner() {
+ Err(err) => {
+ self.is_done = true;
+ Err(err)
+ }
+ Ok(written) => Ok(written),
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/count/mod.rs b/vendor/gix-pack/src/data/output/count/mod.rs
new file mode 100644
index 000000000..e7ee767de
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/mod.rs
@@ -0,0 +1,49 @@
+use gix_hash::ObjectId;
+
+use crate::data::output::Count;
+
+/// Specifies how the pack location was handled during counting
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum PackLocation {
+ /// We did not lookup this object
+ NotLookedUp,
+ /// The object was looked up and there may be a location in a pack, along with entry information
+ LookedUp(Option<crate::data::entry::Location>),
+}
+
+impl PackLocation {
+ /// Directly go through to LookedUp variant, panic otherwise
+ pub fn is_none(&self) -> bool {
+ match self {
+ PackLocation::LookedUp(opt) => opt.is_none(),
+ PackLocation::NotLookedUp => unreachable!("must have been resolved"),
+ }
+ }
+ /// Directly go through to LookedUp variant, panic otherwise
+ pub fn as_ref(&self) -> Option<&crate::data::entry::Location> {
+ match self {
+ PackLocation::LookedUp(opt) => opt.as_ref(),
+ PackLocation::NotLookedUp => unreachable!("must have been resolved"),
+ }
+ }
+}
+
+impl Count {
+ /// Create a new instance from the given `oid` and its corresponding git `obj`ect data.
+ pub fn from_data(oid: impl Into<ObjectId>, location: Option<crate::data::entry::Location>) -> Self {
+ Count {
+ id: oid.into(),
+ entry_pack_location: PackLocation::LookedUp(location),
+ }
+ }
+}
+
+#[path = "objects/mod.rs"]
+mod objects_impl;
+pub use objects_impl::{objects, objects_unthreaded};
+
+///
+pub mod objects {
+ pub use super::objects_impl::{Error, ObjectExpansion, Options, Outcome, Result};
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/mod.rs b/vendor/gix-pack/src/data/output/count/objects/mod.rs
new file mode 100644
index 000000000..d56bc9a5f
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/mod.rs
@@ -0,0 +1,405 @@
+use std::{
+ cell::RefCell,
+ sync::{atomic::AtomicBool, Arc},
+};
+
+use gix_features::{parallel, progress::Progress};
+use gix_hash::ObjectId;
+
+use crate::{data::output, find};
+
+pub(in crate::data::output::count::objects_impl) mod reduce;
+mod util;
+
+mod types;
+pub use types::{Error, ObjectExpansion, Options, Outcome};
+
+mod tree;
+
+/// The return type used by [`objects()`].
+pub type Result<E1, E2> = std::result::Result<(Vec<output::Count>, Outcome), Error<E1, E2>>;
+
+/// Generate [`Count`][output::Count]s from input `objects` with object expansion based on [`options`][Options]
+/// to learn which objects would would constitute a pack. This step is required to know exactly how many objects would
+/// be in a pack while keeping data around to avoid minimize object database access.
+///
+/// A [`Count`][output::Count] object maintains enough state to greatly accelerate future access of packed objects.
+///
+/// * `db` - the object store to use for accessing objects.
+/// * `objects_ids`
+/// * A list of objects ids to add to the pack. Duplication checks are performed so no object is ever added to a pack twice.
+/// * Objects may be expanded based on the provided [`options`][Options]
+/// * `progress`
+/// * a way to obtain progress information
+/// * `should_interrupt`
+/// * A flag that is set to true if the operation should stop
+/// * `options`
+/// * more configuration
+pub fn objects<Find, Iter, IterErr, Oid>(
+ db: Find,
+ objects_ids: Iter,
+ progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ Options {
+ thread_limit,
+ input_object_expansion,
+ chunk_size,
+ }: Options,
+) -> Result<find::existing::Error<Find::Error>, IterErr>
+where
+ Find: crate::Find + Send + Clone,
+ <Find as crate::Find>::Error: Send,
+ Iter: Iterator<Item = std::result::Result<Oid, IterErr>> + Send,
+ Oid: Into<ObjectId> + Send,
+ IterErr: std::error::Error + Send,
+{
+ let lower_bound = objects_ids.size_hint().0;
+ let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit(
+ chunk_size,
+ if lower_bound == 0 { None } else { Some(lower_bound) },
+ thread_limit,
+ None,
+ );
+ let chunks = gix_features::iter::Chunks {
+ inner: objects_ids,
+ size: chunk_size,
+ };
+ let seen_objs = gix_hashtable::sync::ObjectIdMap::default();
+ let progress = Arc::new(parking_lot::Mutex::new(progress));
+
+ parallel::in_parallel(
+ chunks,
+ thread_limit,
+ {
+ let progress = Arc::clone(&progress);
+ move |n| {
+ (
+ Vec::new(), // object data buffer
+ Vec::new(), // object data buffer 2 to hold two objects at a time
+ {
+ let mut p = progress
+ .lock()
+ .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN);
+ p.init(None, gix_features::progress::count("objects"));
+ p
+ },
+ )
+ }
+ },
+ {
+ let seen_objs = &seen_objs;
+ move |oids: Vec<std::result::Result<Oid, IterErr>>, (buf1, buf2, progress)| {
+ expand::this(
+ &db,
+ input_object_expansion,
+ seen_objs,
+ oids,
+ buf1,
+ buf2,
+ progress,
+ should_interrupt,
+ true, /*allow pack lookups*/
+ )
+ }
+ },
+ reduce::Statistics::new(progress),
+ )
+}
+
+/// Like [`objects()`] but using a single thread only to mostly save on the otherwise required overhead.
+pub fn objects_unthreaded<Find, IterErr, Oid>(
+ db: Find,
+ object_ids: impl Iterator<Item = std::result::Result<Oid, IterErr>>,
+ mut progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ input_object_expansion: ObjectExpansion,
+) -> Result<find::existing::Error<Find::Error>, IterErr>
+where
+ Find: crate::Find,
+ Oid: Into<ObjectId>,
+ IterErr: std::error::Error,
+{
+ let seen_objs = RefCell::new(gix_hashtable::HashSet::default());
+
+ let (mut buf1, mut buf2) = (Vec::new(), Vec::new());
+ expand::this(
+ &db,
+ input_object_expansion,
+ &seen_objs,
+ object_ids,
+ &mut buf1,
+ &mut buf2,
+ &mut progress,
+ should_interrupt,
+ false, /*allow pack lookups*/
+ )
+}
+
+mod expand {
+ use std::sync::atomic::{AtomicBool, Ordering};
+
+ use gix_features::progress::Progress;
+ use gix_hash::{oid, ObjectId};
+ use gix_object::{CommitRefIter, TagRefIter};
+
+ use super::{
+ tree,
+ types::{Error, ObjectExpansion, Outcome},
+ util,
+ };
+ use crate::{
+ data::{output, output::count::PackLocation},
+ find, FindExt,
+ };
+
+ #[allow(clippy::too_many_arguments)]
+ pub fn this<Find, IterErr, Oid>(
+ db: &Find,
+ input_object_expansion: ObjectExpansion,
+ seen_objs: &impl util::InsertImmutable,
+ oids: impl IntoIterator<Item = std::result::Result<Oid, IterErr>>,
+ buf1: &mut Vec<u8>,
+ #[allow(clippy::ptr_arg)] buf2: &mut Vec<u8>,
+ progress: &mut impl Progress,
+ should_interrupt: &AtomicBool,
+ allow_pack_lookups: bool,
+ ) -> super::Result<find::existing::Error<Find::Error>, IterErr>
+ where
+ Find: crate::Find,
+ Oid: Into<ObjectId>,
+ IterErr: std::error::Error,
+ {
+ use ObjectExpansion::*;
+
+ let mut out = Vec::new();
+ let mut tree_traversal_state = gix_traverse::tree::breadthfirst::State::default();
+ let mut tree_diff_state = gix_diff::tree::State::default();
+ let mut parent_commit_ids = Vec::new();
+ let mut traverse_delegate = tree::traverse::AllUnseen::new(seen_objs);
+ let mut changes_delegate = tree::changes::AllNew::new(seen_objs);
+ let mut outcome = Outcome::default();
+
+ let stats = &mut outcome;
+ for id in oids.into_iter() {
+ if should_interrupt.load(Ordering::Relaxed) {
+ return Err(Error::Interrupted);
+ }
+
+ let id = id.map(|oid| oid.into()).map_err(Error::InputIteration)?;
+ let (obj, location) = db.find(id, buf1)?;
+ stats.input_objects += 1;
+ match input_object_expansion {
+ TreeAdditionsComparedToAncestor => {
+ use gix_object::Kind::*;
+ let mut obj = obj;
+ let mut location = location;
+ let mut id = id.to_owned();
+
+ loop {
+ push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false);
+ match obj.kind {
+ Tree | Blob => break,
+ Tag => {
+ id = TagRefIter::from_bytes(obj.data)
+ .target_id()
+ .expect("every tag has a target");
+ let tmp = db.find(id, buf1)?;
+
+ obj = tmp.0;
+ location = tmp.1;
+
+ stats.expanded_objects += 1;
+ continue;
+ }
+ Commit => {
+ let current_tree_iter = {
+ let mut commit_iter = CommitRefIter::from_bytes(obj.data);
+ let tree_id = commit_iter.tree_id().expect("every commit has a tree");
+ parent_commit_ids.clear();
+ for token in commit_iter {
+ match token {
+ Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
+ parent_commit_ids.push(id)
+ }
+ Ok(_) => break,
+ Err(err) => return Err(Error::CommitDecode(err)),
+ }
+ }
+ let (obj, location) = db.find(tree_id, buf1)?;
+ push_obj_count_unique(
+ &mut out, seen_objs, &tree_id, location, progress, stats, true,
+ );
+ gix_object::TreeRefIter::from_bytes(obj.data)
+ };
+
+ let objects = if parent_commit_ids.is_empty() {
+ traverse_delegate.clear();
+ gix_traverse::tree::breadthfirst(
+ current_tree_iter,
+ &mut tree_traversal_state,
+ |oid, buf| {
+ stats.decoded_objects += 1;
+ match db.find(oid, buf).ok() {
+ Some((obj, location)) => {
+ progress.inc();
+ stats.expanded_objects += 1;
+ out.push(output::Count::from_data(oid, location));
+ obj.try_into_tree_iter()
+ }
+ None => None,
+ }
+ },
+ &mut traverse_delegate,
+ )
+ .map_err(Error::TreeTraverse)?;
+ &traverse_delegate.non_trees
+ } else {
+ for commit_id in &parent_commit_ids {
+ let parent_tree_id = {
+ let (parent_commit_obj, location) = db.find(commit_id, buf2)?;
+
+ push_obj_count_unique(
+ &mut out, seen_objs, commit_id, location, progress, stats, true,
+ );
+ CommitRefIter::from_bytes(parent_commit_obj.data)
+ .tree_id()
+ .expect("every commit has a tree")
+ };
+ let parent_tree = {
+ let (parent_tree_obj, location) = db.find(parent_tree_id, buf2)?;
+ push_obj_count_unique(
+ &mut out,
+ seen_objs,
+ &parent_tree_id,
+ location,
+ progress,
+ stats,
+ true,
+ );
+ gix_object::TreeRefIter::from_bytes(parent_tree_obj.data)
+ };
+
+ changes_delegate.clear();
+ gix_diff::tree::Changes::from(Some(parent_tree))
+ .needed_to_obtain(
+ current_tree_iter.clone(),
+ &mut tree_diff_state,
+ |oid, buf| {
+ stats.decoded_objects += 1;
+ db.find_tree_iter(oid, buf).map(|t| t.0)
+ },
+ &mut changes_delegate,
+ )
+ .map_err(Error::TreeChanges)?;
+ }
+ &changes_delegate.objects
+ };
+ for id in objects.iter() {
+ out.push(id_to_count(db, buf2, id, progress, stats, allow_pack_lookups));
+ }
+ break;
+ }
+ }
+ }
+ }
+ TreeContents => {
+ use gix_object::Kind::*;
+ let mut id = id;
+ let mut obj = (obj, location);
+ loop {
+ push_obj_count_unique(&mut out, seen_objs, &id, obj.1.clone(), progress, stats, false);
+ match obj.0.kind {
+ Tree => {
+ traverse_delegate.clear();
+ gix_traverse::tree::breadthfirst(
+ gix_object::TreeRefIter::from_bytes(obj.0.data),
+ &mut tree_traversal_state,
+ |oid, buf| {
+ stats.decoded_objects += 1;
+ match db.find(oid, buf).ok() {
+ Some((obj, location)) => {
+ progress.inc();
+ stats.expanded_objects += 1;
+ out.push(output::Count::from_data(oid, location));
+ obj.try_into_tree_iter()
+ }
+ None => None,
+ }
+ },
+ &mut traverse_delegate,
+ )
+ .map_err(Error::TreeTraverse)?;
+ for id in traverse_delegate.non_trees.iter() {
+ out.push(id_to_count(db, buf1, id, progress, stats, allow_pack_lookups));
+ }
+ break;
+ }
+ Commit => {
+ id = CommitRefIter::from_bytes(obj.0.data)
+ .tree_id()
+ .expect("every commit has a tree");
+ stats.expanded_objects += 1;
+ obj = db.find(id, buf1)?;
+ continue;
+ }
+ Blob => break,
+ Tag => {
+ id = TagRefIter::from_bytes(obj.0.data)
+ .target_id()
+ .expect("every tag has a target");
+ stats.expanded_objects += 1;
+ obj = db.find(id, buf1)?;
+ continue;
+ }
+ }
+ }
+ }
+ AsIs => push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false),
+ }
+ }
+ outcome.total_objects = out.len();
+ Ok((out, outcome))
+ }
+
+ #[inline]
+ fn push_obj_count_unique(
+ out: &mut Vec<output::Count>,
+ all_seen: &impl util::InsertImmutable,
+ id: &oid,
+ location: Option<crate::data::entry::Location>,
+ progress: &mut impl Progress,
+ statistics: &mut Outcome,
+ count_expanded: bool,
+ ) {
+ let inserted = all_seen.insert(id.to_owned());
+ if inserted {
+ progress.inc();
+ statistics.decoded_objects += 1;
+ if count_expanded {
+ statistics.expanded_objects += 1;
+ }
+ out.push(output::Count::from_data(id, location));
+ }
+ }
+
+ #[inline]
+ fn id_to_count<Find: crate::Find>(
+ db: &Find,
+ buf: &mut Vec<u8>,
+ id: &oid,
+ progress: &mut impl Progress,
+ statistics: &mut Outcome,
+ allow_pack_lookups: bool,
+ ) -> output::Count {
+ progress.inc();
+ statistics.expanded_objects += 1;
+ output::Count {
+ id: id.to_owned(),
+ entry_pack_location: if allow_pack_lookups {
+ PackLocation::LookedUp(db.location_by_oid(id, buf))
+ } else {
+ PackLocation::NotLookedUp
+ },
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/reduce.rs b/vendor/gix-pack/src/data/output/count/objects/reduce.rs
new file mode 100644
index 000000000..c6a61d467
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/reduce.rs
@@ -0,0 +1,49 @@
+use std::{marker::PhantomData, sync::Arc};
+
+use gix_features::{parallel, progress::Progress};
+
+use super::Outcome;
+use crate::data::output;
+
+pub struct Statistics<E, P> {
+ total: Outcome,
+ counts: Vec<output::Count>,
+ progress: Arc<parking_lot::Mutex<P>>,
+ _err: PhantomData<E>,
+}
+
+impl<E, P> Statistics<E, P>
+where
+ P: Progress,
+{
+ pub fn new(progress: Arc<parking_lot::Mutex<P>>) -> Self {
+ Statistics {
+ total: Default::default(),
+ counts: Default::default(),
+ progress,
+ _err: PhantomData::default(),
+ }
+ }
+}
+
+impl<E, P> parallel::Reduce for Statistics<E, P>
+where
+ P: Progress,
+{
+ type Input = Result<(Vec<output::Count>, Outcome), E>;
+ type FeedProduce = ();
+ type Output = (Vec<output::Count>, Outcome);
+ type Error = E;
+
+ fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
+ let (counts, stats) = item?;
+ self.total.aggregate(stats);
+ self.progress.lock().inc_by(counts.len());
+ self.counts.extend(counts);
+ Ok(())
+ }
+
+ fn finalize(self) -> Result<Self::Output, Self::Error> {
+ Ok((self.counts, self.total))
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/tree.rs b/vendor/gix-pack/src/data/output/count/objects/tree.rs
new file mode 100644
index 000000000..d3f4f6b9a
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/tree.rs
@@ -0,0 +1,124 @@
+pub mod changes {
+ use gix_diff::tree::{
+ visit::{Action, Change},
+ Visit,
+ };
+ use gix_hash::ObjectId;
+ use gix_object::{bstr::BStr, tree::EntryMode};
+
+ use crate::data::output::count::objects_impl::util::InsertImmutable;
+
+ pub struct AllNew<'a, H> {
+ pub objects: Vec<ObjectId>,
+ all_seen: &'a H,
+ }
+
+ impl<'a, H> AllNew<'a, H>
+ where
+ H: InsertImmutable,
+ {
+ pub fn new(all_seen: &'a H) -> Self {
+ AllNew {
+ objects: Default::default(),
+ all_seen,
+ }
+ }
+ pub fn clear(&mut self) {
+ self.objects.clear();
+ }
+ }
+
+ impl<'a, H> Visit for AllNew<'a, H>
+ where
+ H: InsertImmutable,
+ {
+ fn pop_front_tracked_path_and_set_current(&mut self) {}
+
+ fn push_back_tracked_path_component(&mut self, _component: &BStr) {}
+
+ fn push_path_component(&mut self, _component: &BStr) {}
+
+ fn pop_path_component(&mut self) {}
+
+ fn visit(&mut self, change: Change) -> Action {
+ match change {
+ Change::Addition { oid, entry_mode } | Change::Modification { oid, entry_mode, .. } => {
+ if entry_mode == EntryMode::Commit {
+ return Action::Continue;
+ }
+ let inserted = self.all_seen.insert(oid);
+ if inserted {
+ self.objects.push(oid);
+ }
+ }
+ Change::Deletion { .. } => {}
+ };
+ Action::Continue
+ }
+ }
+}
+
+pub mod traverse {
+ use gix_hash::ObjectId;
+ use gix_object::{
+ bstr::BStr,
+ tree::{EntryMode, EntryRef},
+ };
+ use gix_traverse::tree::{visit::Action, Visit};
+
+ use crate::data::output::count::objects_impl::util::InsertImmutable;
+
+ pub struct AllUnseen<'a, H> {
+ pub non_trees: Vec<ObjectId>,
+ all_seen: &'a H,
+ }
+
+ impl<'a, H> AllUnseen<'a, H>
+ where
+ H: InsertImmutable,
+ {
+ pub fn new(all_seen: &'a H) -> Self {
+ AllUnseen {
+ non_trees: Default::default(),
+ all_seen,
+ }
+ }
+ pub fn clear(&mut self) {
+ self.non_trees.clear();
+ }
+ }
+
+ impl<'a, H> Visit for AllUnseen<'a, H>
+ where
+ H: InsertImmutable,
+ {
+ fn pop_front_tracked_path_and_set_current(&mut self) {}
+
+ fn push_back_tracked_path_component(&mut self, _component: &BStr) {}
+
+ fn push_path_component(&mut self, _component: &BStr) {}
+
+ fn pop_path_component(&mut self) {}
+
+ fn visit_tree(&mut self, entry: &EntryRef<'_>) -> Action {
+ let inserted = self.all_seen.insert(entry.oid.to_owned());
+ if inserted {
+ Action::Continue
+ } else {
+ Action::Skip
+ }
+ }
+
+ fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> Action {
+ if entry.mode == EntryMode::Commit {
+ // links don't have a representation
+ return Action::Continue;
+ }
+ let inserted = self.all_seen.insert(entry.oid.to_owned());
+ if inserted {
+ self.non_trees.push(entry.oid.to_owned());
+ }
+ Action::Continue
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/types.rs b/vendor/gix-pack/src/data/output/count/objects/types.rs
new file mode 100644
index 000000000..8c8c939df
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/types.rs
@@ -0,0 +1,105 @@
+/// Information gathered during the run of [`iter_from_objects()`][super::objects()].
+#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Outcome {
+ /// The amount of objects provided to start the iteration.
+ pub input_objects: usize,
+ /// The amount of objects that have been expanded from the input source.
+ /// It's desirable to do that as expansion happens on multiple threads, allowing the amount of input objects to be small.
+ /// `expanded_objects - decoded_objects` is the 'cheap' object we found without decoding the object itself.
+ pub expanded_objects: usize,
+ /// The amount of fully decoded objects. These are the most expensive as they are fully decoded
+ pub decoded_objects: usize,
+ /// The total amount of encountered objects. Should be `expanded_objects + input_objects`.
+ pub total_objects: usize,
+}
+
+impl Outcome {
+ pub(in crate::data::output::count) fn aggregate(
+ &mut self,
+ Outcome {
+ input_objects,
+ decoded_objects,
+ expanded_objects,
+ total_objects,
+ }: Self,
+ ) {
+ self.input_objects += input_objects;
+ self.decoded_objects += decoded_objects;
+ self.expanded_objects += expanded_objects;
+ self.total_objects += total_objects;
+ }
+}
+
+/// The way input objects are handled
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum ObjectExpansion {
+ /// Don't do anything with the input objects except for transforming them into pack entries
+ AsIs,
+ /// If the input object is a Commit then turn it into a pack entry. Additionally obtain its tree, turn it into a pack entry
+ /// along with all of its contents, that is nested trees, and any other objects reachable from it.
+ /// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs].
+ ///
+ /// This mode is useful if all reachable objects should be added, as in cloning a repository.
+ TreeContents,
+ /// If the input is a commit, obtain its ancestors and turn them into pack entries. Obtain the ancestor trees along with the commits
+ /// tree and turn them into pack entries. Finally obtain the added/changed objects when comparing the ancestor trees with the
+ /// current tree and turn them into entries as well.
+ /// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs].
+ ///
+ /// This mode is useful to build a pack containing only new objects compared to a previous state.
+ TreeAdditionsComparedToAncestor,
+}
+
+impl Default for ObjectExpansion {
+ fn default() -> Self {
+ ObjectExpansion::AsIs
+ }
+}
+
+/// Configuration options for the pack generation functions provided in [this module][crate::data::output].
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Options {
+ /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used.
+ /// If more than one thread is used, the order of returned [counts][crate::data::output::Count] is not deterministic anymore
+ /// especially when tree traversal is involved. Thus deterministic ordering requires `Some(1)` to be set.
+ pub thread_limit: Option<usize>,
+ /// The amount of objects per chunk or unit of work to be sent to threads for processing
+ pub chunk_size: usize,
+ /// The way input objects are handled
+ pub input_object_expansion: ObjectExpansion,
+}
+
+impl Default for Options {
+ fn default() -> Self {
+ Options {
+ thread_limit: None,
+ chunk_size: 10,
+ input_object_expansion: Default::default(),
+ }
+ }
+}
+
+/// The error returned by the pack generation iterator [bytes::FromEntriesIter][crate::data::output::bytes::FromEntriesIter].
+#[derive(Debug, thiserror::Error)]
+#[allow(missing_docs)]
+pub enum Error<FindErr, IterErr>
+where
+ FindErr: std::error::Error + 'static,
+ IterErr: std::error::Error + 'static,
+{
+ #[error(transparent)]
+ CommitDecode(gix_object::decode::Error),
+ #[error(transparent)]
+ FindExisting(#[from] FindErr),
+ #[error(transparent)]
+ InputIteration(IterErr),
+ #[error(transparent)]
+ TreeTraverse(gix_traverse::tree::breadthfirst::Error),
+ #[error(transparent)]
+ TreeChanges(gix_diff::tree::changes::Error),
+ #[error("Operation interrupted")]
+ Interrupted,
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/util.rs b/vendor/gix-pack/src/data/output/count/objects/util.rs
new file mode 100644
index 000000000..a80841313
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/util.rs
@@ -0,0 +1,24 @@
+pub trait InsertImmutable {
+ fn insert(&self, id: gix_hash::ObjectId) -> bool;
+}
+
+mod trait_impls {
+ use gix_hash::ObjectId;
+ use std::cell::RefCell;
+
+ use gix_hashtable::HashSet;
+
+ use super::InsertImmutable;
+
+ impl InsertImmutable for gix_hashtable::sync::ObjectIdMap<()> {
+ fn insert(&self, id: ObjectId) -> bool {
+ self.insert(id, ()).is_none()
+ }
+ }
+
+ impl InsertImmutable for RefCell<HashSet<ObjectId>> {
+ fn insert(&self, item: ObjectId) -> bool {
+ self.borrow_mut().insert(item)
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/entry/iter_from_counts.rs b/vendor/gix-pack/src/data/output/entry/iter_from_counts.rs
new file mode 100644
index 000000000..25e256d5c
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/entry/iter_from_counts.rs
@@ -0,0 +1,428 @@
+pub(crate) mod function {
+ use std::{cmp::Ordering, sync::Arc};
+
+ use gix_features::{parallel, parallel::SequenceId, progress::Progress};
+
+ use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId};
+ use crate::data::output;
+
+ /// Given a known list of object `counts`, calculate entries ready to be put into a data pack.
+ ///
+ /// This allows objects to be written quite soon without having to wait for the entire pack to be built in memory.
+ /// A chunk of objects is held in memory and compressed using DEFLATE, and serve the output of this iterator.
+ /// That way slow writers will naturally apply back pressure, and communicate to the implementation that more time can be
+ /// spent compressing objects.
+ ///
+ /// * `counts`
+ /// * A list of previously counted objects to add to the pack. Duplication checks are not performed, no object is expected to be duplicated.
+ /// * `progress`
+ /// * a way to obtain progress information
+ /// * `options`
+ /// * more configuration
+ ///
+ /// _Returns_ the checksum of the pack
+ ///
+ /// ## Discussion
+ ///
+ /// ### Advantages
+ ///
+ /// * Begins writing immediately and supports back-pressure.
+ /// * Abstract over object databases and how input is provided.
+ ///
+ /// ### Disadvantages
+ ///
+ /// * ~~currently there is no way to easily write the pack index, even though the state here is uniquely positioned to do
+ /// so with minimal overhead (especially compared to `gix index-from-pack`)~~ Probably works now by chaining Iterators
+ /// or keeping enough state to write a pack and then generate an index with recorded data.
+ ///
+ pub fn iter_from_counts<Find>(
+ mut counts: Vec<output::Count>,
+ db: Find,
+ mut progress: impl Progress + 'static,
+ Options {
+ version,
+ mode,
+ allow_thin_pack,
+ thread_limit,
+ chunk_size,
+ }: Options,
+ ) -> impl Iterator<Item = Result<(SequenceId, Vec<output::Entry>), Error<Find::Error>>>
+ + parallel::reduce::Finalize<Reduce = reduce::Statistics<Error<Find::Error>>>
+ where
+ Find: crate::Find + Send + Clone + 'static,
+ <Find as crate::Find>::Error: Send,
+ {
+ assert!(
+ matches!(version, crate::data::Version::V2),
+ "currently we can only write version 2"
+ );
+ let (chunk_size, thread_limit, _) =
+ parallel::optimize_chunk_size_and_thread_limit(chunk_size, Some(counts.len()), thread_limit, None);
+ {
+ let progress = Arc::new(parking_lot::Mutex::new(
+ progress.add_child_with_id("resolving", ProgressId::ResolveCounts.into()),
+ ));
+ progress.lock().init(None, gix_features::progress::count("counts"));
+ let enough_counts_present = counts.len() > 4_000;
+ let start = std::time::Instant::now();
+ parallel::in_parallel_if(
+ || enough_counts_present,
+ counts.chunks_mut(chunk_size),
+ thread_limit,
+ |_n| Vec::<u8>::new(),
+ {
+ let progress = Arc::clone(&progress);
+ let db = db.clone();
+ move |chunk, buf| {
+ let chunk_size = chunk.len();
+ for count in chunk {
+ use crate::data::output::count::PackLocation::*;
+ match count.entry_pack_location {
+ LookedUp(_) => continue,
+ NotLookedUp => count.entry_pack_location = LookedUp(db.location_by_oid(count.id, buf)),
+ }
+ }
+ progress.lock().inc_by(chunk_size);
+ Ok::<_, ()>(())
+ }
+ },
+ parallel::reduce::IdentityWithResult::<(), ()>::default(),
+ )
+ .expect("infallible - we ignore none-existing objects");
+ progress.lock().show_throughput(start);
+ }
+ let counts_range_by_pack_id = match mode {
+ Mode::PackCopyAndBaseObjects => {
+ let mut progress = progress.add_child_with_id("sorting", ProgressId::SortEntries.into());
+ progress.init(Some(counts.len()), gix_features::progress::count("counts"));
+ let start = std::time::Instant::now();
+
+ use crate::data::output::count::PackLocation::*;
+ counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) {
+ (LookedUp(None), LookedUp(None)) => Ordering::Equal,
+ (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater,
+ (LookedUp(None), LookedUp(Some(_))) => Ordering::Less,
+ (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs
+ .pack_id
+ .cmp(&rhs.pack_id)
+ .then(lhs.pack_offset.cmp(&rhs.pack_offset)),
+ (_, _) => unreachable!("counts were resolved beforehand"),
+ });
+
+ let mut index: Vec<(u32, std::ops::Range<usize>)> = Vec::new();
+ let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none());
+ let mut slice = &counts[chunks_pack_start..];
+ while !slice.is_empty() {
+ let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id;
+ let pack_end = slice.partition_point(|e| {
+ e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id
+ });
+ index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end));
+ slice = &slice[pack_end..];
+ chunks_pack_start += pack_end;
+ }
+
+ progress.set(counts.len());
+ progress.show_throughput(start);
+
+ index
+ }
+ };
+
+ let counts = Arc::new(counts);
+ let progress = Arc::new(parking_lot::Mutex::new(progress));
+ let chunks = util::ChunkRanges::new(chunk_size, counts.len());
+
+ parallel::reduce::Stepwise::new(
+ chunks.enumerate(),
+ thread_limit,
+ {
+ let progress = Arc::clone(&progress);
+ move |n| {
+ (
+ Vec::new(), // object data buffer
+ progress
+ .lock()
+ .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN),
+ )
+ }
+ },
+ {
+ let counts = Arc::clone(&counts);
+ move |(chunk_id, chunk_range): (SequenceId, std::ops::Range<usize>), (buf, progress)| {
+ let mut out = Vec::new();
+ let chunk = &counts[chunk_range];
+ let mut stats = Outcome::default();
+ let mut pack_offsets_to_id = None;
+ progress.init(Some(chunk.len()), gix_features::progress::count("objects"));
+
+ for count in chunk.iter() {
+ out.push(match count
+ .entry_pack_location
+ .as_ref()
+ .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe)))
+ {
+ Some((location, pack_entry)) => {
+ if let Some((cached_pack_id, _)) = &pack_offsets_to_id {
+ if *cached_pack_id != location.pack_id {
+ pack_offsets_to_id = None;
+ }
+ }
+ let pack_range = counts_range_by_pack_id[counts_range_by_pack_id
+ .binary_search_by_key(&location.pack_id, |e| e.0)
+ .expect("pack-id always present")]
+ .1
+ .clone();
+ let base_index_offset = pack_range.start;
+ let counts_in_pack = &counts[pack_range];
+ match output::Entry::from_pack_entry(
+ pack_entry,
+ count,
+ counts_in_pack,
+ base_index_offset,
+ allow_thin_pack.then_some({
+ |pack_id, base_offset| {
+ let (cached_pack_id, cache) = pack_offsets_to_id.get_or_insert_with(|| {
+ db.pack_offsets_and_oid(pack_id)
+ .map(|mut v| {
+ v.sort_by_key(|e| e.0);
+ (pack_id, v)
+ })
+ .expect("pack used for counts is still available")
+ });
+ debug_assert_eq!(*cached_pack_id, pack_id);
+ stats.ref_delta_objects += 1;
+ cache
+ .binary_search_by_key(&base_offset, |e| e.0)
+ .ok()
+ .map(|idx| cache[idx].1)
+ }
+ }),
+ version,
+ ) {
+ Some(entry) => {
+ stats.objects_copied_from_pack += 1;
+ entry
+ }
+ None => match db.try_find(count.id, buf).map_err(Error::FindExisting)? {
+ Some((obj, _location)) => {
+ stats.decoded_and_recompressed_objects += 1;
+ output::Entry::from_data(count, &obj)
+ }
+ None => {
+ stats.missing_objects += 1;
+ Ok(output::Entry::invalid())
+ }
+ },
+ }
+ }
+ None => match db.try_find(count.id, buf).map_err(Error::FindExisting)? {
+ Some((obj, _location)) => {
+ stats.decoded_and_recompressed_objects += 1;
+ output::Entry::from_data(count, &obj)
+ }
+ None => {
+ stats.missing_objects += 1;
+ Ok(output::Entry::invalid())
+ }
+ },
+ }?);
+ progress.inc();
+ }
+ Ok((chunk_id, out, stats))
+ }
+ },
+ reduce::Statistics::default(),
+ )
+ }
+}
+
+mod util {
+ #[derive(Clone)]
+ pub struct ChunkRanges {
+ cursor: usize,
+ size: usize,
+ len: usize,
+ }
+
+ impl ChunkRanges {
+ pub fn new(size: usize, total: usize) -> Self {
+ ChunkRanges {
+ cursor: 0,
+ size,
+ len: total,
+ }
+ }
+ }
+
+ impl Iterator for ChunkRanges {
+ type Item = std::ops::Range<usize>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.cursor >= self.len {
+ None
+ } else {
+ let upper = (self.cursor + self.size).min(self.len);
+ let range = self.cursor..upper;
+ self.cursor = upper;
+ Some(range)
+ }
+ }
+ }
+}
+
+mod reduce {
+ use std::marker::PhantomData;
+
+ use gix_features::{parallel, parallel::SequenceId};
+
+ use super::Outcome;
+ use crate::data::output;
+
+ pub struct Statistics<E> {
+ total: Outcome,
+ _err: PhantomData<E>,
+ }
+
+ impl<E> Default for Statistics<E> {
+ fn default() -> Self {
+ Statistics {
+ total: Default::default(),
+ _err: PhantomData::default(),
+ }
+ }
+ }
+
+ impl<Error> parallel::Reduce for Statistics<Error> {
+ type Input = Result<(SequenceId, Vec<output::Entry>, Outcome), Error>;
+ type FeedProduce = (SequenceId, Vec<output::Entry>);
+ type Output = Outcome;
+ type Error = Error;
+
+ fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
+ item.map(|(cid, entries, stats)| {
+ self.total.aggregate(stats);
+ (cid, entries)
+ })
+ }
+
+ fn finalize(self) -> Result<Self::Output, Self::Error> {
+ Ok(self.total)
+ }
+ }
+}
+
+mod types {
+ use crate::data::output::entry;
+
+ /// Information gathered during the run of [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()].
+ #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+ #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ pub struct Outcome {
+ /// The amount of fully decoded objects. These are the most expensive as they are fully decoded.
+ pub decoded_and_recompressed_objects: usize,
+ /// The amount of objects that could not be located despite them being mentioned during iteration
+ pub missing_objects: usize,
+ /// The amount of base or delta objects that could be copied directly from the pack. These are cheapest as they
+ /// only cost a memory copy for the most part.
+ pub objects_copied_from_pack: usize,
+ /// The amount of objects that ref to their base as ref-delta, an indication for a thin back being created.
+ pub ref_delta_objects: usize,
+ }
+
+ impl Outcome {
+ pub(in crate::data::output::entry) fn aggregate(
+ &mut self,
+ Outcome {
+ decoded_and_recompressed_objects: decoded_objects,
+ missing_objects,
+ objects_copied_from_pack,
+ ref_delta_objects,
+ }: Self,
+ ) {
+ self.decoded_and_recompressed_objects += decoded_objects;
+ self.missing_objects += missing_objects;
+ self.objects_copied_from_pack += objects_copied_from_pack;
+ self.ref_delta_objects += ref_delta_objects;
+ }
+ }
+
+ /// The way the iterator operates.
+ #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+ #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ pub enum Mode {
+ /// Copy base objects and deltas from packs, while non-packed objects will be treated as base objects
+ /// (i.e. without trying to delta compress them). This is a fast way of obtaining a back while benefiting
+ /// from existing pack compression and spending the smallest possible time on compressing unpacked objects at
+ /// the cost of bandwidth.
+ PackCopyAndBaseObjects,
+ }
+
+ /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()].
+ #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+ #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ pub struct Options {
+ /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used.
+ pub thread_limit: Option<usize>,
+ /// The algorithm to produce a pack
+ pub mode: Mode,
+ /// If set, the resulting back can have deltas that refer to an object which is not in the pack. This can happen
+ /// if the initial counted objects do not contain an object that an existing packed delta refers to, for example, because
+ /// it wasn't part of the iteration, for instance when the iteration was performed on tree deltas or only a part of the
+ /// commit graph. Please note that thin packs are not valid packs at rest, thus they are only valid for packs in transit.
+ ///
+ /// If set to false, delta objects will be decompressed and recompressed as base objects.
+ pub allow_thin_pack: bool,
+ /// The amount of objects per chunk or unit of work to be sent to threads for processing
+ /// TODO: could this become the window size?
+ pub chunk_size: usize,
+ /// The pack data version to produce for each entry
+ pub version: crate::data::Version,
+ }
+
+ impl Default for Options {
+ fn default() -> Self {
+ Options {
+ thread_limit: None,
+ mode: Mode::PackCopyAndBaseObjects,
+ allow_thin_pack: false,
+ chunk_size: 10,
+ version: Default::default(),
+ }
+ }
+ }
+
+ /// The error returned by the pack generation function [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()].
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error<FindErr>
+ where
+ FindErr: std::error::Error + 'static,
+ {
+ #[error(transparent)]
+ FindExisting(FindErr),
+ #[error(transparent)]
+ NewEntry(#[from] entry::Error),
+ }
+
+ /// The progress ids used in [`write_to_directory()`][crate::Bundle::write_to_directory()].
+ ///
+ /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+ #[derive(Debug, Copy, Clone)]
+ pub enum ProgressId {
+ /// The amount of [`Count`][crate::data::output::Count] objects which are resolved to their pack location.
+ ResolveCounts,
+ /// Layout pack entries for placement into a pack (by pack-id and by offset).
+ SortEntries,
+ }
+
+ impl From<ProgressId> for gix_features::progress::Id {
+ fn from(v: ProgressId) -> Self {
+ match v {
+ ProgressId::ResolveCounts => *b"ECRC",
+ ProgressId::SortEntries => *b"ECSE",
+ }
+ }
+ }
+}
+pub use types::{Error, Mode, Options, Outcome, ProgressId};
diff --git a/vendor/gix-pack/src/data/output/entry/mod.rs b/vendor/gix-pack/src/data/output/entry/mod.rs
new file mode 100644
index 000000000..401d2f24c
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/entry/mod.rs
@@ -0,0 +1,181 @@
+use std::{convert::TryFrom, io::Write};
+
+use gix_hash::ObjectId;
+
+use crate::{data, data::output, find};
+
+///
+pub mod iter_from_counts;
+pub use iter_from_counts::function::iter_from_counts;
+
+/// The kind of pack entry to be written
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum Kind {
+ /// A complete base object, including its kind
+ Base(gix_object::Kind),
+ /// A delta against the object with the given index. It's always an index that was already encountered to refer only
+ /// to object we have written already.
+ DeltaRef {
+ /// The absolute index to the object to serve as base. It's up to the writer to maintain enough state to allow producing
+ /// a packed delta object from it.
+ object_index: usize,
+ },
+ /// A delta against the given object as identified by its `ObjectId`.
+ /// This is the case for thin packs only, i.e. those that are sent over the wire.
+ /// Note that there is the option of the `ObjectId` being used to refer to an object within
+ /// the same pack, but it's a discontinued practice which won't be encountered here.
+ DeltaOid {
+ /// The object serving as base for this delta
+ id: ObjectId,
+ },
+}
+
+/// The error returned by [`output::Entry::from_data()`].
+#[allow(missing_docs)]
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+ #[error("{0}")]
+ ZlibDeflate(#[from] std::io::Error),
+}
+
+impl output::Entry {
+ /// An object which can be identified as invalid easily which happens if objects didn't exist even if they were referred to.
+ pub fn invalid() -> output::Entry {
+ output::Entry {
+ id: gix_hash::Kind::Sha1.null(), // NOTE: the actual object hash used in the repo doesn't matter here, this is a sentinel value.
+ kind: Kind::Base(gix_object::Kind::Blob),
+ decompressed_size: 0,
+ compressed_data: vec![],
+ }
+ }
+
+ /// Returns true if this object doesn't really exist but still has to be handled responsibly
+ ///
+ /// Note that this is true for tree entries that are commits/git submodules, or for objects which aren't present in our local clone
+ /// due to shallow clones.
+ pub fn is_invalid(&self) -> bool {
+ self.id.is_null()
+ }
+
+ /// Create an Entry from a previously counted object which is located in a pack. It's `entry` is provided here.
+ /// The `version` specifies what kind of target `Entry` version the caller desires.
+ pub fn from_pack_entry(
+ mut entry: find::Entry,
+ count: &output::Count,
+ potential_bases: &[output::Count],
+ bases_index_offset: usize,
+ pack_offset_to_oid: Option<impl FnMut(u32, u64) -> Option<ObjectId>>,
+ target_version: crate::data::Version,
+ ) -> Option<Result<Self, Error>> {
+ if entry.version != target_version {
+ return None;
+ };
+
+ let pack_offset_must_be_zero = 0;
+ let pack_entry =
+ crate::data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.as_slice().len());
+
+ use crate::data::entry::Header::*;
+ match pack_entry.header {
+ Commit => Some(output::entry::Kind::Base(gix_object::Kind::Commit)),
+ Tree => Some(output::entry::Kind::Base(gix_object::Kind::Tree)),
+ Blob => Some(output::entry::Kind::Base(gix_object::Kind::Blob)),
+ Tag => Some(output::entry::Kind::Base(gix_object::Kind::Tag)),
+ OfsDelta { base_distance } => {
+ let pack_location = count.entry_pack_location.as_ref().expect("packed");
+ let base_offset = pack_location
+ .pack_offset
+ .checked_sub(base_distance)
+ .expect("pack-offset - distance is firmly within the pack");
+ potential_bases
+ .binary_search_by(|e| {
+ e.entry_pack_location
+ .as_ref()
+ .expect("packed")
+ .pack_offset
+ .cmp(&base_offset)
+ })
+ .ok()
+ .map(|idx| output::entry::Kind::DeltaRef {
+ object_index: idx + bases_index_offset,
+ })
+ .or_else(|| {
+ pack_offset_to_oid
+ .and_then(|mut f| f(pack_location.pack_id, base_offset))
+ .map(|id| output::entry::Kind::DeltaOid { id })
+ })
+ }
+ RefDelta { base_id: _ } => None, // ref deltas are for thin packs or legacy, repack them as base objects
+ }
+ .map(|kind| {
+ Ok(output::Entry {
+ id: count.id.to_owned(),
+ kind,
+ decompressed_size: pack_entry.decompressed_size as usize,
+ compressed_data: {
+ entry.data.copy_within(pack_entry.data_offset as usize.., 0);
+ entry.data.resize(
+ entry.data.len()
+ - usize::try_from(pack_entry.data_offset).expect("offset representable as usize"),
+ 0,
+ );
+ entry.data
+ },
+ })
+ })
+ }
+
+ /// Create a new instance from the given `oid` and its corresponding git `obj`ect data.
+ pub fn from_data(count: &output::Count, obj: &gix_object::Data<'_>) -> Result<Self, Error> {
+ Ok(output::Entry {
+ id: count.id.to_owned(),
+ kind: Kind::Base(obj.kind),
+ decompressed_size: obj.data.len(),
+ compressed_data: {
+ let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new());
+ if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) {
+ match err.kind() {
+ std::io::ErrorKind::Other => return Err(Error::ZlibDeflate(err)),
+ err => unreachable!("Should never see other errors than zlib, but got {:?}", err,),
+ }
+ };
+ out.flush()?;
+ out.into_inner()
+ },
+ })
+ }
+
+ /// Transform ourselves into pack entry header of `version` which can be written into a pack.
+ ///
+ /// `index_to_pack(object_index) -> pack_offset` is a function to convert the base object's index into
+ /// the input object array (if each object is numbered) to an offset into the pack.
+ /// This information is known to the one calling the method.
+ pub fn to_entry_header(
+ &self,
+ version: crate::data::Version,
+ index_to_base_distance: impl FnOnce(usize) -> u64,
+ ) -> crate::data::entry::Header {
+ assert!(
+ matches!(version, data::Version::V2),
+ "we can only write V2 pack entries for now"
+ );
+
+ use Kind::*;
+ match self.kind {
+ Base(kind) => {
+ use gix_object::Kind::*;
+ match kind {
+ Tree => data::entry::Header::Tree,
+ Blob => data::entry::Header::Blob,
+ Commit => data::entry::Header::Commit,
+ Tag => data::entry::Header::Tag,
+ }
+ }
+ DeltaOid { id } => data::entry::Header::RefDelta { base_id: id.to_owned() },
+ DeltaRef { object_index } => data::entry::Header::OfsDelta {
+ base_distance: index_to_base_distance(object_index),
+ },
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/mod.rs b/vendor/gix-pack/src/data/output/mod.rs
new file mode 100644
index 000000000..f94d32e8e
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/mod.rs
@@ -0,0 +1,41 @@
+use gix_hash::ObjectId;
+
+///
+pub mod count;
+
+/// An item representing a future Entry in the leanest way possible.
+///
+/// One can expect to have one of these in memory when building big objects, so smaller is better here.
+/// They should contain everything of importance to generate a pack as fast as possible.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Count {
+ /// The hash of the object to write
+ pub id: ObjectId,
+ /// A way to locate a pack entry in the object database, only available if the object is in a pack.
+ pub entry_pack_location: count::PackLocation,
+}
+
+/// An entry to be written to a file.
+///
+/// Some of these will be in-flight and in memory while waiting to be written. Memory requirements depend on the amount of compressed
+/// data they hold.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Entry {
+ /// The hash of the object to write
+ pub id: ObjectId,
+ /// The kind of entry represented by `data`. It's used alongside with it to complete the pack entry
+ /// at rest or in transit.
+ pub kind: entry::Kind,
+ /// The size in bytes needed once `data` gets decompressed
+ pub decompressed_size: usize,
+ /// The compressed data right behind the header
+ pub compressed_data: Vec<u8>,
+}
+
+///
+pub mod entry;
+
+///
+pub mod bytes;
diff --git a/vendor/gix-pack/src/find.rs b/vendor/gix-pack/src/find.rs
new file mode 100644
index 000000000..8143692e7
--- /dev/null
+++ b/vendor/gix-pack/src/find.rs
@@ -0,0 +1,63 @@
+///
+pub mod existing {
+ use gix_hash::ObjectId;
+
+ /// The error returned by the [`find(…)`][crate::FindExt::find()] trait methods.
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error<T: std::error::Error + 'static> {
+ #[error(transparent)]
+ Find(T),
+ #[error("An object with id {} could not be found", .oid)]
+ NotFound { oid: ObjectId },
+ }
+}
+
+///
+pub mod existing_object {
+ use gix_hash::ObjectId;
+
+ /// The error returned by the various [`find_*`][crate::FindExt::find_commit()] trait methods.
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error<T: std::error::Error + 'static> {
+ #[error(transparent)]
+ Find(T),
+ #[error(transparent)]
+ Decode(gix_object::decode::Error),
+ #[error("An object with id {} could not be found", .oid)]
+ NotFound { oid: ObjectId },
+ #[error("Expected object of kind {} something else", .expected)]
+ ObjectKind { expected: gix_object::Kind },
+ }
+}
+
+///
+pub mod existing_iter {
+ use gix_hash::ObjectId;
+
+ /// The error returned by the various [`find_*`][crate::FindExt::find_commit()] trait methods.
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error<T: std::error::Error + 'static> {
+ #[error(transparent)]
+ Find(T),
+ #[error("An object with id {} could not be found", .oid)]
+ NotFound { oid: ObjectId },
+ #[error("Expected object of kind {} something else", .expected)]
+ ObjectKind { expected: gix_object::Kind },
+ }
+}
+
+/// An Entry in a pack providing access to its data.
+///
+/// Its commonly retrieved by reading from a pack index file followed by a read from a pack data file.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[allow(missing_docs)]
+pub struct Entry {
+ /// The pack-data encoded bytes of the pack data entry as present in the pack file, including the header followed by compressed data.
+ pub data: Vec<u8>,
+ /// The version of the pack file containing `data`
+ pub version: crate::data::Version,
+}
diff --git a/vendor/gix-pack/src/find_traits.rs b/vendor/gix-pack/src/find_traits.rs
new file mode 100644
index 000000000..6f828afbf
--- /dev/null
+++ b/vendor/gix-pack/src/find_traits.rs
@@ -0,0 +1,295 @@
+use crate::{data, find};
+
+/// Describe how object can be located in an object store with built-in facilities to supports packs specifically.
+///
+/// ## Notes
+///
+/// Find effectively needs [generic associated types][issue] to allow a trait for the returned object type.
+/// Until then, we will have to make due with explicit types and give them the potentially added features we want.
+///
+/// Furthermore, despite this trait being in `gix-pack`, it leaks knowledge about objects potentially not being packed.
+/// This is a necessary trade-off to allow this trait to live in `gix-pack` where it is used in functions to create a pack.
+///
+/// [issue]: https://github.com/rust-lang/rust/issues/44265
+pub trait Find {
+ /// The error returned by [`try_find()`][Find::try_find()]
+ type Error: std::error::Error + Send + Sync + 'static;
+
+ /// Returns true if the object exists in the database.
+ fn contains(&self, id: impl AsRef<gix_hash::oid>) -> bool;
+
+ /// Find an object matching `id` in the database while placing its raw, decoded data into `buffer`.
+ /// A `pack_cache` can be used to speed up subsequent lookups, set it to [`crate::cache::Never`] if the
+ /// workload isn't suitable for caching.
+ ///
+ /// Returns `Some((<object data>, <pack location if packed>))` if it was present in the database,
+ /// or the error that occurred during lookup or object retrieval.
+ fn try_find<'a>(
+ &self,
+ id: impl AsRef<gix_hash::oid>,
+ buffer: &'a mut Vec<u8>,
+ ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> {
+ self.try_find_cached(id, buffer, &mut crate::cache::Never)
+ }
+
+ /// Like [`Find::try_find()`], but with support for controlling the pack cache.
+ /// A `pack_cache` can be used to speed up subsequent lookups, set it to [`crate::cache::Never`] if the
+ /// workload isn't suitable for caching.
+ ///
+ /// Returns `Some((<object data>, <pack location if packed>))` if it was present in the database,
+ /// or the error that occurred during lookup or object retrieval.
+ fn try_find_cached<'a>(
+ &self,
+ id: impl AsRef<gix_hash::oid>,
+ buffer: &'a mut Vec<u8>,
+ pack_cache: &mut impl crate::cache::DecodeEntry,
+ ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error>;
+
+ /// Find the packs location where an object with `id` can be found in the database, or `None` if there is no pack
+ /// holding the object.
+ ///
+ /// _Note_ that this is always None if the object isn't packed even though it exists as loose object.
+ fn location_by_oid(&self, id: impl AsRef<gix_hash::oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location>;
+
+ /// Obtain a vector of all offsets, in index order, along with their object id.
+ fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>>;
+
+ /// Return the [`find::Entry`] for `location` if it is backed by a pack.
+ ///
+ /// Note that this is only in the interest of avoiding duplicate work during pack generation.
+ /// Pack locations can be obtained from [`Find::try_find()`].
+ ///
+ /// # Notes
+ ///
+ /// Custom implementations might be interested in providing their own meta-data with `object`,
+ /// which currently isn't possible as the `Locate` trait requires GATs to work like that.
+ fn entry_by_location(&self, location: &data::entry::Location) -> Option<find::Entry>;
+}
+
+mod ext {
+ use gix_object::{BlobRef, CommitRef, CommitRefIter, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter};
+
+ use crate::find;
+
+ macro_rules! make_obj_lookup {
+ ($method:ident, $object_variant:path, $object_kind:path, $object_type:ty) => {
+ /// Like [`find(…)`][Self::find()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error
+ /// while returning the desired object type.
+ fn $method<'a>(
+ &self,
+ id: impl AsRef<gix_hash::oid>,
+ buffer: &'a mut Vec<u8>,
+ ) -> Result<($object_type, Option<crate::data::entry::Location>), find::existing_object::Error<Self::Error>>
+ {
+ let id = id.as_ref();
+ self.try_find(id, buffer)
+ .map_err(find::existing_object::Error::Find)?
+ .ok_or_else(|| find::existing_object::Error::NotFound {
+ oid: id.as_ref().to_owned(),
+ })
+ .and_then(|(o, l)| {
+ o.decode()
+ .map_err(find::existing_object::Error::Decode)
+ .map(|o| (o, l))
+ })
+ .and_then(|(o, l)| match o {
+ $object_variant(o) => return Ok((o, l)),
+ _other => Err(find::existing_object::Error::ObjectKind {
+ expected: $object_kind,
+ }),
+ })
+ }
+ };
+ }
+
+ macro_rules! make_iter_lookup {
+ ($method:ident, $object_kind:path, $object_type:ty, $into_iter:tt) => {
+ /// Like [`find(…)`][Self::find()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error
+ /// while returning the desired iterator type.
+ fn $method<'a>(
+ &self,
+ id: impl AsRef<gix_hash::oid>,
+ buffer: &'a mut Vec<u8>,
+ ) -> Result<($object_type, Option<crate::data::entry::Location>), find::existing_iter::Error<Self::Error>> {
+ let id = id.as_ref();
+ self.try_find(id, buffer)
+ .map_err(find::existing_iter::Error::Find)?
+ .ok_or_else(|| find::existing_iter::Error::NotFound {
+ oid: id.as_ref().to_owned(),
+ })
+ .and_then(|(o, l)| {
+ o.$into_iter()
+ .ok_or_else(|| find::existing_iter::Error::ObjectKind {
+ expected: $object_kind,
+ })
+ .map(|i| (i, l))
+ })
+ }
+ };
+ }
+
+ /// An extension trait with convenience functions.
+ pub trait FindExt: super::Find {
+ /// Like [`try_find(…)`][super::Find::try_find()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error.
+ fn find<'a>(
+ &self,
+ id: impl AsRef<gix_hash::oid>,
+ buffer: &'a mut Vec<u8>,
+ ) -> Result<(gix_object::Data<'a>, Option<crate::data::entry::Location>), find::existing::Error<Self::Error>>
+ {
+ let id = id.as_ref();
+ self.try_find(id, buffer)
+ .map_err(find::existing::Error::Find)?
+ .ok_or_else(|| find::existing::Error::NotFound {
+ oid: id.as_ref().to_owned(),
+ })
+ }
+
+ make_obj_lookup!(find_commit, ObjectRef::Commit, Kind::Commit, CommitRef<'a>);
+ make_obj_lookup!(find_tree, ObjectRef::Tree, Kind::Tree, TreeRef<'a>);
+ make_obj_lookup!(find_tag, ObjectRef::Tag, Kind::Tag, TagRef<'a>);
+ make_obj_lookup!(find_blob, ObjectRef::Blob, Kind::Blob, BlobRef<'a>);
+ make_iter_lookup!(find_commit_iter, Kind::Blob, CommitRefIter<'a>, try_into_commit_iter);
+ make_iter_lookup!(find_tree_iter, Kind::Tree, TreeRefIter<'a>, try_into_tree_iter);
+ make_iter_lookup!(find_tag_iter, Kind::Tag, TagRefIter<'a>, try_into_tag_iter);
+ }
+
+ impl<T: super::Find> FindExt for T {}
+}
+pub use ext::FindExt;
+
+mod find_impls {
+ use std::{ops::Deref, rc::Rc};
+
+ use gix_hash::oid;
+
+ use crate::{data, find};
+
+ impl<T> crate::Find for &T
+ where
+ T: crate::Find,
+ {
+ type Error = T::Error;
+
+ fn contains(&self, id: impl AsRef<oid>) -> bool {
+ (*self).contains(id)
+ }
+
+ fn try_find_cached<'a>(
+ &self,
+ id: impl AsRef<oid>,
+ buffer: &'a mut Vec<u8>,
+ pack_cache: &mut impl crate::cache::DecodeEntry,
+ ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> {
+ (*self).try_find_cached(id, buffer, pack_cache)
+ }
+
+ fn location_by_oid(&self, id: impl AsRef<oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location> {
+ (*self).location_by_oid(id, buf)
+ }
+
+ fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>> {
+ (*self).pack_offsets_and_oid(pack_id)
+ }
+
+ fn entry_by_location(&self, location: &data::entry::Location) -> Option<find::Entry> {
+ (*self).entry_by_location(location)
+ }
+ }
+
+ impl<T> super::Find for std::sync::Arc<T>
+ where
+ T: super::Find,
+ {
+ type Error = T::Error;
+
+ fn contains(&self, id: impl AsRef<oid>) -> bool {
+ self.deref().contains(id)
+ }
+
+ fn try_find_cached<'a>(
+ &self,
+ id: impl AsRef<oid>,
+ buffer: &'a mut Vec<u8>,
+ pack_cache: &mut impl crate::cache::DecodeEntry,
+ ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> {
+ self.deref().try_find_cached(id, buffer, pack_cache)
+ }
+
+ fn location_by_oid(&self, id: impl AsRef<oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location> {
+ self.deref().location_by_oid(id, buf)
+ }
+
+ fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>> {
+ self.deref().pack_offsets_and_oid(pack_id)
+ }
+
+ fn entry_by_location(&self, object: &data::entry::Location) -> Option<find::Entry> {
+ self.deref().entry_by_location(object)
+ }
+ }
+
+ impl<T> super::Find for Rc<T>
+ where
+ T: super::Find,
+ {
+ type Error = T::Error;
+
+ fn contains(&self, id: impl AsRef<oid>) -> bool {
+ self.deref().contains(id)
+ }
+
+ fn try_find_cached<'a>(
+ &self,
+ id: impl AsRef<oid>,
+ buffer: &'a mut Vec<u8>,
+ pack_cache: &mut impl crate::cache::DecodeEntry,
+ ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> {
+ self.deref().try_find_cached(id, buffer, pack_cache)
+ }
+
+ fn location_by_oid(&self, id: impl AsRef<oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location> {
+ self.deref().location_by_oid(id, buf)
+ }
+
+ fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>> {
+ self.deref().pack_offsets_and_oid(pack_id)
+ }
+
+ fn entry_by_location(&self, location: &data::entry::Location) -> Option<find::Entry> {
+ self.deref().entry_by_location(location)
+ }
+ }
+
+ impl<T> super::Find for Box<T>
+ where
+ T: super::Find,
+ {
+ type Error = T::Error;
+
+ fn contains(&self, id: impl AsRef<oid>) -> bool {
+ self.deref().contains(id)
+ }
+
+ fn try_find_cached<'a>(
+ &self,
+ id: impl AsRef<oid>,
+ buffer: &'a mut Vec<u8>,
+ pack_cache: &mut impl crate::cache::DecodeEntry,
+ ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> {
+ self.deref().try_find_cached(id, buffer, pack_cache)
+ }
+
+ fn location_by_oid(&self, id: impl AsRef<oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location> {
+ self.deref().location_by_oid(id, buf)
+ }
+
+ fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>> {
+ self.deref().pack_offsets_and_oid(pack_id)
+ }
+
+ fn entry_by_location(&self, location: &data::entry::Location) -> Option<find::Entry> {
+ self.deref().entry_by_location(location)
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/index/access.rs b/vendor/gix-pack/src/index/access.rs
new file mode 100644
index 000000000..0ac85dff7
--- /dev/null
+++ b/vendor/gix-pack/src/index/access.rs
@@ -0,0 +1,290 @@
+use std::{mem::size_of, ops::Range};
+
+use crate::{
+ data,
+ index::{self, EntryIndex, PrefixLookupResult, FAN_LEN},
+};
+
+const N32_SIZE: usize = size_of::<u32>();
+const N64_SIZE: usize = size_of::<u64>();
+const V1_HEADER_SIZE: usize = FAN_LEN * N32_SIZE;
+const V2_HEADER_SIZE: usize = N32_SIZE * 2 + FAN_LEN * N32_SIZE;
+const N32_HIGH_BIT: u32 = 1 << 31;
+
+/// Represents an entry within a pack index file, effectively mapping object [`IDs`][gix_hash::ObjectId] to pack data file locations.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Entry {
+ /// The ID of the object
+ pub oid: gix_hash::ObjectId,
+ /// The offset to the object's header in the pack data file
+ pub pack_offset: data::Offset,
+ /// The CRC32 hash over all bytes of the pack data entry.
+ ///
+ /// This can be useful for direct copies of pack data entries from one pack to another with insurance there was no bit rot.
+ /// _Note_: Only available in index version 2 or newer
+ pub crc32: Option<u32>,
+}
+
+/// Iteration and access
+impl index::File {
+ fn iter_v1(&self) -> impl Iterator<Item = Entry> + '_ {
+ match self.version {
+ index::Version::V1 => self.data[V1_HEADER_SIZE..]
+ .chunks(N32_SIZE + self.hash_len)
+ .take(self.num_objects as usize)
+ .map(|c| {
+ let (ofs, oid) = c.split_at(N32_SIZE);
+ Entry {
+ oid: gix_hash::ObjectId::from(oid),
+ pack_offset: crate::read_u32(ofs) as u64,
+ crc32: None,
+ }
+ }),
+ _ => panic!("Cannot use iter_v1() on index of type {:?}", self.version),
+ }
+ }
+
+ fn iter_v2(&self) -> impl Iterator<Item = Entry> + '_ {
+ let pack64_offset = self.offset_pack_offset64_v2();
+ match self.version {
+ index::Version::V2 => izip!(
+ self.data[V2_HEADER_SIZE..].chunks(self.hash_len),
+ self.data[self.offset_crc32_v2()..].chunks(N32_SIZE),
+ self.data[self.offset_pack_offset_v2()..].chunks(N32_SIZE)
+ )
+ .take(self.num_objects as usize)
+ .map(move |(oid, crc32, ofs32)| Entry {
+ oid: gix_hash::ObjectId::from(oid),
+ pack_offset: self.pack_offset_from_offset_v2(ofs32, pack64_offset),
+ crc32: Some(crate::read_u32(crc32)),
+ }),
+ _ => panic!("Cannot use iter_v2() on index of type {:?}", self.version),
+ }
+ }
+
+ /// Returns the object hash at the given index in our list of (sorted) sha1 hashes.
+ /// The index ranges from 0 to self.num_objects()
+ ///
+ /// # Panics
+ ///
+ /// If `index` is out of bounds.
+ pub fn oid_at_index(&self, index: EntryIndex) -> &gix_hash::oid {
+ let index = index as usize;
+ let start = match self.version {
+ index::Version::V2 => V2_HEADER_SIZE + index * self.hash_len,
+ index::Version::V1 => V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len) + N32_SIZE,
+ };
+ gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len])
+ }
+
+ /// Returns the offset into our pack data file at which to start reading the object at `index`.
+ ///
+ /// # Panics
+ ///
+ /// If `index` is out of bounds.
+ pub fn pack_offset_at_index(&self, index: EntryIndex) -> data::Offset {
+ let index = index as usize;
+ match self.version {
+ index::Version::V2 => {
+ let start = self.offset_pack_offset_v2() + index * N32_SIZE;
+ self.pack_offset_from_offset_v2(&self.data[start..][..N32_SIZE], self.offset_pack_offset64_v2())
+ }
+ index::Version::V1 => {
+ let start = V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len);
+ crate::read_u32(&self.data[start..][..N32_SIZE]) as u64
+ }
+ }
+ }
+
+ /// Returns the CRC32 of the object at the given `index`.
+ ///
+ /// _Note_: These are always present for index version 2 or higher.
+ /// # Panics
+ ///
+ /// If `index` is out of bounds.
+ pub fn crc32_at_index(&self, index: EntryIndex) -> Option<u32> {
+ let index = index as usize;
+ match self.version {
+ index::Version::V2 => {
+ let start = self.offset_crc32_v2() + index * N32_SIZE;
+ Some(crate::read_u32(&self.data[start..start + N32_SIZE]))
+ }
+ index::Version::V1 => None,
+ }
+ }
+
+ /// Returns the `index` of the given hash for use with the [`oid_at_index()`][index::File::oid_at_index()],
+ /// [`pack_offset_at_index()`][index::File::pack_offset_at_index()] or [`crc32_at_index()`][index::File::crc32_at_index()].
+ // NOTE: pretty much the same things as in `multi_index::File::lookup`, change things there
+ // as well.
+ pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<EntryIndex> {
+ lookup(id, &self.fan, |idx| self.oid_at_index(idx))
+ }
+
+ /// Given a `prefix`, find an object that matches it uniquely within this index and return `Some(Ok(entry_index))`.
+ /// If there is more than one object matching the object `Some(Err(())` is returned.
+ ///
+ /// Finally, if no object matches the index, the return value is `None`.
+ ///
+ /// Pass `candidates` to obtain the set of entry-indices matching `prefix`, with the same return value as
+ /// one would have received if it remained `None`. It will be empty if no object matched the `prefix`.
+ ///
+ // NOTE: pretty much the same things as in `index::File::lookup`, change things there
+ // as well.
+ pub fn lookup_prefix(
+ &self,
+ prefix: gix_hash::Prefix,
+ candidates: Option<&mut Range<EntryIndex>>,
+ ) -> Option<PrefixLookupResult> {
+ lookup_prefix(
+ prefix,
+ candidates,
+ &self.fan,
+ |idx| self.oid_at_index(idx),
+ self.num_objects,
+ )
+ }
+
+ /// An iterator over all [`Entries`][Entry] of this index file.
+ pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a> {
+ match self.version {
+ index::Version::V2 => Box::new(self.iter_v2()),
+ index::Version::V1 => Box::new(self.iter_v1()),
+ }
+ }
+
+ /// Return a vector of ascending offsets into our respective pack data file.
+ ///
+ /// Useful to control an iteration over all pack entries in a cache-friendly way.
+ pub fn sorted_offsets(&self) -> Vec<data::Offset> {
+ let mut ofs: Vec<_> = match self.version {
+ index::Version::V1 => self.iter().map(|e| e.pack_offset).collect(),
+ index::Version::V2 => {
+ let offset32_start = &self.data[self.offset_pack_offset_v2()..];
+ let pack_offset_64_start = self.offset_pack_offset64_v2();
+ offset32_start
+ .chunks(N32_SIZE)
+ .take(self.num_objects as usize)
+ .map(|offset| self.pack_offset_from_offset_v2(offset, pack_offset_64_start))
+ .collect()
+ }
+ };
+ ofs.sort_unstable();
+ ofs
+ }
+
+ #[inline]
+ fn offset_crc32_v2(&self) -> usize {
+ V2_HEADER_SIZE + self.num_objects as usize * self.hash_len
+ }
+
+ #[inline]
+ fn offset_pack_offset_v2(&self) -> usize {
+ self.offset_crc32_v2() + self.num_objects as usize * N32_SIZE
+ }
+
+ #[inline]
+ fn offset_pack_offset64_v2(&self) -> usize {
+ self.offset_pack_offset_v2() + self.num_objects as usize * N32_SIZE
+ }
+
+ #[inline]
+ fn pack_offset_from_offset_v2(&self, offset: &[u8], pack64_offset: usize) -> data::Offset {
+ debug_assert_eq!(self.version, index::Version::V2);
+ let ofs32 = crate::read_u32(offset);
+ if (ofs32 & N32_HIGH_BIT) == N32_HIGH_BIT {
+ let from = pack64_offset + (ofs32 ^ N32_HIGH_BIT) as usize * N64_SIZE;
+ crate::read_u64(&self.data[from..][..N64_SIZE])
+ } else {
+ ofs32 as u64
+ }
+ }
+}
+
+pub(crate) fn lookup_prefix<'a>(
+ prefix: gix_hash::Prefix,
+ candidates: Option<&mut Range<EntryIndex>>,
+ fan: &[u32; FAN_LEN],
+ oid_at_index: impl Fn(EntryIndex) -> &'a gix_hash::oid,
+ num_objects: u32,
+) -> Option<PrefixLookupResult> {
+ let first_byte = prefix.as_oid().first_byte() as usize;
+ let mut upper_bound = fan[first_byte];
+ let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 };
+
+ // Bisect using indices
+ while lower_bound < upper_bound {
+ let mid = (lower_bound + upper_bound) / 2;
+ let mid_sha = oid_at_index(mid);
+
+ use std::cmp::Ordering::*;
+ match prefix.cmp_oid(mid_sha) {
+ Less => upper_bound = mid,
+ Equal => match candidates {
+ Some(candidates) => {
+ let first_past_entry = ((0..mid).rev())
+ .take_while(|prev| prefix.cmp_oid(oid_at_index(*prev)) == Equal)
+ .last();
+
+ let last_future_entry = ((mid + 1)..num_objects)
+ .take_while(|next| prefix.cmp_oid(oid_at_index(*next)) == Equal)
+ .last();
+
+ *candidates = match (first_past_entry, last_future_entry) {
+ (Some(first), Some(last)) => first..last + 1,
+ (Some(first), None) => first..mid + 1,
+ (None, Some(last)) => mid..last + 1,
+ (None, None) => mid..mid + 1,
+ };
+
+ return if candidates.len() > 1 {
+ Some(Err(()))
+ } else {
+ Some(Ok(mid))
+ };
+ }
+ None => {
+ let next = mid + 1;
+ if next < num_objects && prefix.cmp_oid(oid_at_index(next)) == Equal {
+ return Some(Err(()));
+ }
+ if mid != 0 && prefix.cmp_oid(oid_at_index(mid - 1)) == Equal {
+ return Some(Err(()));
+ }
+ return Some(Ok(mid));
+ }
+ },
+ Greater => lower_bound = mid + 1,
+ }
+ }
+
+ if let Some(candidates) = candidates {
+ *candidates = 0..0;
+ }
+ None
+}
+
+pub(crate) fn lookup<'a>(
+ id: impl AsRef<gix_hash::oid>,
+ fan: &[u32; FAN_LEN],
+ oid_at_index: impl Fn(EntryIndex) -> &'a gix_hash::oid,
+) -> Option<EntryIndex> {
+ let id = id.as_ref();
+ let first_byte = id.first_byte() as usize;
+ let mut upper_bound = fan[first_byte];
+ let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 };
+
+ while lower_bound < upper_bound {
+ let mid = (lower_bound + upper_bound) / 2;
+ let mid_sha = oid_at_index(mid);
+
+ use std::cmp::Ordering::*;
+ match id.cmp(mid_sha) {
+ Less => upper_bound = mid,
+ Equal => return Some(mid),
+ Greater => lower_bound = mid + 1,
+ }
+ }
+ None
+}
diff --git a/vendor/gix-pack/src/index/init.rs b/vendor/gix-pack/src/index/init.rs
new file mode 100644
index 000000000..13eecdbda
--- /dev/null
+++ b/vendor/gix-pack/src/index/init.rs
@@ -0,0 +1,91 @@
+use std::{mem::size_of, path::Path};
+
+use crate::index::{self, Version, FAN_LEN, V2_SIGNATURE};
+
+/// Returned by [`index::File::at()`].
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("Could not open pack index file at '{path}'")]
+ Io {
+ source: std::io::Error,
+ path: std::path::PathBuf,
+ },
+ #[error("{message}")]
+ Corrupt { message: String },
+ #[error("Unsupported index version: {version})")]
+ UnsupportedVersion { version: u32 },
+}
+
+const N32_SIZE: usize = size_of::<u32>();
+
+/// Instantiation
+impl index::File {
+ /// Open the pack index file at the given `path`.
+ ///
+ /// The `object_hash` is a way to read (and write) the same file format with different hashes, as the hash kind
+ /// isn't stored within the file format itself.
+ pub fn at(path: impl AsRef<Path>, object_hash: gix_hash::Kind) -> Result<index::File, Error> {
+ Self::at_inner(path.as_ref(), object_hash)
+ }
+
+ fn at_inner(path: &Path, object_hash: gix_hash::Kind) -> Result<index::File, Error> {
+ let data = crate::mmap::read_only(path).map_err(|source| Error::Io {
+ source,
+ path: path.to_owned(),
+ })?;
+ let idx_len = data.len();
+ let hash_len = object_hash.len_in_bytes();
+
+ let footer_size = hash_len * 2;
+ if idx_len < FAN_LEN * N32_SIZE + footer_size {
+ return Err(Error::Corrupt {
+ message: format!("Pack index of size {idx_len} is too small for even an empty index"),
+ });
+ }
+ let (kind, fan, num_objects) = {
+ let (kind, d) = {
+ let (sig, d) = data.split_at(V2_SIGNATURE.len());
+ if sig == V2_SIGNATURE {
+ (Version::V2, d)
+ } else {
+ (Version::V1, &data[..])
+ }
+ };
+ let d = {
+ if let Version::V2 = kind {
+ let (vd, dr) = d.split_at(N32_SIZE);
+ let version = crate::read_u32(vd);
+ if version != Version::V2 as u32 {
+ return Err(Error::UnsupportedVersion { version });
+ }
+ dr
+ } else {
+ d
+ }
+ };
+ let (fan, bytes_read) = read_fan(d);
+ let (_, _d) = d.split_at(bytes_read);
+ let num_objects = fan[FAN_LEN - 1];
+
+ (kind, fan, num_objects)
+ };
+ Ok(index::File {
+ data,
+ path: path.to_owned(),
+ version: kind,
+ num_objects,
+ fan,
+ hash_len,
+ object_hash,
+ })
+ }
+}
+
+fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) {
+ let mut fan = [0; FAN_LEN];
+ for (c, f) in d.chunks(N32_SIZE).zip(fan.iter_mut()) {
+ *f = crate::read_u32(c);
+ }
+ (fan, FAN_LEN * N32_SIZE)
+}
diff --git a/vendor/gix-pack/src/index/mod.rs b/vendor/gix-pack/src/index/mod.rs
new file mode 100644
index 000000000..341322f7d
--- /dev/null
+++ b/vendor/gix-pack/src/index/mod.rs
@@ -0,0 +1,155 @@
+//! an index into the pack file
+//!
+/// From itertools
+/// Create an iterator running multiple iterators in lockstep.
+///
+/// The `izip!` iterator yields elements until any subiterator
+/// returns `None`.
+///
+/// This is a version of the standard ``.zip()`` that's supporting more than
+/// two iterators. The iterator element type is a tuple with one element
+/// from each of the input iterators. Just like ``.zip()``, the iteration stops
+/// when the shortest of the inputs reaches its end.
+///
+/// **Note:** The result of this macro is in the general case an iterator
+/// composed of repeated `.zip()` and a `.map()`; it has an anonymous type.
+/// The special cases of one and two arguments produce the equivalent of
+/// `$a.into_iter()` and `$a.into_iter().zip($b)` respectively.
+///
+/// Prefer this macro `izip!()` over [`multizip`] for the performance benefits
+/// of using the standard library `.zip()`.
+///
+/// [`multizip`]: fn.multizip.html
+///
+/// ```
+/// # use itertools::izip;
+/// #
+/// # fn main() {
+///
+/// // iterate over three sequences side-by-side
+/// let mut results = [0, 0, 0, 0];
+/// let inputs = [3, 7, 9, 6];
+///
+/// for (r, index, input) in izip!(&mut results, 0..10, &inputs) {
+/// *r = index * 10 + input;
+/// }
+///
+/// assert_eq!(results, [0 + 3, 10 + 7, 29, 36]);
+/// # }
+/// ```
+macro_rules! izip {
+ // @closure creates a tuple-flattening closure for .map() call. usage:
+ // @closure partial_pattern => partial_tuple , rest , of , iterators
+ // eg. izip!( @closure ((a, b), c) => (a, b, c) , dd , ee )
+ ( @closure $p:pat => $tup:expr ) => {
+ |$p| $tup
+ };
+
+ // The "b" identifier is a different identifier on each recursion level thanks to hygiene.
+ ( @closure $p:pat => ( $($tup:tt)* ) , $_iter:expr $( , $tail:expr )* ) => {
+ izip!(@closure ($p, b) => ( $($tup)*, b ) $( , $tail )*)
+ };
+
+ // unary
+ ($first:expr $(,)*) => {
+ std::iter::IntoIterator::into_iter($first)
+ };
+
+ // binary
+ ($first:expr, $second:expr $(,)*) => {
+ izip!($first)
+ .zip($second)
+ };
+
+ // n-ary where n > 2
+ ( $first:expr $( , $rest:expr )* $(,)* ) => {
+ izip!($first)
+ $(
+ .zip($rest)
+ )*
+ .map(
+ izip!(@closure a => (a) $( , $rest )*)
+ )
+ };
+}
+
+use memmap2::Mmap;
+
+/// The version of an index file
+#[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Hash, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[allow(missing_docs)]
+pub enum Version {
+ V1 = 1,
+ V2 = 2,
+}
+
+impl Default for Version {
+ fn default() -> Self {
+ Version::V2
+ }
+}
+
+impl Version {
+ /// The kind of hash to produce to be compatible to this kind of index
+ pub fn hash(&self) -> gix_hash::Kind {
+ match self {
+ Version::V1 | Version::V2 => gix_hash::Kind::Sha1,
+ }
+ }
+}
+
+/// A way to indicate if a lookup, despite successful, was ambiguous or yielded exactly
+/// one result in the particular index.
+pub type PrefixLookupResult = Result<EntryIndex, ()>;
+
+/// The type for referring to indices of an entry within the index file.
+pub type EntryIndex = u32;
+
+const FAN_LEN: usize = 256;
+
+/// A representation of a pack index file
+pub struct File {
+ data: Mmap,
+ path: std::path::PathBuf,
+ version: Version,
+ num_objects: u32,
+ fan: [u32; FAN_LEN],
+ hash_len: usize,
+ object_hash: gix_hash::Kind,
+}
+
+/// Basic file information
+impl File {
+ /// The version of the pack index
+ pub fn version(&self) -> Version {
+ self.version
+ }
+ /// The path of the opened index file
+ pub fn path(&self) -> &std::path::Path {
+ &self.path
+ }
+ /// The amount of objects stored in the pack and index, as one past the highest entry index.
+ pub fn num_objects(&self) -> EntryIndex {
+ self.num_objects
+ }
+ /// The kind of hash we assume
+ pub fn object_hash(&self) -> gix_hash::Kind {
+ self.object_hash
+ }
+}
+
+const V2_SIGNATURE: &[u8] = b"\xfftOc";
+///
+pub mod init;
+
+pub(crate) mod access;
+pub use access::Entry;
+
+///
+pub mod traverse;
+mod util;
+///
+pub mod verify;
+///
+pub mod write;
diff --git a/vendor/gix-pack/src/index/traverse/error.rs b/vendor/gix-pack/src/index/traverse/error.rs
new file mode 100644
index 000000000..2310c3bab
--- /dev/null
+++ b/vendor/gix-pack/src/index/traverse/error.rs
@@ -0,0 +1,44 @@
+use crate::index;
+
+/// Returned by [`index::File::traverse_with_index()`] and [`index::File::traverse_with_lookup`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error<E: std::error::Error + Send + Sync + 'static> {
+ #[error("One of the traversal processors failed")]
+ Processor(#[source] E),
+ #[error("Index file, pack file or object verification failed")]
+ VerifyChecksum(#[from] index::verify::checksum::Error),
+ #[error("The pack delta tree index could not be built")]
+ Tree(#[from] crate::cache::delta::from_offsets::Error),
+ #[error("The tree traversal failed")]
+ TreeTraversal(#[from] crate::cache::delta::traverse::Error),
+ #[error("Object {id} at offset {offset} could not be decoded")]
+ PackDecode {
+ id: gix_hash::ObjectId,
+ offset: u64,
+ source: crate::data::decode::Error,
+ },
+ #[error("The packfiles checksum didn't match the index file checksum: expected {expected}, got {actual}")]
+ PackMismatch {
+ expected: gix_hash::ObjectId,
+ actual: gix_hash::ObjectId,
+ },
+ #[error("The hash of {kind} object at offset {offset} didn't match the checksum in the index file: expected {expected}, got {actual}")]
+ PackObjectMismatch {
+ expected: gix_hash::ObjectId,
+ actual: gix_hash::ObjectId,
+ offset: u64,
+ kind: gix_object::Kind,
+ },
+ #[error(
+ "The CRC32 of {kind} object at offset {offset} didn't match the checksum in the index file: expected {expected}, got {actual}"
+ )]
+ Crc32Mismatch {
+ expected: u32,
+ actual: u32,
+ offset: u64,
+ kind: gix_object::Kind,
+ },
+ #[error("Interrupted")]
+ Interrupted,
+}
diff --git a/vendor/gix-pack/src/index/traverse/mod.rs b/vendor/gix-pack/src/index/traverse/mod.rs
new file mode 100644
index 000000000..42c820b0e
--- /dev/null
+++ b/vendor/gix-pack/src/index/traverse/mod.rs
@@ -0,0 +1,245 @@
+use std::sync::atomic::AtomicBool;
+
+use gix_features::{parallel, progress::Progress};
+
+use crate::index;
+
+mod reduce;
+///
+pub mod with_index;
+///
+pub mod with_lookup;
+use reduce::Reducer;
+
+mod error;
+pub use error::Error;
+
+mod types;
+pub use types::{Algorithm, ProgressId, SafetyCheck, Statistics};
+
+/// Traversal options for [`index::File::traverse()`].
+#[derive(Debug, Clone)]
+pub struct Options<F> {
+ /// The algorithm to employ.
+ pub traversal: Algorithm,
+ /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on
+ /// the amount of available logical cores.
+ pub thread_limit: Option<usize>,
+ /// The kinds of safety checks to perform.
+ pub check: SafetyCheck,
+ /// A function to create a pack cache
+ pub make_pack_lookup_cache: F,
+}
+
+impl Default for Options<fn() -> crate::cache::Never> {
+ fn default() -> Self {
+ Options {
+ check: Default::default(),
+ traversal: Default::default(),
+ thread_limit: None,
+ make_pack_lookup_cache: || crate::cache::Never,
+ }
+ }
+}
+
+/// The outcome of the [`traverse()`][index::File::traverse()] method.
+pub struct Outcome<P> {
+ /// The checksum obtained when hashing the file, which matched the checksum contained within the file.
+ pub actual_index_checksum: gix_hash::ObjectId,
+ /// The statistics obtained during traversal.
+ pub statistics: Statistics,
+ /// The input progress to allow reuse.
+ pub progress: P,
+}
+
+/// Traversal of pack data files using an index file
+impl index::File {
+ /// Iterate through all _decoded objects_ in the given `pack` and handle them with a `Processor`.
+ /// The return value is (pack-checksum, [`Outcome`], `progress`), thus the pack traversal will always verify
+ /// the whole packs checksum to assure it was correct. In case of bit-rod, the operation will abort early without
+ /// verifying all objects using the [interrupt mechanism][gix_features::interrupt] mechanism.
+ ///
+ /// # Algorithms
+ ///
+ /// Using the [`Options::traversal`] field one can chose between two algorithms providing different tradeoffs. Both invoke
+ /// `new_processor()` to create functions receiving decoded objects, their object kind, index entry and a progress instance to provide
+ /// progress information.
+ ///
+ /// * [`Algorithm::DeltaTreeLookup`] builds an index to avoid any unnecessary computation while resolving objects, avoiding
+ /// the need for a cache entirely, rendering `new_cache()` unused.
+ /// One could also call [`traverse_with_index()`][index::File::traverse_with_index()] directly.
+ /// * [`Algorithm::Lookup`] uses a cache created by `new_cache()` to avoid having to re-compute all bases of a delta-chain while
+ /// decoding objects.
+ /// One could also call [`traverse_with_lookup()`][index::File::traverse_with_lookup()] directly.
+ ///
+ /// Use [`thread_limit`][Options::thread_limit] to further control parallelism and [`check`][SafetyCheck] to define how much the passed
+ /// objects shall be verified beforehand.
+ pub fn traverse<P, C, Processor, E, F>(
+ &self,
+ pack: &crate::data::File,
+ progress: P,
+ should_interrupt: &AtomicBool,
+ new_processor: impl Fn() -> Processor + Send + Clone,
+ Options {
+ traversal,
+ thread_limit,
+ check,
+ make_pack_lookup_cache,
+ }: Options<F>,
+ ) -> Result<Outcome<P>, Error<E>>
+ where
+ P: Progress,
+ C: crate::cache::DecodeEntry,
+ E: std::error::Error + Send + Sync + 'static,
+ Processor: FnMut(
+ gix_object::Kind,
+ &[u8],
+ &index::Entry,
+ &mut <P::SubProgress as Progress>::SubProgress,
+ ) -> Result<(), E>,
+ F: Fn() -> C + Send + Clone,
+ {
+ match traversal {
+ Algorithm::Lookup => self.traverse_with_lookup(
+ new_processor,
+ pack,
+ progress,
+ should_interrupt,
+ with_lookup::Options {
+ thread_limit,
+ check,
+ make_pack_lookup_cache,
+ },
+ ),
+ Algorithm::DeltaTreeLookup => self.traverse_with_index(
+ pack,
+ new_processor,
+ progress,
+ should_interrupt,
+ crate::index::traverse::with_index::Options { check, thread_limit },
+ ),
+ }
+ }
+
+ fn possibly_verify<E>(
+ &self,
+ pack: &crate::data::File,
+ check: SafetyCheck,
+ pack_progress: impl Progress,
+ index_progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ ) -> Result<gix_hash::ObjectId, Error<E>>
+ where
+ E: std::error::Error + Send + Sync + 'static,
+ {
+ Ok(if check.file_checksum() {
+ if self.pack_checksum() != pack.checksum() {
+ return Err(Error::PackMismatch {
+ actual: pack.checksum(),
+ expected: self.pack_checksum(),
+ });
+ }
+ let (pack_res, id) = parallel::join(
+ move || pack.verify_checksum(pack_progress, should_interrupt),
+ move || self.verify_checksum(index_progress, should_interrupt),
+ );
+ pack_res?;
+ id?
+ } else {
+ self.index_checksum()
+ })
+ }
+
+ #[allow(clippy::too_many_arguments)]
+ fn decode_and_process_entry<C, P, E>(
+ &self,
+ check: SafetyCheck,
+ pack: &crate::data::File,
+ cache: &mut C,
+ buf: &mut Vec<u8>,
+ progress: &mut P,
+ index_entry: &crate::index::Entry,
+ processor: &mut impl FnMut(gix_object::Kind, &[u8], &index::Entry, &mut P) -> Result<(), E>,
+ ) -> Result<crate::data::decode::entry::Outcome, Error<E>>
+ where
+ C: crate::cache::DecodeEntry,
+ P: Progress,
+ E: std::error::Error + Send + Sync + 'static,
+ {
+ let pack_entry = pack.entry(index_entry.pack_offset);
+ let pack_entry_data_offset = pack_entry.data_offset;
+ let entry_stats = pack
+ .decode_entry(
+ pack_entry,
+ buf,
+ |id, _| {
+ self.lookup(id).map(|index| {
+ crate::data::decode::entry::ResolvedBase::InPack(pack.entry(self.pack_offset_at_index(index)))
+ })
+ },
+ cache,
+ )
+ .map_err(|e| Error::PackDecode {
+ source: e,
+ id: index_entry.oid,
+ offset: index_entry.pack_offset,
+ })?;
+ let object_kind = entry_stats.kind;
+ let header_size = (pack_entry_data_offset - index_entry.pack_offset) as usize;
+ let entry_len = header_size + entry_stats.compressed_size;
+
+ process_entry(
+ check,
+ object_kind,
+ buf,
+ progress,
+ index_entry,
+ || pack.entry_crc32(index_entry.pack_offset, entry_len),
+ processor,
+ )?;
+ Ok(entry_stats)
+ }
+}
+
+#[allow(clippy::too_many_arguments)]
+fn process_entry<P, E>(
+ check: SafetyCheck,
+ object_kind: gix_object::Kind,
+ decompressed: &[u8],
+ progress: &mut P,
+ index_entry: &crate::index::Entry,
+ pack_entry_crc32: impl FnOnce() -> u32,
+ processor: &mut impl FnMut(gix_object::Kind, &[u8], &index::Entry, &mut P) -> Result<(), E>,
+) -> Result<(), Error<E>>
+where
+ P: Progress,
+ E: std::error::Error + Send + Sync + 'static,
+{
+ if check.object_checksum() {
+ let mut hasher = gix_features::hash::hasher(index_entry.oid.kind());
+ hasher.update(&gix_object::encode::loose_header(object_kind, decompressed.len()));
+ hasher.update(decompressed);
+
+ let actual_oid = gix_hash::ObjectId::from(hasher.digest());
+ if actual_oid != index_entry.oid {
+ return Err(Error::PackObjectMismatch {
+ actual: actual_oid,
+ expected: index_entry.oid,
+ offset: index_entry.pack_offset,
+ kind: object_kind,
+ });
+ }
+ if let Some(desired_crc32) = index_entry.crc32 {
+ let actual_crc32 = pack_entry_crc32();
+ if actual_crc32 != desired_crc32 {
+ return Err(Error::Crc32Mismatch {
+ actual: actual_crc32,
+ expected: desired_crc32,
+ offset: index_entry.pack_offset,
+ kind: object_kind,
+ });
+ }
+ }
+ }
+ processor(object_kind, decompressed, index_entry, progress).map_err(Error::Processor)
+}
diff --git a/vendor/gix-pack/src/index/traverse/reduce.rs b/vendor/gix-pack/src/index/traverse/reduce.rs
new file mode 100644
index 000000000..e05341242
--- /dev/null
+++ b/vendor/gix-pack/src/index/traverse/reduce.rs
@@ -0,0 +1,129 @@
+use std::{
+ sync::atomic::{AtomicBool, Ordering},
+ time::Instant,
+};
+
+use gix_features::{
+ parallel,
+ progress::Progress,
+ threading::{lock, Mutable, OwnShared},
+};
+
+use crate::{data, index::traverse};
+
+fn add_decode_result(lhs: &mut data::decode::entry::Outcome, rhs: data::decode::entry::Outcome) {
+ lhs.num_deltas += rhs.num_deltas;
+ lhs.decompressed_size += rhs.decompressed_size;
+ lhs.compressed_size += rhs.compressed_size;
+ lhs.object_size += rhs.object_size;
+}
+
+fn div_decode_result(lhs: &mut data::decode::entry::Outcome, div: usize) {
+ if div != 0 {
+ lhs.num_deltas = (lhs.num_deltas as f32 / div as f32) as u32;
+ lhs.decompressed_size /= div as u64;
+ lhs.compressed_size /= div;
+ lhs.object_size /= div as u64;
+ }
+}
+
+pub struct Reducer<'a, P, E> {
+ progress: OwnShared<Mutable<P>>,
+ check: traverse::SafetyCheck,
+ then: Instant,
+ entries_seen: usize,
+ stats: traverse::Statistics,
+ should_interrupt: &'a AtomicBool,
+ _error: std::marker::PhantomData<E>,
+}
+
+impl<'a, P, E> Reducer<'a, P, E>
+where
+ P: Progress,
+{
+ pub fn from_progress(
+ progress: OwnShared<Mutable<P>>,
+ pack_data_len_in_bytes: usize,
+ check: traverse::SafetyCheck,
+ should_interrupt: &'a AtomicBool,
+ ) -> Self {
+ let stats = traverse::Statistics {
+ pack_size: pack_data_len_in_bytes as u64,
+ ..Default::default()
+ };
+ Reducer {
+ progress,
+ check,
+ then: Instant::now(),
+ entries_seen: 0,
+ should_interrupt,
+ stats,
+ _error: Default::default(),
+ }
+ }
+}
+
+impl<'a, P, E> parallel::Reduce for Reducer<'a, P, E>
+where
+ P: Progress,
+ E: std::error::Error + Send + Sync + 'static,
+{
+ type Input = Result<Vec<data::decode::entry::Outcome>, traverse::Error<E>>;
+ type FeedProduce = ();
+ type Output = traverse::Statistics;
+ type Error = traverse::Error<E>;
+
+ fn feed(&mut self, input: Self::Input) -> Result<(), Self::Error> {
+ let chunk_stats: Vec<_> = match input {
+ Err(err @ traverse::Error::PackDecode { .. }) if !self.check.fatal_decode_error() => {
+ lock(&self.progress).info(format!("Ignoring decode error: {err}"));
+ return Ok(());
+ }
+ res => res,
+ }?;
+ self.entries_seen += chunk_stats.len();
+
+ let chunk_total = chunk_stats.into_iter().fold(
+ data::decode::entry::Outcome::default_from_kind(gix_object::Kind::Tree),
+ |mut total, stats| {
+ *self.stats.objects_per_chain_length.entry(stats.num_deltas).or_insert(0) += 1;
+ self.stats.total_decompressed_entries_size += stats.decompressed_size;
+ self.stats.total_compressed_entries_size += stats.compressed_size as u64;
+ self.stats.total_object_size += stats.object_size;
+ use gix_object::Kind::*;
+ match stats.kind {
+ Commit => self.stats.num_commits += 1,
+ Tree => self.stats.num_trees += 1,
+ Blob => self.stats.num_blobs += 1,
+ Tag => self.stats.num_tags += 1,
+ }
+ add_decode_result(&mut total, stats);
+ total
+ },
+ );
+
+ add_decode_result(&mut self.stats.average, chunk_total);
+ lock(&self.progress).set(self.entries_seen);
+
+ if self.should_interrupt.load(Ordering::SeqCst) {
+ return Err(Self::Error::Interrupted);
+ }
+ Ok(())
+ }
+
+ fn finalize(mut self) -> Result<Self::Output, Self::Error> {
+ div_decode_result(&mut self.stats.average, self.entries_seen);
+
+ let elapsed_s = self.then.elapsed().as_secs_f32();
+ let objects_per_second = (self.entries_seen as f32 / elapsed_s) as u32;
+
+ lock(&self.progress).info(format!(
+ "of {} objects done in {:.2}s ({} objects/s, ~{}/s)",
+ self.entries_seen,
+ elapsed_s,
+ objects_per_second,
+ gix_features::progress::bytesize::ByteSize(self.stats.average.object_size * objects_per_second as u64)
+ ));
+ Ok(self.stats)
+ }
+}
diff --git a/vendor/gix-pack/src/index/traverse/types.rs b/vendor/gix-pack/src/index/traverse/types.rs
new file mode 100644
index 000000000..84ebc8932
--- /dev/null
+++ b/vendor/gix-pack/src/index/traverse/types.rs
@@ -0,0 +1,123 @@
+use std::{collections::BTreeMap, marker::PhantomData};
+
+/// Statistics regarding object encountered during execution of the [`traverse()`][crate::index::File::traverse()] method.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Statistics {
+ /// The average over all decoded objects
+ pub average: crate::data::decode::entry::Outcome,
+ /// A mapping of the length of the chain to the amount of objects at that length.
+ ///
+ /// A length of 0 indicates full objects, and everything above that involves the given amount
+ /// of delta objects.
+ pub objects_per_chain_length: BTreeMap<u32, u32>,
+ /// The amount of bytes in all compressed streams, one per entry
+ pub total_compressed_entries_size: u64,
+ /// The amount of bytes in all decompressed streams, one per entry
+ pub total_decompressed_entries_size: u64,
+ /// The amount of bytes occupied by all undeltified, decompressed objects
+ pub total_object_size: u64,
+ /// The amount of bytes occupied by the pack itself, in bytes
+ pub pack_size: u64,
+ /// The amount of objects encountered that where commits
+ pub num_commits: u32,
+ /// The amount of objects encountered that where trees
+ pub num_trees: u32,
+ /// The amount of objects encountered that where tags
+ pub num_tags: u32,
+ /// The amount of objects encountered that where blobs
+ pub num_blobs: u32,
+}
+
+impl Default for Statistics {
+ fn default() -> Self {
+ Statistics {
+ average: crate::data::decode::entry::Outcome::default_from_kind(gix_object::Kind::Tree),
+ objects_per_chain_length: Default::default(),
+ total_compressed_entries_size: 0,
+ total_decompressed_entries_size: 0,
+ total_object_size: 0,
+ pack_size: 0,
+ num_blobs: 0,
+ num_commits: 0,
+ num_trees: 0,
+ num_tags: 0,
+ }
+ }
+}
+
+/// The ways to validate decoded objects before passing them to the processor.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum SafetyCheck {
+ /// Don't verify the validity of the checksums stored in the index and pack file
+ SkipFileChecksumVerification,
+
+ /// All of the above, and also don't perform any object checksum verification
+ SkipFileAndObjectChecksumVerification,
+
+ /// All of the above, and only log object decode errors.
+ ///
+ /// Useful if there is a damaged pack and you would like to traverse as many objects as possible.
+ SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError,
+
+ /// Perform all available safety checks before operating on the pack and
+ /// abort if any of them fails
+ All,
+}
+
+impl SafetyCheck {
+ pub(crate) fn file_checksum(&self) -> bool {
+ matches!(self, SafetyCheck::All)
+ }
+ pub(crate) fn object_checksum(&self) -> bool {
+ matches!(self, SafetyCheck::All | SafetyCheck::SkipFileChecksumVerification)
+ }
+ pub(crate) fn fatal_decode_error(&self) -> bool {
+ match self {
+ SafetyCheck::All
+ | SafetyCheck::SkipFileChecksumVerification
+ | SafetyCheck::SkipFileAndObjectChecksumVerification => true,
+ SafetyCheck::SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError => false,
+ }
+ }
+}
+
+impl Default for SafetyCheck {
+ fn default() -> Self {
+ SafetyCheck::All
+ }
+}
+
+/// The way we verify the pack
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum Algorithm {
+ /// Build an index to allow decoding each delta and base exactly once, saving a lot of computational
+ /// resource at the expense of resident memory, as we will use an additional `DeltaTree` to accelerate
+ /// delta chain resolution.
+ DeltaTreeLookup,
+ /// We lookup each object similarly to what would happen during normal repository use.
+ /// Uses more compute resources as it will resolve delta chains from back to front, but start right away
+ /// without indexing or investing any memory in indices.
+ ///
+ /// This option may be well suited for big packs in memory-starved system that support memory mapping.
+ Lookup,
+}
+
+impl Default for Algorithm {
+ fn default() -> Self {
+ Algorithm::DeltaTreeLookup
+ }
+}
+
+/// The progress ids used in [`traverse()`][crate::index::File::traverse()] .
+///
+/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+#[derive(Debug, Copy, Clone)]
+pub enum ProgressId {
+ /// A root progress which isn't actually used, but links to the `ProgressId` of the lookup version of the algorithm.
+ WithLookup(PhantomData<super::with_lookup::ProgressId>),
+ /// A root progress which isn't actually used, but links to the `ProgressId` of the indexed version of the algorithm.
+ WithIndex(PhantomData<super::with_index::ProgressId>),
+}
diff --git a/vendor/gix-pack/src/index/traverse/with_index.rs b/vendor/gix-pack/src/index/traverse/with_index.rs
new file mode 100644
index 000000000..769bbd07f
--- /dev/null
+++ b/vendor/gix-pack/src/index/traverse/with_index.rs
@@ -0,0 +1,230 @@
+use std::sync::atomic::{AtomicBool, Ordering};
+
+use gix_features::{parallel, progress::Progress};
+
+use super::Error;
+use crate::{
+ cache::delta::traverse,
+ index::{self, traverse::Outcome, util::index_entries_sorted_by_offset_ascending},
+};
+
+/// Traversal options for [`traverse_with_index()`][index::File::traverse_with_index()]
+#[derive(Default)]
+pub struct Options {
+ /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on
+ /// the amount of available logical cores.
+ pub thread_limit: Option<usize>,
+ /// The kinds of safety checks to perform.
+ pub check: crate::index::traverse::SafetyCheck,
+}
+
+/// The progress ids used in [`index::File::traverse_with_index()`].
+///
+/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+#[derive(Debug, Copy, Clone)]
+pub enum ProgressId {
+ /// The amount of bytes currently processed to generate a checksum of the *pack data file*.
+ HashPackDataBytes,
+ /// The amount of bytes currently processed to generate a checksum of the *pack index file*.
+ HashPackIndexBytes,
+ /// Collect all object hashes into a vector and sort it by their pack offset.
+ CollectSortedIndexEntries,
+ /// Count the objects processed when building a cache tree from all objects in a pack index.
+ TreeFromOffsetsObjects,
+ /// The amount of objects which were decoded.
+ DecodedObjects,
+ /// The amount of bytes that were decoded in total, as the sum of all bytes to represent all decoded objects.
+ DecodedBytes,
+}
+
+impl From<ProgressId> for gix_features::progress::Id {
+ fn from(v: ProgressId) -> Self {
+ match v {
+ ProgressId::HashPackDataBytes => *b"PTHP",
+ ProgressId::HashPackIndexBytes => *b"PTHI",
+ ProgressId::CollectSortedIndexEntries => *b"PTCE",
+ ProgressId::TreeFromOffsetsObjects => *b"PTDI",
+ ProgressId::DecodedObjects => *b"PTRO",
+ ProgressId::DecodedBytes => *b"PTDB",
+ }
+ }
+}
+
+/// Traversal with index
+impl index::File {
+ /// Iterate through all _decoded objects_ in the given `pack` and handle them with a `Processor`, using an index to reduce waste
+ /// at the cost of memory.
+ ///
+ /// For more details, see the documentation on the [`traverse()`][index::File::traverse()] method.
+ pub fn traverse_with_index<P, Processor, E>(
+ &self,
+ pack: &crate::data::File,
+ new_processor: impl Fn() -> Processor + Send + Clone,
+ mut progress: P,
+ should_interrupt: &AtomicBool,
+ Options { check, thread_limit }: Options,
+ ) -> Result<Outcome<P>, Error<E>>
+ where
+ P: Progress,
+ Processor: FnMut(
+ gix_object::Kind,
+ &[u8],
+ &index::Entry,
+ &mut <P::SubProgress as Progress>::SubProgress,
+ ) -> Result<(), E>,
+ E: std::error::Error + Send + Sync + 'static,
+ {
+ let (verify_result, traversal_result) = parallel::join(
+ {
+ let pack_progress = progress.add_child_with_id(
+ format!(
+ "Hash of pack '{}'",
+ pack.path().file_name().expect("pack has filename").to_string_lossy()
+ ),
+ ProgressId::HashPackDataBytes.into(),
+ );
+ let index_progress = progress.add_child_with_id(
+ format!(
+ "Hash of index '{}'",
+ self.path.file_name().expect("index has filename").to_string_lossy()
+ ),
+ ProgressId::HashPackIndexBytes.into(),
+ );
+ move || {
+ let res = self.possibly_verify(pack, check, pack_progress, index_progress, should_interrupt);
+ if res.is_err() {
+ should_interrupt.store(true, Ordering::SeqCst);
+ }
+ res
+ }
+ },
+ || -> Result<_, Error<_>> {
+ let sorted_entries = index_entries_sorted_by_offset_ascending(
+ self,
+ progress.add_child_with_id("collecting sorted index", ProgressId::CollectSortedIndexEntries.into()),
+ ); /* Pack Traverse Collect sorted Entries */
+ let tree = crate::cache::delta::Tree::from_offsets_in_pack(
+ pack.path(),
+ sorted_entries.into_iter().map(Entry::from),
+ |e| e.index_entry.pack_offset,
+ |id| self.lookup(id).map(|idx| self.pack_offset_at_index(idx)),
+ progress.add_child_with_id("indexing", ProgressId::TreeFromOffsetsObjects.into()),
+ should_interrupt,
+ self.object_hash,
+ )?;
+ let mut outcome = digest_statistics(tree.traverse(
+ |slice, out| pack.entry_slice(slice).map(|entry| out.copy_from_slice(entry)),
+ pack.pack_end() as u64,
+ new_processor,
+ |data,
+ progress,
+ traverse::Context {
+ entry: pack_entry,
+ entry_end,
+ decompressed: bytes,
+ state: ref mut processor,
+ level,
+ }| {
+ let object_kind = pack_entry.header.as_kind().expect("non-delta object");
+ data.level = level;
+ data.decompressed_size = pack_entry.decompressed_size;
+ data.object_kind = object_kind;
+ data.compressed_size = entry_end - pack_entry.data_offset;
+ data.object_size = bytes.len() as u64;
+ let result = crate::index::traverse::process_entry(
+ check,
+ object_kind,
+ bytes,
+ progress,
+ &data.index_entry,
+ || {
+ // TODO: Fix this - we overwrite the header of 'data' which also changes the computed entry size,
+ // causing index and pack to seemingly mismatch. This is surprising, and should be done differently.
+ // debug_assert_eq!(&data.index_entry.pack_offset, &pack_entry.pack_offset());
+ gix_features::hash::crc32(
+ pack.entry_slice(data.index_entry.pack_offset..entry_end)
+ .expect("slice pointing into the pack (by now data is verified)"),
+ )
+ },
+ processor,
+ );
+ match result {
+ Err(err @ Error::PackDecode { .. }) if !check.fatal_decode_error() => {
+ progress.info(format!("Ignoring decode error: {err}"));
+ Ok(())
+ }
+ res => res,
+ }
+ },
+ crate::cache::delta::traverse::Options {
+ object_progress: progress.add_child_with_id("Resolving", ProgressId::DecodedObjects.into()),
+ size_progress: progress.add_child_with_id("Decoding", ProgressId::DecodedBytes.into()),
+ thread_limit,
+ should_interrupt,
+ object_hash: self.object_hash,
+ },
+ )?);
+ outcome.pack_size = pack.data_len() as u64;
+ Ok(outcome)
+ },
+ );
+ Ok(Outcome {
+ actual_index_checksum: verify_result?,
+ statistics: traversal_result?,
+ progress,
+ })
+ }
+}
+
+struct Entry {
+ index_entry: crate::index::Entry,
+ object_kind: gix_object::Kind,
+ object_size: u64,
+ decompressed_size: u64,
+ compressed_size: u64,
+ level: u16,
+}
+
+impl From<crate::index::Entry> for Entry {
+ fn from(index_entry: crate::index::Entry) -> Self {
+ Entry {
+ index_entry,
+ level: 0,
+ object_kind: gix_object::Kind::Tree,
+ object_size: 0,
+ decompressed_size: 0,
+ compressed_size: 0,
+ }
+ }
+}
+
+fn digest_statistics(traverse::Outcome { roots, children }: traverse::Outcome<Entry>) -> index::traverse::Statistics {
+ let mut res = index::traverse::Statistics::default();
+ let average = &mut res.average;
+ for item in roots.iter().chain(children.iter()) {
+ res.total_compressed_entries_size += item.data.compressed_size;
+ res.total_decompressed_entries_size += item.data.decompressed_size;
+ res.total_object_size += item.data.object_size;
+ *res.objects_per_chain_length.entry(item.data.level as u32).or_insert(0) += 1;
+
+ average.decompressed_size += item.data.decompressed_size;
+ average.compressed_size += item.data.compressed_size as usize;
+ average.object_size += item.data.object_size;
+ average.num_deltas += item.data.level as u32;
+ use gix_object::Kind::*;
+ match item.data.object_kind {
+ Blob => res.num_blobs += 1,
+ Tree => res.num_trees += 1,
+ Tag => res.num_tags += 1,
+ Commit => res.num_commits += 1,
+ };
+ }
+
+ let num_nodes = roots.len() + children.len();
+ average.decompressed_size /= num_nodes as u64;
+ average.compressed_size /= num_nodes;
+ average.object_size /= num_nodes as u64;
+ average.num_deltas /= num_nodes as u32;
+
+ res
+}
diff --git a/vendor/gix-pack/src/index/traverse/with_lookup.rs b/vendor/gix-pack/src/index/traverse/with_lookup.rs
new file mode 100644
index 000000000..509ae4e4f
--- /dev/null
+++ b/vendor/gix-pack/src/index/traverse/with_lookup.rs
@@ -0,0 +1,190 @@
+use std::sync::atomic::{AtomicBool, Ordering};
+
+use gix_features::{
+ parallel::{self, in_parallel_if},
+ progress::{self, Progress},
+ threading::{lock, Mutable, OwnShared},
+};
+
+use super::{Error, Reducer};
+use crate::{
+ data, index,
+ index::{traverse::Outcome, util},
+};
+
+/// Traversal options for [`index::File::traverse_with_lookup()`]
+pub struct Options<F> {
+ /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on
+ /// the amount of available logical cores.
+ pub thread_limit: Option<usize>,
+ /// The kinds of safety checks to perform.
+ pub check: index::traverse::SafetyCheck,
+ /// A function to create a pack cache
+ pub make_pack_lookup_cache: F,
+}
+
+impl Default for Options<fn() -> crate::cache::Never> {
+ fn default() -> Self {
+ Options {
+ check: Default::default(),
+ thread_limit: None,
+ make_pack_lookup_cache: || crate::cache::Never,
+ }
+ }
+}
+
+/// The progress ids used in [`index::File::traverse_with_lookup()`].
+///
+/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+#[derive(Debug, Copy, Clone)]
+pub enum ProgressId {
+ /// The amount of bytes currently processed to generate a checksum of the *pack data file*.
+ HashPackDataBytes,
+ /// The amount of bytes currently processed to generate a checksum of the *pack index file*.
+ HashPackIndexBytes,
+ /// Collect all object hashes into a vector and sort it by their pack offset.
+ CollectSortedIndexEntries,
+ /// The amount of objects which were decoded by brute-force.
+ DecodedObjects,
+}
+
+impl From<ProgressId> for gix_features::progress::Id {
+ fn from(v: ProgressId) -> Self {
+ match v {
+ ProgressId::HashPackDataBytes => *b"PTHP",
+ ProgressId::HashPackIndexBytes => *b"PTHI",
+ ProgressId::CollectSortedIndexEntries => *b"PTCE",
+ ProgressId::DecodedObjects => *b"PTRO",
+ }
+ }
+}
+
+/// Verify and validate the content of the index file
+impl index::File {
+ /// Iterate through all _decoded objects_ in the given `pack` and handle them with a `Processor` using a cache to reduce the amount of
+ /// waste while decoding objects.
+ ///
+ /// For more details, see the documentation on the [`traverse()`][index::File::traverse()] method.
+ pub fn traverse_with_lookup<P, C, Processor, E, F>(
+ &self,
+ new_processor: impl Fn() -> Processor + Send + Clone,
+ pack: &crate::data::File,
+ mut progress: P,
+ should_interrupt: &AtomicBool,
+ Options {
+ thread_limit,
+ check,
+ make_pack_lookup_cache,
+ }: Options<F>,
+ ) -> Result<Outcome<P>, Error<E>>
+ where
+ P: Progress,
+ C: crate::cache::DecodeEntry,
+ E: std::error::Error + Send + Sync + 'static,
+ Processor: FnMut(
+ gix_object::Kind,
+ &[u8],
+ &index::Entry,
+ &mut <P::SubProgress as Progress>::SubProgress,
+ ) -> Result<(), E>,
+ F: Fn() -> C + Send + Clone,
+ {
+ let (verify_result, traversal_result) = parallel::join(
+ {
+ let pack_progress = progress.add_child_with_id(
+ format!(
+ "Hash of pack '{}'",
+ pack.path().file_name().expect("pack has filename").to_string_lossy()
+ ),
+ ProgressId::HashPackDataBytes.into(),
+ );
+ let index_progress = progress.add_child_with_id(
+ format!(
+ "Hash of index '{}'",
+ self.path.file_name().expect("index has filename").to_string_lossy()
+ ),
+ ProgressId::HashPackIndexBytes.into(),
+ );
+ move || {
+ let res = self.possibly_verify(pack, check, pack_progress, index_progress, should_interrupt);
+ if res.is_err() {
+ should_interrupt.store(true, Ordering::SeqCst);
+ }
+ res
+ }
+ },
+ || {
+ let index_entries = util::index_entries_sorted_by_offset_ascending(
+ self,
+ progress.add_child_with_id("collecting sorted index", ProgressId::CollectSortedIndexEntries.into()),
+ );
+
+ let (chunk_size, thread_limit, available_cores) =
+ parallel::optimize_chunk_size_and_thread_limit(1000, Some(index_entries.len()), thread_limit, None);
+ let there_are_enough_entries_to_process = || index_entries.len() > chunk_size * available_cores;
+ let input_chunks = index_entries.chunks(chunk_size.max(chunk_size));
+ let reduce_progress = OwnShared::new(Mutable::new({
+ let mut p = progress.add_child_with_id("Traversing", ProgressId::DecodedObjects.into());
+ p.init(Some(self.num_objects() as usize), progress::count("objects"));
+ p
+ }));
+ let state_per_thread = {
+ let reduce_progress = reduce_progress.clone();
+ move |index| {
+ (
+ make_pack_lookup_cache(),
+ new_processor(),
+ Vec::with_capacity(2048), // decode buffer
+ lock(&reduce_progress)
+ .add_child_with_id(format!("thread {index}"), gix_features::progress::UNKNOWN), // per thread progress
+ )
+ }
+ };
+
+ in_parallel_if(
+ there_are_enough_entries_to_process,
+ input_chunks,
+ thread_limit,
+ state_per_thread,
+ |entries: &[index::Entry],
+ (cache, ref mut processor, buf, progress)|
+ -> Result<Vec<data::decode::entry::Outcome>, Error<_>> {
+ progress.init(
+ Some(entries.len()),
+ gix_features::progress::count_with_decimals("objects", 2),
+ );
+ let mut stats = Vec::with_capacity(entries.len());
+ progress.set(0);
+ for index_entry in entries.iter() {
+ let result = self.decode_and_process_entry(
+ check,
+ pack,
+ cache,
+ buf,
+ progress,
+ index_entry,
+ processor,
+ );
+ progress.inc();
+ let stat = match result {
+ Err(err @ Error::PackDecode { .. }) if !check.fatal_decode_error() => {
+ progress.info(format!("Ignoring decode error: {err}"));
+ continue;
+ }
+ res => res,
+ }?;
+ stats.push(stat);
+ }
+ Ok(stats)
+ },
+ Reducer::from_progress(reduce_progress, pack.data_len(), check, should_interrupt),
+ )
+ },
+ );
+ Ok(Outcome {
+ actual_index_checksum: verify_result?,
+ statistics: traversal_result?,
+ progress,
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/index/util.rs b/vendor/gix-pack/src/index/util.rs
new file mode 100644
index 000000000..284ee6158
--- /dev/null
+++ b/vendor/gix-pack/src/index/util.rs
@@ -0,0 +1,47 @@
+use std::{io, time::Instant};
+
+use gix_features::progress::{self, Progress};
+
+pub(crate) fn index_entries_sorted_by_offset_ascending(
+ idx: &crate::index::File,
+ mut progress: impl Progress,
+) -> Vec<crate::index::Entry> {
+ progress.init(Some(idx.num_objects as usize), progress::count("entries"));
+ let start = Instant::now();
+
+ let mut v = Vec::with_capacity(idx.num_objects as usize);
+ for entry in idx.iter() {
+ v.push(entry);
+ progress.inc();
+ }
+ v.sort_by_key(|e| e.pack_offset);
+
+ progress.show_throughput(start);
+ v
+}
+
+pub(crate) struct Count<W> {
+ pub bytes: u64,
+ pub inner: W,
+}
+
+impl<W> Count<W> {
+ pub fn new(inner: W) -> Self {
+ Count { bytes: 0, inner }
+ }
+}
+
+impl<W> io::Write for Count<W>
+where
+ W: io::Write,
+{
+ fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+ let written = self.inner.write(buf)?;
+ self.bytes += written as u64;
+ Ok(written)
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ self.inner.flush()
+ }
+}
diff --git a/vendor/gix-pack/src/index/verify.rs b/vendor/gix-pack/src/index/verify.rs
new file mode 100644
index 000000000..4a4852fb6
--- /dev/null
+++ b/vendor/gix-pack/src/index/verify.rs
@@ -0,0 +1,290 @@
+use std::sync::atomic::AtomicBool;
+
+use gix_features::progress::Progress;
+use gix_object::{bstr::ByteSlice, WriteTo};
+
+use crate::index;
+
+///
+pub mod integrity {
+ use std::marker::PhantomData;
+
+ use gix_object::bstr::BString;
+
+ /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
+ #[derive(thiserror::Error, Debug)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("The fan at index {index} is out of order as it's larger then the following value.")]
+ Fan { index: usize },
+ #[error("{kind} object {id} could not be decoded")]
+ ObjectDecode {
+ source: gix_object::decode::Error,
+ kind: gix_object::Kind,
+ id: gix_hash::ObjectId,
+ },
+ #[error("{kind} object {id} wasn't re-encoded without change, wanted\n{expected}\n\nGOT\n\n{actual}")]
+ ObjectEncodeMismatch {
+ kind: gix_object::Kind,
+ id: gix_hash::ObjectId,
+ expected: BString,
+ actual: BString,
+ },
+ }
+
+ /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
+ pub struct Outcome<P> {
+ /// The computed checksum of the index which matched the stored one.
+ pub actual_index_checksum: gix_hash::ObjectId,
+ /// The packs traversal outcome, if one was provided
+ pub pack_traverse_statistics: Option<crate::index::traverse::Statistics>,
+ /// The provided progress instance.
+ pub progress: P,
+ }
+
+ /// Additional options to define how the integrity should be verified.
+ #[derive(Clone)]
+ pub struct Options<F> {
+ /// The thoroughness of the verification
+ pub verify_mode: crate::index::verify::Mode,
+ /// The way to traverse packs
+ pub traversal: crate::index::traverse::Algorithm,
+ /// The amount of threads to use of `Some(N)`, with `None|Some(0)` using all available cores are used.
+ pub thread_limit: Option<usize>,
+ /// A function to create a pack cache
+ pub make_pack_lookup_cache: F,
+ }
+
+ impl Default for Options<fn() -> crate::cache::Never> {
+ fn default() -> Self {
+ Options {
+ verify_mode: Default::default(),
+ traversal: Default::default(),
+ thread_limit: None,
+ make_pack_lookup_cache: || crate::cache::Never,
+ }
+ }
+ }
+
+ /// The progress ids used in [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
+ ///
+ /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+ #[derive(Debug, Copy, Clone)]
+ pub enum ProgressId {
+ /// The amount of bytes read to verify the index checksum.
+ ChecksumBytes,
+ /// A root progress for traversal which isn't actually used directly, but here to link to the respective `ProgressId` types.
+ Traverse(PhantomData<crate::index::verify::index::traverse::ProgressId>),
+ }
+
+ impl From<ProgressId> for gix_features::progress::Id {
+ fn from(v: ProgressId) -> Self {
+ match v {
+ ProgressId::ChecksumBytes => *b"PTHI",
+ ProgressId::Traverse(_) => gix_features::progress::UNKNOWN,
+ }
+ }
+ }
+}
+
+///
+pub mod checksum {
+ /// Returned by [`index::File::verify_checksum()`][crate::index::File::verify_checksum()].
+ pub type Error = crate::verify::checksum::Error;
+}
+
+/// Various ways in which a pack and index can be verified
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+pub enum Mode {
+ /// Validate the object hash and CRC32
+ HashCrc32,
+ /// Validate hash and CRC32, and decode each non-Blob object.
+ /// Each object should be valid, i.e. be decodable.
+ HashCrc32Decode,
+ /// Validate hash and CRC32, and decode and encode each non-Blob object.
+ /// Each object should yield exactly the same hash when re-encoded.
+ HashCrc32DecodeEncode,
+}
+
+impl Default for Mode {
+ fn default() -> Self {
+ Mode::HashCrc32DecodeEncode
+ }
+}
+
+/// Information to allow verifying the integrity of an index with the help of its corresponding pack.
+pub struct PackContext<'a, F> {
+ /// The pack data file itself.
+ pub data: &'a crate::data::File,
+ /// The options further configuring the pack traversal and verification
+ pub options: integrity::Options<F>,
+}
+
+/// Verify and validate the content of the index file
+impl index::File {
+ /// Returns the trailing hash stored at the end of this index file.
+ ///
+ /// It's a hash over all bytes of the index.
+ pub fn index_checksum(&self) -> gix_hash::ObjectId {
+ gix_hash::ObjectId::from(&self.data[self.data.len() - self.hash_len..])
+ }
+
+ /// Returns the hash of the pack data file that this index file corresponds to.
+ ///
+ /// It should [`crate::data::File::checksum()`] of the corresponding pack data file.
+ pub fn pack_checksum(&self) -> gix_hash::ObjectId {
+ let from = self.data.len() - self.hash_len * 2;
+ gix_hash::ObjectId::from(&self.data[from..][..self.hash_len])
+ }
+
+ /// Validate that our [`index_checksum()`][index::File::index_checksum()] matches the actual contents
+ /// of this index file, and return it if it does.
+ pub fn verify_checksum(
+ &self,
+ progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ ) -> Result<gix_hash::ObjectId, checksum::Error> {
+ crate::verify::checksum_on_disk_or_mmap(
+ self.path(),
+ &self.data,
+ self.index_checksum(),
+ self.object_hash,
+ progress,
+ should_interrupt,
+ )
+ }
+
+ /// The most thorough validation of integrity of both index file and the corresponding pack data file, if provided.
+ /// Returns the checksum of the index file, the traversal outcome and the given progress if the integrity check is successful.
+ ///
+ /// If `pack` is provided, it is expected (and validated to be) the pack belonging to this index.
+ /// It will be used to validate internal integrity of the pack before checking each objects integrity
+ /// is indeed as advertised via its SHA1 as stored in this index, as well as the CRC32 hash.
+ /// The last member of the Option is a function returning an implementation of [`crate::cache::DecodeEntry`] to be used if
+ /// the [`index::traverse::Algorithm`] is `Lookup`.
+ /// To set this to `None`, use `None::<(_, _, _, fn() -> crate::cache::Never)>`.
+ ///
+ /// The `thread_limit` optionally specifies the amount of threads to be used for the [pack traversal][index::File::traverse()].
+ /// `make_cache` is only used in case a `pack` is specified, use existing implementations in the [`crate::cache`] module.
+ ///
+ /// # Tradeoffs
+ ///
+ /// The given `progress` is inevitably consumed if there is an error, which is a tradeoff chosen to easily allow using `?` in the
+ /// error case.
+ pub fn verify_integrity<P, C, F>(
+ &self,
+ pack: Option<PackContext<'_, F>>,
+ mut progress: P,
+ should_interrupt: &AtomicBool,
+ ) -> Result<integrity::Outcome<P>, index::traverse::Error<index::verify::integrity::Error>>
+ where
+ P: Progress,
+ C: crate::cache::DecodeEntry,
+ F: Fn() -> C + Send + Clone,
+ {
+ if let Some(first_invalid) = crate::verify::fan(&self.fan) {
+ return Err(index::traverse::Error::Processor(integrity::Error::Fan {
+ index: first_invalid,
+ }));
+ }
+
+ match pack {
+ Some(PackContext {
+ data: pack,
+ options:
+ integrity::Options {
+ verify_mode,
+ traversal,
+ thread_limit,
+ make_pack_lookup_cache,
+ },
+ }) => self
+ .traverse(
+ pack,
+ progress,
+ should_interrupt,
+ || {
+ let mut encode_buf = Vec::with_capacity(2048);
+ move |kind, data, index_entry, progress| {
+ Self::verify_entry(verify_mode, &mut encode_buf, kind, data, index_entry, progress)
+ }
+ },
+ index::traverse::Options {
+ traversal,
+ thread_limit,
+ check: index::traverse::SafetyCheck::All,
+ make_pack_lookup_cache,
+ },
+ )
+ .map(|o| integrity::Outcome {
+ actual_index_checksum: o.actual_index_checksum,
+ pack_traverse_statistics: Some(o.statistics),
+ progress: o.progress,
+ }),
+ None => self
+ .verify_checksum(
+ progress.add_child_with_id("Sha1 of index", integrity::ProgressId::ChecksumBytes.into()),
+ should_interrupt,
+ )
+ .map_err(Into::into)
+ .map(|id| integrity::Outcome {
+ actual_index_checksum: id,
+ pack_traverse_statistics: None,
+ progress,
+ }),
+ }
+ }
+
+ #[allow(clippy::too_many_arguments)]
+ fn verify_entry<P>(
+ verify_mode: Mode,
+ encode_buf: &mut Vec<u8>,
+ object_kind: gix_object::Kind,
+ buf: &[u8],
+ index_entry: &index::Entry,
+ progress: &mut P,
+ ) -> Result<(), integrity::Error>
+ where
+ P: Progress,
+ {
+ if let Mode::HashCrc32Decode | Mode::HashCrc32DecodeEncode = verify_mode {
+ use gix_object::Kind::*;
+ match object_kind {
+ Tree | Commit | Tag => {
+ let object = gix_object::ObjectRef::from_bytes(object_kind, buf).map_err(|err| {
+ integrity::Error::ObjectDecode {
+ source: err,
+ kind: object_kind,
+ id: index_entry.oid,
+ }
+ })?;
+ if let Mode::HashCrc32DecodeEncode = verify_mode {
+ encode_buf.clear();
+ object
+ .write_to(&mut *encode_buf)
+ .expect("writing to a memory buffer never fails");
+ if encode_buf.as_slice() != buf {
+ let mut should_return_error = true;
+ if let gix_object::Kind::Tree = object_kind {
+ if buf.as_bstr().find(b"100664").is_some() || buf.as_bstr().find(b"100640").is_some() {
+ progress.info(format!("Tree object {} would be cleaned up during re-serialization, replacing mode '100664|100640' with '100644'", index_entry.oid));
+ should_return_error = false
+ }
+ }
+ if should_return_error {
+ return Err(integrity::Error::ObjectEncodeMismatch {
+ kind: object_kind,
+ id: index_entry.oid,
+ expected: buf.into(),
+ actual: encode_buf.clone().into(),
+ });
+ }
+ }
+ }
+ }
+ Blob => {}
+ };
+ }
+ Ok(())
+ }
+}
diff --git a/vendor/gix-pack/src/index/write/encode.rs b/vendor/gix-pack/src/index/write/encode.rs
new file mode 100644
index 000000000..80f0cac61
--- /dev/null
+++ b/vendor/gix-pack/src/index/write/encode.rs
@@ -0,0 +1,127 @@
+use std::{cmp::Ordering, io};
+
+pub(crate) const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff;
+pub(crate) const HIGH_BIT: u32 = 0x8000_0000;
+
+use gix_features::{
+ hash,
+ progress::{self, Progress},
+};
+
+use crate::index::{util::Count, V2_SIGNATURE};
+
+pub(crate) fn write_to(
+ out: impl io::Write,
+ entries_sorted_by_oid: Vec<crate::cache::delta::Item<crate::index::write::TreeEntry>>,
+ pack_hash: &gix_hash::ObjectId,
+ kind: crate::index::Version,
+ mut progress: impl Progress,
+) -> io::Result<gix_hash::ObjectId> {
+ use io::Write;
+ assert_eq!(kind, crate::index::Version::V2, "Can only write V2 packs right now");
+ assert!(
+ entries_sorted_by_oid.len() <= u32::MAX as usize,
+ "a pack cannot have more than u32::MAX objects"
+ );
+
+ // Write header
+ let mut out = Count::new(std::io::BufWriter::with_capacity(
+ 8 * 4096,
+ hash::Write::new(out, kind.hash()),
+ ));
+ out.write_all(V2_SIGNATURE)?;
+ out.write_all(&(kind as u32).to_be_bytes())?;
+
+ progress.init(Some(4), progress::steps());
+ let start = std::time::Instant::now();
+ let _info = progress.add_child_with_id("writing fan-out table", gix_features::progress::UNKNOWN);
+ let fan_out = fanout(entries_sorted_by_oid.iter().map(|e| e.data.id.first_byte()));
+
+ for value in fan_out.iter() {
+ out.write_all(&value.to_be_bytes())?;
+ }
+
+ progress.inc();
+ let _info = progress.add_child_with_id("writing ids", gix_features::progress::UNKNOWN);
+ for entry in &entries_sorted_by_oid {
+ out.write_all(entry.data.id.as_slice())?;
+ }
+
+ progress.inc();
+ let _info = progress.add_child_with_id("writing crc32", gix_features::progress::UNKNOWN);
+ for entry in &entries_sorted_by_oid {
+ out.write_all(&entry.data.crc32.to_be_bytes())?;
+ }
+
+ progress.inc();
+ let _info = progress.add_child_with_id("writing offsets", gix_features::progress::UNKNOWN);
+ {
+ let mut offsets64 = Vec::<u64>::new();
+ for entry in &entries_sorted_by_oid {
+ let offset: u32 = if entry.offset > LARGE_OFFSET_THRESHOLD {
+ assert!(
+ offsets64.len() < LARGE_OFFSET_THRESHOLD as usize,
+ "Encoding breakdown - way too many 64bit offsets"
+ );
+ offsets64.push(entry.offset);
+ ((offsets64.len() - 1) as u32) | HIGH_BIT
+ } else {
+ entry.offset as u32
+ };
+ out.write_all(&offset.to_be_bytes())?;
+ }
+ for value in offsets64 {
+ out.write_all(&value.to_be_bytes())?;
+ }
+ }
+
+ out.write_all(pack_hash.as_slice())?;
+
+ let bytes_written_without_trailer = out.bytes;
+ let mut out = out.inner.into_inner()?;
+ let index_hash: gix_hash::ObjectId = out.hash.digest().into();
+ out.inner.write_all(index_hash.as_slice())?;
+ out.inner.flush()?;
+
+ progress.inc();
+ progress.show_throughput_with(
+ start,
+ (bytes_written_without_trailer + 20) as usize,
+ progress::bytes().expect("unit always set"),
+ progress::MessageLevel::Success,
+ );
+
+ Ok(index_hash)
+}
+
+pub(crate) fn fanout(iter: impl ExactSizeIterator<Item = u8>) -> [u32; 256] {
+ let mut fan_out = [0u32; 256];
+ let entries_len = iter.len() as u32;
+ let mut iter = iter.enumerate();
+ let mut idx_and_entry = iter.next();
+ let mut upper_bound = 0;
+
+ for (offset_be, byte) in fan_out.iter_mut().zip(0u8..=255) {
+ *offset_be = match idx_and_entry.as_ref() {
+ Some((_idx, first_byte)) => match first_byte.cmp(&byte) {
+ Ordering::Less => unreachable!("ids should be ordered, and we make sure to keep ahead with them"),
+ Ordering::Greater => upper_bound,
+ Ordering::Equal => {
+ if byte == 255 {
+ entries_len
+ } else {
+ idx_and_entry = iter.find(|(_, first_byte)| *first_byte != byte);
+ upper_bound = idx_and_entry
+ .as_ref()
+ .map(|(idx, _)| *idx as u32)
+ .unwrap_or(entries_len);
+ upper_bound
+ }
+ }
+ },
+ None => entries_len,
+ };
+ }
+
+ fan_out
+}
diff --git a/vendor/gix-pack/src/index/write/error.rs b/vendor/gix-pack/src/index/write/error.rs
new file mode 100644
index 000000000..a5ef6ad67
--- /dev/null
+++ b/vendor/gix-pack/src/index/write/error.rs
@@ -0,0 +1,25 @@
+use std::io;
+
+/// Returned by [`crate::index::File::write_data_iter_to_stream()`]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("An IO error occurred when reading the pack or creating a temporary file")]
+ Io(#[from] io::Error),
+ #[error("A pack entry could not be extracted")]
+ PackEntryDecode(#[from] crate::data::input::Error),
+ #[error("Indices of type {} cannot be written, only {} are supported", *.0 as usize, crate::index::Version::default() as usize)]
+ Unsupported(crate::index::Version),
+ #[error("Ref delta objects are not supported as there is no way to look them up. Resolve them beforehand.")]
+ IteratorInvariantNoRefDelta,
+ #[error("The iterator failed to set a trailing hash over all prior pack entries in the last provided entry")]
+ IteratorInvariantTrailer,
+ #[error("Only u32::MAX objects can be stored in a pack, found {0}")]
+ IteratorInvariantTooManyObjects(usize),
+ #[error("{pack_offset} is not a valid offset for pack offset {distance}")]
+ IteratorInvariantBaseOffset { pack_offset: u64, distance: u64 },
+ #[error(transparent)]
+ Tree(#[from] crate::cache::delta::Error),
+ #[error(transparent)]
+ TreeTraversal(#[from] crate::cache::delta::traverse::Error),
+}
diff --git a/vendor/gix-pack/src/index/write/mod.rs b/vendor/gix-pack/src/index/write/mod.rs
new file mode 100644
index 000000000..c8fdaa271
--- /dev/null
+++ b/vendor/gix-pack/src/index/write/mod.rs
@@ -0,0 +1,263 @@
+use std::{convert::TryInto, io, sync::atomic::AtomicBool};
+
+pub use error::Error;
+use gix_features::progress::{self, Progress};
+
+use crate::cache::delta::{traverse, Tree};
+
+pub(crate) mod encode;
+mod error;
+
+pub(crate) struct TreeEntry {
+ pub id: gix_hash::ObjectId,
+ pub crc32: u32,
+}
+
+/// Information gathered while executing [`write_data_iter_to_stream()`][crate::index::File::write_data_iter_to_stream]
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Outcome {
+ /// The version of the verified index
+ pub index_version: crate::index::Version,
+ /// The verified checksum of the verified index
+ pub index_hash: gix_hash::ObjectId,
+
+ /// The hash of the '.pack' file, also found in its trailing bytes
+ pub data_hash: gix_hash::ObjectId,
+ /// The amount of objects that were verified, always the amount of objects in the pack.
+ pub num_objects: u32,
+}
+
+/// The progress ids used in [`write_data_iter_from_stream()`][crate::index::File::write_data_iter_to_stream()].
+///
+/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+#[derive(Debug, Copy, Clone)]
+pub enum ProgressId {
+ /// Counts the amount of objects that were index thus far.
+ IndexObjects,
+ /// The amount of bytes that were decompressed while decoding pack entries.
+ ///
+ /// This is done to determine entry boundaries.
+ DecompressedBytes,
+ /// The amount of objects whose hashes were computed.
+ ///
+ /// This is done by decoding them, which typically involves decoding delta objects.
+ ResolveObjects,
+ /// The amount of bytes that were decoded in total, as the sum of all bytes to represent all resolved objects.
+ DecodedBytes,
+ /// The amount of bytes written to the index file.
+ IndexBytesWritten,
+}
+
+impl From<ProgressId> for gix_features::progress::Id {
+ fn from(v: ProgressId) -> Self {
+ match v {
+ ProgressId::IndexObjects => *b"IWIO",
+ ProgressId::DecompressedBytes => *b"IWDB",
+ ProgressId::ResolveObjects => *b"IWRO",
+ ProgressId::DecodedBytes => *b"IWDB",
+ ProgressId::IndexBytesWritten => *b"IWBW",
+ }
+ }
+}
+
+/// Various ways of writing an index file from pack entries
+impl crate::index::File {
+ /// Write information about `entries` as obtained from a pack data file into a pack index file via the `out` stream.
+ /// The resolver produced by `make_resolver` must resolve pack entries from the same pack data file that produced the
+ /// `entries` iterator.
+ ///
+ /// * `kind` is the version of pack index to produce, use [`crate::index::Version::default()`] if in doubt.
+ /// * `tread_limit` is used for a parallel tree traversal for obtaining object hashes with optimal performance.
+ /// * `root_progress` is the top-level progress to stay informed about the progress of this potentially long-running
+ /// computation.
+ /// * `object_hash` defines what kind of object hash we write into the index file.
+ /// * `pack_version` is the version of the underlying pack for which `entries` are read. It's used in case none of these objects are provided
+ /// to compute a pack-hash.
+ ///
+ /// # Remarks
+ ///
+ /// * neither in-pack nor out-of-pack Ref Deltas are supported here, these must have been resolved beforehand.
+ /// * `make_resolver()` will only be called after the iterator stopped returning elements and produces a function that
+ /// provides all bytes belonging to a pack entry writing them to the given mutable output `Vec`.
+ /// It should return `None` if the entry cannot be resolved from the pack that produced the `entries` iterator, causing
+ /// the write operation to fail.
+ #[allow(clippy::too_many_arguments)]
+ pub fn write_data_iter_to_stream<F, F2>(
+ version: crate::index::Version,
+ make_resolver: F,
+ entries: impl Iterator<Item = Result<crate::data::input::Entry, crate::data::input::Error>>,
+ thread_limit: Option<usize>,
+ mut root_progress: impl Progress,
+ out: impl io::Write,
+ should_interrupt: &AtomicBool,
+ object_hash: gix_hash::Kind,
+ pack_version: crate::data::Version,
+ ) -> Result<Outcome, Error>
+ where
+ F: FnOnce() -> io::Result<F2>,
+ F2: for<'r> Fn(crate::data::EntryRange, &'r mut Vec<u8>) -> Option<()> + Send + Clone,
+ {
+ if version != crate::index::Version::default() {
+ return Err(Error::Unsupported(version));
+ }
+ let mut num_objects: usize = 0;
+ let mut last_seen_trailer = None;
+ let (anticipated_num_objects, upper_bound) = entries.size_hint();
+ let worst_case_num_objects_after_thin_pack_resolution = upper_bound.unwrap_or(anticipated_num_objects);
+ let mut tree = Tree::with_capacity(worst_case_num_objects_after_thin_pack_resolution)?;
+ let indexing_start = std::time::Instant::now();
+
+ root_progress.init(Some(4), progress::steps());
+ let mut objects_progress = root_progress.add_child_with_id("indexing", ProgressId::IndexObjects.into());
+ objects_progress.init(Some(anticipated_num_objects), progress::count("objects"));
+ let mut decompressed_progress =
+ root_progress.add_child_with_id("decompressing", ProgressId::DecompressedBytes.into());
+ decompressed_progress.init(None, progress::bytes());
+ let mut pack_entries_end: u64 = 0;
+
+ for entry in entries {
+ let crate::data::input::Entry {
+ header,
+ pack_offset,
+ crc32,
+ header_size,
+ compressed: _,
+ compressed_size,
+ decompressed_size,
+ trailer,
+ } = entry?;
+
+ decompressed_progress.inc_by(decompressed_size as usize);
+
+ let entry_len = header_size as u64 + compressed_size;
+ pack_entries_end = pack_offset + entry_len;
+
+ let crc32 = crc32.expect("crc32 to be computed by the iterator. Caller assures correct configuration.");
+
+ use crate::data::entry::Header::*;
+ match header {
+ Tree | Blob | Commit | Tag => {
+ tree.add_root(
+ pack_offset,
+ TreeEntry {
+ id: object_hash.null(),
+ crc32,
+ },
+ )?;
+ }
+ RefDelta { .. } => return Err(Error::IteratorInvariantNoRefDelta),
+ OfsDelta { base_distance } => {
+ let base_pack_offset =
+ crate::data::entry::Header::verified_base_pack_offset(pack_offset, base_distance).ok_or(
+ Error::IteratorInvariantBaseOffset {
+ pack_offset,
+ distance: base_distance,
+ },
+ )?;
+ tree.add_child(
+ base_pack_offset,
+ pack_offset,
+ TreeEntry {
+ id: object_hash.null(),
+ crc32,
+ },
+ )?;
+ }
+ };
+ last_seen_trailer = trailer;
+ num_objects += 1;
+ objects_progress.inc();
+ }
+ let num_objects: u32 = num_objects
+ .try_into()
+ .map_err(|_| Error::IteratorInvariantTooManyObjects(num_objects))?;
+
+ objects_progress.show_throughput(indexing_start);
+ decompressed_progress.show_throughput(indexing_start);
+ drop(objects_progress);
+ drop(decompressed_progress);
+
+ root_progress.inc();
+
+ let resolver = make_resolver()?;
+ let sorted_pack_offsets_by_oid = {
+ let traverse::Outcome { roots, children } = tree.traverse(
+ resolver,
+ pack_entries_end,
+ || (),
+ |data,
+ _progress,
+ traverse::Context {
+ entry,
+ decompressed: bytes,
+ ..
+ }| {
+ modify_base(data, entry, bytes, version.hash());
+ Ok::<_, Error>(())
+ },
+ traverse::Options {
+ object_progress: root_progress.add_child_with_id("Resolving", ProgressId::ResolveObjects.into()),
+ size_progress: root_progress.add_child_with_id("Decoding", ProgressId::DecodedBytes.into()),
+ thread_limit,
+ should_interrupt,
+ object_hash,
+ },
+ )?;
+ root_progress.inc();
+
+ let mut items = roots;
+ items.extend(children);
+ {
+ let _progress = root_progress.add_child_with_id("sorting by id", gix_features::progress::UNKNOWN);
+ items.sort_by_key(|e| e.data.id);
+ }
+
+ root_progress.inc();
+ items
+ };
+
+ let pack_hash = match last_seen_trailer {
+ Some(ph) => ph,
+ None if num_objects == 0 => {
+ let header = crate::data::header::encode(pack_version, 0);
+ let mut hasher = gix_features::hash::hasher(object_hash);
+ hasher.update(&header);
+ gix_hash::ObjectId::from(hasher.digest())
+ }
+ None => return Err(Error::IteratorInvariantTrailer),
+ };
+ let index_hash = encode::write_to(
+ out,
+ sorted_pack_offsets_by_oid,
+ &pack_hash,
+ version,
+ root_progress.add_child_with_id("writing index file", ProgressId::IndexBytesWritten.into()),
+ )?;
+ root_progress.show_throughput_with(
+ indexing_start,
+ num_objects as usize,
+ progress::count("objects").expect("unit always set"),
+ progress::MessageLevel::Success,
+ );
+ Ok(Outcome {
+ index_version: version,
+ index_hash,
+ data_hash: pack_hash,
+ num_objects,
+ })
+ }
+}
+
+fn modify_base(entry: &mut TreeEntry, pack_entry: &crate::data::Entry, decompressed: &[u8], hash: gix_hash::Kind) {
+ fn compute_hash(kind: gix_object::Kind, bytes: &[u8], object_hash: gix_hash::Kind) -> gix_hash::ObjectId {
+ let mut hasher = gix_features::hash::hasher(object_hash);
+ hasher.update(&gix_object::encode::loose_header(kind, bytes.len()));
+ hasher.update(bytes);
+ gix_hash::ObjectId::from(hasher.digest())
+ }
+
+ let object_kind = pack_entry.header.as_kind().expect("base object as source of iteration");
+ let id = compute_hash(object_kind, decompressed, hash);
+ entry.id = id;
+}
diff --git a/vendor/gix-pack/src/lib.rs b/vendor/gix-pack/src/lib.rs
new file mode 100755
index 000000000..200b87019
--- /dev/null
+++ b/vendor/gix-pack/src/lib.rs
@@ -0,0 +1,73 @@
+//! Git stores all of its data as _Objects_, which are data along with a hash over all data. Storing objects efficiently
+//! is what git packs are concerned about.
+//!
+//! Packs consist of [data files][data::File] and [index files][index::File]. The latter can be generated from a data file
+//! and make accessing objects within a pack feasible.
+//!
+//! A [Bundle] conveniently combines a data pack alongside its index to allow [finding][Find] objects or verifying the pack.
+//! Objects returned by `.find(…)` are [objects][gix_object::Data] which know their pack location in order to speed up
+//! various common operations like creating new packs from existing ones.
+//!
+//! When traversing all objects in a pack, a _delta tree acceleration structure_ can be built from pack data or an index
+//! in order to decompress packs in parallel and without any waste.
+//! ## Feature Flags
+#![cfg_attr(
+ feature = "document-features",
+ cfg_attr(doc, doc = ::document_features::document_features!())
+)]
+#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
+#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
+
+///
+pub mod bundle;
+/// A bundle of pack data and the corresponding pack index
+pub struct Bundle {
+ /// The pack file corresponding to `index`
+ pub pack: data::File,
+ /// The index file corresponding to `pack`
+ pub index: index::File,
+}
+
+///
+pub mod find;
+
+///
+pub mod cache;
+///
+pub mod data;
+
+mod find_traits;
+pub use find_traits::{Find, FindExt};
+
+///
+pub mod index;
+///
+pub mod multi_index;
+
+///
+pub mod verify;
+
+mod mmap {
+ use std::path::Path;
+
+ pub fn read_only(path: &Path) -> std::io::Result<memmap2::Mmap> {
+ let file = std::fs::File::open(path)?;
+ // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
+ #[allow(unsafe_code)]
+ unsafe {
+ memmap2::Mmap::map(&file)
+ }
+ }
+}
+
+use std::convert::TryInto;
+
+#[inline]
+fn read_u32(b: &[u8]) -> u32 {
+ u32::from_be_bytes(b.try_into().unwrap())
+}
+
+#[inline]
+fn read_u64(b: &[u8]) -> u64 {
+ u64::from_be_bytes(b.try_into().unwrap())
+}
diff --git a/vendor/gix-pack/src/multi_index/access.rs b/vendor/gix-pack/src/multi_index/access.rs
new file mode 100644
index 000000000..d209cd0b9
--- /dev/null
+++ b/vendor/gix-pack/src/multi_index/access.rs
@@ -0,0 +1,143 @@
+use std::{
+ ops::Range,
+ path::{Path, PathBuf},
+};
+
+use crate::{
+ data,
+ index::PrefixLookupResult,
+ multi_index::{EntryIndex, File, PackIndex, Version},
+};
+
+/// Represents an entry within a multi index file, effectively mapping object [`IDs`][gix_hash::ObjectId] to pack data
+/// files and the offset within.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Entry {
+ /// The ID of the object.
+ pub oid: gix_hash::ObjectId,
+ /// The offset to the object's header in the pack data file.
+ pub pack_offset: data::Offset,
+ /// The index of the pack matching our [`File::index_names()`] slice.
+ pub pack_index: PackIndex,
+}
+
+/// Access methods
+impl File {
+ /// Returns the version of the multi-index file.
+ pub fn version(&self) -> Version {
+ self.version
+ }
+ /// Returns the path from which the multi-index file was loaded.
+ ///
+ /// Note that it might have changed in the mean time, or might have been removed as well.
+ pub fn path(&self) -> &Path {
+ &self.path
+ }
+ /// Returns the amount of indices stored in this multi-index file. It's the same as [File::index_names().len()][File::index_names()],
+ /// and returned as one past the highest known index.
+ pub fn num_indices(&self) -> PackIndex {
+ self.num_indices
+ }
+ /// Returns the total amount of objects available for lookup, and returned as one past the highest known entry index
+ pub fn num_objects(&self) -> EntryIndex {
+ self.num_objects
+ }
+ /// Returns the kind of hash function used for object ids available in this index.
+ pub fn object_hash(&self) -> gix_hash::Kind {
+ self.object_hash
+ }
+ /// Returns the checksum over the entire content of the file (excluding the checksum itself).
+ ///
+ /// It can be used to validate it didn't change after creation.
+ pub fn checksum(&self) -> gix_hash::ObjectId {
+ gix_hash::ObjectId::from(&self.data[self.data.len() - self.hash_len..])
+ }
+ /// Return all names of index files (`*.idx`) whose objects we contain.
+ ///
+ /// The corresponding pack can be found by replacing the `.idx` extension with `.pack`.
+ pub fn index_names(&self) -> &[PathBuf] {
+ &self.index_names
+ }
+}
+
+impl File {
+ /// Return the object id at the given `index`, which ranges from 0 to [File::num_objects()].
+ pub fn oid_at_index(&self, index: EntryIndex) -> &gix_hash::oid {
+ debug_assert!(index < self.num_objects, "index out of bounds");
+ let index: usize = index as usize;
+ let start = self.lookup_ofs + index * self.hash_len;
+ gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len])
+ }
+
+ /// Given a `prefix`, find an object that matches it uniquely within this index and return `Some(Ok(entry_index))`.
+ /// If there is more than one object matching the object `Some(Err(())` is returned.
+ ///
+ /// Finally, if no object matches the index, the return value is `None`.
+ ///
+ /// Pass `candidates` to obtain the set of entry-indices matching `prefix`, with the same return value as
+ /// one would have received if it remained `None`. It will be empty if no object matched the `prefix`.
+ ///
+ // NOTE: pretty much the same things as in `index::File::lookup`, change things there
+ // as well.
+ pub fn lookup_prefix(
+ &self,
+ prefix: gix_hash::Prefix,
+ candidates: Option<&mut Range<EntryIndex>>,
+ ) -> Option<PrefixLookupResult> {
+ crate::index::access::lookup_prefix(
+ prefix,
+ candidates,
+ &self.fan,
+ |idx| self.oid_at_index(idx),
+ self.num_objects,
+ )
+ }
+
+ /// Find the index ranging from 0 to [File::num_objects()] that belongs to data associated with `id`, or `None` if it wasn't found.
+ ///
+ /// Use this index for finding additional information via [`File::pack_id_and_pack_offset_at_index()`].
+ pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<EntryIndex> {
+ crate::index::access::lookup(id, &self.fan, |idx| self.oid_at_index(idx))
+ }
+
+ /// Given the `index` ranging from 0 to [File::num_objects()], return the pack index and its absolute offset into the pack.
+ ///
+ /// The pack-index refers to an entry in the [`index_names`][File::index_names()] list, from which the pack can be derived.
+ pub fn pack_id_and_pack_offset_at_index(&self, index: EntryIndex) -> (PackIndex, data::Offset) {
+ const OFFSET_ENTRY_SIZE: usize = 4 + 4;
+ let index = index as usize;
+ let start = self.offsets_ofs + index * OFFSET_ENTRY_SIZE;
+
+ const HIGH_BIT: u32 = 1 << 31;
+
+ let pack_index = crate::read_u32(&self.data[start..][..4]);
+ let offset = &self.data[start + 4..][..4];
+ let ofs32 = crate::read_u32(offset);
+ let pack_offset = if (ofs32 & HIGH_BIT) == HIGH_BIT {
+ // We determine if large offsets are actually larger than 4GB and if not, we don't use the high-bit to signal anything
+ // but allow the presence of the large-offset chunk to signal what's happening.
+ if let Some(offsets_64) = self.large_offsets_ofs {
+ let from = offsets_64 + (ofs32 ^ HIGH_BIT) as usize * 8;
+ crate::read_u64(&self.data[from..][..8])
+ } else {
+ ofs32 as u64
+ }
+ } else {
+ ofs32 as u64
+ };
+ (pack_index, pack_offset)
+ }
+
+ /// Return an iterator over all entries within this file.
+ pub fn iter(&self) -> impl Iterator<Item = Entry> + '_ {
+ (0..self.num_objects).map(move |idx| {
+ let (pack_index, pack_offset) = self.pack_id_and_pack_offset_at_index(idx);
+ Entry {
+ oid: self.oid_at_index(idx).to_owned(),
+ pack_offset,
+ pack_index,
+ }
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/multi_index/chunk.rs b/vendor/gix-pack/src/multi_index/chunk.rs
new file mode 100644
index 000000000..7ed8eebcb
--- /dev/null
+++ b/vendor/gix-pack/src/multi_index/chunk.rs
@@ -0,0 +1,276 @@
+/// Information for the chunk about index names
+pub mod index_names {
+ use std::path::{Path, PathBuf};
+
+ use gix_object::bstr::{BString, ByteSlice};
+
+ /// The ID used for the index-names chunk.
+ pub const ID: gix_chunk::Id = *b"PNAM";
+
+ ///
+ pub mod decode {
+ use gix_object::bstr::BString;
+
+ /// The error returned by [from_bytes()][super::from_bytes()].
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("The pack names were not ordered alphabetically.")]
+ NotOrderedAlphabetically,
+ #[error("Each pack path name must be terminated with a null byte")]
+ MissingNullByte,
+ #[error("Couldn't turn path '{path}' into OS path due to encoding issues")]
+ PathEncoding { path: BString },
+ #[error("non-padding bytes found after all paths were read.")]
+ UnknownTrailerBytes,
+ }
+ }
+
+ /// Parse null-separated index names from the given `chunk` of bytes and the expected number of packs and indices.
+ /// Ignore padding bytes which are typically \0.
+ pub fn from_bytes(mut chunk: &[u8], num_packs: u32) -> Result<Vec<PathBuf>, decode::Error> {
+ let mut out = Vec::new();
+ for _ in 0..num_packs {
+ let null_byte_pos = chunk.find_byte(b'\0').ok_or(decode::Error::MissingNullByte)?;
+
+ let path = &chunk[..null_byte_pos];
+ let path = gix_path::try_from_byte_slice(path)
+ .map_err(|_| decode::Error::PathEncoding {
+ path: BString::from(path),
+ })?
+ .to_owned();
+
+ if let Some(previous) = out.last() {
+ if previous >= &path {
+ return Err(decode::Error::NotOrderedAlphabetically);
+ }
+ }
+ out.push(path);
+
+ chunk = &chunk[null_byte_pos + 1..];
+ }
+
+ if !chunk.is_empty() && !chunk.iter().all(|b| *b == 0) {
+ return Err(decode::Error::UnknownTrailerBytes);
+ }
+ // NOTE: git writes garbage into this chunk, usually extra \0 bytes, which we simply ignore. If we were strict
+ // about it we couldn't read this chunk data at all.
+ Ok(out)
+ }
+
+ /// Calculate the size on disk for our chunk with the given index paths. Note that these are expected to have been processed already
+ /// to actually be file names.
+ pub fn storage_size(paths: impl IntoIterator<Item = impl AsRef<Path>>) -> u64 {
+ let mut count = 0u64;
+ for path in paths {
+ let path = path.as_ref();
+ let ascii_path = path.to_str().expect("UTF-8 compatible paths");
+ assert!(
+ ascii_path.is_ascii(),
+ "must use ascii bytes for correct size computation"
+ );
+ count += (ascii_path.as_bytes().len() + 1/* null byte */) as u64
+ }
+
+ let needed_alignment = CHUNK_ALIGNMENT - (count % CHUNK_ALIGNMENT);
+ if needed_alignment < CHUNK_ALIGNMENT {
+ count += needed_alignment;
+ }
+ count
+ }
+
+ /// Write all `paths` in order to `out`, including padding.
+ pub fn write(
+ paths: impl IntoIterator<Item = impl AsRef<Path>>,
+ mut out: impl std::io::Write,
+ ) -> std::io::Result<()> {
+ let mut written_bytes = 0;
+ for path in paths {
+ let path = path.as_ref().to_str().expect("UTF-8 path");
+ out.write_all(path.as_bytes())?;
+ out.write_all(&[0])?;
+ written_bytes += path.as_bytes().len() as u64 + 1;
+ }
+
+ let needed_alignment = CHUNK_ALIGNMENT - (written_bytes % CHUNK_ALIGNMENT);
+ if needed_alignment < CHUNK_ALIGNMENT {
+ let padding = [0u8; CHUNK_ALIGNMENT as usize];
+ out.write_all(&padding[..needed_alignment as usize])?;
+ }
+ Ok(())
+ }
+
+ const CHUNK_ALIGNMENT: u64 = 4;
+}
+
+/// Information for the chunk with the fanout table
+pub mod fanout {
+ use std::convert::TryInto;
+
+ use crate::multi_index;
+
+ /// The size of the fanout table
+ pub const SIZE: usize = 4 * 256;
+
+ /// The id uniquely identifying the fanout table.
+ pub const ID: gix_chunk::Id = *b"OIDF";
+
+ /// Decode the fanout table contained in `chunk`, or return `None` if it didn't have the expected size.
+ pub fn from_bytes(chunk: &[u8]) -> Option<[u32; 256]> {
+ if chunk.len() != SIZE {
+ return None;
+ }
+ let mut out = [0; 256];
+ for (c, f) in chunk.chunks(4).zip(out.iter_mut()) {
+ *f = u32::from_be_bytes(c.try_into().unwrap());
+ }
+ out.into()
+ }
+
+ /// Write the fanout for the given entries, which must be sorted by oid
+ pub(crate) fn write(
+ sorted_entries: &[multi_index::write::Entry],
+ mut out: impl std::io::Write,
+ ) -> std::io::Result<()> {
+ let fanout = crate::index::write::encode::fanout(sorted_entries.iter().map(|e| e.id.first_byte()));
+
+ for value in fanout.iter() {
+ out.write_all(&value.to_be_bytes())?;
+ }
+ Ok(())
+ }
+}
+
+/// Information about the oid lookup table.
+pub mod lookup {
+ use std::ops::Range;
+
+ use crate::multi_index;
+
+ /// The id uniquely identifying the oid lookup table.
+ pub const ID: gix_chunk::Id = *b"OIDL";
+
+ /// Return the amount of bytes needed to store the data on disk for the given amount of `entries`
+ pub fn storage_size(entries: usize, object_hash: gix_hash::Kind) -> u64 {
+ (entries * object_hash.len_in_bytes()) as u64
+ }
+
+ pub(crate) fn write(
+ sorted_entries: &[multi_index::write::Entry],
+ mut out: impl std::io::Write,
+ ) -> std::io::Result<()> {
+ for entry in sorted_entries {
+ out.write_all(entry.id.as_slice())?;
+ }
+ Ok(())
+ }
+
+ /// Return true if the size of the `offset` range seems to match for a `hash` of the given kind and the amount of objects.
+ pub fn is_valid(offset: &Range<usize>, hash: gix_hash::Kind, num_objects: u32) -> bool {
+ (offset.end - offset.start) / hash.len_in_bytes() == num_objects as usize
+ }
+}
+
+/// Information about the offsets table.
+pub mod offsets {
+ use std::{convert::TryInto, ops::Range};
+
+ use crate::multi_index;
+
+ /// The id uniquely identifying the offsets table.
+ pub const ID: gix_chunk::Id = *b"OOFF";
+
+ /// Return the amount of bytes needed to offset data for `entries`.
+ pub fn storage_size(entries: usize) -> u64 {
+ (entries * (4 /*pack-id*/ + 4/* pack offset */)) as u64
+ }
+
+ pub(crate) fn write(
+ sorted_entries: &[multi_index::write::Entry],
+ large_offsets_needed: bool,
+ mut out: impl std::io::Write,
+ ) -> std::io::Result<()> {
+ use crate::index::write::encode::{HIGH_BIT, LARGE_OFFSET_THRESHOLD};
+ let mut num_large_offsets = 0u32;
+
+ for entry in sorted_entries {
+ out.write_all(&entry.pack_index.to_be_bytes())?;
+
+ let offset: u32 = if large_offsets_needed {
+ if entry.pack_offset > LARGE_OFFSET_THRESHOLD {
+ let res = num_large_offsets | HIGH_BIT;
+ num_large_offsets += 1;
+ res
+ } else {
+ entry.pack_offset as u32
+ }
+ } else {
+ entry
+ .pack_offset
+ .try_into()
+ .expect("without large offsets, pack-offset fits u32")
+ };
+ out.write_all(&offset.to_be_bytes())?;
+ }
+ Ok(())
+ }
+
+ /// Returns true if the `offset` range seems to match the size required for `num_objects`.
+ pub fn is_valid(offset: &Range<usize>, num_objects: u32) -> bool {
+ let entry_size = 4 /* pack-id */ + 4 /* pack-offset */;
+ ((offset.end - offset.start) / num_objects as usize) == entry_size
+ }
+}
+
+/// Information about the large offsets table.
+pub mod large_offsets {
+ use std::ops::Range;
+
+ use crate::{index::write::encode::LARGE_OFFSET_THRESHOLD, multi_index};
+
+ /// The id uniquely identifying the large offsets table (with 64 bit offsets)
+ pub const ID: gix_chunk::Id = *b"LOFF";
+
+ /// Returns Some(num-large-offset) if there are offsets larger than u32.
+ pub(crate) fn num_large_offsets(entries: &[multi_index::write::Entry]) -> Option<usize> {
+ let mut num_large_offsets = 0;
+ let mut needs_large_offsets = false;
+ for entry in entries {
+ if entry.pack_offset > LARGE_OFFSET_THRESHOLD {
+ num_large_offsets += 1;
+ }
+ if entry.pack_offset > u32::MAX as crate::data::Offset {
+ needs_large_offsets = true;
+ }
+ }
+
+ needs_large_offsets.then_some(num_large_offsets)
+ }
+ /// Returns true if the `offsets` range seems to be properly aligned for the data we expect.
+ pub fn is_valid(offset: &Range<usize>) -> bool {
+ (offset.end - offset.start) % 8 == 0
+ }
+
+ pub(crate) fn write(
+ sorted_entries: &[multi_index::write::Entry],
+ mut num_large_offsets: usize,
+ mut out: impl std::io::Write,
+ ) -> std::io::Result<()> {
+ for offset in sorted_entries
+ .iter()
+ .filter_map(|e| (e.pack_offset > LARGE_OFFSET_THRESHOLD).then_some(e.pack_offset))
+ {
+ out.write_all(&offset.to_be_bytes())?;
+ num_large_offsets = num_large_offsets
+ .checked_sub(1)
+ .expect("BUG: wrote more offsets the previously found");
+ }
+ assert_eq!(num_large_offsets, 0, "BUG: wrote less offsets than initially counted");
+ Ok(())
+ }
+
+ /// Return the amount of bytes needed to store the given amount of `large_offsets`
+ pub(crate) fn storage_size(large_offsets: usize) -> u64 {
+ 8 * large_offsets as u64
+ }
+}
diff --git a/vendor/gix-pack/src/multi_index/init.rs b/vendor/gix-pack/src/multi_index/init.rs
new file mode 100644
index 000000000..190b40a7b
--- /dev/null
+++ b/vendor/gix-pack/src/multi_index/init.rs
@@ -0,0 +1,157 @@
+use std::{convert::TryFrom, path::Path};
+
+use crate::multi_index::{chunk, File, Version};
+
+mod error {
+ use crate::multi_index::chunk;
+
+ /// The error returned by [File::at()][super::File::at()].
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Could not open multi-index file at '{path}'")]
+ Io {
+ source: std::io::Error,
+ path: std::path::PathBuf,
+ },
+ #[error("{message}")]
+ Corrupt { message: &'static str },
+ #[error("Unsupported multi-index version: {version})")]
+ UnsupportedVersion { version: u8 },
+ #[error("Unsupported hash kind: {kind})")]
+ UnsupportedObjectHash { kind: u8 },
+ #[error(transparent)]
+ ChunkFileDecode(#[from] gix_chunk::file::decode::Error),
+ #[error(transparent)]
+ MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error),
+ #[error(transparent)]
+ FileTooLarge(#[from] gix_chunk::file::index::data_by_kind::Error),
+ #[error("The multi-pack fan doesn't have the correct size of 256 * 4 bytes")]
+ MultiPackFanSize,
+ #[error(transparent)]
+ PackNames(#[from] chunk::index_names::decode::Error),
+ #[error("multi-index chunk {:?} has invalid size: {message}", String::from_utf8_lossy(.id))]
+ InvalidChunkSize { id: gix_chunk::Id, message: &'static str },
+ }
+}
+
+pub use error::Error;
+
+/// Initialization
+impl File {
+ /// Open the multi-index file at the given `path`.
+ pub fn at(path: impl AsRef<Path>) -> Result<Self, Error> {
+ Self::try_from(path.as_ref())
+ }
+}
+
+impl TryFrom<&Path> for File {
+ type Error = Error;
+
+ fn try_from(path: &Path) -> Result<Self, Self::Error> {
+ let data = crate::mmap::read_only(path).map_err(|source| Error::Io {
+ source,
+ path: path.to_owned(),
+ })?;
+
+ const TRAILER_LEN: usize = gix_hash::Kind::shortest().len_in_bytes(); /* trailing hash */
+ if data.len()
+ < Self::HEADER_LEN
+ + gix_chunk::file::Index::size_for_entries(4 /*index names, fan, offsets, oids*/)
+ + chunk::fanout::SIZE
+ + TRAILER_LEN
+ {
+ return Err(Error::Corrupt {
+ message: "multi-index file is truncated and too short",
+ });
+ }
+
+ let (version, object_hash, num_chunks, num_indices) = {
+ let (signature, data) = data.split_at(4);
+ if signature != Self::SIGNATURE {
+ return Err(Error::Corrupt {
+ message: "Invalid signature",
+ });
+ }
+ let (version, data) = data.split_at(1);
+ let version = match version[0] {
+ 1 => Version::V1,
+ version => return Err(Error::UnsupportedVersion { version }),
+ };
+
+ let (object_hash, data) = data.split_at(1);
+ let object_hash = gix_hash::Kind::try_from(object_hash[0])
+ .map_err(|unknown| Error::UnsupportedObjectHash { kind: unknown })?;
+ let (num_chunks, data) = data.split_at(1);
+ let num_chunks = num_chunks[0];
+
+ let (_num_base_files, data) = data.split_at(1); // TODO: handle base files once it's clear what this does
+
+ let (num_indices, _) = data.split_at(4);
+ let num_indices = crate::read_u32(num_indices);
+
+ (version, object_hash, num_chunks, num_indices)
+ };
+
+ let chunks = gix_chunk::file::Index::from_bytes(&data, Self::HEADER_LEN, num_chunks as u32)?;
+
+ let index_names = chunks.data_by_id(&data, chunk::index_names::ID)?;
+ let index_names = chunk::index_names::from_bytes(index_names, num_indices)?;
+
+ let fan = chunks.data_by_id(&data, chunk::fanout::ID)?;
+ let fan = chunk::fanout::from_bytes(fan).ok_or(Error::MultiPackFanSize)?;
+ let num_objects = fan[255];
+
+ let lookup = chunks.validated_usize_offset_by_id(chunk::lookup::ID, |offset| {
+ chunk::lookup::is_valid(&offset, object_hash, num_objects)
+ .then_some(offset)
+ .ok_or(Error::InvalidChunkSize {
+ id: chunk::lookup::ID,
+ message: "The chunk with alphabetically ordered object ids doesn't have the correct size",
+ })
+ })??;
+ let offsets = chunks.validated_usize_offset_by_id(chunk::offsets::ID, |offset| {
+ chunk::offsets::is_valid(&offset, num_objects)
+ .then_some(offset)
+ .ok_or(Error::InvalidChunkSize {
+ id: chunk::offsets::ID,
+ message: "The chunk with offsets into the pack doesn't have the correct size",
+ })
+ })??;
+ let large_offsets = chunks
+ .validated_usize_offset_by_id(chunk::large_offsets::ID, |offset| {
+ chunk::large_offsets::is_valid(&offset)
+ .then_some(offset)
+ .ok_or(Error::InvalidChunkSize {
+ id: chunk::large_offsets::ID,
+ message: "The chunk with large offsets into the pack doesn't have the correct size",
+ })
+ })
+ .ok()
+ .transpose()?;
+
+ let checksum_offset = chunks.highest_offset() as usize;
+ let trailer = &data[checksum_offset..];
+ if trailer.len() != object_hash.len_in_bytes() {
+ return Err(Error::Corrupt {
+ message:
+ "Trailing checksum didn't have the expected size or there were unknown bytes after the checksum.",
+ });
+ }
+
+ Ok(File {
+ data,
+ path: path.to_owned(),
+ version,
+ hash_len: object_hash.len_in_bytes(),
+ object_hash,
+ fan,
+ index_names,
+ lookup_ofs: lookup.start,
+ offsets_ofs: offsets.start,
+ large_offsets_ofs: large_offsets.map(|r| r.start),
+ num_objects,
+ num_indices,
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/multi_index/mod.rs b/vendor/gix-pack/src/multi_index/mod.rs
new file mode 100644
index 000000000..3f7ed0ff5
--- /dev/null
+++ b/vendor/gix-pack/src/multi_index/mod.rs
@@ -0,0 +1,57 @@
+use std::path::PathBuf;
+
+use memmap2::Mmap;
+
+/// Known multi-index file versions
+#[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Hash, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[allow(missing_docs)]
+pub enum Version {
+ V1 = 1,
+}
+
+impl Default for Version {
+ fn default() -> Self {
+ Version::V1
+ }
+}
+
+/// An index into our [`File::index_names()`] array yielding the name of the index and by implication, its pack file.
+pub type PackIndex = u32;
+
+/// The type for referring to indices of an entry within the index file.
+pub type EntryIndex = u32;
+
+/// A representation of an index file for multiple packs at the same time, typically stored in a file
+/// named 'multi-pack-index'.
+pub struct File {
+ data: Mmap,
+ path: std::path::PathBuf,
+ version: Version,
+ hash_len: usize,
+ object_hash: gix_hash::Kind,
+ /// The amount of pack files contained within
+ num_indices: u32,
+ num_objects: u32,
+
+ fan: [u32; 256],
+ index_names: Vec<PathBuf>,
+ lookup_ofs: usize,
+ offsets_ofs: usize,
+ large_offsets_ofs: Option<usize>,
+}
+
+///
+pub mod write;
+
+///
+mod access;
+
+///
+pub mod verify;
+
+///
+pub mod chunk;
+
+///
+pub mod init;
diff --git a/vendor/gix-pack/src/multi_index/verify.rs b/vendor/gix-pack/src/multi_index/verify.rs
new file mode 100644
index 000000000..856a48501
--- /dev/null
+++ b/vendor/gix-pack/src/multi_index/verify.rs
@@ -0,0 +1,337 @@
+use std::{cmp::Ordering, sync::atomic::AtomicBool, time::Instant};
+
+use gix_features::progress::Progress;
+
+use crate::{index, multi_index::File};
+
+///
+pub mod integrity {
+ use crate::multi_index::EntryIndex;
+
+ /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
+ #[derive(thiserror::Error, Debug)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Object {id} should be at pack-offset {expected_pack_offset} but was found at {actual_pack_offset}")]
+ PackOffsetMismatch {
+ id: gix_hash::ObjectId,
+ expected_pack_offset: u64,
+ actual_pack_offset: u64,
+ },
+ #[error(transparent)]
+ MultiIndexChecksum(#[from] crate::multi_index::verify::checksum::Error),
+ #[error(transparent)]
+ IndexIntegrity(#[from] crate::index::verify::integrity::Error),
+ #[error(transparent)]
+ BundleInit(#[from] crate::bundle::init::Error),
+ #[error("Counted {actual} objects, but expected {expected} as per multi-index")]
+ UnexpectedObjectCount { actual: usize, expected: usize },
+ #[error("{id} wasn't found in the index referenced in the multi-pack index")]
+ OidNotFound { id: gix_hash::ObjectId },
+ #[error("The object id at multi-index entry {index} wasn't in order")]
+ OutOfOrder { index: EntryIndex },
+ #[error("The fan at index {index} is out of order as it's larger then the following value.")]
+ Fan { index: usize },
+ #[error("The multi-index claims to have no objects")]
+ Empty,
+ #[error("Interrupted")]
+ Interrupted,
+ }
+
+ /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
+ pub struct Outcome<P> {
+ /// The computed checksum of the multi-index which matched the stored one.
+ pub actual_index_checksum: gix_hash::ObjectId,
+ /// The for each entry in [`index_names()`][super::File::index_names()] provide the corresponding pack traversal outcome.
+ pub pack_traverse_statistics: Vec<crate::index::traverse::Statistics>,
+ /// The provided progress instance.
+ pub progress: P,
+ }
+
+ /// The progress ids used in [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
+ ///
+ /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+ #[derive(Debug, Copy, Clone)]
+ pub enum ProgressId {
+ /// The amount of bytes read to verify the multi-index checksum.
+ ChecksumBytes,
+ /// The amount of objects whose offset has been checked.
+ ObjectOffsets,
+ }
+
+ impl From<ProgressId> for gix_features::progress::Id {
+ fn from(v: ProgressId) -> Self {
+ match v {
+ ProgressId::ChecksumBytes => *b"MVCK",
+ ProgressId::ObjectOffsets => *b"MVOF",
+ }
+ }
+ }
+}
+
+///
+pub mod checksum {
+ /// Returned by [`multi_index::File::verify_checksum()`][crate::multi_index::File::verify_checksum()].
+ pub type Error = crate::verify::checksum::Error;
+}
+
+impl File {
+ /// Validate that our [`checksum()`][File::checksum()] matches the actual contents
+ /// of this index file, and return it if it does.
+ pub fn verify_checksum(
+ &self,
+ progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ ) -> Result<gix_hash::ObjectId, checksum::Error> {
+ crate::verify::checksum_on_disk_or_mmap(
+ self.path(),
+ &self.data,
+ self.checksum(),
+ self.object_hash,
+ progress,
+ should_interrupt,
+ )
+ }
+
+ /// Similar to [`verify_integrity()`][File::verify_integrity()] but without any deep inspection of objects.
+ ///
+ /// Instead we only validate the contents of the multi-index itself.
+ pub fn verify_integrity_fast<P>(
+ &self,
+ progress: P,
+ should_interrupt: &AtomicBool,
+ ) -> Result<(gix_hash::ObjectId, P), integrity::Error>
+ where
+ P: Progress,
+ {
+ self.verify_integrity_inner(
+ progress,
+ should_interrupt,
+ false,
+ index::verify::integrity::Options::default(),
+ )
+ .map_err(|err| match err {
+ index::traverse::Error::Processor(err) => err,
+ _ => unreachable!("BUG: no other error type is possible"),
+ })
+ .map(|o| (o.actual_index_checksum, o.progress))
+ }
+
+ /// Similar to [`crate::Bundle::verify_integrity()`] but checks all contained indices and their packs.
+ ///
+ /// Note that it's considered a failure if an index doesn't have a corresponding pack.
+ pub fn verify_integrity<C, P, F>(
+ &self,
+ progress: P,
+ should_interrupt: &AtomicBool,
+ options: index::verify::integrity::Options<F>,
+ ) -> Result<integrity::Outcome<P>, index::traverse::Error<integrity::Error>>
+ where
+ P: Progress,
+ C: crate::cache::DecodeEntry,
+ F: Fn() -> C + Send + Clone,
+ {
+ self.verify_integrity_inner(progress, should_interrupt, true, options)
+ }
+
+ fn verify_integrity_inner<C, P, F>(
+ &self,
+ mut progress: P,
+ should_interrupt: &AtomicBool,
+ deep_check: bool,
+ options: index::verify::integrity::Options<F>,
+ ) -> Result<integrity::Outcome<P>, index::traverse::Error<integrity::Error>>
+ where
+ P: Progress,
+ C: crate::cache::DecodeEntry,
+ F: Fn() -> C + Send + Clone,
+ {
+ let parent = self.path.parent().expect("must be in a directory");
+
+ let actual_index_checksum = self
+ .verify_checksum(
+ progress.add_child_with_id(
+ format!("{}: checksum", self.path.display()),
+ integrity::ProgressId::ChecksumBytes.into(),
+ ),
+ should_interrupt,
+ )
+ .map_err(integrity::Error::from)
+ .map_err(index::traverse::Error::Processor)?;
+
+ if let Some(first_invalid) = crate::verify::fan(&self.fan) {
+ return Err(index::traverse::Error::Processor(integrity::Error::Fan {
+ index: first_invalid,
+ }));
+ }
+
+ if self.num_objects == 0 {
+ return Err(index::traverse::Error::Processor(integrity::Error::Empty));
+ }
+
+ let mut pack_traverse_statistics = Vec::new();
+
+ let operation_start = Instant::now();
+ let mut total_objects_checked = 0;
+ let mut pack_ids_and_offsets = Vec::with_capacity(self.num_objects as usize);
+ {
+ let order_start = Instant::now();
+ let mut progress = progress.add_child_with_id("checking oid order", gix_features::progress::UNKNOWN);
+ progress.init(
+ Some(self.num_objects as usize),
+ gix_features::progress::count("objects"),
+ );
+
+ for entry_index in 0..(self.num_objects - 1) {
+ let lhs = self.oid_at_index(entry_index);
+ let rhs = self.oid_at_index(entry_index + 1);
+
+ if rhs.cmp(lhs) != Ordering::Greater {
+ return Err(index::traverse::Error::Processor(integrity::Error::OutOfOrder {
+ index: entry_index,
+ }));
+ }
+ let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
+ pack_ids_and_offsets.push((pack_id, entry_index));
+ progress.inc();
+ }
+ {
+ let entry_index = self.num_objects - 1;
+ let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
+ pack_ids_and_offsets.push((pack_id, entry_index));
+ }
+ // sort by pack-id to allow handling all indices matching a pack while its open.
+ pack_ids_and_offsets.sort_by(|l, r| l.0.cmp(&r.0));
+ progress.show_throughput(order_start);
+ };
+
+ progress.init(
+ Some(self.num_indices as usize),
+ gix_features::progress::count("indices"),
+ );
+
+ let mut pack_ids_slice = pack_ids_and_offsets.as_slice();
+
+ for (pack_id, index_file_name) in self.index_names.iter().enumerate() {
+ progress.set_name(index_file_name.display().to_string());
+ progress.inc();
+
+ let mut bundle = None;
+ let index;
+ let index_path = parent.join(index_file_name);
+ let index = if deep_check {
+ bundle = crate::Bundle::at(index_path, self.object_hash)
+ .map_err(integrity::Error::from)
+ .map_err(index::traverse::Error::Processor)?
+ .into();
+ bundle.as_ref().map(|b| &b.index).expect("just set")
+ } else {
+ index = Some(
+ index::File::at(index_path, self.object_hash)
+ .map_err(|err| integrity::Error::BundleInit(crate::bundle::init::Error::Index(err)))
+ .map_err(index::traverse::Error::Processor)?,
+ );
+ index.as_ref().expect("just set")
+ };
+
+ let slice_end = pack_ids_slice.partition_point(|e| e.0 == pack_id as crate::data::Id);
+ let multi_index_entries_to_check = &pack_ids_slice[..slice_end];
+ {
+ let offset_start = Instant::now();
+ let mut offsets_progress =
+ progress.add_child_with_id("verify object offsets", integrity::ProgressId::ObjectOffsets.into());
+ offsets_progress.init(
+ Some(pack_ids_and_offsets.len()),
+ gix_features::progress::count("objects"),
+ );
+ pack_ids_slice = &pack_ids_slice[slice_end..];
+
+ for entry_id in multi_index_entries_to_check.iter().map(|e| e.1) {
+ let oid = self.oid_at_index(entry_id);
+ let (_, expected_pack_offset) = self.pack_id_and_pack_offset_at_index(entry_id);
+ let entry_in_bundle_index = index.lookup(oid).ok_or_else(|| {
+ index::traverse::Error::Processor(integrity::Error::OidNotFound { id: oid.to_owned() })
+ })?;
+ let actual_pack_offset = index.pack_offset_at_index(entry_in_bundle_index);
+ if actual_pack_offset != expected_pack_offset {
+ return Err(index::traverse::Error::Processor(
+ integrity::Error::PackOffsetMismatch {
+ id: oid.to_owned(),
+ expected_pack_offset,
+ actual_pack_offset,
+ },
+ ));
+ }
+ offsets_progress.inc();
+ }
+
+ if should_interrupt.load(std::sync::atomic::Ordering::Relaxed) {
+ return Err(index::traverse::Error::Processor(integrity::Error::Interrupted));
+ }
+ offsets_progress.show_throughput(offset_start);
+ }
+
+ total_objects_checked += multi_index_entries_to_check.len();
+
+ if let Some(bundle) = bundle {
+ progress.set_name(format!("Validating {}", index_file_name.display()));
+ let crate::bundle::verify::integrity::Outcome {
+ actual_index_checksum: _,
+ pack_traverse_outcome,
+ progress: returned_progress,
+ } = bundle
+ .verify_integrity(progress, should_interrupt, options.clone())
+ .map_err(|err| {
+ use index::traverse::Error::*;
+ match err {
+ Processor(err) => Processor(integrity::Error::IndexIntegrity(err)),
+ VerifyChecksum(err) => VerifyChecksum(err),
+ Tree(err) => Tree(err),
+ TreeTraversal(err) => TreeTraversal(err),
+ PackDecode { id, offset, source } => PackDecode { id, offset, source },
+ PackMismatch { expected, actual } => PackMismatch { expected, actual },
+ PackObjectMismatch {
+ expected,
+ actual,
+ offset,
+ kind,
+ } => PackObjectMismatch {
+ expected,
+ actual,
+ offset,
+ kind,
+ },
+ Crc32Mismatch {
+ expected,
+ actual,
+ offset,
+ kind,
+ } => Crc32Mismatch {
+ expected,
+ actual,
+ offset,
+ kind,
+ },
+ Interrupted => Interrupted,
+ }
+ })?;
+ progress = returned_progress;
+ pack_traverse_statistics.push(pack_traverse_outcome);
+ }
+ }
+
+ assert_eq!(
+ self.num_objects as usize, total_objects_checked,
+ "BUG: our slicing should allow to visit all objects"
+ );
+
+ progress.set_name("Validating multi-pack");
+ progress.show_throughput(operation_start);
+
+ Ok(integrity::Outcome {
+ actual_index_checksum,
+ pack_traverse_statistics,
+ progress,
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/multi_index/write.rs b/vendor/gix-pack/src/multi_index/write.rs
new file mode 100644
index 000000000..314506401
--- /dev/null
+++ b/vendor/gix-pack/src/multi_index/write.rs
@@ -0,0 +1,244 @@
+use std::{
+ convert::TryInto,
+ path::PathBuf,
+ sync::atomic::{AtomicBool, Ordering},
+ time::{Instant, SystemTime},
+};
+
+use gix_features::progress::Progress;
+
+use crate::multi_index;
+
+mod error {
+ /// The error returned by [multi_index::File::write_from_index_paths()][super::multi_index::File::write_from_index_paths()]..
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error(transparent)]
+ Io(#[from] std::io::Error),
+ #[error("Interrupted")]
+ Interrupted,
+ #[error(transparent)]
+ OpenIndex(#[from] crate::index::init::Error),
+ }
+}
+pub use error::Error;
+
+/// An entry suitable for sorting and writing
+pub(crate) struct Entry {
+ pub(crate) id: gix_hash::ObjectId,
+ pub(crate) pack_index: u32,
+ pub(crate) pack_offset: crate::data::Offset,
+ /// Used for sorting in case of duplicates
+ index_mtime: SystemTime,
+}
+
+/// Options for use in [`multi_index::File::write_from_index_paths()`].
+pub struct Options {
+ /// The kind of hash to use for objects and to expect in the input files.
+ pub object_hash: gix_hash::Kind,
+}
+
+/// The result of [`multi_index::File::write_from_index_paths()`].
+pub struct Outcome<P> {
+ /// The calculated multi-index checksum of the file at `multi_index_path`.
+ pub multi_index_checksum: gix_hash::ObjectId,
+ /// The input progress
+ pub progress: P,
+}
+
+/// The progress ids used in [`write_from_index_paths()`][multi_index::File::write_from_index_paths()].
+///
+/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+#[derive(Debug, Copy, Clone)]
+pub enum ProgressId {
+ /// Counts each path in the input set whose entries we enumerate and write into the multi-index
+ FromPathsCollectingEntries,
+ /// The amount of bytes written as part of the multi-index.
+ BytesWritten,
+}
+
+impl From<ProgressId> for gix_features::progress::Id {
+ fn from(v: ProgressId) -> Self {
+ match v {
+ ProgressId::FromPathsCollectingEntries => *b"MPCE",
+ ProgressId::BytesWritten => *b"MPBW",
+ }
+ }
+}
+
+impl multi_index::File {
+ pub(crate) const SIGNATURE: &'static [u8] = b"MIDX";
+ pub(crate) const HEADER_LEN: usize = 4 /*signature*/ +
+ 1 /*version*/ +
+ 1 /*object id version*/ +
+ 1 /*num chunks */ +
+ 1 /*num base files */ +
+ 4 /*num pack files*/;
+
+ /// Create a new multi-index file for writing to `out` from the pack index files at `index_paths`.
+ ///
+ /// Progress is sent to `progress` and interruptions checked via `should_interrupt`.
+ pub fn write_from_index_paths<P>(
+ mut index_paths: Vec<PathBuf>,
+ out: impl std::io::Write,
+ mut progress: P,
+ should_interrupt: &AtomicBool,
+ Options { object_hash }: Options,
+ ) -> Result<Outcome<P>, Error>
+ where
+ P: Progress,
+ {
+ let out = gix_features::hash::Write::new(out, object_hash);
+ let (index_paths_sorted, index_filenames_sorted) = {
+ index_paths.sort();
+ let file_names = index_paths
+ .iter()
+ .map(|p| PathBuf::from(p.file_name().expect("file name present")))
+ .collect::<Vec<_>>();
+ (index_paths, file_names)
+ };
+
+ let entries = {
+ let mut entries = Vec::new();
+ let start = Instant::now();
+ let mut progress =
+ progress.add_child_with_id("Collecting entries", ProgressId::FromPathsCollectingEntries.into());
+ progress.init(Some(index_paths_sorted.len()), gix_features::progress::count("indices"));
+
+ // This could be parallelized… but it's probably not worth it unless you have 500mio objects.
+ for (index_id, index) in index_paths_sorted.iter().enumerate() {
+ let mtime = index
+ .metadata()
+ .and_then(|m| m.modified())
+ .unwrap_or(SystemTime::UNIX_EPOCH);
+ let index = crate::index::File::at(index, object_hash)?;
+
+ entries.reserve(index.num_objects() as usize);
+ entries.extend(index.iter().map(|e| Entry {
+ id: e.oid,
+ pack_index: index_id as u32,
+ pack_offset: e.pack_offset,
+ index_mtime: mtime,
+ }));
+ progress.inc();
+ if should_interrupt.load(Ordering::Relaxed) {
+ return Err(Error::Interrupted);
+ }
+ }
+ progress.show_throughput(start);
+
+ let start = Instant::now();
+ progress.set_name("Deduplicate");
+ progress.init(Some(entries.len()), gix_features::progress::count("entries"));
+ entries.sort_by(|l, r| {
+ l.id.cmp(&r.id)
+ .then_with(|| l.index_mtime.cmp(&r.index_mtime).reverse())
+ .then_with(|| l.pack_index.cmp(&r.pack_index))
+ });
+ entries.dedup_by_key(|e| e.id);
+ progress.inc_by(entries.len());
+ progress.show_throughput(start);
+ if should_interrupt.load(Ordering::Relaxed) {
+ return Err(Error::Interrupted);
+ }
+ entries
+ };
+
+ let mut cf = gix_chunk::file::Index::for_writing();
+ cf.plan_chunk(
+ multi_index::chunk::index_names::ID,
+ multi_index::chunk::index_names::storage_size(&index_filenames_sorted),
+ );
+ cf.plan_chunk(multi_index::chunk::fanout::ID, multi_index::chunk::fanout::SIZE as u64);
+ cf.plan_chunk(
+ multi_index::chunk::lookup::ID,
+ multi_index::chunk::lookup::storage_size(entries.len(), object_hash),
+ );
+ cf.plan_chunk(
+ multi_index::chunk::offsets::ID,
+ multi_index::chunk::offsets::storage_size(entries.len()),
+ );
+
+ let num_large_offsets = multi_index::chunk::large_offsets::num_large_offsets(&entries);
+ if let Some(num_large_offsets) = num_large_offsets {
+ cf.plan_chunk(
+ multi_index::chunk::large_offsets::ID,
+ multi_index::chunk::large_offsets::storage_size(num_large_offsets),
+ );
+ }
+
+ let mut write_progress = progress.add_child_with_id("Writing multi-index", ProgressId::BytesWritten.into());
+ let write_start = Instant::now();
+ write_progress.init(
+ Some(cf.planned_storage_size() as usize + Self::HEADER_LEN),
+ gix_features::progress::bytes(),
+ );
+ let mut out = gix_features::progress::Write {
+ inner: out,
+ progress: write_progress,
+ };
+
+ let bytes_written = Self::write_header(
+ &mut out,
+ cf.num_chunks().try_into().expect("BUG: wrote more than 256 chunks"),
+ index_paths_sorted.len() as u32,
+ object_hash,
+ )?;
+
+ {
+ progress.set_name("Writing chunks");
+ progress.init(Some(cf.num_chunks()), gix_features::progress::count("chunks"));
+
+ let mut chunk_write = cf.into_write(&mut out, bytes_written)?;
+ while let Some(chunk_to_write) = chunk_write.next_chunk() {
+ match chunk_to_write {
+ multi_index::chunk::index_names::ID => {
+ multi_index::chunk::index_names::write(&index_filenames_sorted, &mut chunk_write)?
+ }
+ multi_index::chunk::fanout::ID => multi_index::chunk::fanout::write(&entries, &mut chunk_write)?,
+ multi_index::chunk::lookup::ID => multi_index::chunk::lookup::write(&entries, &mut chunk_write)?,
+ multi_index::chunk::offsets::ID => {
+ multi_index::chunk::offsets::write(&entries, num_large_offsets.is_some(), &mut chunk_write)?
+ }
+ multi_index::chunk::large_offsets::ID => multi_index::chunk::large_offsets::write(
+ &entries,
+ num_large_offsets.expect("available if planned"),
+ &mut chunk_write,
+ )?,
+ unknown => unreachable!("BUG: forgot to implement chunk {:?}", std::str::from_utf8(&unknown)),
+ }
+ progress.inc();
+ if should_interrupt.load(Ordering::Relaxed) {
+ return Err(Error::Interrupted);
+ }
+ }
+ }
+
+ // write trailing checksum
+ let multi_index_checksum: gix_hash::ObjectId = out.inner.hash.digest().into();
+ out.inner.inner.write_all(multi_index_checksum.as_slice())?;
+ out.progress.show_throughput(write_start);
+
+ Ok(Outcome {
+ multi_index_checksum,
+ progress,
+ })
+ }
+
+ fn write_header(
+ mut out: impl std::io::Write,
+ num_chunks: u8,
+ num_indices: u32,
+ object_hash: gix_hash::Kind,
+ ) -> std::io::Result<usize> {
+ out.write_all(Self::SIGNATURE)?;
+ out.write_all(&[crate::multi_index::Version::V1 as u8])?;
+ out.write_all(&[object_hash as u8])?;
+ out.write_all(&[num_chunks])?;
+ out.write_all(&[0])?; /* unused number of base files */
+ out.write_all(&num_indices.to_be_bytes())?;
+
+ Ok(Self::HEADER_LEN)
+ }
+}
diff --git a/vendor/gix-pack/src/verify.rs b/vendor/gix-pack/src/verify.rs
new file mode 100644
index 000000000..f985c8657
--- /dev/null
+++ b/vendor/gix-pack/src/verify.rs
@@ -0,0 +1,64 @@
+use std::{path::Path, sync::atomic::AtomicBool};
+
+use gix_features::progress::Progress;
+
+///
+pub mod checksum {
+ /// Returned by various methods to verify the checksum of a memory mapped file that might also exist on disk.
+ #[derive(thiserror::Error, Debug)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Interrupted by user")]
+ Interrupted,
+ #[error("index checksum mismatch: expected {expected}, got {actual}")]
+ Mismatch {
+ expected: gix_hash::ObjectId,
+ actual: gix_hash::ObjectId,
+ },
+ }
+}
+
+/// Returns the `index` at which the following `index + 1` value is not an increment over the value at `index`.
+pub fn fan(data: &[u32]) -> Option<usize> {
+ data.windows(2)
+ .enumerate()
+ .find_map(|(win_index, v)| (v[0] > v[1]).then_some(win_index))
+}
+
+/// Calculate the hash of the given kind by trying to read the file from disk at `data_path` or falling back on the mapped content in `data`.
+/// `Ok(desired_hash)` or `Err(Some(actual_hash))` is returned if the hash matches or mismatches.
+/// If the `Err(None)` is returned, the operation was interrupted.
+pub fn checksum_on_disk_or_mmap(
+ data_path: &Path,
+ data: &[u8],
+ expected: gix_hash::ObjectId,
+ object_hash: gix_hash::Kind,
+ mut progress: impl Progress,
+ should_interrupt: &AtomicBool,
+) -> Result<gix_hash::ObjectId, checksum::Error> {
+ let data_len_without_trailer = data.len() - object_hash.len_in_bytes();
+ let actual = match gix_features::hash::bytes_of_file(
+ data_path,
+ data_len_without_trailer,
+ object_hash,
+ &mut progress,
+ should_interrupt,
+ ) {
+ Ok(id) => id,
+ Err(err) if err.kind() == std::io::ErrorKind::Interrupted => return Err(checksum::Error::Interrupted),
+ Err(_io_err) => {
+ let start = std::time::Instant::now();
+ let mut hasher = gix_features::hash::hasher(object_hash);
+ hasher.update(&data[..data_len_without_trailer]);
+ progress.inc_by(data_len_without_trailer);
+ progress.show_throughput(start);
+ gix_hash::ObjectId::from(hasher.digest())
+ }
+ };
+
+ if actual == expected {
+ Ok(actual)
+ } else {
+ Err(checksum::Error::Mismatch { actual, expected })
+ }
+}