summaryrefslogtreecommitdiffstats
path: root/vendor/gix-pack/src/data
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-pack/src/data')
-rw-r--r--vendor/gix-pack/src/data/delta.rs70
-rw-r--r--vendor/gix-pack/src/data/entry/decode.rs125
-rw-r--r--vendor/gix-pack/src/data/entry/header.rs150
-rw-r--r--vendor/gix-pack/src/data/entry/mod.rs53
-rw-r--r--vendor/gix-pack/src/data/file/decode/entry.rs422
-rw-r--r--vendor/gix-pack/src/data/file/decode/header.rs114
-rw-r--r--vendor/gix-pack/src/data/file/decode/mod.rs16
-rw-r--r--vendor/gix-pack/src/data/file/init.rs41
-rw-r--r--vendor/gix-pack/src/data/file/mod.rs9
-rw-r--r--vendor/gix-pack/src/data/file/verify.rs42
-rw-r--r--vendor/gix-pack/src/data/header.rs55
-rw-r--r--vendor/gix-pack/src/data/input/bytes_to_entries.rs295
-rw-r--r--vendor/gix-pack/src/data/input/entries_to_bytes.rs155
-rw-r--r--vendor/gix-pack/src/data/input/entry.rs65
-rw-r--r--vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs211
-rw-r--r--vendor/gix-pack/src/data/input/mod.rs41
-rw-r--r--vendor/gix-pack/src/data/input/types.rs73
-rw-r--r--vendor/gix-pack/src/data/mod.rs134
-rw-r--r--vendor/gix-pack/src/data/output/bytes.rs156
-rw-r--r--vendor/gix-pack/src/data/output/count/mod.rs49
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/mod.rs405
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/reduce.rs49
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/tree.rs124
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/types.rs105
-rw-r--r--vendor/gix-pack/src/data/output/count/objects/util.rs24
-rw-r--r--vendor/gix-pack/src/data/output/entry/iter_from_counts.rs428
-rw-r--r--vendor/gix-pack/src/data/output/entry/mod.rs181
-rw-r--r--vendor/gix-pack/src/data/output/mod.rs41
28 files changed, 3633 insertions, 0 deletions
diff --git a/vendor/gix-pack/src/data/delta.rs b/vendor/gix-pack/src/data/delta.rs
new file mode 100644
index 000000000..a898e4aaf
--- /dev/null
+++ b/vendor/gix-pack/src/data/delta.rs
@@ -0,0 +1,70 @@
+/// Given the decompressed pack delta `d`, decode a size in bytes (either the base object size or the result object size)
+/// Equivalent to [this canonical git function](https://github.com/git/git/blob/311531c9de557d25ac087c1637818bd2aad6eb3a/delta.h#L89)
+pub fn decode_header_size(d: &[u8]) -> (u64, usize) {
+ let mut i = 0;
+ let mut size = 0u64;
+ let mut consumed = 0;
+ for cmd in d.iter() {
+ consumed += 1;
+ size |= (*cmd as u64 & 0x7f) << i;
+ i += 7;
+ if *cmd & 0x80 == 0 {
+ break;
+ }
+ }
+ (size, consumed)
+}
+
+pub fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) {
+ let mut i = 0;
+ while let Some(cmd) = data.get(i) {
+ i += 1;
+ match cmd {
+ cmd if cmd & 0b1000_0000 != 0 => {
+ let (mut ofs, mut size): (u32, u32) = (0, 0);
+ if cmd & 0b0000_0001 != 0 {
+ ofs = data[i] as u32;
+ i += 1;
+ }
+ if cmd & 0b0000_0010 != 0 {
+ ofs |= (data[i] as u32) << 8;
+ i += 1;
+ }
+ if cmd & 0b0000_0100 != 0 {
+ ofs |= (data[i] as u32) << 16;
+ i += 1;
+ }
+ if cmd & 0b0000_1000 != 0 {
+ ofs |= (data[i] as u32) << 24;
+ i += 1;
+ }
+ if cmd & 0b0001_0000 != 0 {
+ size = data[i] as u32;
+ i += 1;
+ }
+ if cmd & 0b0010_0000 != 0 {
+ size |= (data[i] as u32) << 8;
+ i += 1;
+ }
+ if cmd & 0b0100_0000 != 0 {
+ size |= (data[i] as u32) << 16;
+ i += 1;
+ }
+ if size == 0 {
+ size = 0x10000; // 65536
+ }
+ let ofs = ofs as usize;
+ std::io::Write::write(&mut target, &base[ofs..ofs + size as usize])
+ .expect("delta copy from base: byte slices must match");
+ }
+ 0 => panic!("encountered unsupported command code: 0"),
+ size => {
+ std::io::Write::write(&mut target, &data[i..i + *size as usize])
+ .expect("delta copy data: slice sizes to match up");
+ i += *size as usize;
+ }
+ }
+ }
+ assert_eq!(i, data.len());
+ assert_eq!(target.len(), 0);
+}
diff --git a/vendor/gix-pack/src/data/entry/decode.rs b/vendor/gix-pack/src/data/entry/decode.rs
new file mode 100644
index 000000000..79d7aecff
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/decode.rs
@@ -0,0 +1,125 @@
+use std::io;
+
+use gix_features::decode::{leb64, leb64_from_read};
+
+use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
+use crate::data;
+
+/// Decoding
+impl data::Entry {
+ /// Decode an entry from the given entry data `d`, providing the `pack_offset` to allow tracking the start of the entry data section.
+ ///
+ /// # Panics
+ ///
+ /// If we cannot understand the header, garbage data is likely to trigger this.
+ pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> data::Entry {
+ let (type_id, size, mut consumed) = parse_header_info(d);
+
+ use crate::data::entry::Header::*;
+ let object = match type_id {
+ OFS_DELTA => {
+ let (distance, leb_bytes) = leb64(&d[consumed..]);
+ let delta = OfsDelta {
+ base_distance: distance,
+ };
+ consumed += leb_bytes;
+ delta
+ }
+ REF_DELTA => {
+ let delta = RefDelta {
+ base_id: gix_hash::ObjectId::from(&d[consumed..][..hash_len]),
+ };
+ consumed += hash_len;
+ delta
+ }
+ BLOB => Blob,
+ TREE => Tree,
+ COMMIT => Commit,
+ TAG => Tag,
+ _ => panic!("We currently don't support any V3 features or extensions"),
+ };
+ data::Entry {
+ header: object,
+ decompressed_size: size,
+ data_offset: pack_offset + consumed as u64,
+ }
+ }
+
+ /// Instantiate an `Entry` from the reader `r`, providing the `pack_offset` to allow tracking the start of the entry data section.
+ pub fn from_read(
+ mut r: impl io::Read,
+ pack_offset: data::Offset,
+ hash_len: usize,
+ ) -> Result<data::Entry, io::Error> {
+ let (type_id, size, mut consumed) = streaming_parse_header_info(&mut r)?;
+
+ use crate::data::entry::Header::*;
+ let object = match type_id {
+ OFS_DELTA => {
+ let (distance, leb_bytes) = leb64_from_read(&mut r)?;
+ let delta = OfsDelta {
+ base_distance: distance,
+ };
+ consumed += leb_bytes;
+ delta
+ }
+ REF_DELTA => {
+ let mut buf = gix_hash::Kind::buf();
+ let hash = &mut buf[..hash_len];
+ r.read_exact(hash)?;
+ #[allow(clippy::redundant_slicing)]
+ let delta = RefDelta {
+ base_id: gix_hash::ObjectId::from(&hash[..]),
+ };
+ consumed += hash_len;
+ delta
+ }
+ BLOB => Blob,
+ TREE => Tree,
+ COMMIT => Commit,
+ TAG => Tag,
+ _ => panic!("We currently don't support any V3 features or extensions"),
+ };
+ Ok(data::Entry {
+ header: object,
+ decompressed_size: size,
+ data_offset: pack_offset + consumed as u64,
+ })
+ }
+}
+
+#[inline]
+fn streaming_parse_header_info(mut read: impl io::Read) -> Result<(u8, u64, usize), io::Error> {
+ let mut byte = [0u8; 1];
+ read.read_exact(&mut byte)?;
+ let mut c = byte[0];
+ let mut i = 1;
+ let type_id = (c >> 4) & 0b0000_0111;
+ let mut size = c as u64 & 0b0000_1111;
+ let mut s = 4;
+ while c & 0b1000_0000 != 0 {
+ read.read_exact(&mut byte)?;
+ c = byte[0];
+ i += 1;
+ size += ((c & 0b0111_1111) as u64) << s;
+ s += 7
+ }
+ Ok((type_id, size, i))
+}
+
+/// Parses the header of a pack-entry, yielding object type id, decompressed object size, and consumed bytes
+#[inline]
+fn parse_header_info(data: &[u8]) -> (u8, u64, usize) {
+ let mut c = data[0];
+ let mut i = 1;
+ let type_id = (c >> 4) & 0b0000_0111;
+ let mut size = c as u64 & 0b0000_1111;
+ let mut s = 4;
+ while c & 0b1000_0000 != 0 {
+ c = data[i];
+ i += 1;
+ size += ((c & 0b0111_1111) as u64) << s;
+ s += 7
+ }
+ (type_id, size, i)
+}
diff --git a/vendor/gix-pack/src/data/entry/header.rs b/vendor/gix-pack/src/data/entry/header.rs
new file mode 100644
index 000000000..83983eab0
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/header.rs
@@ -0,0 +1,150 @@
+use std::io;
+
+use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
+use crate::data;
+
+/// The header portion of a pack data entry, identifying the kind of stored object.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[allow(missing_docs)]
+pub enum Header {
+ /// The object is a commit
+ Commit,
+ /// The object is a tree
+ Tree,
+ /// The object is a blob
+ Blob,
+ /// The object is a tag
+ Tag,
+ /// Describes a delta-object which needs to be applied to a base. The base object is identified by the `base_id` field
+ /// which is found within the parent repository.
+ /// Most commonly used for **thin-packs** when receiving pack files from the server to refer to objects that are not
+ /// part of the pack but expected to be present in the receivers repository.
+ ///
+ /// # Note
+ /// This could also be an object within this pack if the LSB encoded offset would be larger than 20 bytes, which is unlikely to
+ /// happen.
+ ///
+ /// **The naming** is exactly the same as the canonical implementation uses, namely **REF_DELTA**.
+ RefDelta { base_id: gix_hash::ObjectId },
+ /// Describes a delta-object present in this pack which acts as base for this object.
+ /// The base object is measured as a distance from this objects
+ /// pack offset, so that `base_pack_offset = this_objects_pack_offset - base_distance`
+ ///
+ /// # Note
+ ///
+ /// **The naming** is exactly the same as the canonical implementation uses, namely **OFS_DELTA**.
+ OfsDelta { base_distance: u64 },
+}
+
+impl Header {
+ /// Subtract `distance` from `pack_offset` safely without the chance for overflow or no-ops if `distance` is 0.
+ pub fn verified_base_pack_offset(pack_offset: data::Offset, distance: u64) -> Option<data::Offset> {
+ if distance == 0 {
+ return None;
+ }
+ pack_offset.checked_sub(distance)
+ }
+ /// Convert the header's object kind into [`gix_object::Kind`] if possible
+ pub fn as_kind(&self) -> Option<gix_object::Kind> {
+ use gix_object::Kind::*;
+ Some(match self {
+ Header::Tree => Tree,
+ Header::Blob => Blob,
+ Header::Commit => Commit,
+ Header::Tag => Tag,
+ Header::RefDelta { .. } | Header::OfsDelta { .. } => return None,
+ })
+ }
+ /// Convert this header's object kind into the packs internal representation
+ pub fn as_type_id(&self) -> u8 {
+ use Header::*;
+ match self {
+ Blob => BLOB,
+ Tree => TREE,
+ Commit => COMMIT,
+ Tag => TAG,
+ OfsDelta { .. } => OFS_DELTA,
+ RefDelta { .. } => REF_DELTA,
+ }
+ }
+ /// Return's true if this is a delta object, i.e. not a full object.
+ pub fn is_delta(&self) -> bool {
+ matches!(self, Header::OfsDelta { .. } | Header::RefDelta { .. })
+ }
+ /// Return's true if this is a base object, i.e. not a delta object.
+ pub fn is_base(&self) -> bool {
+ !self.is_delta()
+ }
+}
+
+impl Header {
+ /// Encode this header along the given `decompressed_size_in_bytes` into the `out` write stream for use within a data pack.
+ ///
+ /// Returns the amount of bytes written to `out`.
+ /// `decompressed_size_in_bytes` is the full size in bytes of the object that this header represents
+ pub fn write_to(&self, decompressed_size_in_bytes: u64, mut out: impl io::Write) -> io::Result<usize> {
+ let mut size = decompressed_size_in_bytes;
+ let mut written = 1;
+ let mut c: u8 = (self.as_type_id() << 4) | (size as u8 & 0b0000_1111);
+ size >>= 4;
+ while size != 0 {
+ out.write_all(&[c | 0b1000_0000])?;
+ written += 1;
+ c = size as u8 & 0b0111_1111;
+ size >>= 7;
+ }
+ out.write_all(&[c])?;
+
+ use Header::*;
+ match self {
+ RefDelta { base_id: oid } => {
+ out.write_all(oid.as_slice())?;
+ written += oid.as_slice().len();
+ }
+ OfsDelta { base_distance } => {
+ let mut buf = [0u8; 10];
+ let buf = leb64_encode(*base_distance, &mut buf);
+ out.write_all(buf)?;
+ written += buf.len();
+ }
+ Blob | Tree | Commit | Tag => {}
+ }
+ Ok(written)
+ }
+
+ /// The size of the header in bytes when serialized
+ pub fn size(&self, decompressed_size: u64) -> usize {
+ self.write_to(decompressed_size, io::sink())
+ .expect("io::sink() to never fail")
+ }
+}
+
+#[inline]
+fn leb64_encode(mut n: u64, buf: &mut [u8; 10]) -> &[u8] {
+ let mut bytes_written = 1;
+ buf[buf.len() - 1] = n as u8 & 0b0111_1111;
+ for out in buf.iter_mut().rev().skip(1) {
+ n >>= 7;
+ if n == 0 {
+ break;
+ }
+ n -= 1;
+ *out = 0b1000_0000 | (n as u8 & 0b0111_1111);
+ bytes_written += 1;
+ }
+ debug_assert_eq!(n, 0, "BUG: buffer must be large enough to hold a 64 bit integer");
+ &buf[buf.len() - bytes_written..]
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn leb64_encode_max_int() {
+ let mut buf = [0u8; 10];
+ let buf = leb64_encode(u64::MAX, &mut buf);
+ assert_eq!(buf.len(), 10, "10 bytes should be used when 64bits are encoded");
+ }
+}
diff --git a/vendor/gix-pack/src/data/entry/mod.rs b/vendor/gix-pack/src/data/entry/mod.rs
new file mode 100644
index 000000000..f11c39c5c
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/mod.rs
@@ -0,0 +1,53 @@
+use crate::data::Entry;
+
+const _TYPE_EXT1: u8 = 0;
+const COMMIT: u8 = 1;
+const TREE: u8 = 2;
+const BLOB: u8 = 3;
+const TAG: u8 = 4;
+const _TYPE_EXT2: u8 = 5;
+const OFS_DELTA: u8 = 6;
+const REF_DELTA: u8 = 7;
+
+/// A way to uniquely identify the location of an entry within a pack bundle
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Location {
+ /// The id of the pack containing the object. It's unique within its frame of reference which is the owning object database.
+ pub pack_id: u32,
+ /// The size of the entry of disk so that the range of bytes of the entry is `pack_offset..pack_offset + entry_size`.
+ pub entry_size: usize,
+ /// The start of the entry in the pack identified by `pack_id`.
+ pub pack_offset: data::Offset,
+}
+
+impl Location {
+ /// Compute a range suitable for lookup in pack data using the [`entry_slice()`][crate::data::File::entry_slice()] method.
+ pub fn entry_range(&self, pack_offset: data::Offset) -> crate::data::EntryRange {
+ pack_offset..pack_offset + self.entry_size as u64
+ }
+}
+
+/// Access
+impl Entry {
+ /// Compute the pack offset to the base entry of the object represented by this entry.
+ pub fn base_pack_offset(&self, distance: u64) -> data::Offset {
+ let pack_offset = self.data_offset - self.header_size() as u64;
+ pack_offset.checked_sub(distance).expect("in-bound distance of deltas")
+ }
+ /// The pack offset at which this entry starts
+ pub fn pack_offset(&self) -> data::Offset {
+ self.data_offset - self.header_size() as u64
+ }
+ /// The amount of bytes used to describe this entry in the pack. The header starts at [`Self::pack_offset()`]
+ pub fn header_size(&self) -> usize {
+ self.header.size(self.decompressed_size)
+ }
+}
+
+mod decode;
+
+mod header;
+pub use header::Header;
+
+use crate::data;
diff --git a/vendor/gix-pack/src/data/file/decode/entry.rs b/vendor/gix-pack/src/data/file/decode/entry.rs
new file mode 100644
index 000000000..60fefec0f
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/decode/entry.rs
@@ -0,0 +1,422 @@
+use std::{convert::TryInto, ops::Range};
+
+use gix_features::zlib;
+use smallvec::SmallVec;
+
+use crate::{
+ cache, data,
+ data::{delta, file::decode::Error, File},
+};
+
+/// A return value of a resolve function, which given an [`ObjectId`][gix_hash::ObjectId] determines where an object can be found.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum ResolvedBase {
+ /// Indicate an object is within this pack, at the given entry, and thus can be looked up locally.
+ InPack(data::Entry),
+ /// Indicates the object of `kind` was found outside of the pack, and its data was written into an output
+ /// vector which now has a length of `end`.
+ #[allow(missing_docs)]
+ OutOfPack { kind: gix_object::Kind, end: usize },
+}
+
+#[derive(Debug)]
+struct Delta {
+ data: Range<usize>,
+ base_size: usize,
+ result_size: usize,
+
+ decompressed_size: usize,
+ data_offset: data::Offset,
+}
+
+/// Additional information and statistics about a successfully decoded object produced by [`File::decode_entry()`].
+///
+/// Useful to understand the effectiveness of the pack compression or the cost of decompression.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Outcome {
+ /// The kind of resolved object.
+ pub kind: gix_object::Kind,
+ /// The amount of deltas in the chain of objects that had to be resolved beforehand.
+ ///
+ /// This number is affected by the [`Cache`][cache::DecodeEntry] implementation, with cache hits shortening the
+ /// delta chain accordingly
+ pub num_deltas: u32,
+ /// The total decompressed size of all pack entries in the delta chain
+ pub decompressed_size: u64,
+ /// The total compressed size of all pack entries in the delta chain
+ pub compressed_size: usize,
+ /// The total size of the decoded object.
+ pub object_size: u64,
+}
+
+impl Outcome {
+ pub(crate) fn default_from_kind(kind: gix_object::Kind) -> Self {
+ Self {
+ kind,
+ num_deltas: 0,
+ decompressed_size: 0,
+ compressed_size: 0,
+ object_size: 0,
+ }
+ }
+ fn from_object_entry(kind: gix_object::Kind, entry: &data::Entry, compressed_size: usize) -> Self {
+ Self {
+ kind,
+ num_deltas: 0,
+ decompressed_size: entry.decompressed_size,
+ compressed_size,
+ object_size: entry.decompressed_size,
+ }
+ }
+}
+
+/// Decompression of objects
+impl File {
+ /// Decompress the given `entry` into `out` and return the amount of bytes read from the pack data.
+ ///
+ /// _Note_ that this method does not resolve deltified objects, but merely decompresses their content
+ /// `out` is expected to be large enough to hold `entry.size` bytes.
+ ///
+ /// # Panics
+ ///
+ /// If `out` isn't large enough to hold the decompressed `entry`
+ pub fn decompress_entry(&self, entry: &data::Entry, out: &mut [u8]) -> Result<usize, Error> {
+ assert!(
+ out.len() as u64 >= entry.decompressed_size,
+ "output buffer isn't large enough to hold decompressed result, want {}, have {}",
+ entry.decompressed_size,
+ out.len()
+ );
+
+ self.decompress_entry_from_data_offset(entry.data_offset, out)
+ .map_err(Into::into)
+ }
+
+ fn assure_v2(&self) {
+ assert!(
+ matches!(self.version, crate::data::Version::V2),
+ "Only V2 is implemented"
+ );
+ }
+
+ /// Obtain the [`Entry`][crate::data::Entry] at the given `offset` into the pack.
+ ///
+ /// The `offset` is typically obtained from the pack index file.
+ pub fn entry(&self, offset: data::Offset) -> data::Entry {
+ self.assure_v2();
+ let pack_offset: usize = offset.try_into().expect("offset representable by machine");
+ assert!(pack_offset <= self.data.len(), "offset out of bounds");
+
+ let object_data = &self.data[pack_offset..];
+ data::Entry::from_bytes(object_data, offset, self.hash_len)
+ }
+
+ /// Decompress the object expected at the given data offset, sans pack header. This information is only
+ /// known after the pack header was parsed.
+ /// Note that this method does not resolve deltified objects, but merely decompresses their content
+ /// `out` is expected to be large enough to hold `entry.size` bytes.
+ /// Returns the amount of packed bytes there read from the pack data file.
+ pub(crate) fn decompress_entry_from_data_offset(
+ &self,
+ data_offset: data::Offset,
+ out: &mut [u8],
+ ) -> Result<usize, zlib::inflate::Error> {
+ let offset: usize = data_offset.try_into().expect("offset representable by machine");
+ assert!(offset < self.data.len(), "entry offset out of bounds");
+
+ zlib::Inflate::default()
+ .once(&self.data[offset..], out)
+ .map(|(_status, consumed_in, _consumed_out)| consumed_in)
+ }
+
+ /// Like `decompress_entry_from_data_offset`, but returns consumed input and output.
+ pub(crate) fn decompress_entry_from_data_offset_2(
+ &self,
+ data_offset: data::Offset,
+ out: &mut [u8],
+ ) -> Result<(usize, usize), zlib::inflate::Error> {
+ let offset: usize = data_offset.try_into().expect("offset representable by machine");
+ assert!(offset < self.data.len(), "entry offset out of bounds");
+
+ zlib::Inflate::default()
+ .once(&self.data[offset..], out)
+ .map(|(_status, consumed_in, consumed_out)| (consumed_in, consumed_out))
+ }
+
+ /// Decode an entry, resolving delta's as needed, while growing the `out` vector if there is not enough
+ /// space to hold the result object.
+ ///
+ /// The `entry` determines which object to decode, and is commonly obtained with the help of a pack index file or through pack iteration.
+ ///
+ /// `resolve` is a function to lookup objects with the given [`ObjectId`][gix_hash::ObjectId], in case the full object id is used to refer to
+ /// a base object, instead of an in-pack offset.
+ ///
+ /// `delta_cache` is a mechanism to avoid looking up base objects multiple times when decompressing multiple objects in a row.
+ /// Use a [Noop-Cache][cache::Never] to disable caching all together at the cost of repeating work.
+ pub fn decode_entry(
+ &self,
+ entry: data::Entry,
+ out: &mut Vec<u8>,
+ resolve: impl Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
+ delta_cache: &mut impl cache::DecodeEntry,
+ ) -> Result<Outcome, Error> {
+ use crate::data::entry::Header::*;
+ match entry.header {
+ Tree | Blob | Commit | Tag => {
+ out.resize(
+ entry
+ .decompressed_size
+ .try_into()
+ .expect("size representable by machine"),
+ 0,
+ );
+ self.decompress_entry(&entry, out.as_mut_slice()).map(|consumed_input| {
+ Outcome::from_object_entry(
+ entry.header.as_kind().expect("a non-delta entry"),
+ &entry,
+ consumed_input,
+ )
+ })
+ }
+ OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, out, delta_cache),
+ }
+ }
+
+ /// resolve: technically, this shouldn't ever be required as stored local packs don't refer to objects by id
+ /// that are outside of the pack. Unless, of course, the ref refers to an object within this pack, which means
+ /// it's very, very large as 20bytes are smaller than the corresponding MSB encoded number
+ fn resolve_deltas(
+ &self,
+ last: data::Entry,
+ resolve: impl Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
+ out: &mut Vec<u8>,
+ cache: &mut impl cache::DecodeEntry,
+ ) -> Result<Outcome, Error> {
+ // all deltas, from the one that produces the desired object (first) to the oldest at the end of the chain
+ let mut chain = SmallVec::<[Delta; 10]>::default();
+ let first_entry = last.clone();
+ let mut cursor = last;
+ let mut base_buffer_size: Option<usize> = None;
+ let mut object_kind: Option<gix_object::Kind> = None;
+ let mut consumed_input: Option<usize> = None;
+
+ // Find the first full base, either an undeltified object in the pack or a reference to another object.
+ let mut total_delta_data_size: u64 = 0;
+ while cursor.header.is_delta() {
+ if let Some((kind, packed_size)) = cache.get(self.id, cursor.data_offset, out) {
+ base_buffer_size = Some(out.len());
+ object_kind = Some(kind);
+ // If the input entry is a cache hit, keep the packed size as it must be returned.
+ // Otherwise, the packed size will be determined later when decompressing the input delta
+ if total_delta_data_size == 0 {
+ consumed_input = Some(packed_size);
+ }
+ break;
+ }
+ total_delta_data_size += cursor.decompressed_size;
+ let decompressed_size = cursor
+ .decompressed_size
+ .try_into()
+ .expect("a single delta size small enough to fit a usize");
+ chain.push(Delta {
+ data: Range {
+ start: 0,
+ end: decompressed_size,
+ },
+ base_size: 0,
+ result_size: 0,
+ decompressed_size,
+ data_offset: cursor.data_offset,
+ });
+ use crate::data::entry::Header;
+ cursor = match cursor.header {
+ Header::OfsDelta { base_distance } => self.entry(cursor.base_pack_offset(base_distance)),
+ Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) {
+ Some(ResolvedBase::InPack(entry)) => entry,
+ Some(ResolvedBase::OutOfPack { end, kind }) => {
+ base_buffer_size = Some(end);
+ object_kind = Some(kind);
+ break;
+ }
+ None => return Err(Error::DeltaBaseUnresolved(base_id)),
+ },
+ _ => unreachable!("cursor.is_delta() only allows deltas here"),
+ };
+ }
+
+ // This can happen if the cache held the first entry itself
+ // We will just treat it as an object then, even though it's technically incorrect.
+ if chain.is_empty() {
+ return Ok(Outcome::from_object_entry(
+ object_kind.expect("object kind as set by cache"),
+ &first_entry,
+ consumed_input.expect("consumed bytes as set by cache"),
+ ));
+ };
+
+ // First pass will decompress all delta data and keep it in our output buffer
+ // [<possibly resolved base object>]<delta-1..delta-n>...
+ // so that we can find the biggest result size.
+ let total_delta_data_size: usize = total_delta_data_size.try_into().expect("delta data to fit in memory");
+
+ let chain_len = chain.len();
+ let (first_buffer_end, second_buffer_end) = {
+ let delta_start = base_buffer_size.unwrap_or(0);
+ out.resize(delta_start + total_delta_data_size, 0);
+
+ let delta_range = Range {
+ start: delta_start,
+ end: delta_start + total_delta_data_size,
+ };
+ let mut instructions = &mut out[delta_range.clone()];
+ let mut relative_delta_start = 0;
+ let mut biggest_result_size = 0;
+ for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
+ let consumed_from_data_offset = self.decompress_entry_from_data_offset(
+ delta.data_offset,
+ &mut instructions[..delta.decompressed_size],
+ )?;
+ let is_last_delta_to_be_applied = delta_idx + 1 == chain_len;
+ if is_last_delta_to_be_applied {
+ consumed_input = Some(consumed_from_data_offset);
+ }
+
+ let (base_size, offset) = delta::decode_header_size(instructions);
+ let mut bytes_consumed_by_header = offset;
+ biggest_result_size = biggest_result_size.max(base_size);
+ delta.base_size = base_size.try_into().expect("base size fits into usize");
+
+ let (result_size, offset) = delta::decode_header_size(&instructions[offset..]);
+ bytes_consumed_by_header += offset;
+ biggest_result_size = biggest_result_size.max(result_size);
+ delta.result_size = result_size.try_into().expect("result size fits into usize");
+
+ // the absolute location into the instructions buffer, so we keep track of the end point of the last
+ delta.data.start = relative_delta_start + bytes_consumed_by_header;
+ relative_delta_start += delta.decompressed_size;
+ delta.data.end = relative_delta_start;
+
+ instructions = &mut instructions[delta.decompressed_size..];
+ }
+
+ // Now we can produce a buffer like this
+ // [<biggest-result-buffer, possibly filled with resolved base object data>]<biggest-result-buffer><delta-1..delta-n>
+ // from [<possibly resolved base object>]<delta-1..delta-n>...
+ let biggest_result_size: usize = biggest_result_size
+ .try_into()
+ .expect("biggest result size small enough to fit into usize");
+ let first_buffer_size = biggest_result_size;
+ let second_buffer_size = first_buffer_size;
+ out.resize(first_buffer_size + second_buffer_size + total_delta_data_size, 0);
+
+ // Now 'rescue' the deltas, because in the next step we possibly overwrite that portion
+ // of memory with the base object (in the majority of cases)
+ let second_buffer_end = {
+ let end = first_buffer_size + second_buffer_size;
+ if delta_range.start < end {
+ // …this means that the delta size is even larger than two uncompressed worst-case
+ // intermediate results combined. It would already be undesirable to have it bigger
+ // then the target size (as you could just store the object in whole).
+ // However, this just means that it reuses existing deltas smartly, which as we rightfully
+ // remember stand for an object each. However, this means a lot of data is read to restore
+ // a single object sometimes. Fair enough - package size is minimized that way.
+ out.copy_within(delta_range, end);
+ } else {
+ let (buffers, instructions) = out.split_at_mut(end);
+ instructions.copy_from_slice(&buffers[delta_range]);
+ }
+ end
+ };
+
+ // If we don't have a out-of-pack object already, fill the base-buffer by decompressing the full object
+ // at which the cursor is left after the iteration
+ if base_buffer_size.is_none() {
+ let base_entry = cursor;
+ debug_assert!(!base_entry.header.is_delta());
+ object_kind = base_entry.header.as_kind();
+ self.decompress_entry_from_data_offset(base_entry.data_offset, out)?;
+ }
+
+ (first_buffer_size, second_buffer_end)
+ };
+
+ // From oldest to most recent, apply all deltas, swapping the buffer back and forth
+ // TODO: once we have more tests, we could optimize this memory-intensive work to
+ // analyse the delta-chains to only copy data once - after all, with 'copy-from-base' deltas,
+ // all data originates from one base at some point.
+ // `out` is: [source-buffer][target-buffer][max-delta-instructions-buffer]
+ let (buffers, instructions) = out.split_at_mut(second_buffer_end);
+ let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
+
+ let mut last_result_size = None;
+ for (
+ delta_idx,
+ Delta {
+ data,
+ base_size,
+ result_size,
+ ..
+ },
+ ) in chain.into_iter().rev().enumerate()
+ {
+ let data = &mut instructions[data];
+ if delta_idx + 1 == chain_len {
+ last_result_size = Some(result_size);
+ }
+ delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data);
+ // use the target as source for the next delta
+ std::mem::swap(&mut source_buf, &mut target_buf);
+ }
+
+ let last_result_size = last_result_size.expect("at least one delta chain item");
+ // uneven chains leave the target buffer after the source buffer
+ // FIXME(Performance) If delta-chains are uneven, we know we will have to copy bytes over here
+ // Instead we could use a different start buffer, to naturally end up with the result in the
+ // right one.
+ // However, this is a bit more complicated than just that - you have to deal with the base
+ // object, which should also be placed in the second buffer right away. You don't have that
+ // control/knowledge for out-of-pack bases, so this is a special case to deal with, too.
+ // Maybe these invariants can be represented in the type system though.
+ if chain_len % 2 == 1 {
+ // this seems inverted, but remember: we swapped the buffers on the last iteration
+ target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
+ }
+ out.resize(last_result_size, 0);
+
+ let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve");
+ let consumed_input = consumed_input.expect("at least one decompressed delta object");
+ cache.put(
+ self.id,
+ first_entry.data_offset,
+ out.as_slice(),
+ object_kind,
+ consumed_input,
+ );
+ Ok(Outcome {
+ kind: object_kind,
+ // technically depending on the cache, the chain size is not correct as it might
+ // have been cut short by a cache hit. The caller must deactivate the cache to get
+ // actual results
+ num_deltas: chain_len as u32,
+ decompressed_size: first_entry.decompressed_size,
+ compressed_size: consumed_input,
+ object_size: last_result_size as u64,
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn size_of_decode_entry_outcome() {
+ assert_eq!(
+ std::mem::size_of::<Outcome>(),
+ 32,
+ "this shouldn't change without use noticing as it's returned a lot"
+ );
+ }
+}
diff --git a/vendor/gix-pack/src/data/file/decode/header.rs b/vendor/gix-pack/src/data/file/decode/header.rs
new file mode 100644
index 000000000..1f4b1de0a
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/decode/header.rs
@@ -0,0 +1,114 @@
+use crate::{
+ data,
+ data::{delta, file::decode::Error, File},
+};
+
+/// A return value of a resolve function, which given an [`ObjectId`][gix_hash::ObjectId] determines where an object can be found.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum ResolvedBase {
+ /// Indicate an object is within this pack, at the given entry, and thus can be looked up locally.
+ InPack(data::Entry),
+ /// Indicates the object of `kind` was found outside of the pack.
+ OutOfPack {
+ /// The kind of object we found when reading the header of the out-of-pack base.
+ kind: gix_object::Kind,
+ /// The amount of deltas encountered if the object was packed as well.
+ num_deltas: Option<u32>,
+ },
+}
+
+/// Additional information and statistics about a successfully decoded object produced by [`File::decode_header()`].
+///
+/// Useful to understand the effectiveness of the pack compression or the cost of decompression.
+#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Outcome {
+ /// The kind of resolved object.
+ pub kind: gix_object::Kind,
+ /// The decompressed size of the object.
+ pub object_size: u64,
+ /// The amount of deltas in the chain of objects that had to be resolved beforehand.
+ pub num_deltas: u32,
+}
+
+/// Obtain object information quickly.
+impl File {
+ /// Resolve the object header information starting at `entry`, following the chain of entries as needed.
+ ///
+ /// The `entry` determines which object to decode, and is commonly obtained with the help of a pack index file or through pack iteration.
+ ///
+ /// `resolve` is a function to lookup objects with the given [`ObjectId`][gix_hash::ObjectId], in case the full object id
+ /// is used to refer to a base object, instead of an in-pack offset.
+ pub fn decode_header(
+ &self,
+ mut entry: data::Entry,
+ resolve: impl Fn(&gix_hash::oid) -> Option<ResolvedBase>,
+ ) -> Result<Outcome, Error> {
+ use crate::data::entry::Header::*;
+ let mut num_deltas = 0;
+ let mut first_delta_decompressed_size = None::<u64>;
+ loop {
+ match entry.header {
+ Tree | Blob | Commit | Tag => {
+ return Ok(Outcome {
+ kind: entry.header.as_kind().expect("always valid for non-refs"),
+ object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size),
+ num_deltas,
+ });
+ }
+ OfsDelta { base_distance } => {
+ num_deltas += 1;
+ if first_delta_decompressed_size.is_none() {
+ first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?);
+ }
+ entry = self.entry(entry.base_pack_offset(base_distance))
+ }
+ RefDelta { base_id } => {
+ num_deltas += 1;
+ if first_delta_decompressed_size.is_none() {
+ first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?);
+ }
+ match resolve(base_id.as_ref()) {
+ Some(ResolvedBase::InPack(base_entry)) => entry = base_entry,
+ Some(ResolvedBase::OutOfPack {
+ kind,
+ num_deltas: origin_num_deltas,
+ }) => {
+ return Ok(Outcome {
+ kind,
+ object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size),
+ num_deltas: origin_num_deltas.unwrap_or_default() + num_deltas,
+ })
+ }
+ None => return Err(Error::DeltaBaseUnresolved(base_id)),
+ }
+ }
+ };
+ }
+ }
+
+ #[inline]
+ fn decode_delta_object_size(&self, entry: &data::Entry) -> Result<u64, Error> {
+ let mut buf = [0_u8; 32];
+ let used = self.decompress_entry_from_data_offset_2(entry.data_offset, &mut buf)?.1;
+ let buf = &buf[..used];
+ let (_base_size, offset) = delta::decode_header_size(buf);
+ let (result_size, _offset) = delta::decode_header_size(&buf[offset..]);
+ Ok(result_size)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn size_of_decode_entry_outcome() {
+ assert_eq!(
+ std::mem::size_of::<Outcome>(),
+ 16,
+ "this shouldn't change without use noticing as it's returned a lot"
+ );
+ }
+}
diff --git a/vendor/gix-pack/src/data/file/decode/mod.rs b/vendor/gix-pack/src/data/file/decode/mod.rs
new file mode 100644
index 000000000..10bb7f19b
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/decode/mod.rs
@@ -0,0 +1,16 @@
+///
+pub mod entry;
+///
+pub mod header;
+
+/// Returned by [`File::decode_header()`][crate::data::File::decode_header()],
+/// [`File::decode_entry()`][crate::data::File::decode_entry()] and .
+/// [`File::decompress_entry()`][crate::data::File::decompress_entry()]
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("Failed to decompress pack entry")]
+ ZlibInflate(#[from] gix_features::zlib::inflate::Error),
+ #[error("A delta chain could not be followed as the ref base with id {0} could not be found")]
+ DeltaBaseUnresolved(gix_hash::ObjectId),
+}
diff --git a/vendor/gix-pack/src/data/file/init.rs b/vendor/gix-pack/src/data/file/init.rs
new file mode 100644
index 000000000..b16072417
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/init.rs
@@ -0,0 +1,41 @@
+use std::{convert::TryInto, path::Path};
+
+use crate::data;
+
+/// Instantiation
+impl data::File {
+ /// Try opening a data file at the given `path`.
+ ///
+ /// The `object_hash` is a way to read (and write) the same file format with different hashes, as the hash kind
+ /// isn't stored within the file format itself.
+ pub fn at(path: impl AsRef<Path>, object_hash: gix_hash::Kind) -> Result<data::File, data::header::decode::Error> {
+ Self::at_inner(path.as_ref(), object_hash)
+ }
+
+ fn at_inner(path: &Path, object_hash: gix_hash::Kind) -> Result<data::File, data::header::decode::Error> {
+ use crate::data::header::N32_SIZE;
+ let hash_len = object_hash.len_in_bytes();
+
+ let data = crate::mmap::read_only(path).map_err(|e| data::header::decode::Error::Io {
+ source: e,
+ path: path.to_owned(),
+ })?;
+ let pack_len = data.len();
+ if pack_len < N32_SIZE * 3 + hash_len {
+ return Err(data::header::decode::Error::Corrupt(format!(
+ "Pack data of size {pack_len} is too small for even an empty pack with shortest hash"
+ )));
+ }
+ let (kind, num_objects) =
+ data::header::decode(&data[..12].try_into().expect("enough data after previous check"))?;
+ Ok(data::File {
+ data,
+ path: path.to_owned(),
+ id: gix_features::hash::crc32(path.as_os_str().to_string_lossy().as_bytes()),
+ version: kind,
+ num_objects,
+ hash_len,
+ object_hash,
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/data/file/mod.rs b/vendor/gix-pack/src/data/file/mod.rs
new file mode 100644
index 000000000..6bfe0e272
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/mod.rs
@@ -0,0 +1,9 @@
+mod init;
+///
+pub mod verify;
+
+///
+pub mod decode;
+
+/// The bytes used as header in a pack data file.
+pub type Header = [u8; 12];
diff --git a/vendor/gix-pack/src/data/file/verify.rs b/vendor/gix-pack/src/data/file/verify.rs
new file mode 100644
index 000000000..afec20826
--- /dev/null
+++ b/vendor/gix-pack/src/data/file/verify.rs
@@ -0,0 +1,42 @@
+use std::sync::atomic::AtomicBool;
+
+use gix_features::progress::Progress;
+
+use crate::data::File;
+
+///
+pub mod checksum {
+ /// Returned by [`data::File::verify_checksum()`][crate::data::File::verify_checksum()].
+ pub type Error = crate::verify::checksum::Error;
+}
+
+/// Checksums and verify checksums
+impl File {
+ /// The checksum in the trailer of this pack data file
+ pub fn checksum(&self) -> gix_hash::ObjectId {
+ gix_hash::ObjectId::from(&self.data[self.data.len() - self.hash_len..])
+ }
+
+ /// Verifies that the checksum of the packfile over all bytes preceding it indeed matches the actual checksum,
+ /// returning the actual checksum equivalent to the return value of [`checksum()`][File::checksum()] if there
+ /// is no mismatch.
+ ///
+ /// Note that if no `progress` is desired, one can pass [`gix_features::progress::Discard`].
+ ///
+ /// Have a look at [`index::File::verify_integrity(…)`][crate::index::File::verify_integrity()] for an
+ /// even more thorough integrity check.
+ pub fn verify_checksum(
+ &self,
+ progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ ) -> Result<gix_hash::ObjectId, checksum::Error> {
+ crate::verify::checksum_on_disk_or_mmap(
+ self.path(),
+ &self.data,
+ self.checksum(),
+ self.object_hash,
+ progress,
+ should_interrupt,
+ )
+ }
+}
diff --git a/vendor/gix-pack/src/data/header.rs b/vendor/gix-pack/src/data/header.rs
new file mode 100644
index 000000000..348a4ca24
--- /dev/null
+++ b/vendor/gix-pack/src/data/header.rs
@@ -0,0 +1,55 @@
+use crate::data;
+
+pub(crate) const N32_SIZE: usize = std::mem::size_of::<u32>();
+
+/// Parses the first 12 bytes of a pack file, returning the pack version as well as the number of objects contained in the pack.
+pub fn decode(data: &[u8; 12]) -> Result<(data::Version, u32), decode::Error> {
+ let mut ofs = 0;
+ if &data[ofs..ofs + b"PACK".len()] != b"PACK" {
+ return Err(decode::Error::Corrupt("Pack data type not recognized".into()));
+ }
+ ofs += N32_SIZE;
+ let kind = match crate::read_u32(&data[ofs..ofs + N32_SIZE]) {
+ 2 => data::Version::V2,
+ 3 => data::Version::V3,
+ v => return Err(decode::Error::UnsupportedVersion(v)),
+ };
+ ofs += N32_SIZE;
+ let num_objects = crate::read_u32(&data[ofs..ofs + N32_SIZE]);
+
+ Ok((kind, num_objects))
+}
+
+/// Write a pack data header at `version` with `num_objects` and return a buffer.
+pub fn encode(version: data::Version, num_objects: u32) -> [u8; 12] {
+ use crate::data::Version::*;
+ let mut buf = [0u8; 12];
+ buf[..4].copy_from_slice(b"PACK");
+ buf[4..8].copy_from_slice(
+ &match version {
+ V2 => 2u32,
+ V3 => 3,
+ }
+ .to_be_bytes()[..],
+ );
+ buf[8..].copy_from_slice(&num_objects.to_be_bytes()[..]);
+ buf
+}
+
+///
+pub mod decode {
+ /// Returned by [`decode()`][super::decode()].
+ #[derive(thiserror::Error, Debug)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Could not open pack file at '{path}'")]
+ Io {
+ source: std::io::Error,
+ path: std::path::PathBuf,
+ },
+ #[error("{0}")]
+ Corrupt(String),
+ #[error("Unsupported pack version: {0}")]
+ UnsupportedVersion(u32),
+ }
+}
diff --git a/vendor/gix-pack/src/data/input/bytes_to_entries.rs b/vendor/gix-pack/src/data/input/bytes_to_entries.rs
new file mode 100644
index 000000000..cf20d5fbf
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/bytes_to_entries.rs
@@ -0,0 +1,295 @@
+use std::{fs, io};
+
+use gix_features::{
+ hash,
+ hash::Sha1,
+ zlib::{stream::inflate::ReadBoxed, Decompress},
+};
+use gix_hash::ObjectId;
+
+use crate::data::input;
+
+/// An iterator over [`Entries`][input::Entry] in a byte stream.
+///
+/// The iterator used as part of [Bundle::write_to_directory(…)][crate::Bundle::write_to_directory()].
+pub struct BytesToEntriesIter<BR> {
+ read: BR,
+ decompressor: Option<Box<Decompress>>,
+ offset: u64,
+ had_error: bool,
+ version: crate::data::Version,
+ objects_left: u32,
+ hash: Option<Sha1>,
+ mode: input::Mode,
+ compressed: input::EntryDataMode,
+ compressed_buf: Option<Vec<u8>>,
+ hash_len: usize,
+ object_hash: gix_hash::Kind,
+}
+
+/// Access
+impl<BR> BytesToEntriesIter<BR> {
+ /// The pack version currently being iterated
+ pub fn version(&self) -> crate::data::Version {
+ self.version
+ }
+
+ /// The kind of iteration
+ pub fn mode(&self) -> input::Mode {
+ self.mode
+ }
+}
+
+/// Initialization
+impl<BR> BytesToEntriesIter<BR>
+where
+ BR: io::BufRead,
+{
+ /// Obtain an iterator from a `read` stream to a pack data file and configure it using `mode` and `compressed`.
+ /// `object_hash` specifies which hash is used for objects in ref-delta entries.
+ ///
+ /// Note that `read` is expected at the beginning of a valid pack data file with a header, entries and a trailer.
+ pub fn new_from_header(
+ mut read: BR,
+ mode: input::Mode,
+ compressed: input::EntryDataMode,
+ object_hash: gix_hash::Kind,
+ ) -> Result<BytesToEntriesIter<BR>, input::Error> {
+ let mut header_data = [0u8; 12];
+ read.read_exact(&mut header_data)?;
+
+ let (version, num_objects) = crate::data::header::decode(&header_data)?;
+ assert_eq!(
+ version,
+ crate::data::Version::V2,
+ "let's stop here if we see undocumented pack formats"
+ );
+ Ok(BytesToEntriesIter {
+ read,
+ decompressor: None,
+ compressed,
+ offset: 12,
+ had_error: false,
+ version,
+ objects_left: num_objects,
+ hash: (mode != input::Mode::AsIs).then(|| {
+ let mut hash = gix_features::hash::hasher(object_hash);
+ hash.update(&header_data);
+ hash
+ }),
+ mode,
+ compressed_buf: None,
+ hash_len: object_hash.len_in_bytes(),
+ object_hash,
+ })
+ }
+
+ fn next_inner(&mut self) -> Result<input::Entry, input::Error> {
+ self.objects_left -= 1; // even an error counts as objects
+
+ // Read header
+ let entry = match self.hash.take() {
+ Some(hash) => {
+ let mut read = read_and_pass_to(
+ &mut self.read,
+ hash::Write {
+ inner: io::sink(),
+ hash,
+ },
+ );
+ let res = crate::data::Entry::from_read(&mut read, self.offset, self.hash_len);
+ self.hash = Some(read.write.hash);
+ res
+ }
+ None => crate::data::Entry::from_read(&mut self.read, self.offset, self.hash_len),
+ }
+ .map_err(input::Error::from)?;
+
+ // Decompress object to learn its compressed bytes
+ let mut decompressor = self
+ .decompressor
+ .take()
+ .unwrap_or_else(|| Box::new(Decompress::new(true)));
+ let compressed_buf = self.compressed_buf.take().unwrap_or_else(|| Vec::with_capacity(4096));
+ decompressor.reset(true);
+ let mut decompressed_reader = ReadBoxed {
+ inner: read_and_pass_to(
+ &mut self.read,
+ if self.compressed.keep() {
+ Vec::with_capacity(entry.decompressed_size as usize)
+ } else {
+ compressed_buf
+ },
+ ),
+ decompressor,
+ };
+
+ let bytes_copied = io::copy(&mut decompressed_reader, &mut io::sink())?;
+ if bytes_copied != entry.decompressed_size {
+ return Err(input::Error::IncompletePack {
+ actual: bytes_copied,
+ expected: entry.decompressed_size,
+ });
+ }
+
+ let pack_offset = self.offset;
+ let compressed_size = decompressed_reader.decompressor.total_in();
+ self.offset += entry.header_size() as u64 + compressed_size;
+ self.decompressor = Some(decompressed_reader.decompressor);
+
+ let mut compressed = decompressed_reader.inner.write;
+ debug_assert_eq!(
+ compressed_size,
+ compressed.len() as u64,
+ "we must track exactly the same amount of bytes as read by the decompressor"
+ );
+ if let Some(hash) = self.hash.as_mut() {
+ hash.update(&compressed);
+ }
+
+ let crc32 = if self.compressed.crc32() {
+ let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()];
+ let header_len = entry.header.write_to(bytes_copied, header_buf.as_mut())?;
+ let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]);
+ Some(gix_features::hash::crc32_update(state, &compressed))
+ } else {
+ None
+ };
+
+ let compressed = if self.compressed.keep() {
+ Some(compressed)
+ } else {
+ compressed.clear();
+ self.compressed_buf = Some(compressed);
+ None
+ };
+
+ // Last objects gets trailer (which is potentially verified)
+ let trailer = self.try_read_trailer()?;
+ Ok(input::Entry {
+ header: entry.header,
+ header_size: entry.header_size() as u16,
+ compressed,
+ compressed_size,
+ crc32,
+ pack_offset,
+ decompressed_size: bytes_copied,
+ trailer,
+ })
+ }
+
+ fn try_read_trailer(&mut self) -> Result<Option<ObjectId>, input::Error> {
+ Ok(if self.objects_left == 0 {
+ let mut id = gix_hash::ObjectId::null(self.object_hash);
+ if let Err(err) = self.read.read_exact(id.as_mut_slice()) {
+ if self.mode != input::Mode::Restore {
+ return Err(err.into());
+ }
+ }
+
+ if let Some(hash) = self.hash.take() {
+ let actual_id = gix_hash::ObjectId::from(hash.digest());
+ if self.mode == input::Mode::Restore {
+ id = actual_id;
+ }
+ if id != actual_id {
+ return Err(input::Error::ChecksumMismatch {
+ actual: actual_id,
+ expected: id,
+ });
+ }
+ }
+ Some(id)
+ } else if self.mode == input::Mode::Restore {
+ let hash = self.hash.clone().expect("in restore mode a hash is set");
+ Some(gix_hash::ObjectId::from(hash.digest()))
+ } else {
+ None
+ })
+ }
+}
+
+fn read_and_pass_to<R: io::Read, W: io::Write>(read: &mut R, to: W) -> PassThrough<&mut R, W> {
+ PassThrough { read, write: to }
+}
+
+impl<R> Iterator for BytesToEntriesIter<R>
+where
+ R: io::BufRead,
+{
+ type Item = Result<input::Entry, input::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.had_error || self.objects_left == 0 {
+ return None;
+ }
+ let result = self.next_inner();
+ self.had_error = result.is_err();
+ if self.had_error {
+ self.objects_left = 0;
+ }
+ if self.mode == input::Mode::Restore && self.had_error {
+ None
+ } else {
+ Some(result)
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.objects_left as usize, Some(self.objects_left as usize))
+ }
+}
+
+impl<R> std::iter::ExactSizeIterator for BytesToEntriesIter<R> where R: io::BufRead {}
+
+struct PassThrough<R, W> {
+ read: R,
+ write: W,
+}
+
+impl<R, W> io::BufRead for PassThrough<R, W>
+where
+ Self: io::Read,
+ R: io::BufRead,
+ W: io::Write,
+{
+ fn fill_buf(&mut self) -> io::Result<&[u8]> {
+ self.read.fill_buf()
+ }
+
+ fn consume(&mut self, amt: usize) {
+ let buf = self
+ .read
+ .fill_buf()
+ .expect("never fail as we called fill-buf before and this does nothing");
+ self.write
+ .write_all(&buf[..amt])
+ .expect("a write to never fail - should be a memory buffer");
+ self.read.consume(amt)
+ }
+}
+
+impl<R, W> io::Read for PassThrough<R, W>
+where
+ W: io::Write,
+ R: io::Read,
+{
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ let bytes_read = self.read.read(buf)?;
+ self.write.write_all(&buf[..bytes_read])?;
+ Ok(bytes_read)
+ }
+}
+
+impl crate::data::File {
+ /// Returns an iterator over [`Entries`][crate::data::input::Entry], without making use of the memory mapping.
+ pub fn streaming_iter(&self) -> Result<BytesToEntriesIter<impl io::BufRead>, input::Error> {
+ let reader = io::BufReader::with_capacity(4096 * 8, fs::File::open(&self.path)?);
+ BytesToEntriesIter::new_from_header(
+ reader,
+ input::Mode::Verify,
+ input::EntryDataMode::KeepAndCrc32,
+ self.object_hash,
+ )
+ }
+}
diff --git a/vendor/gix-pack/src/data/input/entries_to_bytes.rs b/vendor/gix-pack/src/data/input/entries_to_bytes.rs
new file mode 100644
index 000000000..a8c21e653
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/entries_to_bytes.rs
@@ -0,0 +1,155 @@
+use std::iter::Peekable;
+
+use gix_features::hash;
+
+use crate::data::input;
+
+/// An implementation of [`Iterator`] to write [encoded entries][input::Entry] to an inner implementation each time
+/// `next()` is called.
+///
+/// It is able to deal with an unknown amount of objects as it will rewrite the pack header once the entries iterator
+/// is depleted and compute the hash in one go by re-reading the whole file.
+pub struct EntriesToBytesIter<I: Iterator, W> {
+ /// An iterator for input [`input::Entry`] instances
+ pub input: Peekable<I>,
+ /// A way of writing encoded bytes.
+ output: W,
+ /// Our trailing hash when done writing all input entries
+ trailer: Option<gix_hash::ObjectId>,
+ /// The amount of objects in the iteration and the version of the packfile to be written.
+ /// Will be `None` to signal the header was written already.
+ data_version: crate::data::Version,
+ /// The amount of entries seen so far
+ num_entries: u32,
+ /// If we are done, no additional writes will occur
+ is_done: bool,
+ /// The kind of hash to use for the digest
+ object_hash: gix_hash::Kind,
+}
+
+impl<I, W> EntriesToBytesIter<I, W>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ W: std::io::Read + std::io::Write + std::io::Seek,
+{
+ /// Create a new instance reading [entries][input::Entry] from an `input` iterator and write pack data bytes to
+ /// `output` writer, resembling a pack of `version`. The amount of entries will be dynamically determined and
+ /// the pack is completed once the last entry was written.
+ /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version.
+ ///
+ /// # Panics
+ ///
+ /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors.
+ pub fn new(input: I, output: W, version: crate::data::Version, object_hash: gix_hash::Kind) -> Self {
+ assert!(
+ matches!(version, crate::data::Version::V2),
+ "currently only pack version 2 can be written",
+ );
+ assert!(
+ matches!(object_hash, gix_hash::Kind::Sha1),
+ "currently only Sha1 is supported, right now we don't know how other hashes are encoded",
+ );
+ EntriesToBytesIter {
+ input: input.peekable(),
+ output,
+ object_hash,
+ num_entries: 0,
+ trailer: None,
+ data_version: version,
+ is_done: false,
+ }
+ }
+
+ /// Returns the trailing hash over all ~ entries once done.
+ /// It's `None` if we are not yet done writing.
+ pub fn digest(&self) -> Option<gix_hash::ObjectId> {
+ self.trailer
+ }
+
+ fn next_inner(&mut self, entry: input::Entry) -> Result<input::Entry, input::Error> {
+ if self.num_entries == 0 {
+ let header_bytes = crate::data::header::encode(self.data_version, 0);
+ self.output.write_all(&header_bytes[..])?;
+ }
+ self.num_entries += 1;
+ entry.header.write_to(entry.decompressed_size, &mut self.output)?;
+ std::io::copy(
+ &mut entry
+ .compressed
+ .as_deref()
+ .expect("caller must configure generator to keep compressed bytes"),
+ &mut self.output,
+ )?;
+ Ok(entry)
+ }
+
+ fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), input::Error> {
+ let header_bytes = crate::data::header::encode(self.data_version, self.num_entries);
+ let num_bytes_written = if last_entry.is_some() {
+ self.output.stream_position()?
+ } else {
+ header_bytes.len() as u64
+ };
+ self.output.rewind()?;
+ self.output.write_all(&header_bytes[..])?;
+ self.output.flush()?;
+
+ self.output.rewind()?;
+ let interrupt_never = std::sync::atomic::AtomicBool::new(false);
+ let digest = hash::bytes(
+ &mut self.output,
+ num_bytes_written as usize,
+ self.object_hash,
+ &mut gix_features::progress::Discard,
+ &interrupt_never,
+ )?;
+ self.output.write_all(digest.as_slice())?;
+ self.output.flush()?;
+
+ self.is_done = true;
+ if let Some(last_entry) = last_entry {
+ last_entry.trailer = Some(digest);
+ }
+ self.trailer = Some(digest);
+ Ok(())
+ }
+}
+
+impl<I, W> Iterator for EntriesToBytesIter<I, W>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ W: std::io::Read + std::io::Write + std::io::Seek,
+{
+ /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input.
+ type Item = Result<input::Entry, input::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.is_done {
+ return None;
+ }
+
+ match self.input.next() {
+ Some(res) => Some(match res {
+ Ok(entry) => self.next_inner(entry).and_then(|mut entry| {
+ if self.input.peek().is_none() {
+ self.write_header_and_digest(Some(&mut entry)).map(|_| entry)
+ } else {
+ Ok(entry)
+ }
+ }),
+ Err(err) => {
+ self.is_done = true;
+ Err(err)
+ }
+ }),
+ None => match self.write_header_and_digest(None) {
+ Ok(_) => None,
+ Err(err) => Some(Err(err)),
+ },
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.input.size_hint()
+ }
+}
diff --git a/vendor/gix-pack/src/data/input/entry.rs b/vendor/gix-pack/src/data/input/entry.rs
new file mode 100644
index 000000000..74d4800a0
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/entry.rs
@@ -0,0 +1,65 @@
+use std::io::Write;
+
+use crate::data::{entry::Header, input};
+
+impl input::Entry {
+ /// Create a new input entry from a given data `obj` set to be placed at the given `pack_offset`.
+ ///
+ /// This method is useful when arbitrary base entries are created
+ pub fn from_data_obj(obj: &gix_object::Data<'_>, pack_offset: u64) -> Result<Self, input::Error> {
+ let header = to_header(obj.kind);
+ let compressed = compress_data(obj)?;
+ let compressed_size = compressed.len() as u64;
+ let mut entry = input::Entry {
+ header,
+ header_size: header.size(obj.data.len() as u64) as u16,
+ pack_offset,
+ compressed: Some(compressed),
+ compressed_size,
+ crc32: None,
+ decompressed_size: obj.data.len() as u64,
+ trailer: None,
+ };
+ entry.crc32 = Some(entry.compute_crc32());
+ Ok(entry)
+ }
+ /// The amount of bytes this entry may consume in a pack data file
+ pub fn bytes_in_pack(&self) -> u64 {
+ self.header_size as u64 + self.compressed_size
+ }
+
+ /// Update our CRC value by recalculating it from our header and compressed data.
+ pub fn compute_crc32(&self) -> u32 {
+ let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()];
+ let header_len = self
+ .header
+ .write_to(self.decompressed_size, header_buf.as_mut())
+ .expect("write to memory will not fail");
+ let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]);
+ gix_features::hash::crc32_update(state, self.compressed.as_ref().expect("we always set it"))
+ }
+}
+
+fn to_header(kind: gix_object::Kind) -> Header {
+ use gix_object::Kind::*;
+ match kind {
+ Tree => Header::Tree,
+ Blob => Header::Blob,
+ Commit => Header::Commit,
+ Tag => Header::Tag,
+ }
+}
+
+fn compress_data(obj: &gix_object::Data<'_>) -> Result<Vec<u8>, input::Error> {
+ let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new());
+ if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) {
+ match err.kind() {
+ std::io::ErrorKind::Other => return Err(input::Error::Io(err)),
+ err => {
+ unreachable!("Should never see other errors than zlib, but got {:?}", err,)
+ }
+ }
+ };
+ out.flush().expect("zlib flush should never fail");
+ Ok(out.into_inner())
+}
diff --git a/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs
new file mode 100644
index 000000000..f52c645f8
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs
@@ -0,0 +1,211 @@
+use std::convert::TryInto;
+
+use gix_hash::ObjectId;
+
+use crate::data::{entry::Header, input};
+
+/// An iterator to resolve thin packs on the fly.
+pub struct LookupRefDeltaObjectsIter<I, LFn> {
+ /// The inner iterator whose entries we will resolve.
+ pub inner: I,
+ lookup: LFn,
+ /// The cached delta to provide next time we are called, it's the delta to go with the base we just resolved in its place.
+ next_delta: Option<input::Entry>,
+ /// Fuse to stop iteration after first missing object.
+ error: bool,
+ /// The overall pack-offset we accumulated thus far. Each inserted entry offsets all following
+ /// objects by its length. We need to determine exactly where the object was inserted to see if its affected at all.
+ inserted_entry_length_at_offset: Vec<Change>,
+ /// The sum of all entries added so far, as a cache to avoid recomputation
+ inserted_entries_length_in_bytes: i64,
+ buf: Vec<u8>,
+}
+
+impl<I, LFn> LookupRefDeltaObjectsIter<I, LFn>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>,
+{
+ /// Create a new instance wrapping `iter` and using `lookup` as function to retrieve objects that will serve as bases
+ /// for ref deltas seen while traversing `iter`.
+ pub fn new(iter: I, lookup: LFn) -> Self {
+ LookupRefDeltaObjectsIter {
+ inner: iter,
+ lookup,
+ error: false,
+ inserted_entry_length_at_offset: Vec::new(),
+ inserted_entries_length_in_bytes: 0,
+ next_delta: None,
+ buf: Vec::new(),
+ }
+ }
+
+ fn shifted_pack_offset(&self, pack_offset: u64) -> u64 {
+ let new_ofs = pack_offset as i64 + self.inserted_entries_length_in_bytes;
+ new_ofs.try_into().expect("offset value is never becomes negative")
+ }
+
+ /// positive `size_change` values mean an object grew or was more commonly, was inserted. Negative values
+ /// mean the object shrunk, usually because there header changed from ref-deltas to ofs deltas.
+ fn track_change(
+ &mut self,
+ shifted_pack_offset: u64,
+ pack_offset: u64,
+ size_change: i64,
+ oid: impl Into<Option<ObjectId>>,
+ ) {
+ if size_change == 0 {
+ return;
+ }
+ self.inserted_entry_length_at_offset.push(Change {
+ shifted_pack_offset,
+ pack_offset,
+ size_change_in_bytes: size_change,
+ oid: oid.into().unwrap_or_else(||
+ // NOTE: this value acts as sentinel and the actual hash kind doesn't matter.
+ gix_hash::Kind::Sha1.null()),
+ });
+ self.inserted_entries_length_in_bytes += size_change;
+ }
+
+ fn shift_entry_and_point_to_base_by_offset(&mut self, entry: &mut input::Entry, base_distance: u64) {
+ let pack_offset = entry.pack_offset;
+ entry.pack_offset = self.shifted_pack_offset(pack_offset);
+ entry.header = Header::OfsDelta { base_distance };
+ let previous_header_size = entry.header_size;
+ entry.header_size = entry.header.size(entry.decompressed_size) as u16;
+
+ let change = entry.header_size as i64 - previous_header_size as i64;
+ entry.crc32 = Some(entry.compute_crc32());
+ self.track_change(entry.pack_offset, pack_offset, change, None);
+ }
+}
+
+impl<I, LFn> Iterator for LookupRefDeltaObjectsIter<I, LFn>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>,
+{
+ type Item = Result<input::Entry, input::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.error {
+ return None;
+ }
+ if let Some(delta) = self.next_delta.take() {
+ return Some(Ok(delta));
+ }
+ match self.inner.next() {
+ Some(Ok(mut entry)) => match entry.header {
+ Header::RefDelta { base_id } => {
+ match self.inserted_entry_length_at_offset.iter().rfind(|e| e.oid == base_id) {
+ None => {
+ let base_entry = match (self.lookup)(base_id, &mut self.buf) {
+ Some(obj) => {
+ let current_pack_offset = entry.pack_offset;
+ let mut entry = match input::Entry::from_data_obj(&obj, 0) {
+ Ok(e) => e,
+ Err(err) => return Some(Err(err)),
+ };
+ entry.pack_offset = self.shifted_pack_offset(current_pack_offset);
+ self.track_change(
+ entry.pack_offset,
+ current_pack_offset,
+ entry.bytes_in_pack() as i64,
+ base_id,
+ );
+ entry
+ }
+ None => {
+ self.error = true;
+ return Some(Err(input::Error::NotFound { object_id: base_id }));
+ }
+ };
+
+ {
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, base_entry.bytes_in_pack());
+ self.next_delta = Some(entry);
+ }
+ Some(Ok(base_entry))
+ }
+ Some(base_entry) => {
+ let base_distance =
+ self.shifted_pack_offset(entry.pack_offset) - base_entry.shifted_pack_offset;
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, base_distance);
+ Some(Ok(entry))
+ }
+ }
+ }
+ _ => {
+ if self.inserted_entries_length_in_bytes != 0 {
+ if let Header::OfsDelta { base_distance } = entry.header {
+ // We have to find the new distance based on the previous distance to the base, using the absolute
+ // pack offset computed from it as stored in `base_pack_offset`.
+ let base_pack_offset = entry
+ .pack_offset
+ .checked_sub(base_distance)
+ .expect("distance to be in range of pack");
+ match self
+ .inserted_entry_length_at_offset
+ .binary_search_by_key(&base_pack_offset, |c| c.pack_offset)
+ {
+ Ok(index) => {
+ let index = {
+ let maybe_index_of_actual_entry = index + 1;
+ self.inserted_entry_length_at_offset
+ .get(maybe_index_of_actual_entry)
+ .and_then(|c| {
+ (c.pack_offset == base_pack_offset)
+ .then_some(maybe_index_of_actual_entry)
+ })
+ .unwrap_or(index)
+ };
+ let new_distance = self
+ .shifted_pack_offset(entry.pack_offset)
+ .checked_sub(self.inserted_entry_length_at_offset[index].shifted_pack_offset)
+ .expect("a base that is behind us in the pack");
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
+ }
+ Err(index) => {
+ let change_since_offset = self.inserted_entry_length_at_offset[index..]
+ .iter()
+ .map(|c| c.size_change_in_bytes)
+ .sum::<i64>();
+ let new_distance: u64 = {
+ (base_distance as i64 + change_since_offset)
+ .try_into()
+ .expect("it still points behind us")
+ };
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
+ }
+ }
+ } else {
+ // Offset this entry by all changes (positive or negative) that we saw thus far.
+ entry.pack_offset = self.shifted_pack_offset(entry.pack_offset);
+ }
+ }
+ Some(Ok(entry))
+ }
+ },
+ other => other,
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let (min, max) = self.inner.size_hint();
+ max.map(|max| (min, Some(max * 2))).unwrap_or_else(|| (min * 2, None))
+ }
+}
+
+#[derive(Debug)]
+struct Change {
+ /// The original pack offset as mentioned in the entry we saw. This is used to find this as base object if deltas refer to it by
+ /// old offset.
+ pack_offset: u64,
+ /// The new pack offset that is the shifted location of the pack entry in the pack.
+ shifted_pack_offset: u64,
+ /// The size change of the entry header, negative values denote shrinking, positive denote growing.
+ size_change_in_bytes: i64,
+ /// The object id of the entry responsible for the change, or null if it's an entry just for tracking an insertion.
+ oid: ObjectId,
+}
diff --git a/vendor/gix-pack/src/data/input/mod.rs b/vendor/gix-pack/src/data/input/mod.rs
new file mode 100644
index 000000000..df191de67
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/mod.rs
@@ -0,0 +1,41 @@
+/// An item of the iteration produced by [`BytesToEntriesIter`]
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Entry {
+ /// The header of a pack entry
+ pub header: crate::data::entry::Header,
+ /// The amount of bytes used to encode the `header`. `pack_offset + header_size` is the beginning of
+ /// the compressed data in the pack.
+ pub header_size: u16,
+ /// The first byte of the entry at which the `header` can be read.
+ pub pack_offset: u64,
+ /// The bytes consumed while producing `decompressed`
+ /// These do not contain the header, which makes it possible to easily replace a RefDelta with offset deltas
+ /// when resolving thin packs.
+ /// Depends on `CompressionMode` when the iterator is initialized.
+ pub compressed: Option<Vec<u8>>,
+ /// The amount of bytes the compressed portion of the entry takes, i.e. the portion behind behind the header.
+ pub compressed_size: u64,
+ /// The CRC32 over the complete entry, that is encoded header and compressed object data.
+ /// Depends on `CompressionMode` when the iterator is initialized
+ pub crc32: Option<u32>,
+ /// The amount of decompressed bytes of the entry.
+ pub decompressed_size: u64,
+ /// Set for the last object in the iteration, providing the hash over all bytes of the iteration
+ /// for use as trailer in a pack or to verify it matches the trailer.
+ pub trailer: Option<gix_hash::ObjectId>,
+}
+
+mod entry;
+
+mod types;
+pub use types::{EntryDataMode, Error, Mode};
+
+mod bytes_to_entries;
+pub use bytes_to_entries::BytesToEntriesIter;
+
+mod lookup_ref_delta_objects;
+pub use lookup_ref_delta_objects::LookupRefDeltaObjectsIter;
+
+mod entries_to_bytes;
+pub use entries_to_bytes::EntriesToBytesIter;
diff --git a/vendor/gix-pack/src/data/input/types.rs b/vendor/gix-pack/src/data/input/types.rs
new file mode 100644
index 000000000..6fcd459e2
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/types.rs
@@ -0,0 +1,73 @@
+use std::io;
+
+/// Returned by [`BytesToEntriesIter::new_from_header()`][crate::data::input::BytesToEntriesIter::new_from_header()] and as part
+/// of `Item` of [`BytesToEntriesIter`][crate::data::input::BytesToEntriesIter].
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("An IO operation failed while streaming an entry")]
+ Io(#[from] io::Error),
+ #[error(transparent)]
+ PackParse(#[from] crate::data::header::decode::Error),
+ #[error("pack checksum in trailer was {expected}, but actual checksum was {actual}")]
+ ChecksumMismatch {
+ expected: gix_hash::ObjectId,
+ actual: gix_hash::ObjectId,
+ },
+ #[error("pack is incomplete: it was decompressed into {actual} bytes but {expected} bytes where expected.")]
+ IncompletePack { actual: u64, expected: u64 },
+ #[error("The object {object_id} could not be decoded or wasn't found")]
+ NotFound { object_id: gix_hash::ObjectId },
+}
+
+/// Iteration Mode
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum Mode {
+ /// Provide the trailer as read from the pack
+ AsIs,
+ /// Generate an own hash and trigger an error on the last iterated object
+ /// if it does not match the hash provided with the pack.
+ ///
+ /// This way the one iterating the data cannot miss corruption as long as
+ /// the iteration is continued through to the end.
+ Verify,
+ /// Generate an own hash and if there was an error or the objects are depleted early
+ /// due to partial packs, return the last valid entry and with our own hash thus far.
+ /// Note that the existing pack hash, if present, will be ignored.
+ /// As we won't know which objects fails, every object will have the hash obtained thus far.
+ /// This also means that algorithms must know about this possibility, or else might wrongfully
+ /// assume the pack is finished.
+ Restore,
+}
+
+/// Define what to do with the compressed bytes portion of a pack [`Entry`][super::Entry]
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum EntryDataMode {
+ /// Do nothing with the compressed bytes we read
+ Ignore,
+ /// Only create a CRC32 of the entry, otherwise similar to `Ignore`
+ Crc32,
+ /// Keep them and pass them along in a newly allocated buffer
+ Keep,
+ /// As above, but also compute a CRC32
+ KeepAndCrc32,
+}
+
+impl EntryDataMode {
+ /// Returns true if a crc32 should be computed
+ pub fn crc32(&self) -> bool {
+ match self {
+ EntryDataMode::KeepAndCrc32 | EntryDataMode::Crc32 => true,
+ EntryDataMode::Keep | EntryDataMode::Ignore => false,
+ }
+ }
+ /// Returns true if compressed bytes should be kept
+ pub fn keep(&self) -> bool {
+ match self {
+ EntryDataMode::Keep | EntryDataMode::KeepAndCrc32 => true,
+ EntryDataMode::Ignore | EntryDataMode::Crc32 => false,
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/mod.rs b/vendor/gix-pack/src/data/mod.rs
new file mode 100644
index 000000000..da717fc1a
--- /dev/null
+++ b/vendor/gix-pack/src/data/mod.rs
@@ -0,0 +1,134 @@
+//! a pack data file
+use std::{convert::TryInto, path::Path};
+
+/// The offset to an entry into the pack data file, relative to its beginning.
+pub type Offset = u64;
+
+/// An identifier to uniquely identify all packs loaded within a known context or namespace.
+pub type Id = u32;
+
+use memmap2::Mmap;
+
+/// An representing an full- or delta-object within a pack
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Entry {
+ /// The entry's header
+ pub header: entry::Header,
+ /// The decompressed size of the entry in bytes.
+ ///
+ /// Note that for non-delta entries this will be the size of the object itself.
+ pub decompressed_size: u64,
+ /// absolute offset to compressed object data in the pack, just behind the entry's header
+ pub data_offset: Offset,
+}
+
+mod file;
+pub use file::{decode, verify, Header};
+///
+pub mod header;
+
+///
+pub mod init {
+ pub use super::header::decode::Error;
+}
+
+///
+pub mod entry;
+
+///
+pub mod input;
+
+/// Utilities to encode pack data entries and write them to a `Write` implementation to resemble a pack data file.
+pub mod output;
+
+/// A slice into a pack file denoting a pack entry.
+///
+/// An entry can be decoded into an object.
+pub type EntryRange = std::ops::Range<Offset>;
+
+/// Supported versions of a pack data file
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[allow(missing_docs)]
+pub enum Version {
+ V2,
+ V3,
+}
+
+impl Default for Version {
+ fn default() -> Self {
+ Version::V2
+ }
+}
+
+/// A pack data file
+pub struct File {
+ data: Mmap,
+ path: std::path::PathBuf,
+ /// A value to represent this pack uniquely when used with cache lookup, or a way to identify this pack by its location on disk.
+ /// The same location on disk should yield the same id.
+ ///
+ /// These must be unique per pack and must be stable, that is they don't change if the pack doesn't change.
+ /// If the same id is assigned (or reassigned) to different packs, pack creation or cache access will fail in hard-to-debug ways.
+ ///
+ /// This value is controlled by the owning object store, which can use it in whichever way it wants as long as the above constraints are met.
+ pub id: Id,
+ version: Version,
+ num_objects: u32,
+ /// The size of the hash contained within. This is entirely determined by the caller, and repositories have to know which hash to use
+ /// based on their configuration.
+ hash_len: usize,
+ object_hash: gix_hash::Kind,
+}
+
+/// Information about the pack data file itself
+impl File {
+ /// The pack data version of this file
+ pub fn version(&self) -> Version {
+ self.version
+ }
+ /// The number of objects stored in this pack data file
+ pub fn num_objects(&self) -> u32 {
+ self.num_objects
+ }
+ /// The length of all mapped data, including the pack header and the pack trailer
+ pub fn data_len(&self) -> usize {
+ self.data.len()
+ }
+ /// The kind of hash we use internally.
+ pub fn object_hash(&self) -> gix_hash::Kind {
+ self.object_hash
+ }
+ /// The position of the byte one past the last pack entry, or in other terms, the first byte of the trailing hash.
+ pub fn pack_end(&self) -> usize {
+ self.data.len() - self.hash_len
+ }
+
+ /// The path to the pack data file on disk
+ pub fn path(&self) -> &Path {
+ &self.path
+ }
+
+ /// Returns the pack data at the given slice if its range is contained in the mapped pack data
+ pub fn entry_slice(&self, slice: EntryRange) -> Option<&[u8]> {
+ let entry_end: usize = slice.end.try_into().expect("end of pack fits into usize");
+ let entry_start = slice.start as usize;
+ self.data.get(entry_start..entry_end)
+ }
+
+ /// Returns the CRC32 of the pack data indicated by `pack_offset` and the `size` of the mapped data.
+ ///
+ /// _Note:_ finding the right size is only possible by decompressing
+ /// the pack entry beforehand, or by using the (to be sorted) offsets stored in an index file.
+ ///
+ /// # Panics
+ ///
+ /// If `pack_offset` or `size` are pointing to a range outside of the mapped pack data.
+ pub fn entry_crc32(&self, pack_offset: Offset, size: usize) -> u32 {
+ let pack_offset: usize = pack_offset.try_into().expect("pack_size fits into usize");
+ gix_features::hash::crc32(&self.data[pack_offset..pack_offset + size])
+ }
+}
+
+pub(crate) mod delta;
diff --git a/vendor/gix-pack/src/data/output/bytes.rs b/vendor/gix-pack/src/data/output/bytes.rs
new file mode 100644
index 000000000..ec219db9d
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/bytes.rs
@@ -0,0 +1,156 @@
+use std::io::Write;
+
+use gix_features::hash;
+
+use crate::data::output;
+
+/// The error returned by `next()` in the [`FromEntriesIter`] iterator.
+#[allow(missing_docs)]
+#[derive(Debug, thiserror::Error)]
+pub enum Error<E>
+where
+ E: std::error::Error + 'static,
+{
+ #[error(transparent)]
+ Io(#[from] std::io::Error),
+ #[error(transparent)]
+ Input(E),
+}
+
+/// An implementation of [`Iterator`] to write [encoded entries][output::Entry] to an inner implementation each time
+/// `next()` is called.
+pub struct FromEntriesIter<I, W> {
+ /// An iterator for input [`output::Entry`] instances
+ pub input: I,
+ /// A way of writing encoded bytes.
+ output: hash::Write<W>,
+ /// Our trailing hash when done writing all input entries
+ trailer: Option<gix_hash::ObjectId>,
+ /// The amount of objects in the iteration and the version of the packfile to be written.
+ /// Will be `None` to signal the header was written already.
+ header_info: Option<(crate::data::Version, u32)>,
+ /// The pack data version with which pack entries should be written.
+ entry_version: crate::data::Version,
+ /// The amount of written bytes thus far
+ written: u64,
+ /// Required to quickly find offsets by object IDs, as future objects may refer to those in the past to become a delta offset base.
+ /// It stores the pack offsets at which objects begin.
+ /// Additionally we store if an object was invalid, and if so we will not write it nor will we allow delta objects to it.
+ pack_offsets_and_validity: Vec<(u64, bool)>,
+ /// If we are done, no additional writes will occur
+ is_done: bool,
+}
+
+impl<I, W, E> FromEntriesIter<I, W>
+where
+ I: Iterator<Item = Result<Vec<output::Entry>, E>>,
+ W: std::io::Write,
+ E: std::error::Error + 'static,
+{
+ /// Create a new instance reading [entries][output::Entry] from an `input` iterator and write pack data bytes to
+ /// `output` writer, resembling a pack of `version` with exactly `num_entries` amount of objects contained in it.
+ /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version.
+ ///
+ /// The input chunks are expected to be sorted already. You can use the [InOrderIter][gix_features::parallel::InOrderIter] to assure
+ /// this happens on the fly holding entire chunks in memory as long as needed for them to be dispensed in order.
+ ///
+ /// # Panics
+ ///
+ /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors.
+ pub fn new(
+ input: I,
+ output: W,
+ num_entries: u32,
+ version: crate::data::Version,
+ object_hash: gix_hash::Kind,
+ ) -> Self {
+ assert!(
+ matches!(version, crate::data::Version::V2),
+ "currently only pack version 2 can be written",
+ );
+ FromEntriesIter {
+ input,
+ output: hash::Write::new(output, object_hash),
+ trailer: None,
+ entry_version: version,
+ pack_offsets_and_validity: Vec::with_capacity(num_entries as usize),
+ written: 0,
+ header_info: Some((version, num_entries)),
+ is_done: false,
+ }
+ }
+
+ /// Consume this instance and return the `output` implementation.
+ ///
+ /// _Note_ that the `input` iterator can be moved out of this instance beforehand.
+ pub fn into_write(self) -> W {
+ self.output.inner
+ }
+
+ /// Returns the trailing hash over all written entries once done.
+ /// It's `None` if we are not yet done writing.
+ pub fn digest(&self) -> Option<gix_hash::ObjectId> {
+ self.trailer
+ }
+
+ fn next_inner(&mut self) -> Result<u64, Error<E>> {
+ let previous_written = self.written;
+ if let Some((version, num_entries)) = self.header_info.take() {
+ let header_bytes = crate::data::header::encode(version, num_entries);
+ self.output.write_all(&header_bytes[..])?;
+ self.written += header_bytes.len() as u64;
+ }
+ match self.input.next() {
+ Some(entries) => {
+ for entry in entries.map_err(Error::Input)? {
+ if entry.is_invalid() {
+ self.pack_offsets_and_validity.push((0, false));
+ continue;
+ };
+ self.pack_offsets_and_validity.push((self.written, true));
+ let header = entry.to_entry_header(self.entry_version, |index| {
+ let (base_offset, is_valid_object) = self.pack_offsets_and_validity[index];
+ if !is_valid_object {
+ unreachable!("if you see this the object database is correct as a delta refers to a non-existing object")
+ }
+ self.written - base_offset
+ });
+ self.written += header.write_to(entry.decompressed_size as u64, &mut self.output)? as u64;
+ self.written += std::io::copy(&mut &*entry.compressed_data, &mut self.output)?;
+ }
+ }
+ None => {
+ let digest = self.output.hash.clone().digest();
+ self.output.inner.write_all(&digest[..])?;
+ self.written += digest.len() as u64;
+ self.output.inner.flush()?;
+ self.is_done = true;
+ self.trailer = Some(gix_hash::ObjectId::from(digest));
+ }
+ };
+ Ok(self.written - previous_written)
+ }
+}
+
+impl<I, W, E> Iterator for FromEntriesIter<I, W>
+where
+ I: Iterator<Item = Result<Vec<output::Entry>, E>>,
+ W: std::io::Write,
+ E: std::error::Error + 'static,
+{
+ /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input.
+ type Item = Result<u64, Error<E>>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.is_done {
+ return None;
+ }
+ Some(match self.next_inner() {
+ Err(err) => {
+ self.is_done = true;
+ Err(err)
+ }
+ Ok(written) => Ok(written),
+ })
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/count/mod.rs b/vendor/gix-pack/src/data/output/count/mod.rs
new file mode 100644
index 000000000..e7ee767de
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/mod.rs
@@ -0,0 +1,49 @@
+use gix_hash::ObjectId;
+
+use crate::data::output::Count;
+
+/// Specifies how the pack location was handled during counting
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum PackLocation {
+ /// We did not lookup this object
+ NotLookedUp,
+ /// The object was looked up and there may be a location in a pack, along with entry information
+ LookedUp(Option<crate::data::entry::Location>),
+}
+
+impl PackLocation {
+ /// Directly go through to LookedUp variant, panic otherwise
+ pub fn is_none(&self) -> bool {
+ match self {
+ PackLocation::LookedUp(opt) => opt.is_none(),
+ PackLocation::NotLookedUp => unreachable!("must have been resolved"),
+ }
+ }
+ /// Directly go through to LookedUp variant, panic otherwise
+ pub fn as_ref(&self) -> Option<&crate::data::entry::Location> {
+ match self {
+ PackLocation::LookedUp(opt) => opt.as_ref(),
+ PackLocation::NotLookedUp => unreachable!("must have been resolved"),
+ }
+ }
+}
+
+impl Count {
+ /// Create a new instance from the given `oid` and its corresponding git `obj`ect data.
+ pub fn from_data(oid: impl Into<ObjectId>, location: Option<crate::data::entry::Location>) -> Self {
+ Count {
+ id: oid.into(),
+ entry_pack_location: PackLocation::LookedUp(location),
+ }
+ }
+}
+
+#[path = "objects/mod.rs"]
+mod objects_impl;
+pub use objects_impl::{objects, objects_unthreaded};
+
+///
+pub mod objects {
+ pub use super::objects_impl::{Error, ObjectExpansion, Options, Outcome, Result};
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/mod.rs b/vendor/gix-pack/src/data/output/count/objects/mod.rs
new file mode 100644
index 000000000..d56bc9a5f
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/mod.rs
@@ -0,0 +1,405 @@
+use std::{
+ cell::RefCell,
+ sync::{atomic::AtomicBool, Arc},
+};
+
+use gix_features::{parallel, progress::Progress};
+use gix_hash::ObjectId;
+
+use crate::{data::output, find};
+
+pub(in crate::data::output::count::objects_impl) mod reduce;
+mod util;
+
+mod types;
+pub use types::{Error, ObjectExpansion, Options, Outcome};
+
+mod tree;
+
+/// The return type used by [`objects()`].
+pub type Result<E1, E2> = std::result::Result<(Vec<output::Count>, Outcome), Error<E1, E2>>;
+
+/// Generate [`Count`][output::Count]s from input `objects` with object expansion based on [`options`][Options]
+/// to learn which objects would would constitute a pack. This step is required to know exactly how many objects would
+/// be in a pack while keeping data around to avoid minimize object database access.
+///
+/// A [`Count`][output::Count] object maintains enough state to greatly accelerate future access of packed objects.
+///
+/// * `db` - the object store to use for accessing objects.
+/// * `objects_ids`
+/// * A list of objects ids to add to the pack. Duplication checks are performed so no object is ever added to a pack twice.
+/// * Objects may be expanded based on the provided [`options`][Options]
+/// * `progress`
+/// * a way to obtain progress information
+/// * `should_interrupt`
+/// * A flag that is set to true if the operation should stop
+/// * `options`
+/// * more configuration
+pub fn objects<Find, Iter, IterErr, Oid>(
+ db: Find,
+ objects_ids: Iter,
+ progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ Options {
+ thread_limit,
+ input_object_expansion,
+ chunk_size,
+ }: Options,
+) -> Result<find::existing::Error<Find::Error>, IterErr>
+where
+ Find: crate::Find + Send + Clone,
+ <Find as crate::Find>::Error: Send,
+ Iter: Iterator<Item = std::result::Result<Oid, IterErr>> + Send,
+ Oid: Into<ObjectId> + Send,
+ IterErr: std::error::Error + Send,
+{
+ let lower_bound = objects_ids.size_hint().0;
+ let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit(
+ chunk_size,
+ if lower_bound == 0 { None } else { Some(lower_bound) },
+ thread_limit,
+ None,
+ );
+ let chunks = gix_features::iter::Chunks {
+ inner: objects_ids,
+ size: chunk_size,
+ };
+ let seen_objs = gix_hashtable::sync::ObjectIdMap::default();
+ let progress = Arc::new(parking_lot::Mutex::new(progress));
+
+ parallel::in_parallel(
+ chunks,
+ thread_limit,
+ {
+ let progress = Arc::clone(&progress);
+ move |n| {
+ (
+ Vec::new(), // object data buffer
+ Vec::new(), // object data buffer 2 to hold two objects at a time
+ {
+ let mut p = progress
+ .lock()
+ .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN);
+ p.init(None, gix_features::progress::count("objects"));
+ p
+ },
+ )
+ }
+ },
+ {
+ let seen_objs = &seen_objs;
+ move |oids: Vec<std::result::Result<Oid, IterErr>>, (buf1, buf2, progress)| {
+ expand::this(
+ &db,
+ input_object_expansion,
+ seen_objs,
+ oids,
+ buf1,
+ buf2,
+ progress,
+ should_interrupt,
+ true, /*allow pack lookups*/
+ )
+ }
+ },
+ reduce::Statistics::new(progress),
+ )
+}
+
+/// Like [`objects()`] but using a single thread only to mostly save on the otherwise required overhead.
+pub fn objects_unthreaded<Find, IterErr, Oid>(
+ db: Find,
+ object_ids: impl Iterator<Item = std::result::Result<Oid, IterErr>>,
+ mut progress: impl Progress,
+ should_interrupt: &AtomicBool,
+ input_object_expansion: ObjectExpansion,
+) -> Result<find::existing::Error<Find::Error>, IterErr>
+where
+ Find: crate::Find,
+ Oid: Into<ObjectId>,
+ IterErr: std::error::Error,
+{
+ let seen_objs = RefCell::new(gix_hashtable::HashSet::default());
+
+ let (mut buf1, mut buf2) = (Vec::new(), Vec::new());
+ expand::this(
+ &db,
+ input_object_expansion,
+ &seen_objs,
+ object_ids,
+ &mut buf1,
+ &mut buf2,
+ &mut progress,
+ should_interrupt,
+ false, /*allow pack lookups*/
+ )
+}
+
+mod expand {
+ use std::sync::atomic::{AtomicBool, Ordering};
+
+ use gix_features::progress::Progress;
+ use gix_hash::{oid, ObjectId};
+ use gix_object::{CommitRefIter, TagRefIter};
+
+ use super::{
+ tree,
+ types::{Error, ObjectExpansion, Outcome},
+ util,
+ };
+ use crate::{
+ data::{output, output::count::PackLocation},
+ find, FindExt,
+ };
+
+ #[allow(clippy::too_many_arguments)]
+ pub fn this<Find, IterErr, Oid>(
+ db: &Find,
+ input_object_expansion: ObjectExpansion,
+ seen_objs: &impl util::InsertImmutable,
+ oids: impl IntoIterator<Item = std::result::Result<Oid, IterErr>>,
+ buf1: &mut Vec<u8>,
+ #[allow(clippy::ptr_arg)] buf2: &mut Vec<u8>,
+ progress: &mut impl Progress,
+ should_interrupt: &AtomicBool,
+ allow_pack_lookups: bool,
+ ) -> super::Result<find::existing::Error<Find::Error>, IterErr>
+ where
+ Find: crate::Find,
+ Oid: Into<ObjectId>,
+ IterErr: std::error::Error,
+ {
+ use ObjectExpansion::*;
+
+ let mut out = Vec::new();
+ let mut tree_traversal_state = gix_traverse::tree::breadthfirst::State::default();
+ let mut tree_diff_state = gix_diff::tree::State::default();
+ let mut parent_commit_ids = Vec::new();
+ let mut traverse_delegate = tree::traverse::AllUnseen::new(seen_objs);
+ let mut changes_delegate = tree::changes::AllNew::new(seen_objs);
+ let mut outcome = Outcome::default();
+
+ let stats = &mut outcome;
+ for id in oids.into_iter() {
+ if should_interrupt.load(Ordering::Relaxed) {
+ return Err(Error::Interrupted);
+ }
+
+ let id = id.map(|oid| oid.into()).map_err(Error::InputIteration)?;
+ let (obj, location) = db.find(id, buf1)?;
+ stats.input_objects += 1;
+ match input_object_expansion {
+ TreeAdditionsComparedToAncestor => {
+ use gix_object::Kind::*;
+ let mut obj = obj;
+ let mut location = location;
+ let mut id = id.to_owned();
+
+ loop {
+ push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false);
+ match obj.kind {
+ Tree | Blob => break,
+ Tag => {
+ id = TagRefIter::from_bytes(obj.data)
+ .target_id()
+ .expect("every tag has a target");
+ let tmp = db.find(id, buf1)?;
+
+ obj = tmp.0;
+ location = tmp.1;
+
+ stats.expanded_objects += 1;
+ continue;
+ }
+ Commit => {
+ let current_tree_iter = {
+ let mut commit_iter = CommitRefIter::from_bytes(obj.data);
+ let tree_id = commit_iter.tree_id().expect("every commit has a tree");
+ parent_commit_ids.clear();
+ for token in commit_iter {
+ match token {
+ Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
+ parent_commit_ids.push(id)
+ }
+ Ok(_) => break,
+ Err(err) => return Err(Error::CommitDecode(err)),
+ }
+ }
+ let (obj, location) = db.find(tree_id, buf1)?;
+ push_obj_count_unique(
+ &mut out, seen_objs, &tree_id, location, progress, stats, true,
+ );
+ gix_object::TreeRefIter::from_bytes(obj.data)
+ };
+
+ let objects = if parent_commit_ids.is_empty() {
+ traverse_delegate.clear();
+ gix_traverse::tree::breadthfirst(
+ current_tree_iter,
+ &mut tree_traversal_state,
+ |oid, buf| {
+ stats.decoded_objects += 1;
+ match db.find(oid, buf).ok() {
+ Some((obj, location)) => {
+ progress.inc();
+ stats.expanded_objects += 1;
+ out.push(output::Count::from_data(oid, location));
+ obj.try_into_tree_iter()
+ }
+ None => None,
+ }
+ },
+ &mut traverse_delegate,
+ )
+ .map_err(Error::TreeTraverse)?;
+ &traverse_delegate.non_trees
+ } else {
+ for commit_id in &parent_commit_ids {
+ let parent_tree_id = {
+ let (parent_commit_obj, location) = db.find(commit_id, buf2)?;
+
+ push_obj_count_unique(
+ &mut out, seen_objs, commit_id, location, progress, stats, true,
+ );
+ CommitRefIter::from_bytes(parent_commit_obj.data)
+ .tree_id()
+ .expect("every commit has a tree")
+ };
+ let parent_tree = {
+ let (parent_tree_obj, location) = db.find(parent_tree_id, buf2)?;
+ push_obj_count_unique(
+ &mut out,
+ seen_objs,
+ &parent_tree_id,
+ location,
+ progress,
+ stats,
+ true,
+ );
+ gix_object::TreeRefIter::from_bytes(parent_tree_obj.data)
+ };
+
+ changes_delegate.clear();
+ gix_diff::tree::Changes::from(Some(parent_tree))
+ .needed_to_obtain(
+ current_tree_iter.clone(),
+ &mut tree_diff_state,
+ |oid, buf| {
+ stats.decoded_objects += 1;
+ db.find_tree_iter(oid, buf).map(|t| t.0)
+ },
+ &mut changes_delegate,
+ )
+ .map_err(Error::TreeChanges)?;
+ }
+ &changes_delegate.objects
+ };
+ for id in objects.iter() {
+ out.push(id_to_count(db, buf2, id, progress, stats, allow_pack_lookups));
+ }
+ break;
+ }
+ }
+ }
+ }
+ TreeContents => {
+ use gix_object::Kind::*;
+ let mut id = id;
+ let mut obj = (obj, location);
+ loop {
+ push_obj_count_unique(&mut out, seen_objs, &id, obj.1.clone(), progress, stats, false);
+ match obj.0.kind {
+ Tree => {
+ traverse_delegate.clear();
+ gix_traverse::tree::breadthfirst(
+ gix_object::TreeRefIter::from_bytes(obj.0.data),
+ &mut tree_traversal_state,
+ |oid, buf| {
+ stats.decoded_objects += 1;
+ match db.find(oid, buf).ok() {
+ Some((obj, location)) => {
+ progress.inc();
+ stats.expanded_objects += 1;
+ out.push(output::Count::from_data(oid, location));
+ obj.try_into_tree_iter()
+ }
+ None => None,
+ }
+ },
+ &mut traverse_delegate,
+ )
+ .map_err(Error::TreeTraverse)?;
+ for id in traverse_delegate.non_trees.iter() {
+ out.push(id_to_count(db, buf1, id, progress, stats, allow_pack_lookups));
+ }
+ break;
+ }
+ Commit => {
+ id = CommitRefIter::from_bytes(obj.0.data)
+ .tree_id()
+ .expect("every commit has a tree");
+ stats.expanded_objects += 1;
+ obj = db.find(id, buf1)?;
+ continue;
+ }
+ Blob => break,
+ Tag => {
+ id = TagRefIter::from_bytes(obj.0.data)
+ .target_id()
+ .expect("every tag has a target");
+ stats.expanded_objects += 1;
+ obj = db.find(id, buf1)?;
+ continue;
+ }
+ }
+ }
+ }
+ AsIs => push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false),
+ }
+ }
+ outcome.total_objects = out.len();
+ Ok((out, outcome))
+ }
+
+ #[inline]
+ fn push_obj_count_unique(
+ out: &mut Vec<output::Count>,
+ all_seen: &impl util::InsertImmutable,
+ id: &oid,
+ location: Option<crate::data::entry::Location>,
+ progress: &mut impl Progress,
+ statistics: &mut Outcome,
+ count_expanded: bool,
+ ) {
+ let inserted = all_seen.insert(id.to_owned());
+ if inserted {
+ progress.inc();
+ statistics.decoded_objects += 1;
+ if count_expanded {
+ statistics.expanded_objects += 1;
+ }
+ out.push(output::Count::from_data(id, location));
+ }
+ }
+
+ #[inline]
+ fn id_to_count<Find: crate::Find>(
+ db: &Find,
+ buf: &mut Vec<u8>,
+ id: &oid,
+ progress: &mut impl Progress,
+ statistics: &mut Outcome,
+ allow_pack_lookups: bool,
+ ) -> output::Count {
+ progress.inc();
+ statistics.expanded_objects += 1;
+ output::Count {
+ id: id.to_owned(),
+ entry_pack_location: if allow_pack_lookups {
+ PackLocation::LookedUp(db.location_by_oid(id, buf))
+ } else {
+ PackLocation::NotLookedUp
+ },
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/reduce.rs b/vendor/gix-pack/src/data/output/count/objects/reduce.rs
new file mode 100644
index 000000000..c6a61d467
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/reduce.rs
@@ -0,0 +1,49 @@
+use std::{marker::PhantomData, sync::Arc};
+
+use gix_features::{parallel, progress::Progress};
+
+use super::Outcome;
+use crate::data::output;
+
+pub struct Statistics<E, P> {
+ total: Outcome,
+ counts: Vec<output::Count>,
+ progress: Arc<parking_lot::Mutex<P>>,
+ _err: PhantomData<E>,
+}
+
+impl<E, P> Statistics<E, P>
+where
+ P: Progress,
+{
+ pub fn new(progress: Arc<parking_lot::Mutex<P>>) -> Self {
+ Statistics {
+ total: Default::default(),
+ counts: Default::default(),
+ progress,
+ _err: PhantomData::default(),
+ }
+ }
+}
+
+impl<E, P> parallel::Reduce for Statistics<E, P>
+where
+ P: Progress,
+{
+ type Input = Result<(Vec<output::Count>, Outcome), E>;
+ type FeedProduce = ();
+ type Output = (Vec<output::Count>, Outcome);
+ type Error = E;
+
+ fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
+ let (counts, stats) = item?;
+ self.total.aggregate(stats);
+ self.progress.lock().inc_by(counts.len());
+ self.counts.extend(counts);
+ Ok(())
+ }
+
+ fn finalize(self) -> Result<Self::Output, Self::Error> {
+ Ok((self.counts, self.total))
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/tree.rs b/vendor/gix-pack/src/data/output/count/objects/tree.rs
new file mode 100644
index 000000000..d3f4f6b9a
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/tree.rs
@@ -0,0 +1,124 @@
+pub mod changes {
+ use gix_diff::tree::{
+ visit::{Action, Change},
+ Visit,
+ };
+ use gix_hash::ObjectId;
+ use gix_object::{bstr::BStr, tree::EntryMode};
+
+ use crate::data::output::count::objects_impl::util::InsertImmutable;
+
+ pub struct AllNew<'a, H> {
+ pub objects: Vec<ObjectId>,
+ all_seen: &'a H,
+ }
+
+ impl<'a, H> AllNew<'a, H>
+ where
+ H: InsertImmutable,
+ {
+ pub fn new(all_seen: &'a H) -> Self {
+ AllNew {
+ objects: Default::default(),
+ all_seen,
+ }
+ }
+ pub fn clear(&mut self) {
+ self.objects.clear();
+ }
+ }
+
+ impl<'a, H> Visit for AllNew<'a, H>
+ where
+ H: InsertImmutable,
+ {
+ fn pop_front_tracked_path_and_set_current(&mut self) {}
+
+ fn push_back_tracked_path_component(&mut self, _component: &BStr) {}
+
+ fn push_path_component(&mut self, _component: &BStr) {}
+
+ fn pop_path_component(&mut self) {}
+
+ fn visit(&mut self, change: Change) -> Action {
+ match change {
+ Change::Addition { oid, entry_mode } | Change::Modification { oid, entry_mode, .. } => {
+ if entry_mode == EntryMode::Commit {
+ return Action::Continue;
+ }
+ let inserted = self.all_seen.insert(oid);
+ if inserted {
+ self.objects.push(oid);
+ }
+ }
+ Change::Deletion { .. } => {}
+ };
+ Action::Continue
+ }
+ }
+}
+
+pub mod traverse {
+ use gix_hash::ObjectId;
+ use gix_object::{
+ bstr::BStr,
+ tree::{EntryMode, EntryRef},
+ };
+ use gix_traverse::tree::{visit::Action, Visit};
+
+ use crate::data::output::count::objects_impl::util::InsertImmutable;
+
+ pub struct AllUnseen<'a, H> {
+ pub non_trees: Vec<ObjectId>,
+ all_seen: &'a H,
+ }
+
+ impl<'a, H> AllUnseen<'a, H>
+ where
+ H: InsertImmutable,
+ {
+ pub fn new(all_seen: &'a H) -> Self {
+ AllUnseen {
+ non_trees: Default::default(),
+ all_seen,
+ }
+ }
+ pub fn clear(&mut self) {
+ self.non_trees.clear();
+ }
+ }
+
+ impl<'a, H> Visit for AllUnseen<'a, H>
+ where
+ H: InsertImmutable,
+ {
+ fn pop_front_tracked_path_and_set_current(&mut self) {}
+
+ fn push_back_tracked_path_component(&mut self, _component: &BStr) {}
+
+ fn push_path_component(&mut self, _component: &BStr) {}
+
+ fn pop_path_component(&mut self) {}
+
+ fn visit_tree(&mut self, entry: &EntryRef<'_>) -> Action {
+ let inserted = self.all_seen.insert(entry.oid.to_owned());
+ if inserted {
+ Action::Continue
+ } else {
+ Action::Skip
+ }
+ }
+
+ fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> Action {
+ if entry.mode == EntryMode::Commit {
+ // links don't have a representation
+ return Action::Continue;
+ }
+ let inserted = self.all_seen.insert(entry.oid.to_owned());
+ if inserted {
+ self.non_trees.push(entry.oid.to_owned());
+ }
+ Action::Continue
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/types.rs b/vendor/gix-pack/src/data/output/count/objects/types.rs
new file mode 100644
index 000000000..8c8c939df
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/types.rs
@@ -0,0 +1,105 @@
+/// Information gathered during the run of [`iter_from_objects()`][super::objects()].
+#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Outcome {
+ /// The amount of objects provided to start the iteration.
+ pub input_objects: usize,
+ /// The amount of objects that have been expanded from the input source.
+ /// It's desirable to do that as expansion happens on multiple threads, allowing the amount of input objects to be small.
+ /// `expanded_objects - decoded_objects` is the 'cheap' object we found without decoding the object itself.
+ pub expanded_objects: usize,
+ /// The amount of fully decoded objects. These are the most expensive as they are fully decoded
+ pub decoded_objects: usize,
+ /// The total amount of encountered objects. Should be `expanded_objects + input_objects`.
+ pub total_objects: usize,
+}
+
+impl Outcome {
+ pub(in crate::data::output::count) fn aggregate(
+ &mut self,
+ Outcome {
+ input_objects,
+ decoded_objects,
+ expanded_objects,
+ total_objects,
+ }: Self,
+ ) {
+ self.input_objects += input_objects;
+ self.decoded_objects += decoded_objects;
+ self.expanded_objects += expanded_objects;
+ self.total_objects += total_objects;
+ }
+}
+
+/// The way input objects are handled
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum ObjectExpansion {
+ /// Don't do anything with the input objects except for transforming them into pack entries
+ AsIs,
+ /// If the input object is a Commit then turn it into a pack entry. Additionally obtain its tree, turn it into a pack entry
+ /// along with all of its contents, that is nested trees, and any other objects reachable from it.
+ /// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs].
+ ///
+ /// This mode is useful if all reachable objects should be added, as in cloning a repository.
+ TreeContents,
+ /// If the input is a commit, obtain its ancestors and turn them into pack entries. Obtain the ancestor trees along with the commits
+ /// tree and turn them into pack entries. Finally obtain the added/changed objects when comparing the ancestor trees with the
+ /// current tree and turn them into entries as well.
+ /// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs].
+ ///
+ /// This mode is useful to build a pack containing only new objects compared to a previous state.
+ TreeAdditionsComparedToAncestor,
+}
+
+impl Default for ObjectExpansion {
+ fn default() -> Self {
+ ObjectExpansion::AsIs
+ }
+}
+
+/// Configuration options for the pack generation functions provided in [this module][crate::data::output].
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Options {
+ /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used.
+ /// If more than one thread is used, the order of returned [counts][crate::data::output::Count] is not deterministic anymore
+ /// especially when tree traversal is involved. Thus deterministic ordering requires `Some(1)` to be set.
+ pub thread_limit: Option<usize>,
+ /// The amount of objects per chunk or unit of work to be sent to threads for processing
+ pub chunk_size: usize,
+ /// The way input objects are handled
+ pub input_object_expansion: ObjectExpansion,
+}
+
+impl Default for Options {
+ fn default() -> Self {
+ Options {
+ thread_limit: None,
+ chunk_size: 10,
+ input_object_expansion: Default::default(),
+ }
+ }
+}
+
+/// The error returned by the pack generation iterator [bytes::FromEntriesIter][crate::data::output::bytes::FromEntriesIter].
+#[derive(Debug, thiserror::Error)]
+#[allow(missing_docs)]
+pub enum Error<FindErr, IterErr>
+where
+ FindErr: std::error::Error + 'static,
+ IterErr: std::error::Error + 'static,
+{
+ #[error(transparent)]
+ CommitDecode(gix_object::decode::Error),
+ #[error(transparent)]
+ FindExisting(#[from] FindErr),
+ #[error(transparent)]
+ InputIteration(IterErr),
+ #[error(transparent)]
+ TreeTraverse(gix_traverse::tree::breadthfirst::Error),
+ #[error(transparent)]
+ TreeChanges(gix_diff::tree::changes::Error),
+ #[error("Operation interrupted")]
+ Interrupted,
+}
diff --git a/vendor/gix-pack/src/data/output/count/objects/util.rs b/vendor/gix-pack/src/data/output/count/objects/util.rs
new file mode 100644
index 000000000..a80841313
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/count/objects/util.rs
@@ -0,0 +1,24 @@
+pub trait InsertImmutable {
+ fn insert(&self, id: gix_hash::ObjectId) -> bool;
+}
+
+mod trait_impls {
+ use gix_hash::ObjectId;
+ use std::cell::RefCell;
+
+ use gix_hashtable::HashSet;
+
+ use super::InsertImmutable;
+
+ impl InsertImmutable for gix_hashtable::sync::ObjectIdMap<()> {
+ fn insert(&self, id: ObjectId) -> bool {
+ self.insert(id, ()).is_none()
+ }
+ }
+
+ impl InsertImmutable for RefCell<HashSet<ObjectId>> {
+ fn insert(&self, item: ObjectId) -> bool {
+ self.borrow_mut().insert(item)
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/entry/iter_from_counts.rs b/vendor/gix-pack/src/data/output/entry/iter_from_counts.rs
new file mode 100644
index 000000000..25e256d5c
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/entry/iter_from_counts.rs
@@ -0,0 +1,428 @@
+pub(crate) mod function {
+ use std::{cmp::Ordering, sync::Arc};
+
+ use gix_features::{parallel, parallel::SequenceId, progress::Progress};
+
+ use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId};
+ use crate::data::output;
+
+ /// Given a known list of object `counts`, calculate entries ready to be put into a data pack.
+ ///
+ /// This allows objects to be written quite soon without having to wait for the entire pack to be built in memory.
+ /// A chunk of objects is held in memory and compressed using DEFLATE, and serve the output of this iterator.
+ /// That way slow writers will naturally apply back pressure, and communicate to the implementation that more time can be
+ /// spent compressing objects.
+ ///
+ /// * `counts`
+ /// * A list of previously counted objects to add to the pack. Duplication checks are not performed, no object is expected to be duplicated.
+ /// * `progress`
+ /// * a way to obtain progress information
+ /// * `options`
+ /// * more configuration
+ ///
+ /// _Returns_ the checksum of the pack
+ ///
+ /// ## Discussion
+ ///
+ /// ### Advantages
+ ///
+ /// * Begins writing immediately and supports back-pressure.
+ /// * Abstract over object databases and how input is provided.
+ ///
+ /// ### Disadvantages
+ ///
+ /// * ~~currently there is no way to easily write the pack index, even though the state here is uniquely positioned to do
+ /// so with minimal overhead (especially compared to `gix index-from-pack`)~~ Probably works now by chaining Iterators
+ /// or keeping enough state to write a pack and then generate an index with recorded data.
+ ///
+ pub fn iter_from_counts<Find>(
+ mut counts: Vec<output::Count>,
+ db: Find,
+ mut progress: impl Progress + 'static,
+ Options {
+ version,
+ mode,
+ allow_thin_pack,
+ thread_limit,
+ chunk_size,
+ }: Options,
+ ) -> impl Iterator<Item = Result<(SequenceId, Vec<output::Entry>), Error<Find::Error>>>
+ + parallel::reduce::Finalize<Reduce = reduce::Statistics<Error<Find::Error>>>
+ where
+ Find: crate::Find + Send + Clone + 'static,
+ <Find as crate::Find>::Error: Send,
+ {
+ assert!(
+ matches!(version, crate::data::Version::V2),
+ "currently we can only write version 2"
+ );
+ let (chunk_size, thread_limit, _) =
+ parallel::optimize_chunk_size_and_thread_limit(chunk_size, Some(counts.len()), thread_limit, None);
+ {
+ let progress = Arc::new(parking_lot::Mutex::new(
+ progress.add_child_with_id("resolving", ProgressId::ResolveCounts.into()),
+ ));
+ progress.lock().init(None, gix_features::progress::count("counts"));
+ let enough_counts_present = counts.len() > 4_000;
+ let start = std::time::Instant::now();
+ parallel::in_parallel_if(
+ || enough_counts_present,
+ counts.chunks_mut(chunk_size),
+ thread_limit,
+ |_n| Vec::<u8>::new(),
+ {
+ let progress = Arc::clone(&progress);
+ let db = db.clone();
+ move |chunk, buf| {
+ let chunk_size = chunk.len();
+ for count in chunk {
+ use crate::data::output::count::PackLocation::*;
+ match count.entry_pack_location {
+ LookedUp(_) => continue,
+ NotLookedUp => count.entry_pack_location = LookedUp(db.location_by_oid(count.id, buf)),
+ }
+ }
+ progress.lock().inc_by(chunk_size);
+ Ok::<_, ()>(())
+ }
+ },
+ parallel::reduce::IdentityWithResult::<(), ()>::default(),
+ )
+ .expect("infallible - we ignore none-existing objects");
+ progress.lock().show_throughput(start);
+ }
+ let counts_range_by_pack_id = match mode {
+ Mode::PackCopyAndBaseObjects => {
+ let mut progress = progress.add_child_with_id("sorting", ProgressId::SortEntries.into());
+ progress.init(Some(counts.len()), gix_features::progress::count("counts"));
+ let start = std::time::Instant::now();
+
+ use crate::data::output::count::PackLocation::*;
+ counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) {
+ (LookedUp(None), LookedUp(None)) => Ordering::Equal,
+ (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater,
+ (LookedUp(None), LookedUp(Some(_))) => Ordering::Less,
+ (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs
+ .pack_id
+ .cmp(&rhs.pack_id)
+ .then(lhs.pack_offset.cmp(&rhs.pack_offset)),
+ (_, _) => unreachable!("counts were resolved beforehand"),
+ });
+
+ let mut index: Vec<(u32, std::ops::Range<usize>)> = Vec::new();
+ let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none());
+ let mut slice = &counts[chunks_pack_start..];
+ while !slice.is_empty() {
+ let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id;
+ let pack_end = slice.partition_point(|e| {
+ e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id
+ });
+ index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end));
+ slice = &slice[pack_end..];
+ chunks_pack_start += pack_end;
+ }
+
+ progress.set(counts.len());
+ progress.show_throughput(start);
+
+ index
+ }
+ };
+
+ let counts = Arc::new(counts);
+ let progress = Arc::new(parking_lot::Mutex::new(progress));
+ let chunks = util::ChunkRanges::new(chunk_size, counts.len());
+
+ parallel::reduce::Stepwise::new(
+ chunks.enumerate(),
+ thread_limit,
+ {
+ let progress = Arc::clone(&progress);
+ move |n| {
+ (
+ Vec::new(), // object data buffer
+ progress
+ .lock()
+ .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN),
+ )
+ }
+ },
+ {
+ let counts = Arc::clone(&counts);
+ move |(chunk_id, chunk_range): (SequenceId, std::ops::Range<usize>), (buf, progress)| {
+ let mut out = Vec::new();
+ let chunk = &counts[chunk_range];
+ let mut stats = Outcome::default();
+ let mut pack_offsets_to_id = None;
+ progress.init(Some(chunk.len()), gix_features::progress::count("objects"));
+
+ for count in chunk.iter() {
+ out.push(match count
+ .entry_pack_location
+ .as_ref()
+ .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe)))
+ {
+ Some((location, pack_entry)) => {
+ if let Some((cached_pack_id, _)) = &pack_offsets_to_id {
+ if *cached_pack_id != location.pack_id {
+ pack_offsets_to_id = None;
+ }
+ }
+ let pack_range = counts_range_by_pack_id[counts_range_by_pack_id
+ .binary_search_by_key(&location.pack_id, |e| e.0)
+ .expect("pack-id always present")]
+ .1
+ .clone();
+ let base_index_offset = pack_range.start;
+ let counts_in_pack = &counts[pack_range];
+ match output::Entry::from_pack_entry(
+ pack_entry,
+ count,
+ counts_in_pack,
+ base_index_offset,
+ allow_thin_pack.then_some({
+ |pack_id, base_offset| {
+ let (cached_pack_id, cache) = pack_offsets_to_id.get_or_insert_with(|| {
+ db.pack_offsets_and_oid(pack_id)
+ .map(|mut v| {
+ v.sort_by_key(|e| e.0);
+ (pack_id, v)
+ })
+ .expect("pack used for counts is still available")
+ });
+ debug_assert_eq!(*cached_pack_id, pack_id);
+ stats.ref_delta_objects += 1;
+ cache
+ .binary_search_by_key(&base_offset, |e| e.0)
+ .ok()
+ .map(|idx| cache[idx].1)
+ }
+ }),
+ version,
+ ) {
+ Some(entry) => {
+ stats.objects_copied_from_pack += 1;
+ entry
+ }
+ None => match db.try_find(count.id, buf).map_err(Error::FindExisting)? {
+ Some((obj, _location)) => {
+ stats.decoded_and_recompressed_objects += 1;
+ output::Entry::from_data(count, &obj)
+ }
+ None => {
+ stats.missing_objects += 1;
+ Ok(output::Entry::invalid())
+ }
+ },
+ }
+ }
+ None => match db.try_find(count.id, buf).map_err(Error::FindExisting)? {
+ Some((obj, _location)) => {
+ stats.decoded_and_recompressed_objects += 1;
+ output::Entry::from_data(count, &obj)
+ }
+ None => {
+ stats.missing_objects += 1;
+ Ok(output::Entry::invalid())
+ }
+ },
+ }?);
+ progress.inc();
+ }
+ Ok((chunk_id, out, stats))
+ }
+ },
+ reduce::Statistics::default(),
+ )
+ }
+}
+
+mod util {
+ #[derive(Clone)]
+ pub struct ChunkRanges {
+ cursor: usize,
+ size: usize,
+ len: usize,
+ }
+
+ impl ChunkRanges {
+ pub fn new(size: usize, total: usize) -> Self {
+ ChunkRanges {
+ cursor: 0,
+ size,
+ len: total,
+ }
+ }
+ }
+
+ impl Iterator for ChunkRanges {
+ type Item = std::ops::Range<usize>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.cursor >= self.len {
+ None
+ } else {
+ let upper = (self.cursor + self.size).min(self.len);
+ let range = self.cursor..upper;
+ self.cursor = upper;
+ Some(range)
+ }
+ }
+ }
+}
+
+mod reduce {
+ use std::marker::PhantomData;
+
+ use gix_features::{parallel, parallel::SequenceId};
+
+ use super::Outcome;
+ use crate::data::output;
+
+ pub struct Statistics<E> {
+ total: Outcome,
+ _err: PhantomData<E>,
+ }
+
+ impl<E> Default for Statistics<E> {
+ fn default() -> Self {
+ Statistics {
+ total: Default::default(),
+ _err: PhantomData::default(),
+ }
+ }
+ }
+
+ impl<Error> parallel::Reduce for Statistics<Error> {
+ type Input = Result<(SequenceId, Vec<output::Entry>, Outcome), Error>;
+ type FeedProduce = (SequenceId, Vec<output::Entry>);
+ type Output = Outcome;
+ type Error = Error;
+
+ fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
+ item.map(|(cid, entries, stats)| {
+ self.total.aggregate(stats);
+ (cid, entries)
+ })
+ }
+
+ fn finalize(self) -> Result<Self::Output, Self::Error> {
+ Ok(self.total)
+ }
+ }
+}
+
+mod types {
+ use crate::data::output::entry;
+
+ /// Information gathered during the run of [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()].
+ #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+ #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ pub struct Outcome {
+ /// The amount of fully decoded objects. These are the most expensive as they are fully decoded.
+ pub decoded_and_recompressed_objects: usize,
+ /// The amount of objects that could not be located despite them being mentioned during iteration
+ pub missing_objects: usize,
+ /// The amount of base or delta objects that could be copied directly from the pack. These are cheapest as they
+ /// only cost a memory copy for the most part.
+ pub objects_copied_from_pack: usize,
+ /// The amount of objects that ref to their base as ref-delta, an indication for a thin back being created.
+ pub ref_delta_objects: usize,
+ }
+
+ impl Outcome {
+ pub(in crate::data::output::entry) fn aggregate(
+ &mut self,
+ Outcome {
+ decoded_and_recompressed_objects: decoded_objects,
+ missing_objects,
+ objects_copied_from_pack,
+ ref_delta_objects,
+ }: Self,
+ ) {
+ self.decoded_and_recompressed_objects += decoded_objects;
+ self.missing_objects += missing_objects;
+ self.objects_copied_from_pack += objects_copied_from_pack;
+ self.ref_delta_objects += ref_delta_objects;
+ }
+ }
+
+ /// The way the iterator operates.
+ #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+ #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ pub enum Mode {
+ /// Copy base objects and deltas from packs, while non-packed objects will be treated as base objects
+ /// (i.e. without trying to delta compress them). This is a fast way of obtaining a back while benefiting
+ /// from existing pack compression and spending the smallest possible time on compressing unpacked objects at
+ /// the cost of bandwidth.
+ PackCopyAndBaseObjects,
+ }
+
+ /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()].
+ #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+ #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+ pub struct Options {
+ /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used.
+ pub thread_limit: Option<usize>,
+ /// The algorithm to produce a pack
+ pub mode: Mode,
+ /// If set, the resulting back can have deltas that refer to an object which is not in the pack. This can happen
+ /// if the initial counted objects do not contain an object that an existing packed delta refers to, for example, because
+ /// it wasn't part of the iteration, for instance when the iteration was performed on tree deltas or only a part of the
+ /// commit graph. Please note that thin packs are not valid packs at rest, thus they are only valid for packs in transit.
+ ///
+ /// If set to false, delta objects will be decompressed and recompressed as base objects.
+ pub allow_thin_pack: bool,
+ /// The amount of objects per chunk or unit of work to be sent to threads for processing
+ /// TODO: could this become the window size?
+ pub chunk_size: usize,
+ /// The pack data version to produce for each entry
+ pub version: crate::data::Version,
+ }
+
+ impl Default for Options {
+ fn default() -> Self {
+ Options {
+ thread_limit: None,
+ mode: Mode::PackCopyAndBaseObjects,
+ allow_thin_pack: false,
+ chunk_size: 10,
+ version: Default::default(),
+ }
+ }
+ }
+
+ /// The error returned by the pack generation function [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()].
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error<FindErr>
+ where
+ FindErr: std::error::Error + 'static,
+ {
+ #[error(transparent)]
+ FindExisting(FindErr),
+ #[error(transparent)]
+ NewEntry(#[from] entry::Error),
+ }
+
+ /// The progress ids used in [`write_to_directory()`][crate::Bundle::write_to_directory()].
+ ///
+ /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
+ #[derive(Debug, Copy, Clone)]
+ pub enum ProgressId {
+ /// The amount of [`Count`][crate::data::output::Count] objects which are resolved to their pack location.
+ ResolveCounts,
+ /// Layout pack entries for placement into a pack (by pack-id and by offset).
+ SortEntries,
+ }
+
+ impl From<ProgressId> for gix_features::progress::Id {
+ fn from(v: ProgressId) -> Self {
+ match v {
+ ProgressId::ResolveCounts => *b"ECRC",
+ ProgressId::SortEntries => *b"ECSE",
+ }
+ }
+ }
+}
+pub use types::{Error, Mode, Options, Outcome, ProgressId};
diff --git a/vendor/gix-pack/src/data/output/entry/mod.rs b/vendor/gix-pack/src/data/output/entry/mod.rs
new file mode 100644
index 000000000..401d2f24c
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/entry/mod.rs
@@ -0,0 +1,181 @@
+use std::{convert::TryFrom, io::Write};
+
+use gix_hash::ObjectId;
+
+use crate::{data, data::output, find};
+
+///
+pub mod iter_from_counts;
+pub use iter_from_counts::function::iter_from_counts;
+
+/// The kind of pack entry to be written
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum Kind {
+ /// A complete base object, including its kind
+ Base(gix_object::Kind),
+ /// A delta against the object with the given index. It's always an index that was already encountered to refer only
+ /// to object we have written already.
+ DeltaRef {
+ /// The absolute index to the object to serve as base. It's up to the writer to maintain enough state to allow producing
+ /// a packed delta object from it.
+ object_index: usize,
+ },
+ /// A delta against the given object as identified by its `ObjectId`.
+ /// This is the case for thin packs only, i.e. those that are sent over the wire.
+ /// Note that there is the option of the `ObjectId` being used to refer to an object within
+ /// the same pack, but it's a discontinued practice which won't be encountered here.
+ DeltaOid {
+ /// The object serving as base for this delta
+ id: ObjectId,
+ },
+}
+
+/// The error returned by [`output::Entry::from_data()`].
+#[allow(missing_docs)]
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+ #[error("{0}")]
+ ZlibDeflate(#[from] std::io::Error),
+}
+
+impl output::Entry {
+ /// An object which can be identified as invalid easily which happens if objects didn't exist even if they were referred to.
+ pub fn invalid() -> output::Entry {
+ output::Entry {
+ id: gix_hash::Kind::Sha1.null(), // NOTE: the actual object hash used in the repo doesn't matter here, this is a sentinel value.
+ kind: Kind::Base(gix_object::Kind::Blob),
+ decompressed_size: 0,
+ compressed_data: vec![],
+ }
+ }
+
+ /// Returns true if this object doesn't really exist but still has to be handled responsibly
+ ///
+ /// Note that this is true for tree entries that are commits/git submodules, or for objects which aren't present in our local clone
+ /// due to shallow clones.
+ pub fn is_invalid(&self) -> bool {
+ self.id.is_null()
+ }
+
+ /// Create an Entry from a previously counted object which is located in a pack. It's `entry` is provided here.
+ /// The `version` specifies what kind of target `Entry` version the caller desires.
+ pub fn from_pack_entry(
+ mut entry: find::Entry,
+ count: &output::Count,
+ potential_bases: &[output::Count],
+ bases_index_offset: usize,
+ pack_offset_to_oid: Option<impl FnMut(u32, u64) -> Option<ObjectId>>,
+ target_version: crate::data::Version,
+ ) -> Option<Result<Self, Error>> {
+ if entry.version != target_version {
+ return None;
+ };
+
+ let pack_offset_must_be_zero = 0;
+ let pack_entry =
+ crate::data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.as_slice().len());
+
+ use crate::data::entry::Header::*;
+ match pack_entry.header {
+ Commit => Some(output::entry::Kind::Base(gix_object::Kind::Commit)),
+ Tree => Some(output::entry::Kind::Base(gix_object::Kind::Tree)),
+ Blob => Some(output::entry::Kind::Base(gix_object::Kind::Blob)),
+ Tag => Some(output::entry::Kind::Base(gix_object::Kind::Tag)),
+ OfsDelta { base_distance } => {
+ let pack_location = count.entry_pack_location.as_ref().expect("packed");
+ let base_offset = pack_location
+ .pack_offset
+ .checked_sub(base_distance)
+ .expect("pack-offset - distance is firmly within the pack");
+ potential_bases
+ .binary_search_by(|e| {
+ e.entry_pack_location
+ .as_ref()
+ .expect("packed")
+ .pack_offset
+ .cmp(&base_offset)
+ })
+ .ok()
+ .map(|idx| output::entry::Kind::DeltaRef {
+ object_index: idx + bases_index_offset,
+ })
+ .or_else(|| {
+ pack_offset_to_oid
+ .and_then(|mut f| f(pack_location.pack_id, base_offset))
+ .map(|id| output::entry::Kind::DeltaOid { id })
+ })
+ }
+ RefDelta { base_id: _ } => None, // ref deltas are for thin packs or legacy, repack them as base objects
+ }
+ .map(|kind| {
+ Ok(output::Entry {
+ id: count.id.to_owned(),
+ kind,
+ decompressed_size: pack_entry.decompressed_size as usize,
+ compressed_data: {
+ entry.data.copy_within(pack_entry.data_offset as usize.., 0);
+ entry.data.resize(
+ entry.data.len()
+ - usize::try_from(pack_entry.data_offset).expect("offset representable as usize"),
+ 0,
+ );
+ entry.data
+ },
+ })
+ })
+ }
+
+ /// Create a new instance from the given `oid` and its corresponding git `obj`ect data.
+ pub fn from_data(count: &output::Count, obj: &gix_object::Data<'_>) -> Result<Self, Error> {
+ Ok(output::Entry {
+ id: count.id.to_owned(),
+ kind: Kind::Base(obj.kind),
+ decompressed_size: obj.data.len(),
+ compressed_data: {
+ let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new());
+ if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) {
+ match err.kind() {
+ std::io::ErrorKind::Other => return Err(Error::ZlibDeflate(err)),
+ err => unreachable!("Should never see other errors than zlib, but got {:?}", err,),
+ }
+ };
+ out.flush()?;
+ out.into_inner()
+ },
+ })
+ }
+
+ /// Transform ourselves into pack entry header of `version` which can be written into a pack.
+ ///
+ /// `index_to_pack(object_index) -> pack_offset` is a function to convert the base object's index into
+ /// the input object array (if each object is numbered) to an offset into the pack.
+ /// This information is known to the one calling the method.
+ pub fn to_entry_header(
+ &self,
+ version: crate::data::Version,
+ index_to_base_distance: impl FnOnce(usize) -> u64,
+ ) -> crate::data::entry::Header {
+ assert!(
+ matches!(version, data::Version::V2),
+ "we can only write V2 pack entries for now"
+ );
+
+ use Kind::*;
+ match self.kind {
+ Base(kind) => {
+ use gix_object::Kind::*;
+ match kind {
+ Tree => data::entry::Header::Tree,
+ Blob => data::entry::Header::Blob,
+ Commit => data::entry::Header::Commit,
+ Tag => data::entry::Header::Tag,
+ }
+ }
+ DeltaOid { id } => data::entry::Header::RefDelta { base_id: id.to_owned() },
+ DeltaRef { object_index } => data::entry::Header::OfsDelta {
+ base_distance: index_to_base_distance(object_index),
+ },
+ }
+ }
+}
diff --git a/vendor/gix-pack/src/data/output/mod.rs b/vendor/gix-pack/src/data/output/mod.rs
new file mode 100644
index 000000000..f94d32e8e
--- /dev/null
+++ b/vendor/gix-pack/src/data/output/mod.rs
@@ -0,0 +1,41 @@
+use gix_hash::ObjectId;
+
+///
+pub mod count;
+
+/// An item representing a future Entry in the leanest way possible.
+///
+/// One can expect to have one of these in memory when building big objects, so smaller is better here.
+/// They should contain everything of importance to generate a pack as fast as possible.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Count {
+ /// The hash of the object to write
+ pub id: ObjectId,
+ /// A way to locate a pack entry in the object database, only available if the object is in a pack.
+ pub entry_pack_location: count::PackLocation,
+}
+
+/// An entry to be written to a file.
+///
+/// Some of these will be in-flight and in memory while waiting to be written. Memory requirements depend on the amount of compressed
+/// data they hold.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Entry {
+ /// The hash of the object to write
+ pub id: ObjectId,
+ /// The kind of entry represented by `data`. It's used alongside with it to complete the pack entry
+ /// at rest or in transit.
+ pub kind: entry::Kind,
+ /// The size in bytes needed once `data` gets decompressed
+ pub decompressed_size: usize,
+ /// The compressed data right behind the header
+ pub compressed_data: Vec<u8>,
+}
+
+///
+pub mod entry;
+
+///
+pub mod bytes;