summaryrefslogtreecommitdiffstats
path: root/vendor/gix-pack/src/data/input
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
commit10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
treebdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-pack/src/data/input
parentReleasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
downloadrustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz
rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-pack/src/data/input')
-rw-r--r--vendor/gix-pack/src/data/input/bytes_to_entries.rs295
-rw-r--r--vendor/gix-pack/src/data/input/entries_to_bytes.rs155
-rw-r--r--vendor/gix-pack/src/data/input/entry.rs65
-rw-r--r--vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs211
-rw-r--r--vendor/gix-pack/src/data/input/mod.rs41
-rw-r--r--vendor/gix-pack/src/data/input/types.rs73
6 files changed, 840 insertions, 0 deletions
diff --git a/vendor/gix-pack/src/data/input/bytes_to_entries.rs b/vendor/gix-pack/src/data/input/bytes_to_entries.rs
new file mode 100644
index 000000000..cf20d5fbf
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/bytes_to_entries.rs
@@ -0,0 +1,295 @@
+use std::{fs, io};
+
+use gix_features::{
+ hash,
+ hash::Sha1,
+ zlib::{stream::inflate::ReadBoxed, Decompress},
+};
+use gix_hash::ObjectId;
+
+use crate::data::input;
+
+/// An iterator over [`Entries`][input::Entry] in a byte stream.
+///
+/// The iterator used as part of [Bundle::write_to_directory(…)][crate::Bundle::write_to_directory()].
+pub struct BytesToEntriesIter<BR> {
+ read: BR,
+ decompressor: Option<Box<Decompress>>,
+ offset: u64,
+ had_error: bool,
+ version: crate::data::Version,
+ objects_left: u32,
+ hash: Option<Sha1>,
+ mode: input::Mode,
+ compressed: input::EntryDataMode,
+ compressed_buf: Option<Vec<u8>>,
+ hash_len: usize,
+ object_hash: gix_hash::Kind,
+}
+
+/// Access
+impl<BR> BytesToEntriesIter<BR> {
+ /// The pack version currently being iterated
+ pub fn version(&self) -> crate::data::Version {
+ self.version
+ }
+
+ /// The kind of iteration
+ pub fn mode(&self) -> input::Mode {
+ self.mode
+ }
+}
+
+/// Initialization
+impl<BR> BytesToEntriesIter<BR>
+where
+ BR: io::BufRead,
+{
+ /// Obtain an iterator from a `read` stream to a pack data file and configure it using `mode` and `compressed`.
+ /// `object_hash` specifies which hash is used for objects in ref-delta entries.
+ ///
+ /// Note that `read` is expected at the beginning of a valid pack data file with a header, entries and a trailer.
+ pub fn new_from_header(
+ mut read: BR,
+ mode: input::Mode,
+ compressed: input::EntryDataMode,
+ object_hash: gix_hash::Kind,
+ ) -> Result<BytesToEntriesIter<BR>, input::Error> {
+ let mut header_data = [0u8; 12];
+ read.read_exact(&mut header_data)?;
+
+ let (version, num_objects) = crate::data::header::decode(&header_data)?;
+ assert_eq!(
+ version,
+ crate::data::Version::V2,
+ "let's stop here if we see undocumented pack formats"
+ );
+ Ok(BytesToEntriesIter {
+ read,
+ decompressor: None,
+ compressed,
+ offset: 12,
+ had_error: false,
+ version,
+ objects_left: num_objects,
+ hash: (mode != input::Mode::AsIs).then(|| {
+ let mut hash = gix_features::hash::hasher(object_hash);
+ hash.update(&header_data);
+ hash
+ }),
+ mode,
+ compressed_buf: None,
+ hash_len: object_hash.len_in_bytes(),
+ object_hash,
+ })
+ }
+
+ fn next_inner(&mut self) -> Result<input::Entry, input::Error> {
+ self.objects_left -= 1; // even an error counts as objects
+
+ // Read header
+ let entry = match self.hash.take() {
+ Some(hash) => {
+ let mut read = read_and_pass_to(
+ &mut self.read,
+ hash::Write {
+ inner: io::sink(),
+ hash,
+ },
+ );
+ let res = crate::data::Entry::from_read(&mut read, self.offset, self.hash_len);
+ self.hash = Some(read.write.hash);
+ res
+ }
+ None => crate::data::Entry::from_read(&mut self.read, self.offset, self.hash_len),
+ }
+ .map_err(input::Error::from)?;
+
+ // Decompress object to learn its compressed bytes
+ let mut decompressor = self
+ .decompressor
+ .take()
+ .unwrap_or_else(|| Box::new(Decompress::new(true)));
+ let compressed_buf = self.compressed_buf.take().unwrap_or_else(|| Vec::with_capacity(4096));
+ decompressor.reset(true);
+ let mut decompressed_reader = ReadBoxed {
+ inner: read_and_pass_to(
+ &mut self.read,
+ if self.compressed.keep() {
+ Vec::with_capacity(entry.decompressed_size as usize)
+ } else {
+ compressed_buf
+ },
+ ),
+ decompressor,
+ };
+
+ let bytes_copied = io::copy(&mut decompressed_reader, &mut io::sink())?;
+ if bytes_copied != entry.decompressed_size {
+ return Err(input::Error::IncompletePack {
+ actual: bytes_copied,
+ expected: entry.decompressed_size,
+ });
+ }
+
+ let pack_offset = self.offset;
+ let compressed_size = decompressed_reader.decompressor.total_in();
+ self.offset += entry.header_size() as u64 + compressed_size;
+ self.decompressor = Some(decompressed_reader.decompressor);
+
+ let mut compressed = decompressed_reader.inner.write;
+ debug_assert_eq!(
+ compressed_size,
+ compressed.len() as u64,
+ "we must track exactly the same amount of bytes as read by the decompressor"
+ );
+ if let Some(hash) = self.hash.as_mut() {
+ hash.update(&compressed);
+ }
+
+ let crc32 = if self.compressed.crc32() {
+ let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()];
+ let header_len = entry.header.write_to(bytes_copied, header_buf.as_mut())?;
+ let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]);
+ Some(gix_features::hash::crc32_update(state, &compressed))
+ } else {
+ None
+ };
+
+ let compressed = if self.compressed.keep() {
+ Some(compressed)
+ } else {
+ compressed.clear();
+ self.compressed_buf = Some(compressed);
+ None
+ };
+
+ // Last objects gets trailer (which is potentially verified)
+ let trailer = self.try_read_trailer()?;
+ Ok(input::Entry {
+ header: entry.header,
+ header_size: entry.header_size() as u16,
+ compressed,
+ compressed_size,
+ crc32,
+ pack_offset,
+ decompressed_size: bytes_copied,
+ trailer,
+ })
+ }
+
+ fn try_read_trailer(&mut self) -> Result<Option<ObjectId>, input::Error> {
+ Ok(if self.objects_left == 0 {
+ let mut id = gix_hash::ObjectId::null(self.object_hash);
+ if let Err(err) = self.read.read_exact(id.as_mut_slice()) {
+ if self.mode != input::Mode::Restore {
+ return Err(err.into());
+ }
+ }
+
+ if let Some(hash) = self.hash.take() {
+ let actual_id = gix_hash::ObjectId::from(hash.digest());
+ if self.mode == input::Mode::Restore {
+ id = actual_id;
+ }
+ if id != actual_id {
+ return Err(input::Error::ChecksumMismatch {
+ actual: actual_id,
+ expected: id,
+ });
+ }
+ }
+ Some(id)
+ } else if self.mode == input::Mode::Restore {
+ let hash = self.hash.clone().expect("in restore mode a hash is set");
+ Some(gix_hash::ObjectId::from(hash.digest()))
+ } else {
+ None
+ })
+ }
+}
+
+fn read_and_pass_to<R: io::Read, W: io::Write>(read: &mut R, to: W) -> PassThrough<&mut R, W> {
+ PassThrough { read, write: to }
+}
+
+impl<R> Iterator for BytesToEntriesIter<R>
+where
+ R: io::BufRead,
+{
+ type Item = Result<input::Entry, input::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.had_error || self.objects_left == 0 {
+ return None;
+ }
+ let result = self.next_inner();
+ self.had_error = result.is_err();
+ if self.had_error {
+ self.objects_left = 0;
+ }
+ if self.mode == input::Mode::Restore && self.had_error {
+ None
+ } else {
+ Some(result)
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.objects_left as usize, Some(self.objects_left as usize))
+ }
+}
+
+impl<R> std::iter::ExactSizeIterator for BytesToEntriesIter<R> where R: io::BufRead {}
+
+struct PassThrough<R, W> {
+ read: R,
+ write: W,
+}
+
+impl<R, W> io::BufRead for PassThrough<R, W>
+where
+ Self: io::Read,
+ R: io::BufRead,
+ W: io::Write,
+{
+ fn fill_buf(&mut self) -> io::Result<&[u8]> {
+ self.read.fill_buf()
+ }
+
+ fn consume(&mut self, amt: usize) {
+ let buf = self
+ .read
+ .fill_buf()
+ .expect("never fail as we called fill-buf before and this does nothing");
+ self.write
+ .write_all(&buf[..amt])
+ .expect("a write to never fail - should be a memory buffer");
+ self.read.consume(amt)
+ }
+}
+
+impl<R, W> io::Read for PassThrough<R, W>
+where
+ W: io::Write,
+ R: io::Read,
+{
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ let bytes_read = self.read.read(buf)?;
+ self.write.write_all(&buf[..bytes_read])?;
+ Ok(bytes_read)
+ }
+}
+
+impl crate::data::File {
+ /// Returns an iterator over [`Entries`][crate::data::input::Entry], without making use of the memory mapping.
+ pub fn streaming_iter(&self) -> Result<BytesToEntriesIter<impl io::BufRead>, input::Error> {
+ let reader = io::BufReader::with_capacity(4096 * 8, fs::File::open(&self.path)?);
+ BytesToEntriesIter::new_from_header(
+ reader,
+ input::Mode::Verify,
+ input::EntryDataMode::KeepAndCrc32,
+ self.object_hash,
+ )
+ }
+}
diff --git a/vendor/gix-pack/src/data/input/entries_to_bytes.rs b/vendor/gix-pack/src/data/input/entries_to_bytes.rs
new file mode 100644
index 000000000..a8c21e653
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/entries_to_bytes.rs
@@ -0,0 +1,155 @@
+use std::iter::Peekable;
+
+use gix_features::hash;
+
+use crate::data::input;
+
+/// An implementation of [`Iterator`] to write [encoded entries][input::Entry] to an inner implementation each time
+/// `next()` is called.
+///
+/// It is able to deal with an unknown amount of objects as it will rewrite the pack header once the entries iterator
+/// is depleted and compute the hash in one go by re-reading the whole file.
+pub struct EntriesToBytesIter<I: Iterator, W> {
+ /// An iterator for input [`input::Entry`] instances
+ pub input: Peekable<I>,
+ /// A way of writing encoded bytes.
+ output: W,
+ /// Our trailing hash when done writing all input entries
+ trailer: Option<gix_hash::ObjectId>,
+ /// The amount of objects in the iteration and the version of the packfile to be written.
+ /// Will be `None` to signal the header was written already.
+ data_version: crate::data::Version,
+ /// The amount of entries seen so far
+ num_entries: u32,
+ /// If we are done, no additional writes will occur
+ is_done: bool,
+ /// The kind of hash to use for the digest
+ object_hash: gix_hash::Kind,
+}
+
+impl<I, W> EntriesToBytesIter<I, W>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ W: std::io::Read + std::io::Write + std::io::Seek,
+{
+ /// Create a new instance reading [entries][input::Entry] from an `input` iterator and write pack data bytes to
+ /// `output` writer, resembling a pack of `version`. The amount of entries will be dynamically determined and
+ /// the pack is completed once the last entry was written.
+ /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version.
+ ///
+ /// # Panics
+ ///
+ /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors.
+ pub fn new(input: I, output: W, version: crate::data::Version, object_hash: gix_hash::Kind) -> Self {
+ assert!(
+ matches!(version, crate::data::Version::V2),
+ "currently only pack version 2 can be written",
+ );
+ assert!(
+ matches!(object_hash, gix_hash::Kind::Sha1),
+ "currently only Sha1 is supported, right now we don't know how other hashes are encoded",
+ );
+ EntriesToBytesIter {
+ input: input.peekable(),
+ output,
+ object_hash,
+ num_entries: 0,
+ trailer: None,
+ data_version: version,
+ is_done: false,
+ }
+ }
+
+ /// Returns the trailing hash over all ~ entries once done.
+ /// It's `None` if we are not yet done writing.
+ pub fn digest(&self) -> Option<gix_hash::ObjectId> {
+ self.trailer
+ }
+
+ fn next_inner(&mut self, entry: input::Entry) -> Result<input::Entry, input::Error> {
+ if self.num_entries == 0 {
+ let header_bytes = crate::data::header::encode(self.data_version, 0);
+ self.output.write_all(&header_bytes[..])?;
+ }
+ self.num_entries += 1;
+ entry.header.write_to(entry.decompressed_size, &mut self.output)?;
+ std::io::copy(
+ &mut entry
+ .compressed
+ .as_deref()
+ .expect("caller must configure generator to keep compressed bytes"),
+ &mut self.output,
+ )?;
+ Ok(entry)
+ }
+
+ fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), input::Error> {
+ let header_bytes = crate::data::header::encode(self.data_version, self.num_entries);
+ let num_bytes_written = if last_entry.is_some() {
+ self.output.stream_position()?
+ } else {
+ header_bytes.len() as u64
+ };
+ self.output.rewind()?;
+ self.output.write_all(&header_bytes[..])?;
+ self.output.flush()?;
+
+ self.output.rewind()?;
+ let interrupt_never = std::sync::atomic::AtomicBool::new(false);
+ let digest = hash::bytes(
+ &mut self.output,
+ num_bytes_written as usize,
+ self.object_hash,
+ &mut gix_features::progress::Discard,
+ &interrupt_never,
+ )?;
+ self.output.write_all(digest.as_slice())?;
+ self.output.flush()?;
+
+ self.is_done = true;
+ if let Some(last_entry) = last_entry {
+ last_entry.trailer = Some(digest);
+ }
+ self.trailer = Some(digest);
+ Ok(())
+ }
+}
+
+impl<I, W> Iterator for EntriesToBytesIter<I, W>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ W: std::io::Read + std::io::Write + std::io::Seek,
+{
+ /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input.
+ type Item = Result<input::Entry, input::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.is_done {
+ return None;
+ }
+
+ match self.input.next() {
+ Some(res) => Some(match res {
+ Ok(entry) => self.next_inner(entry).and_then(|mut entry| {
+ if self.input.peek().is_none() {
+ self.write_header_and_digest(Some(&mut entry)).map(|_| entry)
+ } else {
+ Ok(entry)
+ }
+ }),
+ Err(err) => {
+ self.is_done = true;
+ Err(err)
+ }
+ }),
+ None => match self.write_header_and_digest(None) {
+ Ok(_) => None,
+ Err(err) => Some(Err(err)),
+ },
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.input.size_hint()
+ }
+}
diff --git a/vendor/gix-pack/src/data/input/entry.rs b/vendor/gix-pack/src/data/input/entry.rs
new file mode 100644
index 000000000..74d4800a0
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/entry.rs
@@ -0,0 +1,65 @@
+use std::io::Write;
+
+use crate::data::{entry::Header, input};
+
+impl input::Entry {
+ /// Create a new input entry from a given data `obj` set to be placed at the given `pack_offset`.
+ ///
+ /// This method is useful when arbitrary base entries are created
+ pub fn from_data_obj(obj: &gix_object::Data<'_>, pack_offset: u64) -> Result<Self, input::Error> {
+ let header = to_header(obj.kind);
+ let compressed = compress_data(obj)?;
+ let compressed_size = compressed.len() as u64;
+ let mut entry = input::Entry {
+ header,
+ header_size: header.size(obj.data.len() as u64) as u16,
+ pack_offset,
+ compressed: Some(compressed),
+ compressed_size,
+ crc32: None,
+ decompressed_size: obj.data.len() as u64,
+ trailer: None,
+ };
+ entry.crc32 = Some(entry.compute_crc32());
+ Ok(entry)
+ }
+ /// The amount of bytes this entry may consume in a pack data file
+ pub fn bytes_in_pack(&self) -> u64 {
+ self.header_size as u64 + self.compressed_size
+ }
+
+ /// Update our CRC value by recalculating it from our header and compressed data.
+ pub fn compute_crc32(&self) -> u32 {
+ let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()];
+ let header_len = self
+ .header
+ .write_to(self.decompressed_size, header_buf.as_mut())
+ .expect("write to memory will not fail");
+ let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]);
+ gix_features::hash::crc32_update(state, self.compressed.as_ref().expect("we always set it"))
+ }
+}
+
+fn to_header(kind: gix_object::Kind) -> Header {
+ use gix_object::Kind::*;
+ match kind {
+ Tree => Header::Tree,
+ Blob => Header::Blob,
+ Commit => Header::Commit,
+ Tag => Header::Tag,
+ }
+}
+
+fn compress_data(obj: &gix_object::Data<'_>) -> Result<Vec<u8>, input::Error> {
+ let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new());
+ if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) {
+ match err.kind() {
+ std::io::ErrorKind::Other => return Err(input::Error::Io(err)),
+ err => {
+ unreachable!("Should never see other errors than zlib, but got {:?}", err,)
+ }
+ }
+ };
+ out.flush().expect("zlib flush should never fail");
+ Ok(out.into_inner())
+}
diff --git a/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs
new file mode 100644
index 000000000..f52c645f8
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs
@@ -0,0 +1,211 @@
+use std::convert::TryInto;
+
+use gix_hash::ObjectId;
+
+use crate::data::{entry::Header, input};
+
+/// An iterator to resolve thin packs on the fly.
+pub struct LookupRefDeltaObjectsIter<I, LFn> {
+ /// The inner iterator whose entries we will resolve.
+ pub inner: I,
+ lookup: LFn,
+ /// The cached delta to provide next time we are called, it's the delta to go with the base we just resolved in its place.
+ next_delta: Option<input::Entry>,
+ /// Fuse to stop iteration after first missing object.
+ error: bool,
+ /// The overall pack-offset we accumulated thus far. Each inserted entry offsets all following
+ /// objects by its length. We need to determine exactly where the object was inserted to see if its affected at all.
+ inserted_entry_length_at_offset: Vec<Change>,
+ /// The sum of all entries added so far, as a cache to avoid recomputation
+ inserted_entries_length_in_bytes: i64,
+ buf: Vec<u8>,
+}
+
+impl<I, LFn> LookupRefDeltaObjectsIter<I, LFn>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>,
+{
+ /// Create a new instance wrapping `iter` and using `lookup` as function to retrieve objects that will serve as bases
+ /// for ref deltas seen while traversing `iter`.
+ pub fn new(iter: I, lookup: LFn) -> Self {
+ LookupRefDeltaObjectsIter {
+ inner: iter,
+ lookup,
+ error: false,
+ inserted_entry_length_at_offset: Vec::new(),
+ inserted_entries_length_in_bytes: 0,
+ next_delta: None,
+ buf: Vec::new(),
+ }
+ }
+
+ fn shifted_pack_offset(&self, pack_offset: u64) -> u64 {
+ let new_ofs = pack_offset as i64 + self.inserted_entries_length_in_bytes;
+ new_ofs.try_into().expect("offset value is never becomes negative")
+ }
+
+ /// positive `size_change` values mean an object grew or was more commonly, was inserted. Negative values
+ /// mean the object shrunk, usually because there header changed from ref-deltas to ofs deltas.
+ fn track_change(
+ &mut self,
+ shifted_pack_offset: u64,
+ pack_offset: u64,
+ size_change: i64,
+ oid: impl Into<Option<ObjectId>>,
+ ) {
+ if size_change == 0 {
+ return;
+ }
+ self.inserted_entry_length_at_offset.push(Change {
+ shifted_pack_offset,
+ pack_offset,
+ size_change_in_bytes: size_change,
+ oid: oid.into().unwrap_or_else(||
+ // NOTE: this value acts as sentinel and the actual hash kind doesn't matter.
+ gix_hash::Kind::Sha1.null()),
+ });
+ self.inserted_entries_length_in_bytes += size_change;
+ }
+
+ fn shift_entry_and_point_to_base_by_offset(&mut self, entry: &mut input::Entry, base_distance: u64) {
+ let pack_offset = entry.pack_offset;
+ entry.pack_offset = self.shifted_pack_offset(pack_offset);
+ entry.header = Header::OfsDelta { base_distance };
+ let previous_header_size = entry.header_size;
+ entry.header_size = entry.header.size(entry.decompressed_size) as u16;
+
+ let change = entry.header_size as i64 - previous_header_size as i64;
+ entry.crc32 = Some(entry.compute_crc32());
+ self.track_change(entry.pack_offset, pack_offset, change, None);
+ }
+}
+
+impl<I, LFn> Iterator for LookupRefDeltaObjectsIter<I, LFn>
+where
+ I: Iterator<Item = Result<input::Entry, input::Error>>,
+ LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>,
+{
+ type Item = Result<input::Entry, input::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.error {
+ return None;
+ }
+ if let Some(delta) = self.next_delta.take() {
+ return Some(Ok(delta));
+ }
+ match self.inner.next() {
+ Some(Ok(mut entry)) => match entry.header {
+ Header::RefDelta { base_id } => {
+ match self.inserted_entry_length_at_offset.iter().rfind(|e| e.oid == base_id) {
+ None => {
+ let base_entry = match (self.lookup)(base_id, &mut self.buf) {
+ Some(obj) => {
+ let current_pack_offset = entry.pack_offset;
+ let mut entry = match input::Entry::from_data_obj(&obj, 0) {
+ Ok(e) => e,
+ Err(err) => return Some(Err(err)),
+ };
+ entry.pack_offset = self.shifted_pack_offset(current_pack_offset);
+ self.track_change(
+ entry.pack_offset,
+ current_pack_offset,
+ entry.bytes_in_pack() as i64,
+ base_id,
+ );
+ entry
+ }
+ None => {
+ self.error = true;
+ return Some(Err(input::Error::NotFound { object_id: base_id }));
+ }
+ };
+
+ {
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, base_entry.bytes_in_pack());
+ self.next_delta = Some(entry);
+ }
+ Some(Ok(base_entry))
+ }
+ Some(base_entry) => {
+ let base_distance =
+ self.shifted_pack_offset(entry.pack_offset) - base_entry.shifted_pack_offset;
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, base_distance);
+ Some(Ok(entry))
+ }
+ }
+ }
+ _ => {
+ if self.inserted_entries_length_in_bytes != 0 {
+ if let Header::OfsDelta { base_distance } = entry.header {
+ // We have to find the new distance based on the previous distance to the base, using the absolute
+ // pack offset computed from it as stored in `base_pack_offset`.
+ let base_pack_offset = entry
+ .pack_offset
+ .checked_sub(base_distance)
+ .expect("distance to be in range of pack");
+ match self
+ .inserted_entry_length_at_offset
+ .binary_search_by_key(&base_pack_offset, |c| c.pack_offset)
+ {
+ Ok(index) => {
+ let index = {
+ let maybe_index_of_actual_entry = index + 1;
+ self.inserted_entry_length_at_offset
+ .get(maybe_index_of_actual_entry)
+ .and_then(|c| {
+ (c.pack_offset == base_pack_offset)
+ .then_some(maybe_index_of_actual_entry)
+ })
+ .unwrap_or(index)
+ };
+ let new_distance = self
+ .shifted_pack_offset(entry.pack_offset)
+ .checked_sub(self.inserted_entry_length_at_offset[index].shifted_pack_offset)
+ .expect("a base that is behind us in the pack");
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
+ }
+ Err(index) => {
+ let change_since_offset = self.inserted_entry_length_at_offset[index..]
+ .iter()
+ .map(|c| c.size_change_in_bytes)
+ .sum::<i64>();
+ let new_distance: u64 = {
+ (base_distance as i64 + change_since_offset)
+ .try_into()
+ .expect("it still points behind us")
+ };
+ self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
+ }
+ }
+ } else {
+ // Offset this entry by all changes (positive or negative) that we saw thus far.
+ entry.pack_offset = self.shifted_pack_offset(entry.pack_offset);
+ }
+ }
+ Some(Ok(entry))
+ }
+ },
+ other => other,
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let (min, max) = self.inner.size_hint();
+ max.map(|max| (min, Some(max * 2))).unwrap_or_else(|| (min * 2, None))
+ }
+}
+
+#[derive(Debug)]
+struct Change {
+ /// The original pack offset as mentioned in the entry we saw. This is used to find this as base object if deltas refer to it by
+ /// old offset.
+ pack_offset: u64,
+ /// The new pack offset that is the shifted location of the pack entry in the pack.
+ shifted_pack_offset: u64,
+ /// The size change of the entry header, negative values denote shrinking, positive denote growing.
+ size_change_in_bytes: i64,
+ /// The object id of the entry responsible for the change, or null if it's an entry just for tracking an insertion.
+ oid: ObjectId,
+}
diff --git a/vendor/gix-pack/src/data/input/mod.rs b/vendor/gix-pack/src/data/input/mod.rs
new file mode 100644
index 000000000..df191de67
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/mod.rs
@@ -0,0 +1,41 @@
+/// An item of the iteration produced by [`BytesToEntriesIter`]
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Entry {
+ /// The header of a pack entry
+ pub header: crate::data::entry::Header,
+ /// The amount of bytes used to encode the `header`. `pack_offset + header_size` is the beginning of
+ /// the compressed data in the pack.
+ pub header_size: u16,
+ /// The first byte of the entry at which the `header` can be read.
+ pub pack_offset: u64,
+ /// The bytes consumed while producing `decompressed`
+ /// These do not contain the header, which makes it possible to easily replace a RefDelta with offset deltas
+ /// when resolving thin packs.
+ /// Depends on `CompressionMode` when the iterator is initialized.
+ pub compressed: Option<Vec<u8>>,
+ /// The amount of bytes the compressed portion of the entry takes, i.e. the portion behind behind the header.
+ pub compressed_size: u64,
+ /// The CRC32 over the complete entry, that is encoded header and compressed object data.
+ /// Depends on `CompressionMode` when the iterator is initialized
+ pub crc32: Option<u32>,
+ /// The amount of decompressed bytes of the entry.
+ pub decompressed_size: u64,
+ /// Set for the last object in the iteration, providing the hash over all bytes of the iteration
+ /// for use as trailer in a pack or to verify it matches the trailer.
+ pub trailer: Option<gix_hash::ObjectId>,
+}
+
+mod entry;
+
+mod types;
+pub use types::{EntryDataMode, Error, Mode};
+
+mod bytes_to_entries;
+pub use bytes_to_entries::BytesToEntriesIter;
+
+mod lookup_ref_delta_objects;
+pub use lookup_ref_delta_objects::LookupRefDeltaObjectsIter;
+
+mod entries_to_bytes;
+pub use entries_to_bytes::EntriesToBytesIter;
diff --git a/vendor/gix-pack/src/data/input/types.rs b/vendor/gix-pack/src/data/input/types.rs
new file mode 100644
index 000000000..6fcd459e2
--- /dev/null
+++ b/vendor/gix-pack/src/data/input/types.rs
@@ -0,0 +1,73 @@
+use std::io;
+
+/// Returned by [`BytesToEntriesIter::new_from_header()`][crate::data::input::BytesToEntriesIter::new_from_header()] and as part
+/// of `Item` of [`BytesToEntriesIter`][crate::data::input::BytesToEntriesIter].
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("An IO operation failed while streaming an entry")]
+ Io(#[from] io::Error),
+ #[error(transparent)]
+ PackParse(#[from] crate::data::header::decode::Error),
+ #[error("pack checksum in trailer was {expected}, but actual checksum was {actual}")]
+ ChecksumMismatch {
+ expected: gix_hash::ObjectId,
+ actual: gix_hash::ObjectId,
+ },
+ #[error("pack is incomplete: it was decompressed into {actual} bytes but {expected} bytes where expected.")]
+ IncompletePack { actual: u64, expected: u64 },
+ #[error("The object {object_id} could not be decoded or wasn't found")]
+ NotFound { object_id: gix_hash::ObjectId },
+}
+
+/// Iteration Mode
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum Mode {
+ /// Provide the trailer as read from the pack
+ AsIs,
+ /// Generate an own hash and trigger an error on the last iterated object
+ /// if it does not match the hash provided with the pack.
+ ///
+ /// This way the one iterating the data cannot miss corruption as long as
+ /// the iteration is continued through to the end.
+ Verify,
+ /// Generate an own hash and if there was an error or the objects are depleted early
+ /// due to partial packs, return the last valid entry and with our own hash thus far.
+ /// Note that the existing pack hash, if present, will be ignored.
+ /// As we won't know which objects fails, every object will have the hash obtained thus far.
+ /// This also means that algorithms must know about this possibility, or else might wrongfully
+ /// assume the pack is finished.
+ Restore,
+}
+
+/// Define what to do with the compressed bytes portion of a pack [`Entry`][super::Entry]
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub enum EntryDataMode {
+ /// Do nothing with the compressed bytes we read
+ Ignore,
+ /// Only create a CRC32 of the entry, otherwise similar to `Ignore`
+ Crc32,
+ /// Keep them and pass them along in a newly allocated buffer
+ Keep,
+ /// As above, but also compute a CRC32
+ KeepAndCrc32,
+}
+
+impl EntryDataMode {
+ /// Returns true if a crc32 should be computed
+ pub fn crc32(&self) -> bool {
+ match self {
+ EntryDataMode::KeepAndCrc32 | EntryDataMode::Crc32 => true,
+ EntryDataMode::Keep | EntryDataMode::Ignore => false,
+ }
+ }
+ /// Returns true if compressed bytes should be kept
+ pub fn keep(&self) -> bool {
+ match self {
+ EntryDataMode::Keep | EntryDataMode::KeepAndCrc32 => true,
+ EntryDataMode::Ignore | EntryDataMode::Crc32 => false,
+ }
+ }
+}