diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
commit | 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch) | |
tree | bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-pack/src/data/input | |
parent | Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff) | |
download | rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip |
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-pack/src/data/input')
-rw-r--r-- | vendor/gix-pack/src/data/input/bytes_to_entries.rs | 295 | ||||
-rw-r--r-- | vendor/gix-pack/src/data/input/entries_to_bytes.rs | 155 | ||||
-rw-r--r-- | vendor/gix-pack/src/data/input/entry.rs | 65 | ||||
-rw-r--r-- | vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs | 211 | ||||
-rw-r--r-- | vendor/gix-pack/src/data/input/mod.rs | 41 | ||||
-rw-r--r-- | vendor/gix-pack/src/data/input/types.rs | 73 |
6 files changed, 840 insertions, 0 deletions
diff --git a/vendor/gix-pack/src/data/input/bytes_to_entries.rs b/vendor/gix-pack/src/data/input/bytes_to_entries.rs new file mode 100644 index 000000000..cf20d5fbf --- /dev/null +++ b/vendor/gix-pack/src/data/input/bytes_to_entries.rs @@ -0,0 +1,295 @@ +use std::{fs, io}; + +use gix_features::{ + hash, + hash::Sha1, + zlib::{stream::inflate::ReadBoxed, Decompress}, +}; +use gix_hash::ObjectId; + +use crate::data::input; + +/// An iterator over [`Entries`][input::Entry] in a byte stream. +/// +/// The iterator used as part of [Bundle::write_to_directory(…)][crate::Bundle::write_to_directory()]. +pub struct BytesToEntriesIter<BR> { + read: BR, + decompressor: Option<Box<Decompress>>, + offset: u64, + had_error: bool, + version: crate::data::Version, + objects_left: u32, + hash: Option<Sha1>, + mode: input::Mode, + compressed: input::EntryDataMode, + compressed_buf: Option<Vec<u8>>, + hash_len: usize, + object_hash: gix_hash::Kind, +} + +/// Access +impl<BR> BytesToEntriesIter<BR> { + /// The pack version currently being iterated + pub fn version(&self) -> crate::data::Version { + self.version + } + + /// The kind of iteration + pub fn mode(&self) -> input::Mode { + self.mode + } +} + +/// Initialization +impl<BR> BytesToEntriesIter<BR> +where + BR: io::BufRead, +{ + /// Obtain an iterator from a `read` stream to a pack data file and configure it using `mode` and `compressed`. + /// `object_hash` specifies which hash is used for objects in ref-delta entries. + /// + /// Note that `read` is expected at the beginning of a valid pack data file with a header, entries and a trailer. + pub fn new_from_header( + mut read: BR, + mode: input::Mode, + compressed: input::EntryDataMode, + object_hash: gix_hash::Kind, + ) -> Result<BytesToEntriesIter<BR>, input::Error> { + let mut header_data = [0u8; 12]; + read.read_exact(&mut header_data)?; + + let (version, num_objects) = crate::data::header::decode(&header_data)?; + assert_eq!( + version, + crate::data::Version::V2, + "let's stop here if we see undocumented pack formats" + ); + Ok(BytesToEntriesIter { + read, + decompressor: None, + compressed, + offset: 12, + had_error: false, + version, + objects_left: num_objects, + hash: (mode != input::Mode::AsIs).then(|| { + let mut hash = gix_features::hash::hasher(object_hash); + hash.update(&header_data); + hash + }), + mode, + compressed_buf: None, + hash_len: object_hash.len_in_bytes(), + object_hash, + }) + } + + fn next_inner(&mut self) -> Result<input::Entry, input::Error> { + self.objects_left -= 1; // even an error counts as objects + + // Read header + let entry = match self.hash.take() { + Some(hash) => { + let mut read = read_and_pass_to( + &mut self.read, + hash::Write { + inner: io::sink(), + hash, + }, + ); + let res = crate::data::Entry::from_read(&mut read, self.offset, self.hash_len); + self.hash = Some(read.write.hash); + res + } + None => crate::data::Entry::from_read(&mut self.read, self.offset, self.hash_len), + } + .map_err(input::Error::from)?; + + // Decompress object to learn its compressed bytes + let mut decompressor = self + .decompressor + .take() + .unwrap_or_else(|| Box::new(Decompress::new(true))); + let compressed_buf = self.compressed_buf.take().unwrap_or_else(|| Vec::with_capacity(4096)); + decompressor.reset(true); + let mut decompressed_reader = ReadBoxed { + inner: read_and_pass_to( + &mut self.read, + if self.compressed.keep() { + Vec::with_capacity(entry.decompressed_size as usize) + } else { + compressed_buf + }, + ), + decompressor, + }; + + let bytes_copied = io::copy(&mut decompressed_reader, &mut io::sink())?; + if bytes_copied != entry.decompressed_size { + return Err(input::Error::IncompletePack { + actual: bytes_copied, + expected: entry.decompressed_size, + }); + } + + let pack_offset = self.offset; + let compressed_size = decompressed_reader.decompressor.total_in(); + self.offset += entry.header_size() as u64 + compressed_size; + self.decompressor = Some(decompressed_reader.decompressor); + + let mut compressed = decompressed_reader.inner.write; + debug_assert_eq!( + compressed_size, + compressed.len() as u64, + "we must track exactly the same amount of bytes as read by the decompressor" + ); + if let Some(hash) = self.hash.as_mut() { + hash.update(&compressed); + } + + let crc32 = if self.compressed.crc32() { + let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()]; + let header_len = entry.header.write_to(bytes_copied, header_buf.as_mut())?; + let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]); + Some(gix_features::hash::crc32_update(state, &compressed)) + } else { + None + }; + + let compressed = if self.compressed.keep() { + Some(compressed) + } else { + compressed.clear(); + self.compressed_buf = Some(compressed); + None + }; + + // Last objects gets trailer (which is potentially verified) + let trailer = self.try_read_trailer()?; + Ok(input::Entry { + header: entry.header, + header_size: entry.header_size() as u16, + compressed, + compressed_size, + crc32, + pack_offset, + decompressed_size: bytes_copied, + trailer, + }) + } + + fn try_read_trailer(&mut self) -> Result<Option<ObjectId>, input::Error> { + Ok(if self.objects_left == 0 { + let mut id = gix_hash::ObjectId::null(self.object_hash); + if let Err(err) = self.read.read_exact(id.as_mut_slice()) { + if self.mode != input::Mode::Restore { + return Err(err.into()); + } + } + + if let Some(hash) = self.hash.take() { + let actual_id = gix_hash::ObjectId::from(hash.digest()); + if self.mode == input::Mode::Restore { + id = actual_id; + } + if id != actual_id { + return Err(input::Error::ChecksumMismatch { + actual: actual_id, + expected: id, + }); + } + } + Some(id) + } else if self.mode == input::Mode::Restore { + let hash = self.hash.clone().expect("in restore mode a hash is set"); + Some(gix_hash::ObjectId::from(hash.digest())) + } else { + None + }) + } +} + +fn read_and_pass_to<R: io::Read, W: io::Write>(read: &mut R, to: W) -> PassThrough<&mut R, W> { + PassThrough { read, write: to } +} + +impl<R> Iterator for BytesToEntriesIter<R> +where + R: io::BufRead, +{ + type Item = Result<input::Entry, input::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.had_error || self.objects_left == 0 { + return None; + } + let result = self.next_inner(); + self.had_error = result.is_err(); + if self.had_error { + self.objects_left = 0; + } + if self.mode == input::Mode::Restore && self.had_error { + None + } else { + Some(result) + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.objects_left as usize, Some(self.objects_left as usize)) + } +} + +impl<R> std::iter::ExactSizeIterator for BytesToEntriesIter<R> where R: io::BufRead {} + +struct PassThrough<R, W> { + read: R, + write: W, +} + +impl<R, W> io::BufRead for PassThrough<R, W> +where + Self: io::Read, + R: io::BufRead, + W: io::Write, +{ + fn fill_buf(&mut self) -> io::Result<&[u8]> { + self.read.fill_buf() + } + + fn consume(&mut self, amt: usize) { + let buf = self + .read + .fill_buf() + .expect("never fail as we called fill-buf before and this does nothing"); + self.write + .write_all(&buf[..amt]) + .expect("a write to never fail - should be a memory buffer"); + self.read.consume(amt) + } +} + +impl<R, W> io::Read for PassThrough<R, W> +where + W: io::Write, + R: io::Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let bytes_read = self.read.read(buf)?; + self.write.write_all(&buf[..bytes_read])?; + Ok(bytes_read) + } +} + +impl crate::data::File { + /// Returns an iterator over [`Entries`][crate::data::input::Entry], without making use of the memory mapping. + pub fn streaming_iter(&self) -> Result<BytesToEntriesIter<impl io::BufRead>, input::Error> { + let reader = io::BufReader::with_capacity(4096 * 8, fs::File::open(&self.path)?); + BytesToEntriesIter::new_from_header( + reader, + input::Mode::Verify, + input::EntryDataMode::KeepAndCrc32, + self.object_hash, + ) + } +} diff --git a/vendor/gix-pack/src/data/input/entries_to_bytes.rs b/vendor/gix-pack/src/data/input/entries_to_bytes.rs new file mode 100644 index 000000000..a8c21e653 --- /dev/null +++ b/vendor/gix-pack/src/data/input/entries_to_bytes.rs @@ -0,0 +1,155 @@ +use std::iter::Peekable; + +use gix_features::hash; + +use crate::data::input; + +/// An implementation of [`Iterator`] to write [encoded entries][input::Entry] to an inner implementation each time +/// `next()` is called. +/// +/// It is able to deal with an unknown amount of objects as it will rewrite the pack header once the entries iterator +/// is depleted and compute the hash in one go by re-reading the whole file. +pub struct EntriesToBytesIter<I: Iterator, W> { + /// An iterator for input [`input::Entry`] instances + pub input: Peekable<I>, + /// A way of writing encoded bytes. + output: W, + /// Our trailing hash when done writing all input entries + trailer: Option<gix_hash::ObjectId>, + /// The amount of objects in the iteration and the version of the packfile to be written. + /// Will be `None` to signal the header was written already. + data_version: crate::data::Version, + /// The amount of entries seen so far + num_entries: u32, + /// If we are done, no additional writes will occur + is_done: bool, + /// The kind of hash to use for the digest + object_hash: gix_hash::Kind, +} + +impl<I, W> EntriesToBytesIter<I, W> +where + I: Iterator<Item = Result<input::Entry, input::Error>>, + W: std::io::Read + std::io::Write + std::io::Seek, +{ + /// Create a new instance reading [entries][input::Entry] from an `input` iterator and write pack data bytes to + /// `output` writer, resembling a pack of `version`. The amount of entries will be dynamically determined and + /// the pack is completed once the last entry was written. + /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version. + /// + /// # Panics + /// + /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors. + pub fn new(input: I, output: W, version: crate::data::Version, object_hash: gix_hash::Kind) -> Self { + assert!( + matches!(version, crate::data::Version::V2), + "currently only pack version 2 can be written", + ); + assert!( + matches!(object_hash, gix_hash::Kind::Sha1), + "currently only Sha1 is supported, right now we don't know how other hashes are encoded", + ); + EntriesToBytesIter { + input: input.peekable(), + output, + object_hash, + num_entries: 0, + trailer: None, + data_version: version, + is_done: false, + } + } + + /// Returns the trailing hash over all ~ entries once done. + /// It's `None` if we are not yet done writing. + pub fn digest(&self) -> Option<gix_hash::ObjectId> { + self.trailer + } + + fn next_inner(&mut self, entry: input::Entry) -> Result<input::Entry, input::Error> { + if self.num_entries == 0 { + let header_bytes = crate::data::header::encode(self.data_version, 0); + self.output.write_all(&header_bytes[..])?; + } + self.num_entries += 1; + entry.header.write_to(entry.decompressed_size, &mut self.output)?; + std::io::copy( + &mut entry + .compressed + .as_deref() + .expect("caller must configure generator to keep compressed bytes"), + &mut self.output, + )?; + Ok(entry) + } + + fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), input::Error> { + let header_bytes = crate::data::header::encode(self.data_version, self.num_entries); + let num_bytes_written = if last_entry.is_some() { + self.output.stream_position()? + } else { + header_bytes.len() as u64 + }; + self.output.rewind()?; + self.output.write_all(&header_bytes[..])?; + self.output.flush()?; + + self.output.rewind()?; + let interrupt_never = std::sync::atomic::AtomicBool::new(false); + let digest = hash::bytes( + &mut self.output, + num_bytes_written as usize, + self.object_hash, + &mut gix_features::progress::Discard, + &interrupt_never, + )?; + self.output.write_all(digest.as_slice())?; + self.output.flush()?; + + self.is_done = true; + if let Some(last_entry) = last_entry { + last_entry.trailer = Some(digest); + } + self.trailer = Some(digest); + Ok(()) + } +} + +impl<I, W> Iterator for EntriesToBytesIter<I, W> +where + I: Iterator<Item = Result<input::Entry, input::Error>>, + W: std::io::Read + std::io::Write + std::io::Seek, +{ + /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input. + type Item = Result<input::Entry, input::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.is_done { + return None; + } + + match self.input.next() { + Some(res) => Some(match res { + Ok(entry) => self.next_inner(entry).and_then(|mut entry| { + if self.input.peek().is_none() { + self.write_header_and_digest(Some(&mut entry)).map(|_| entry) + } else { + Ok(entry) + } + }), + Err(err) => { + self.is_done = true; + Err(err) + } + }), + None => match self.write_header_and_digest(None) { + Ok(_) => None, + Err(err) => Some(Err(err)), + }, + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.input.size_hint() + } +} diff --git a/vendor/gix-pack/src/data/input/entry.rs b/vendor/gix-pack/src/data/input/entry.rs new file mode 100644 index 000000000..74d4800a0 --- /dev/null +++ b/vendor/gix-pack/src/data/input/entry.rs @@ -0,0 +1,65 @@ +use std::io::Write; + +use crate::data::{entry::Header, input}; + +impl input::Entry { + /// Create a new input entry from a given data `obj` set to be placed at the given `pack_offset`. + /// + /// This method is useful when arbitrary base entries are created + pub fn from_data_obj(obj: &gix_object::Data<'_>, pack_offset: u64) -> Result<Self, input::Error> { + let header = to_header(obj.kind); + let compressed = compress_data(obj)?; + let compressed_size = compressed.len() as u64; + let mut entry = input::Entry { + header, + header_size: header.size(obj.data.len() as u64) as u16, + pack_offset, + compressed: Some(compressed), + compressed_size, + crc32: None, + decompressed_size: obj.data.len() as u64, + trailer: None, + }; + entry.crc32 = Some(entry.compute_crc32()); + Ok(entry) + } + /// The amount of bytes this entry may consume in a pack data file + pub fn bytes_in_pack(&self) -> u64 { + self.header_size as u64 + self.compressed_size + } + + /// Update our CRC value by recalculating it from our header and compressed data. + pub fn compute_crc32(&self) -> u32 { + let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()]; + let header_len = self + .header + .write_to(self.decompressed_size, header_buf.as_mut()) + .expect("write to memory will not fail"); + let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]); + gix_features::hash::crc32_update(state, self.compressed.as_ref().expect("we always set it")) + } +} + +fn to_header(kind: gix_object::Kind) -> Header { + use gix_object::Kind::*; + match kind { + Tree => Header::Tree, + Blob => Header::Blob, + Commit => Header::Commit, + Tag => Header::Tag, + } +} + +fn compress_data(obj: &gix_object::Data<'_>) -> Result<Vec<u8>, input::Error> { + let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new()); + if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) { + match err.kind() { + std::io::ErrorKind::Other => return Err(input::Error::Io(err)), + err => { + unreachable!("Should never see other errors than zlib, but got {:?}", err,) + } + } + }; + out.flush().expect("zlib flush should never fail"); + Ok(out.into_inner()) +} diff --git a/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs new file mode 100644 index 000000000..f52c645f8 --- /dev/null +++ b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs @@ -0,0 +1,211 @@ +use std::convert::TryInto; + +use gix_hash::ObjectId; + +use crate::data::{entry::Header, input}; + +/// An iterator to resolve thin packs on the fly. +pub struct LookupRefDeltaObjectsIter<I, LFn> { + /// The inner iterator whose entries we will resolve. + pub inner: I, + lookup: LFn, + /// The cached delta to provide next time we are called, it's the delta to go with the base we just resolved in its place. + next_delta: Option<input::Entry>, + /// Fuse to stop iteration after first missing object. + error: bool, + /// The overall pack-offset we accumulated thus far. Each inserted entry offsets all following + /// objects by its length. We need to determine exactly where the object was inserted to see if its affected at all. + inserted_entry_length_at_offset: Vec<Change>, + /// The sum of all entries added so far, as a cache to avoid recomputation + inserted_entries_length_in_bytes: i64, + buf: Vec<u8>, +} + +impl<I, LFn> LookupRefDeltaObjectsIter<I, LFn> +where + I: Iterator<Item = Result<input::Entry, input::Error>>, + LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>, +{ + /// Create a new instance wrapping `iter` and using `lookup` as function to retrieve objects that will serve as bases + /// for ref deltas seen while traversing `iter`. + pub fn new(iter: I, lookup: LFn) -> Self { + LookupRefDeltaObjectsIter { + inner: iter, + lookup, + error: false, + inserted_entry_length_at_offset: Vec::new(), + inserted_entries_length_in_bytes: 0, + next_delta: None, + buf: Vec::new(), + } + } + + fn shifted_pack_offset(&self, pack_offset: u64) -> u64 { + let new_ofs = pack_offset as i64 + self.inserted_entries_length_in_bytes; + new_ofs.try_into().expect("offset value is never becomes negative") + } + + /// positive `size_change` values mean an object grew or was more commonly, was inserted. Negative values + /// mean the object shrunk, usually because there header changed from ref-deltas to ofs deltas. + fn track_change( + &mut self, + shifted_pack_offset: u64, + pack_offset: u64, + size_change: i64, + oid: impl Into<Option<ObjectId>>, + ) { + if size_change == 0 { + return; + } + self.inserted_entry_length_at_offset.push(Change { + shifted_pack_offset, + pack_offset, + size_change_in_bytes: size_change, + oid: oid.into().unwrap_or_else(|| + // NOTE: this value acts as sentinel and the actual hash kind doesn't matter. + gix_hash::Kind::Sha1.null()), + }); + self.inserted_entries_length_in_bytes += size_change; + } + + fn shift_entry_and_point_to_base_by_offset(&mut self, entry: &mut input::Entry, base_distance: u64) { + let pack_offset = entry.pack_offset; + entry.pack_offset = self.shifted_pack_offset(pack_offset); + entry.header = Header::OfsDelta { base_distance }; + let previous_header_size = entry.header_size; + entry.header_size = entry.header.size(entry.decompressed_size) as u16; + + let change = entry.header_size as i64 - previous_header_size as i64; + entry.crc32 = Some(entry.compute_crc32()); + self.track_change(entry.pack_offset, pack_offset, change, None); + } +} + +impl<I, LFn> Iterator for LookupRefDeltaObjectsIter<I, LFn> +where + I: Iterator<Item = Result<input::Entry, input::Error>>, + LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>, +{ + type Item = Result<input::Entry, input::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.error { + return None; + } + if let Some(delta) = self.next_delta.take() { + return Some(Ok(delta)); + } + match self.inner.next() { + Some(Ok(mut entry)) => match entry.header { + Header::RefDelta { base_id } => { + match self.inserted_entry_length_at_offset.iter().rfind(|e| e.oid == base_id) { + None => { + let base_entry = match (self.lookup)(base_id, &mut self.buf) { + Some(obj) => { + let current_pack_offset = entry.pack_offset; + let mut entry = match input::Entry::from_data_obj(&obj, 0) { + Ok(e) => e, + Err(err) => return Some(Err(err)), + }; + entry.pack_offset = self.shifted_pack_offset(current_pack_offset); + self.track_change( + entry.pack_offset, + current_pack_offset, + entry.bytes_in_pack() as i64, + base_id, + ); + entry + } + None => { + self.error = true; + return Some(Err(input::Error::NotFound { object_id: base_id })); + } + }; + + { + self.shift_entry_and_point_to_base_by_offset(&mut entry, base_entry.bytes_in_pack()); + self.next_delta = Some(entry); + } + Some(Ok(base_entry)) + } + Some(base_entry) => { + let base_distance = + self.shifted_pack_offset(entry.pack_offset) - base_entry.shifted_pack_offset; + self.shift_entry_and_point_to_base_by_offset(&mut entry, base_distance); + Some(Ok(entry)) + } + } + } + _ => { + if self.inserted_entries_length_in_bytes != 0 { + if let Header::OfsDelta { base_distance } = entry.header { + // We have to find the new distance based on the previous distance to the base, using the absolute + // pack offset computed from it as stored in `base_pack_offset`. + let base_pack_offset = entry + .pack_offset + .checked_sub(base_distance) + .expect("distance to be in range of pack"); + match self + .inserted_entry_length_at_offset + .binary_search_by_key(&base_pack_offset, |c| c.pack_offset) + { + Ok(index) => { + let index = { + let maybe_index_of_actual_entry = index + 1; + self.inserted_entry_length_at_offset + .get(maybe_index_of_actual_entry) + .and_then(|c| { + (c.pack_offset == base_pack_offset) + .then_some(maybe_index_of_actual_entry) + }) + .unwrap_or(index) + }; + let new_distance = self + .shifted_pack_offset(entry.pack_offset) + .checked_sub(self.inserted_entry_length_at_offset[index].shifted_pack_offset) + .expect("a base that is behind us in the pack"); + self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance); + } + Err(index) => { + let change_since_offset = self.inserted_entry_length_at_offset[index..] + .iter() + .map(|c| c.size_change_in_bytes) + .sum::<i64>(); + let new_distance: u64 = { + (base_distance as i64 + change_since_offset) + .try_into() + .expect("it still points behind us") + }; + self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance); + } + } + } else { + // Offset this entry by all changes (positive or negative) that we saw thus far. + entry.pack_offset = self.shifted_pack_offset(entry.pack_offset); + } + } + Some(Ok(entry)) + } + }, + other => other, + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let (min, max) = self.inner.size_hint(); + max.map(|max| (min, Some(max * 2))).unwrap_or_else(|| (min * 2, None)) + } +} + +#[derive(Debug)] +struct Change { + /// The original pack offset as mentioned in the entry we saw. This is used to find this as base object if deltas refer to it by + /// old offset. + pack_offset: u64, + /// The new pack offset that is the shifted location of the pack entry in the pack. + shifted_pack_offset: u64, + /// The size change of the entry header, negative values denote shrinking, positive denote growing. + size_change_in_bytes: i64, + /// The object id of the entry responsible for the change, or null if it's an entry just for tracking an insertion. + oid: ObjectId, +} diff --git a/vendor/gix-pack/src/data/input/mod.rs b/vendor/gix-pack/src/data/input/mod.rs new file mode 100644 index 000000000..df191de67 --- /dev/null +++ b/vendor/gix-pack/src/data/input/mod.rs @@ -0,0 +1,41 @@ +/// An item of the iteration produced by [`BytesToEntriesIter`] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry { + /// The header of a pack entry + pub header: crate::data::entry::Header, + /// The amount of bytes used to encode the `header`. `pack_offset + header_size` is the beginning of + /// the compressed data in the pack. + pub header_size: u16, + /// The first byte of the entry at which the `header` can be read. + pub pack_offset: u64, + /// The bytes consumed while producing `decompressed` + /// These do not contain the header, which makes it possible to easily replace a RefDelta with offset deltas + /// when resolving thin packs. + /// Depends on `CompressionMode` when the iterator is initialized. + pub compressed: Option<Vec<u8>>, + /// The amount of bytes the compressed portion of the entry takes, i.e. the portion behind behind the header. + pub compressed_size: u64, + /// The CRC32 over the complete entry, that is encoded header and compressed object data. + /// Depends on `CompressionMode` when the iterator is initialized + pub crc32: Option<u32>, + /// The amount of decompressed bytes of the entry. + pub decompressed_size: u64, + /// Set for the last object in the iteration, providing the hash over all bytes of the iteration + /// for use as trailer in a pack or to verify it matches the trailer. + pub trailer: Option<gix_hash::ObjectId>, +} + +mod entry; + +mod types; +pub use types::{EntryDataMode, Error, Mode}; + +mod bytes_to_entries; +pub use bytes_to_entries::BytesToEntriesIter; + +mod lookup_ref_delta_objects; +pub use lookup_ref_delta_objects::LookupRefDeltaObjectsIter; + +mod entries_to_bytes; +pub use entries_to_bytes::EntriesToBytesIter; diff --git a/vendor/gix-pack/src/data/input/types.rs b/vendor/gix-pack/src/data/input/types.rs new file mode 100644 index 000000000..6fcd459e2 --- /dev/null +++ b/vendor/gix-pack/src/data/input/types.rs @@ -0,0 +1,73 @@ +use std::io; + +/// Returned by [`BytesToEntriesIter::new_from_header()`][crate::data::input::BytesToEntriesIter::new_from_header()] and as part +/// of `Item` of [`BytesToEntriesIter`][crate::data::input::BytesToEntriesIter]. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("An IO operation failed while streaming an entry")] + Io(#[from] io::Error), + #[error(transparent)] + PackParse(#[from] crate::data::header::decode::Error), + #[error("pack checksum in trailer was {expected}, but actual checksum was {actual}")] + ChecksumMismatch { + expected: gix_hash::ObjectId, + actual: gix_hash::ObjectId, + }, + #[error("pack is incomplete: it was decompressed into {actual} bytes but {expected} bytes where expected.")] + IncompletePack { actual: u64, expected: u64 }, + #[error("The object {object_id} could not be decoded or wasn't found")] + NotFound { object_id: gix_hash::ObjectId }, +} + +/// Iteration Mode +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum Mode { + /// Provide the trailer as read from the pack + AsIs, + /// Generate an own hash and trigger an error on the last iterated object + /// if it does not match the hash provided with the pack. + /// + /// This way the one iterating the data cannot miss corruption as long as + /// the iteration is continued through to the end. + Verify, + /// Generate an own hash and if there was an error or the objects are depleted early + /// due to partial packs, return the last valid entry and with our own hash thus far. + /// Note that the existing pack hash, if present, will be ignored. + /// As we won't know which objects fails, every object will have the hash obtained thus far. + /// This also means that algorithms must know about this possibility, or else might wrongfully + /// assume the pack is finished. + Restore, +} + +/// Define what to do with the compressed bytes portion of a pack [`Entry`][super::Entry] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum EntryDataMode { + /// Do nothing with the compressed bytes we read + Ignore, + /// Only create a CRC32 of the entry, otherwise similar to `Ignore` + Crc32, + /// Keep them and pass them along in a newly allocated buffer + Keep, + /// As above, but also compute a CRC32 + KeepAndCrc32, +} + +impl EntryDataMode { + /// Returns true if a crc32 should be computed + pub fn crc32(&self) -> bool { + match self { + EntryDataMode::KeepAndCrc32 | EntryDataMode::Crc32 => true, + EntryDataMode::Keep | EntryDataMode::Ignore => false, + } + } + /// Returns true if compressed bytes should be kept + pub fn keep(&self) -> bool { + match self { + EntryDataMode::Keep | EntryDataMode::KeepAndCrc32 => true, + EntryDataMode::Ignore | EntryDataMode::Crc32 => false, + } + } +} |