diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
commit | 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch) | |
tree | bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-pack/src/data/entry | |
parent | Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff) | |
download | rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip |
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-pack/src/data/entry')
-rw-r--r-- | vendor/gix-pack/src/data/entry/decode.rs | 125 | ||||
-rw-r--r-- | vendor/gix-pack/src/data/entry/header.rs | 150 | ||||
-rw-r--r-- | vendor/gix-pack/src/data/entry/mod.rs | 53 |
3 files changed, 328 insertions, 0 deletions
diff --git a/vendor/gix-pack/src/data/entry/decode.rs b/vendor/gix-pack/src/data/entry/decode.rs new file mode 100644 index 000000000..79d7aecff --- /dev/null +++ b/vendor/gix-pack/src/data/entry/decode.rs @@ -0,0 +1,125 @@ +use std::io; + +use gix_features::decode::{leb64, leb64_from_read}; + +use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE}; +use crate::data; + +/// Decoding +impl data::Entry { + /// Decode an entry from the given entry data `d`, providing the `pack_offset` to allow tracking the start of the entry data section. + /// + /// # Panics + /// + /// If we cannot understand the header, garbage data is likely to trigger this. + pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> data::Entry { + let (type_id, size, mut consumed) = parse_header_info(d); + + use crate::data::entry::Header::*; + let object = match type_id { + OFS_DELTA => { + let (distance, leb_bytes) = leb64(&d[consumed..]); + let delta = OfsDelta { + base_distance: distance, + }; + consumed += leb_bytes; + delta + } + REF_DELTA => { + let delta = RefDelta { + base_id: gix_hash::ObjectId::from(&d[consumed..][..hash_len]), + }; + consumed += hash_len; + delta + } + BLOB => Blob, + TREE => Tree, + COMMIT => Commit, + TAG => Tag, + _ => panic!("We currently don't support any V3 features or extensions"), + }; + data::Entry { + header: object, + decompressed_size: size, + data_offset: pack_offset + consumed as u64, + } + } + + /// Instantiate an `Entry` from the reader `r`, providing the `pack_offset` to allow tracking the start of the entry data section. + pub fn from_read( + mut r: impl io::Read, + pack_offset: data::Offset, + hash_len: usize, + ) -> Result<data::Entry, io::Error> { + let (type_id, size, mut consumed) = streaming_parse_header_info(&mut r)?; + + use crate::data::entry::Header::*; + let object = match type_id { + OFS_DELTA => { + let (distance, leb_bytes) = leb64_from_read(&mut r)?; + let delta = OfsDelta { + base_distance: distance, + }; + consumed += leb_bytes; + delta + } + REF_DELTA => { + let mut buf = gix_hash::Kind::buf(); + let hash = &mut buf[..hash_len]; + r.read_exact(hash)?; + #[allow(clippy::redundant_slicing)] + let delta = RefDelta { + base_id: gix_hash::ObjectId::from(&hash[..]), + }; + consumed += hash_len; + delta + } + BLOB => Blob, + TREE => Tree, + COMMIT => Commit, + TAG => Tag, + _ => panic!("We currently don't support any V3 features or extensions"), + }; + Ok(data::Entry { + header: object, + decompressed_size: size, + data_offset: pack_offset + consumed as u64, + }) + } +} + +#[inline] +fn streaming_parse_header_info(mut read: impl io::Read) -> Result<(u8, u64, usize), io::Error> { + let mut byte = [0u8; 1]; + read.read_exact(&mut byte)?; + let mut c = byte[0]; + let mut i = 1; + let type_id = (c >> 4) & 0b0000_0111; + let mut size = c as u64 & 0b0000_1111; + let mut s = 4; + while c & 0b1000_0000 != 0 { + read.read_exact(&mut byte)?; + c = byte[0]; + i += 1; + size += ((c & 0b0111_1111) as u64) << s; + s += 7 + } + Ok((type_id, size, i)) +} + +/// Parses the header of a pack-entry, yielding object type id, decompressed object size, and consumed bytes +#[inline] +fn parse_header_info(data: &[u8]) -> (u8, u64, usize) { + let mut c = data[0]; + let mut i = 1; + let type_id = (c >> 4) & 0b0000_0111; + let mut size = c as u64 & 0b0000_1111; + let mut s = 4; + while c & 0b1000_0000 != 0 { + c = data[i]; + i += 1; + size += ((c & 0b0111_1111) as u64) << s; + s += 7 + } + (type_id, size, i) +} diff --git a/vendor/gix-pack/src/data/entry/header.rs b/vendor/gix-pack/src/data/entry/header.rs new file mode 100644 index 000000000..83983eab0 --- /dev/null +++ b/vendor/gix-pack/src/data/entry/header.rs @@ -0,0 +1,150 @@ +use std::io; + +use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE}; +use crate::data; + +/// The header portion of a pack data entry, identifying the kind of stored object. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[allow(missing_docs)] +pub enum Header { + /// The object is a commit + Commit, + /// The object is a tree + Tree, + /// The object is a blob + Blob, + /// The object is a tag + Tag, + /// Describes a delta-object which needs to be applied to a base. The base object is identified by the `base_id` field + /// which is found within the parent repository. + /// Most commonly used for **thin-packs** when receiving pack files from the server to refer to objects that are not + /// part of the pack but expected to be present in the receivers repository. + /// + /// # Note + /// This could also be an object within this pack if the LSB encoded offset would be larger than 20 bytes, which is unlikely to + /// happen. + /// + /// **The naming** is exactly the same as the canonical implementation uses, namely **REF_DELTA**. + RefDelta { base_id: gix_hash::ObjectId }, + /// Describes a delta-object present in this pack which acts as base for this object. + /// The base object is measured as a distance from this objects + /// pack offset, so that `base_pack_offset = this_objects_pack_offset - base_distance` + /// + /// # Note + /// + /// **The naming** is exactly the same as the canonical implementation uses, namely **OFS_DELTA**. + OfsDelta { base_distance: u64 }, +} + +impl Header { + /// Subtract `distance` from `pack_offset` safely without the chance for overflow or no-ops if `distance` is 0. + pub fn verified_base_pack_offset(pack_offset: data::Offset, distance: u64) -> Option<data::Offset> { + if distance == 0 { + return None; + } + pack_offset.checked_sub(distance) + } + /// Convert the header's object kind into [`gix_object::Kind`] if possible + pub fn as_kind(&self) -> Option<gix_object::Kind> { + use gix_object::Kind::*; + Some(match self { + Header::Tree => Tree, + Header::Blob => Blob, + Header::Commit => Commit, + Header::Tag => Tag, + Header::RefDelta { .. } | Header::OfsDelta { .. } => return None, + }) + } + /// Convert this header's object kind into the packs internal representation + pub fn as_type_id(&self) -> u8 { + use Header::*; + match self { + Blob => BLOB, + Tree => TREE, + Commit => COMMIT, + Tag => TAG, + OfsDelta { .. } => OFS_DELTA, + RefDelta { .. } => REF_DELTA, + } + } + /// Return's true if this is a delta object, i.e. not a full object. + pub fn is_delta(&self) -> bool { + matches!(self, Header::OfsDelta { .. } | Header::RefDelta { .. }) + } + /// Return's true if this is a base object, i.e. not a delta object. + pub fn is_base(&self) -> bool { + !self.is_delta() + } +} + +impl Header { + /// Encode this header along the given `decompressed_size_in_bytes` into the `out` write stream for use within a data pack. + /// + /// Returns the amount of bytes written to `out`. + /// `decompressed_size_in_bytes` is the full size in bytes of the object that this header represents + pub fn write_to(&self, decompressed_size_in_bytes: u64, mut out: impl io::Write) -> io::Result<usize> { + let mut size = decompressed_size_in_bytes; + let mut written = 1; + let mut c: u8 = (self.as_type_id() << 4) | (size as u8 & 0b0000_1111); + size >>= 4; + while size != 0 { + out.write_all(&[c | 0b1000_0000])?; + written += 1; + c = size as u8 & 0b0111_1111; + size >>= 7; + } + out.write_all(&[c])?; + + use Header::*; + match self { + RefDelta { base_id: oid } => { + out.write_all(oid.as_slice())?; + written += oid.as_slice().len(); + } + OfsDelta { base_distance } => { + let mut buf = [0u8; 10]; + let buf = leb64_encode(*base_distance, &mut buf); + out.write_all(buf)?; + written += buf.len(); + } + Blob | Tree | Commit | Tag => {} + } + Ok(written) + } + + /// The size of the header in bytes when serialized + pub fn size(&self, decompressed_size: u64) -> usize { + self.write_to(decompressed_size, io::sink()) + .expect("io::sink() to never fail") + } +} + +#[inline] +fn leb64_encode(mut n: u64, buf: &mut [u8; 10]) -> &[u8] { + let mut bytes_written = 1; + buf[buf.len() - 1] = n as u8 & 0b0111_1111; + for out in buf.iter_mut().rev().skip(1) { + n >>= 7; + if n == 0 { + break; + } + n -= 1; + *out = 0b1000_0000 | (n as u8 & 0b0111_1111); + bytes_written += 1; + } + debug_assert_eq!(n, 0, "BUG: buffer must be large enough to hold a 64 bit integer"); + &buf[buf.len() - bytes_written..] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn leb64_encode_max_int() { + let mut buf = [0u8; 10]; + let buf = leb64_encode(u64::MAX, &mut buf); + assert_eq!(buf.len(), 10, "10 bytes should be used when 64bits are encoded"); + } +} diff --git a/vendor/gix-pack/src/data/entry/mod.rs b/vendor/gix-pack/src/data/entry/mod.rs new file mode 100644 index 000000000..f11c39c5c --- /dev/null +++ b/vendor/gix-pack/src/data/entry/mod.rs @@ -0,0 +1,53 @@ +use crate::data::Entry; + +const _TYPE_EXT1: u8 = 0; +const COMMIT: u8 = 1; +const TREE: u8 = 2; +const BLOB: u8 = 3; +const TAG: u8 = 4; +const _TYPE_EXT2: u8 = 5; +const OFS_DELTA: u8 = 6; +const REF_DELTA: u8 = 7; + +/// A way to uniquely identify the location of an entry within a pack bundle +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Location { + /// The id of the pack containing the object. It's unique within its frame of reference which is the owning object database. + pub pack_id: u32, + /// The size of the entry of disk so that the range of bytes of the entry is `pack_offset..pack_offset + entry_size`. + pub entry_size: usize, + /// The start of the entry in the pack identified by `pack_id`. + pub pack_offset: data::Offset, +} + +impl Location { + /// Compute a range suitable for lookup in pack data using the [`entry_slice()`][crate::data::File::entry_slice()] method. + pub fn entry_range(&self, pack_offset: data::Offset) -> crate::data::EntryRange { + pack_offset..pack_offset + self.entry_size as u64 + } +} + +/// Access +impl Entry { + /// Compute the pack offset to the base entry of the object represented by this entry. + pub fn base_pack_offset(&self, distance: u64) -> data::Offset { + let pack_offset = self.data_offset - self.header_size() as u64; + pack_offset.checked_sub(distance).expect("in-bound distance of deltas") + } + /// The pack offset at which this entry starts + pub fn pack_offset(&self) -> data::Offset { + self.data_offset - self.header_size() as u64 + } + /// The amount of bytes used to describe this entry in the pack. The header starts at [`Self::pack_offset()`] + pub fn header_size(&self) -> usize { + self.header.size(self.decompressed_size) + } +} + +mod decode; + +mod header; +pub use header::Header; + +use crate::data; |