summaryrefslogtreecommitdiffstats
path: root/vendor/gix-pack/src/data/entry
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:41:41 +0000
commit10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
treebdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-pack/src/data/entry
parentReleasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
downloadrustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz
rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-pack/src/data/entry')
-rw-r--r--vendor/gix-pack/src/data/entry/decode.rs125
-rw-r--r--vendor/gix-pack/src/data/entry/header.rs150
-rw-r--r--vendor/gix-pack/src/data/entry/mod.rs53
3 files changed, 328 insertions, 0 deletions
diff --git a/vendor/gix-pack/src/data/entry/decode.rs b/vendor/gix-pack/src/data/entry/decode.rs
new file mode 100644
index 000000000..79d7aecff
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/decode.rs
@@ -0,0 +1,125 @@
+use std::io;
+
+use gix_features::decode::{leb64, leb64_from_read};
+
+use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
+use crate::data;
+
+/// Decoding
+impl data::Entry {
+ /// Decode an entry from the given entry data `d`, providing the `pack_offset` to allow tracking the start of the entry data section.
+ ///
+ /// # Panics
+ ///
+ /// If we cannot understand the header, garbage data is likely to trigger this.
+ pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> data::Entry {
+ let (type_id, size, mut consumed) = parse_header_info(d);
+
+ use crate::data::entry::Header::*;
+ let object = match type_id {
+ OFS_DELTA => {
+ let (distance, leb_bytes) = leb64(&d[consumed..]);
+ let delta = OfsDelta {
+ base_distance: distance,
+ };
+ consumed += leb_bytes;
+ delta
+ }
+ REF_DELTA => {
+ let delta = RefDelta {
+ base_id: gix_hash::ObjectId::from(&d[consumed..][..hash_len]),
+ };
+ consumed += hash_len;
+ delta
+ }
+ BLOB => Blob,
+ TREE => Tree,
+ COMMIT => Commit,
+ TAG => Tag,
+ _ => panic!("We currently don't support any V3 features or extensions"),
+ };
+ data::Entry {
+ header: object,
+ decompressed_size: size,
+ data_offset: pack_offset + consumed as u64,
+ }
+ }
+
+ /// Instantiate an `Entry` from the reader `r`, providing the `pack_offset` to allow tracking the start of the entry data section.
+ pub fn from_read(
+ mut r: impl io::Read,
+ pack_offset: data::Offset,
+ hash_len: usize,
+ ) -> Result<data::Entry, io::Error> {
+ let (type_id, size, mut consumed) = streaming_parse_header_info(&mut r)?;
+
+ use crate::data::entry::Header::*;
+ let object = match type_id {
+ OFS_DELTA => {
+ let (distance, leb_bytes) = leb64_from_read(&mut r)?;
+ let delta = OfsDelta {
+ base_distance: distance,
+ };
+ consumed += leb_bytes;
+ delta
+ }
+ REF_DELTA => {
+ let mut buf = gix_hash::Kind::buf();
+ let hash = &mut buf[..hash_len];
+ r.read_exact(hash)?;
+ #[allow(clippy::redundant_slicing)]
+ let delta = RefDelta {
+ base_id: gix_hash::ObjectId::from(&hash[..]),
+ };
+ consumed += hash_len;
+ delta
+ }
+ BLOB => Blob,
+ TREE => Tree,
+ COMMIT => Commit,
+ TAG => Tag,
+ _ => panic!("We currently don't support any V3 features or extensions"),
+ };
+ Ok(data::Entry {
+ header: object,
+ decompressed_size: size,
+ data_offset: pack_offset + consumed as u64,
+ })
+ }
+}
+
+#[inline]
+fn streaming_parse_header_info(mut read: impl io::Read) -> Result<(u8, u64, usize), io::Error> {
+ let mut byte = [0u8; 1];
+ read.read_exact(&mut byte)?;
+ let mut c = byte[0];
+ let mut i = 1;
+ let type_id = (c >> 4) & 0b0000_0111;
+ let mut size = c as u64 & 0b0000_1111;
+ let mut s = 4;
+ while c & 0b1000_0000 != 0 {
+ read.read_exact(&mut byte)?;
+ c = byte[0];
+ i += 1;
+ size += ((c & 0b0111_1111) as u64) << s;
+ s += 7
+ }
+ Ok((type_id, size, i))
+}
+
+/// Parses the header of a pack-entry, yielding object type id, decompressed object size, and consumed bytes
+#[inline]
+fn parse_header_info(data: &[u8]) -> (u8, u64, usize) {
+ let mut c = data[0];
+ let mut i = 1;
+ let type_id = (c >> 4) & 0b0000_0111;
+ let mut size = c as u64 & 0b0000_1111;
+ let mut s = 4;
+ while c & 0b1000_0000 != 0 {
+ c = data[i];
+ i += 1;
+ size += ((c & 0b0111_1111) as u64) << s;
+ s += 7
+ }
+ (type_id, size, i)
+}
diff --git a/vendor/gix-pack/src/data/entry/header.rs b/vendor/gix-pack/src/data/entry/header.rs
new file mode 100644
index 000000000..83983eab0
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/header.rs
@@ -0,0 +1,150 @@
+use std::io;
+
+use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
+use crate::data;
+
+/// The header portion of a pack data entry, identifying the kind of stored object.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[allow(missing_docs)]
+pub enum Header {
+ /// The object is a commit
+ Commit,
+ /// The object is a tree
+ Tree,
+ /// The object is a blob
+ Blob,
+ /// The object is a tag
+ Tag,
+ /// Describes a delta-object which needs to be applied to a base. The base object is identified by the `base_id` field
+ /// which is found within the parent repository.
+ /// Most commonly used for **thin-packs** when receiving pack files from the server to refer to objects that are not
+ /// part of the pack but expected to be present in the receivers repository.
+ ///
+ /// # Note
+ /// This could also be an object within this pack if the LSB encoded offset would be larger than 20 bytes, which is unlikely to
+ /// happen.
+ ///
+ /// **The naming** is exactly the same as the canonical implementation uses, namely **REF_DELTA**.
+ RefDelta { base_id: gix_hash::ObjectId },
+ /// Describes a delta-object present in this pack which acts as base for this object.
+ /// The base object is measured as a distance from this objects
+ /// pack offset, so that `base_pack_offset = this_objects_pack_offset - base_distance`
+ ///
+ /// # Note
+ ///
+ /// **The naming** is exactly the same as the canonical implementation uses, namely **OFS_DELTA**.
+ OfsDelta { base_distance: u64 },
+}
+
+impl Header {
+ /// Subtract `distance` from `pack_offset` safely without the chance for overflow or no-ops if `distance` is 0.
+ pub fn verified_base_pack_offset(pack_offset: data::Offset, distance: u64) -> Option<data::Offset> {
+ if distance == 0 {
+ return None;
+ }
+ pack_offset.checked_sub(distance)
+ }
+ /// Convert the header's object kind into [`gix_object::Kind`] if possible
+ pub fn as_kind(&self) -> Option<gix_object::Kind> {
+ use gix_object::Kind::*;
+ Some(match self {
+ Header::Tree => Tree,
+ Header::Blob => Blob,
+ Header::Commit => Commit,
+ Header::Tag => Tag,
+ Header::RefDelta { .. } | Header::OfsDelta { .. } => return None,
+ })
+ }
+ /// Convert this header's object kind into the packs internal representation
+ pub fn as_type_id(&self) -> u8 {
+ use Header::*;
+ match self {
+ Blob => BLOB,
+ Tree => TREE,
+ Commit => COMMIT,
+ Tag => TAG,
+ OfsDelta { .. } => OFS_DELTA,
+ RefDelta { .. } => REF_DELTA,
+ }
+ }
+ /// Return's true if this is a delta object, i.e. not a full object.
+ pub fn is_delta(&self) -> bool {
+ matches!(self, Header::OfsDelta { .. } | Header::RefDelta { .. })
+ }
+ /// Return's true if this is a base object, i.e. not a delta object.
+ pub fn is_base(&self) -> bool {
+ !self.is_delta()
+ }
+}
+
+impl Header {
+ /// Encode this header along the given `decompressed_size_in_bytes` into the `out` write stream for use within a data pack.
+ ///
+ /// Returns the amount of bytes written to `out`.
+ /// `decompressed_size_in_bytes` is the full size in bytes of the object that this header represents
+ pub fn write_to(&self, decompressed_size_in_bytes: u64, mut out: impl io::Write) -> io::Result<usize> {
+ let mut size = decompressed_size_in_bytes;
+ let mut written = 1;
+ let mut c: u8 = (self.as_type_id() << 4) | (size as u8 & 0b0000_1111);
+ size >>= 4;
+ while size != 0 {
+ out.write_all(&[c | 0b1000_0000])?;
+ written += 1;
+ c = size as u8 & 0b0111_1111;
+ size >>= 7;
+ }
+ out.write_all(&[c])?;
+
+ use Header::*;
+ match self {
+ RefDelta { base_id: oid } => {
+ out.write_all(oid.as_slice())?;
+ written += oid.as_slice().len();
+ }
+ OfsDelta { base_distance } => {
+ let mut buf = [0u8; 10];
+ let buf = leb64_encode(*base_distance, &mut buf);
+ out.write_all(buf)?;
+ written += buf.len();
+ }
+ Blob | Tree | Commit | Tag => {}
+ }
+ Ok(written)
+ }
+
+ /// The size of the header in bytes when serialized
+ pub fn size(&self, decompressed_size: u64) -> usize {
+ self.write_to(decompressed_size, io::sink())
+ .expect("io::sink() to never fail")
+ }
+}
+
+#[inline]
+fn leb64_encode(mut n: u64, buf: &mut [u8; 10]) -> &[u8] {
+ let mut bytes_written = 1;
+ buf[buf.len() - 1] = n as u8 & 0b0111_1111;
+ for out in buf.iter_mut().rev().skip(1) {
+ n >>= 7;
+ if n == 0 {
+ break;
+ }
+ n -= 1;
+ *out = 0b1000_0000 | (n as u8 & 0b0111_1111);
+ bytes_written += 1;
+ }
+ debug_assert_eq!(n, 0, "BUG: buffer must be large enough to hold a 64 bit integer");
+ &buf[buf.len() - bytes_written..]
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn leb64_encode_max_int() {
+ let mut buf = [0u8; 10];
+ let buf = leb64_encode(u64::MAX, &mut buf);
+ assert_eq!(buf.len(), 10, "10 bytes should be used when 64bits are encoded");
+ }
+}
diff --git a/vendor/gix-pack/src/data/entry/mod.rs b/vendor/gix-pack/src/data/entry/mod.rs
new file mode 100644
index 000000000..f11c39c5c
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/mod.rs
@@ -0,0 +1,53 @@
+use crate::data::Entry;
+
+const _TYPE_EXT1: u8 = 0;
+const COMMIT: u8 = 1;
+const TREE: u8 = 2;
+const BLOB: u8 = 3;
+const TAG: u8 = 4;
+const _TYPE_EXT2: u8 = 5;
+const OFS_DELTA: u8 = 6;
+const REF_DELTA: u8 = 7;
+
+/// A way to uniquely identify the location of an entry within a pack bundle
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Location {
+ /// The id of the pack containing the object. It's unique within its frame of reference which is the owning object database.
+ pub pack_id: u32,
+ /// The size of the entry of disk so that the range of bytes of the entry is `pack_offset..pack_offset + entry_size`.
+ pub entry_size: usize,
+ /// The start of the entry in the pack identified by `pack_id`.
+ pub pack_offset: data::Offset,
+}
+
+impl Location {
+ /// Compute a range suitable for lookup in pack data using the [`entry_slice()`][crate::data::File::entry_slice()] method.
+ pub fn entry_range(&self, pack_offset: data::Offset) -> crate::data::EntryRange {
+ pack_offset..pack_offset + self.entry_size as u64
+ }
+}
+
+/// Access
+impl Entry {
+ /// Compute the pack offset to the base entry of the object represented by this entry.
+ pub fn base_pack_offset(&self, distance: u64) -> data::Offset {
+ let pack_offset = self.data_offset - self.header_size() as u64;
+ pack_offset.checked_sub(distance).expect("in-bound distance of deltas")
+ }
+ /// The pack offset at which this entry starts
+ pub fn pack_offset(&self) -> data::Offset {
+ self.data_offset - self.header_size() as u64
+ }
+ /// The amount of bytes used to describe this entry in the pack. The header starts at [`Self::pack_offset()`]
+ pub fn header_size(&self) -> usize {
+ self.header.size(self.decompressed_size)
+ }
+}
+
+mod decode;
+
+mod header;
+pub use header::Header;
+
+use crate::data;