Merging upstream version 1.70.0+dfsg2.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:41:41 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:41:41 +0000
commit: 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
tree: bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-pack/src/data/entry
parent: Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
download: rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz
rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip
3 files changed, 328 insertions, 0 deletions
diff --git a/vendor/gix-pack/src/data/entry/decode.rs b/vendor/gix-pack/src/data/entry/decode.rs
new file mode 100644
index 000000000..79d7aecff
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/decode.rs
@@ -0,0 +1,125 @@
+use std::io;
+
+use gix_features::decode::{leb64, leb64_from_read};
+
+use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
+use crate::data;
+
+/// Decoding
+impl data::Entry {
+    /// Decode an entry from the given entry data `d`, providing the `pack_offset` to allow tracking the start of the entry data section.
+    ///
+    /// # Panics
+    ///
+    /// If we cannot understand the header, garbage data is likely to trigger this.
+    pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> data::Entry {
+        let (type_id, size, mut consumed) = parse_header_info(d);
+
+        use crate::data::entry::Header::*;
+        let object = match type_id {
+            OFS_DELTA => {
+                let (distance, leb_bytes) = leb64(&d[consumed..]);
+                let delta = OfsDelta {
+                    base_distance: distance,
+                };
+                consumed += leb_bytes;
+                delta
+            }
+            REF_DELTA => {
+                let delta = RefDelta {
+                    base_id: gix_hash::ObjectId::from(&d[consumed..][..hash_len]),
+                };
+                consumed += hash_len;
+                delta
+            }
+            BLOB => Blob,
+            TREE => Tree,
+            COMMIT => Commit,
+            TAG => Tag,
+            _ => panic!("We currently don't support any V3 features or extensions"),
+        };
+        data::Entry {
+            header: object,
+            decompressed_size: size,
+            data_offset: pack_offset + consumed as u64,
+        }
+    }
+
+    /// Instantiate an `Entry` from the reader `r`, providing the `pack_offset` to allow tracking the start of the entry data section.
+    pub fn from_read(
+        mut r: impl io::Read,
+        pack_offset: data::Offset,
+        hash_len: usize,
+    ) -> Result<data::Entry, io::Error> {
+        let (type_id, size, mut consumed) = streaming_parse_header_info(&mut r)?;
+
+        use crate::data::entry::Header::*;
+        let object = match type_id {
+            OFS_DELTA => {
+                let (distance, leb_bytes) = leb64_from_read(&mut r)?;
+                let delta = OfsDelta {
+                    base_distance: distance,
+                };
+                consumed += leb_bytes;
+                delta
+            }
+            REF_DELTA => {
+                let mut buf = gix_hash::Kind::buf();
+                let hash = &mut buf[..hash_len];
+                r.read_exact(hash)?;
+                #[allow(clippy::redundant_slicing)]
+                let delta = RefDelta {
+                    base_id: gix_hash::ObjectId::from(&hash[..]),
+                };
+                consumed += hash_len;
+                delta
+            }
+            BLOB => Blob,
+            TREE => Tree,
+            COMMIT => Commit,
+            TAG => Tag,
+            _ => panic!("We currently don't support any V3 features or extensions"),
+        };
+        Ok(data::Entry {
+            header: object,
+            decompressed_size: size,
+            data_offset: pack_offset + consumed as u64,
+        })
+    }
+}
+
+#[inline]
+fn streaming_parse_header_info(mut read: impl io::Read) -> Result<(u8, u64, usize), io::Error> {
+    let mut byte = [0u8; 1];
+    read.read_exact(&mut byte)?;
+    let mut c = byte[0];
+    let mut i = 1;
+    let type_id = (c >> 4) & 0b0000_0111;
+    let mut size = c as u64 & 0b0000_1111;
+    let mut s = 4;
+    while c & 0b1000_0000 != 0 {
+        read.read_exact(&mut byte)?;
+        c = byte[0];
+        i += 1;
+        size += ((c & 0b0111_1111) as u64) << s;
+        s += 7
+    }
+    Ok((type_id, size, i))
+}
+
+/// Parses the header of a pack-entry, yielding object type id, decompressed object size, and consumed bytes
+#[inline]
+fn parse_header_info(data: &[u8]) -> (u8, u64, usize) {
+    let mut c = data[0];
+    let mut i = 1;
+    let type_id = (c >> 4) & 0b0000_0111;
+    let mut size = c as u64 & 0b0000_1111;
+    let mut s = 4;
+    while c & 0b1000_0000 != 0 {
+        c = data[i];
+        i += 1;
+        size += ((c & 0b0111_1111) as u64) << s;
+        s += 7
+    }
+    (type_id, size, i)
+}
diff --git a/vendor/gix-pack/src/data/entry/header.rs b/vendor/gix-pack/src/data/entry/header.rs
new file mode 100644
index 000000000..83983eab0
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/header.rs
@@ -0,0 +1,150 @@
+use std::io;
+
+use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE};
+use crate::data;
+
+/// The header portion of a pack data entry, identifying the kind of stored object.
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+#[allow(missing_docs)]
+pub enum Header {
+    /// The object is a commit
+    Commit,
+    /// The object is a tree
+    Tree,
+    /// The object is a blob
+    Blob,
+    /// The object is a tag
+    Tag,
+    /// Describes a delta-object which needs to be applied to a base. The base object is identified by the `base_id` field
+    /// which is found within the parent repository.
+    /// Most commonly used for **thin-packs** when receiving pack files from the server to refer to objects that are not
+    /// part of the pack but expected to be present in the receivers repository.
+    ///
+    /// # Note
+    /// This could also be an object within this pack if the LSB encoded offset would be larger than 20 bytes, which is unlikely to
+    /// happen.
+    ///
+    /// **The naming** is exactly the same as the canonical implementation uses, namely **REF_DELTA**.
+    RefDelta { base_id: gix_hash::ObjectId },
+    /// Describes a delta-object present in this pack which acts as base for this object.
+    /// The base object is measured as a distance from this objects
+    /// pack offset, so that `base_pack_offset = this_objects_pack_offset - base_distance`
+    ///
+    /// # Note
+    ///
+    /// **The naming** is exactly the same as the canonical implementation uses, namely **OFS_DELTA**.
+    OfsDelta { base_distance: u64 },
+}
+
+impl Header {
+    /// Subtract `distance` from `pack_offset` safely without the chance for overflow or no-ops if `distance` is 0.
+    pub fn verified_base_pack_offset(pack_offset: data::Offset, distance: u64) -> Option<data::Offset> {
+        if distance == 0 {
+            return None;
+        }
+        pack_offset.checked_sub(distance)
+    }
+    /// Convert the header's object kind into [`gix_object::Kind`] if possible
+    pub fn as_kind(&self) -> Option<gix_object::Kind> {
+        use gix_object::Kind::*;
+        Some(match self {
+            Header::Tree => Tree,
+            Header::Blob => Blob,
+            Header::Commit => Commit,
+            Header::Tag => Tag,
+            Header::RefDelta { .. } | Header::OfsDelta { .. } => return None,
+        })
+    }
+    /// Convert this header's object kind into the packs internal representation
+    pub fn as_type_id(&self) -> u8 {
+        use Header::*;
+        match self {
+            Blob => BLOB,
+            Tree => TREE,
+            Commit => COMMIT,
+            Tag => TAG,
+            OfsDelta { .. } => OFS_DELTA,
+            RefDelta { .. } => REF_DELTA,
+        }
+    }
+    /// Return's true if this is a delta object, i.e. not a full object.
+    pub fn is_delta(&self) -> bool {
+        matches!(self, Header::OfsDelta { .. } | Header::RefDelta { .. })
+    }
+    /// Return's true if this is a base object, i.e. not a delta object.
+    pub fn is_base(&self) -> bool {
+        !self.is_delta()
+    }
+}
+
+impl Header {
+    /// Encode this header along the given `decompressed_size_in_bytes` into the `out` write stream for use within a data pack.
+    ///
+    /// Returns the amount of bytes written to `out`.
+    /// `decompressed_size_in_bytes` is the full size in bytes of the object that this header represents
+    pub fn write_to(&self, decompressed_size_in_bytes: u64, mut out: impl io::Write) -> io::Result<usize> {
+        let mut size = decompressed_size_in_bytes;
+        let mut written = 1;
+        let mut c: u8 = (self.as_type_id() << 4) | (size as u8 & 0b0000_1111);
+        size >>= 4;
+        while size != 0 {
+            out.write_all(&[c | 0b1000_0000])?;
+            written += 1;
+            c = size as u8 & 0b0111_1111;
+            size >>= 7;
+        }
+        out.write_all(&[c])?;
+
+        use Header::*;
+        match self {
+            RefDelta { base_id: oid } => {
+                out.write_all(oid.as_slice())?;
+                written += oid.as_slice().len();
+            }
+            OfsDelta { base_distance } => {
+                let mut buf = [0u8; 10];
+                let buf = leb64_encode(*base_distance, &mut buf);
+                out.write_all(buf)?;
+                written += buf.len();
+            }
+            Blob | Tree | Commit | Tag => {}
+        }
+        Ok(written)
+    }
+
+    /// The size of the header in bytes when serialized
+    pub fn size(&self, decompressed_size: u64) -> usize {
+        self.write_to(decompressed_size, io::sink())
+            .expect("io::sink() to never fail")
+    }
+}
+
+#[inline]
+fn leb64_encode(mut n: u64, buf: &mut [u8; 10]) -> &[u8] {
+    let mut bytes_written = 1;
+    buf[buf.len() - 1] = n as u8 & 0b0111_1111;
+    for out in buf.iter_mut().rev().skip(1) {
+        n >>= 7;
+        if n == 0 {
+            break;
+        }
+        n -= 1;
+        *out = 0b1000_0000 | (n as u8 & 0b0111_1111);
+        bytes_written += 1;
+    }
+    debug_assert_eq!(n, 0, "BUG: buffer must be large enough to hold a 64 bit integer");
+    &buf[buf.len() - bytes_written..]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn leb64_encode_max_int() {
+        let mut buf = [0u8; 10];
+        let buf = leb64_encode(u64::MAX, &mut buf);
+        assert_eq!(buf.len(), 10, "10 bytes should be used when 64bits are encoded");
+    }
+}
diff --git a/vendor/gix-pack/src/data/entry/mod.rs b/vendor/gix-pack/src/data/entry/mod.rs
new file mode 100644
index 000000000..f11c39c5c
--- /dev/null
+++ b/vendor/gix-pack/src/data/entry/mod.rs
@@ -0,0 +1,53 @@
+use crate::data::Entry;
+
+const _TYPE_EXT1: u8 = 0;
+const COMMIT: u8 = 1;
+const TREE: u8 = 2;
+const BLOB: u8 = 3;
+const TAG: u8 = 4;
+const _TYPE_EXT2: u8 = 5;
+const OFS_DELTA: u8 = 6;
+const REF_DELTA: u8 = 7;
+
+/// A way to uniquely identify the location of an entry within a pack bundle
+#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct Location {
+    /// The id of the pack containing the object. It's unique within its frame of reference which is the owning object database.
+    pub pack_id: u32,
+    /// The size of the entry of disk so that the range of bytes of the entry is `pack_offset..pack_offset + entry_size`.
+    pub entry_size: usize,
+    /// The start of the entry in the pack identified by `pack_id`.
+    pub pack_offset: data::Offset,
+}
+
+impl Location {
+    /// Compute a range suitable for lookup in pack data using the [`entry_slice()`][crate::data::File::entry_slice()] method.
+    pub fn entry_range(&self, pack_offset: data::Offset) -> crate::data::EntryRange {
+        pack_offset..pack_offset + self.entry_size as u64
+    }
+}
+
+/// Access
+impl Entry {
+    /// Compute the pack offset to the base entry of the object represented by this entry.
+    pub fn base_pack_offset(&self, distance: u64) -> data::Offset {
+        let pack_offset = self.data_offset - self.header_size() as u64;
+        pack_offset.checked_sub(distance).expect("in-bound distance of deltas")
+    }
+    /// The pack offset at which this entry starts
+    pub fn pack_offset(&self) -> data::Offset {
+        self.data_offset - self.header_size() as u64
+    }
+    /// The amount of bytes used to describe this entry in the pack. The header starts at [`Self::pack_offset()`]
+    pub fn header_size(&self) -> usize {
+        self.header.size(self.decompressed_size)
+    }
+}
+
+mod decode;
+
+mod header;
+pub use header::Header;
+
+use crate::data;
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:41:41 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:41:41 +0000
commit	10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch)
tree	bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-pack/src/data/entry
parent	Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff)
download	rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip