4 files changed, 368 insertions, 0 deletions
diff --git a/vendor/gix-chunk/src/file/decode.rs b/vendor/gix-chunk/src/file/decode.rs
new file mode 100644
index 000000000..1543ac9be
--- /dev/null
+++ b/vendor/gix-chunk/src/file/decode.rs
@@ -0,0 +1,105 @@
+use std::{convert::TryInto, ops::Range};
+
+mod error {
+    /// The value returned by [crate::FileRef::from_bytes()
+    #[derive(Debug, thiserror::Error)]
+    #[allow(missing_docs)]
+    pub enum Error {
+        #[error("Sentinel value encountered while still processing chunks.")]
+        EarlySentinelValue,
+        #[error("Sentinel value wasn't found, saw {:?}", std::str::from_utf8(actual.as_ref()).unwrap_or("<non-ascii>"))]
+        MissingSentinelValue { actual: crate::Id },
+        #[error("The chunk offset {offset} went past the file of length {file_length} - was it truncated?")]
+        ChunkSizeOutOfBounds {
+            offset: crate::file::Offset,
+            file_length: u64,
+        },
+        #[error("All chunk offsets must be incrementing.")]
+        NonIncrementalChunkOffsets,
+        #[error("The chunk of kind {:?} was encountered more than once", std::str::from_utf8(kind.as_ref()).unwrap_or("<non-ascii>"))]
+        DuplicateChunk { kind: crate::Id },
+        #[error("The table of contents would be {expected} bytes, but got only {actual}")]
+        TocTooSmall { actual: usize, expected: usize },
+        #[error("Empty chunk indices are not allowed as the point of chunked files is to have chunks.")]
+        Empty,
+    }
+}
+pub use error::Error;
+
+use crate::{file, file::index};
+
+impl file::Index {
+    /// Provided a mapped file at the beginning via `data`, starting at `toc_offset` decode all chunk information to return
+    /// an index with `num_chunks` chunks.
+    pub fn from_bytes(data: &[u8], toc_offset: usize, num_chunks: u32) -> Result<Self, Error> {
+        if num_chunks == 0 {
+            return Err(Error::Empty);
+        }
+
+        let data_len: u64 = data.len() as u64;
+        let mut chunks = Vec::with_capacity(num_chunks as usize);
+        let mut toc_entry = &data[toc_offset..];
+        let expected_min_size = (num_chunks as usize + 1) * file::Index::ENTRY_SIZE;
+        if toc_entry.len() < expected_min_size {
+            return Err(Error::TocTooSmall {
+                expected: expected_min_size,
+                actual: toc_entry.len(),
+            });
+        }
+
+        for _ in 0..num_chunks {
+            let (kind, offset) = toc_entry.split_at(4);
+            let kind = to_kind(kind);
+            if kind == crate::SENTINEL {
+                return Err(Error::EarlySentinelValue);
+            }
+            if chunks.iter().any(|c: &index::Entry| c.kind == kind) {
+                return Err(Error::DuplicateChunk { kind });
+            }
+
+            let offset = be_u64(offset);
+            if offset > data_len {
+                return Err(Error::ChunkSizeOutOfBounds {
+                    offset,
+                    file_length: data_len,
+                });
+            }
+            toc_entry = &toc_entry[file::Index::ENTRY_SIZE..];
+            let next_offset = be_u64(&toc_entry[4..]);
+            if next_offset > data_len {
+                return Err(Error::ChunkSizeOutOfBounds {
+                    offset: next_offset,
+                    file_length: data_len,
+                });
+            }
+            if next_offset <= offset {
+                return Err(Error::NonIncrementalChunkOffsets);
+            }
+            chunks.push(index::Entry {
+                kind,
+                offset: Range {
+                    start: offset,
+                    end: next_offset,
+                },
+            })
+        }
+
+        let sentinel = to_kind(&toc_entry[..4]);
+        if sentinel != crate::SENTINEL {
+            return Err(Error::MissingSentinelValue { actual: sentinel });
+        }
+
+        Ok(file::Index {
+            chunks,
+            will_write: false,
+        })
+    }
+}
+
+fn to_kind(data: &[u8]) -> crate::Id {
+    data[..4].try_into().unwrap()
+}
+
+fn be_u64(data: &[u8]) -> u64 {
+    u64::from_be_bytes(data[..8].try_into().unwrap())
+}
diff --git a/vendor/gix-chunk/src/file/index.rs b/vendor/gix-chunk/src/file/index.rs
new file mode 100644
index 000000000..5b59f6767
--- /dev/null
+++ b/vendor/gix-chunk/src/file/index.rs
@@ -0,0 +1,107 @@
+use std::ops::Range;
+
+use crate::file::Index;
+
+///
+pub mod offset_by_kind {
+    use std::fmt::{Display, Formatter};
+
+    /// The error returned by [Index::offset_by_kind()][super::Index::offset_by_id()].
+    #[allow(missing_docs)]
+    #[derive(Debug)]
+    pub struct Error {
+        pub kind: crate::Id,
+    }
+
+    impl Display for Error {
+        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+            write!(
+                f,
+                "Chunk named {:?} was not found in chunk file index",
+                std::str::from_utf8(&self.kind).unwrap_or("<non-ascii>")
+            )
+        }
+    }
+
+    impl std::error::Error for Error {}
+}
+
+///
+pub mod data_by_kind {
+    /// The error returned by [Index::data_by_kind()][super::Index::data_by_id()].
+    #[derive(Debug, thiserror::Error)]
+    #[allow(missing_docs)]
+    pub enum Error {
+        #[error("The chunk wasn't found in the file index")]
+        NotFound(#[from] super::offset_by_kind::Error),
+        #[error("The offsets into the file couldn't be represented by usize")]
+        FileTooLarge,
+    }
+}
+
+/// An entry of a chunk file index
+pub struct Entry {
+    /// The kind of the chunk file
+    pub kind: crate::Id,
+    /// The offset, relative to the beginning of the file, at which to find the chunk and its end.
+    pub offset: Range<crate::file::Offset>,
+}
+
+impl Index {
+    /// The size of a single index entry in bytes
+    pub const ENTRY_SIZE: usize = std::mem::size_of::<u32>() + std::mem::size_of::<u64>();
+    /// The smallest possible size of an index, consisting only of the sentinel value pointing past itself.
+    pub const EMPTY_SIZE: usize = Index::ENTRY_SIZE;
+
+    /// Returns the size in bytes an index with `num_entries` would take.
+    pub const fn size_for_entries(num_entries: usize) -> usize {
+        Self::ENTRY_SIZE * (num_entries + 1/*sentinel*/)
+    }
+
+    /// Find a chunk of `kind` and return its offset into the data if found
+    pub fn offset_by_id(&self, kind: crate::Id) -> Result<Range<crate::file::Offset>, offset_by_kind::Error> {
+        self.chunks
+            .iter()
+            .find_map(|c| (c.kind == kind).then(|| c.offset.clone()))
+            .ok_or(offset_by_kind::Error { kind })
+    }
+
+    /// Find a chunk of `kind` and return its offset as usize range into the data if found.
+    ///
+    ///
+    /// # Panics
+    ///
+    /// - if the usize conversion fails, which isn't expected as memory maps can't be created if files are too large
+    ///   to require such offsets.
+    pub fn usize_offset_by_id(&self, kind: crate::Id) -> Result<Range<usize>, offset_by_kind::Error> {
+        self.chunks
+            .iter()
+            .find_map(|c| (c.kind == kind).then(|| crate::range::into_usize_or_panic(c.offset.clone())))
+            .ok_or(offset_by_kind::Error { kind })
+    }
+
+    /// Like [`Index::usize_offset_by_id()`] but with support for validation and transformation using a function.
+    pub fn validated_usize_offset_by_id<T>(
+        &self,
+        kind: crate::Id,
+        validate: impl FnOnce(Range<usize>) -> T,
+    ) -> Result<T, offset_by_kind::Error> {
+        self.chunks
+            .iter()
+            .find_map(|c| (c.kind == kind).then(|| crate::range::into_usize_or_panic(c.offset.clone())))
+            .map(validate)
+            .ok_or(offset_by_kind::Error { kind })
+    }
+
+    /// Find a chunk of `kind` and return its data slice based on its offset.
+    pub fn data_by_id<'a>(&self, data: &'a [u8], kind: crate::Id) -> Result<&'a [u8], data_by_kind::Error> {
+        let offset = self.offset_by_id(kind)?;
+        Ok(&data[crate::range::into_usize(offset).ok_or(data_by_kind::Error::FileTooLarge)?])
+    }
+
+    /// Return the end offset lf the last chunk, which is the highest offset as well.
+    /// It's definitely available as we have one or more chunks.
+    pub fn highest_offset(&self) -> crate::file::Offset {
+        self.chunks.last().expect("at least one chunk").offset.end
+    }
+}
diff --git a/vendor/gix-chunk/src/file/mod.rs b/vendor/gix-chunk/src/file/mod.rs
new file mode 100644
index 000000000..4ddd94999
--- /dev/null
+++ b/vendor/gix-chunk/src/file/mod.rs
@@ -0,0 +1,20 @@
+///
+pub mod decode;
+///
+pub mod index;
+
+///
+pub mod write;
+
+/// The offset to a chunk as seen relative to the beginning of the file containing it.
+pub type Offset = u64;
+
+/// A chunk file providing a table into the parent data.
+pub struct Index {
+    /// If true, we use `chunks` in a way that facilitates writing them.
+    will_write: bool,
+    /// Validated chunks as defined by their index entries.
+    ///
+    /// Note that this list cannot be empty.
+    chunks: Vec<index::Entry>,
+}
diff --git a/vendor/gix-chunk/src/file/write.rs b/vendor/gix-chunk/src/file/write.rs
new file mode 100644
index 000000000..8189140fe
--- /dev/null
+++ b/vendor/gix-chunk/src/file/write.rs
@@ -0,0 +1,136 @@
+use crate::file::{index::Entry, Index};
+
+mod write_chunk {
+    use std::collections::VecDeque;
+
+    use crate::file::index;
+
+    /// A [`Write`][std::io::Write] implementation that validates chunk sizes while allowing the user to know
+    /// which chunk is to be written next.
+    pub struct Chunk<W> {
+        chunks_to_write: VecDeque<index::Entry>,
+        inner: W,
+        next_chunk: Option<index::Entry>,
+        written_bytes: usize,
+    }
+
+    impl<W> Chunk<W>
+    where
+        W: std::io::Write,
+    {
+        pub(crate) fn new(out: W, chunks: VecDeque<index::Entry>) -> Chunk<W>
+        where
+            W: std::io::Write,
+        {
+            Chunk {
+                chunks_to_write: chunks,
+                inner: out,
+                next_chunk: None,
+                written_bytes: 0,
+            }
+        }
+    }
+
+    impl<W> std::io::Write for Chunk<W>
+    where
+        W: std::io::Write,
+    {
+        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+            let written = self.inner.write(buf)?;
+            self.written_bytes += written;
+            Ok(written)
+        }
+
+        fn flush(&mut self) -> std::io::Result<()> {
+            self.inner.flush()
+        }
+    }
+
+    impl<W> Chunk<W> {
+        /// Return the inner writer - should only be called once there is no more chunk to write.
+        pub fn into_inner(self) -> W {
+            self.inner
+        }
+        /// Return the next chunk-id to write, if there is one.
+        pub fn next_chunk(&mut self) -> Option<crate::Id> {
+            if let Some(entry) = self.next_chunk.take() {
+                assert_eq!(
+                    entry.offset.end,
+                    self.written_bytes as u64,
+                    "BUG: expected to write {} bytes, but only wrote {} for chunk {:?}",
+                    entry.offset.end,
+                    self.written_bytes,
+                    std::str::from_utf8(&entry.kind)
+                )
+            }
+            self.written_bytes = 0;
+            self.next_chunk = self.chunks_to_write.pop_front();
+            self.next_chunk.as_ref().map(|e| e.kind)
+        }
+    }
+}
+pub use write_chunk::Chunk;
+
+/// Writing
+impl Index {
+    /// Create a new index whose sole purpose is to be receiving chunks using [`plan_chunk()`][Index::plan_chunk()] and to be written to
+    /// an output using [`into_write()`][Index::into_write()]
+    pub fn for_writing() -> Self {
+        Index {
+            will_write: true,
+            chunks: Vec::new(),
+        }
+    }
+    /// Plan to write a new chunk as part of the index when [`into_write()`][Index::into_write()] is called.
+    pub fn plan_chunk(&mut self, chunk: crate::Id, exact_size_on_disk: u64) {
+        assert!(self.will_write, "BUG: create the index with `for_writing()`");
+        assert!(
+            !self.chunks.iter().any(|e| e.kind == chunk),
+            "BUG: must not add chunk of same kind twice: {:?}",
+            std::str::from_utf8(&chunk)
+        );
+        self.chunks.push(Entry {
+            kind: chunk,
+            offset: 0..exact_size_on_disk,
+        })
+    }
+
+    /// Return the total size of all planned chunks thus far.
+    pub fn planned_storage_size(&self) -> u64 {
+        assert!(self.will_write, "BUG: create the index with `for_writing()`");
+        self.chunks.iter().map(|e| e.offset.end).sum()
+    }
+
+    /// Return the amount of chunks we currently know.
+    pub fn num_chunks(&self) -> usize {
+        self.chunks.len()
+    }
+
+    /// After [planning all chunks][Index::plan_chunk()] call this method with the destination to write the chunks to.
+    /// Use the [Chunk] writer to write each chunk in order.
+    /// `current_offset` is the byte position at which `out` will continue writing.
+    pub fn into_write<W>(self, mut out: W, current_offset: usize) -> std::io::Result<Chunk<W>>
+    where
+        W: std::io::Write,
+    {
+        assert!(
+            self.will_write,
+            "BUG: create the index with `for_writing()`, cannot write decoded indices"
+        );
+        // First chunk starts past the table of contents
+        let mut current_offset = (current_offset + Self::size_for_entries(self.num_chunks())) as u64;
+
+        for entry in &self.chunks {
+            out.write_all(&entry.kind)?;
+            out.write_all(&current_offset.to_be_bytes())?;
+
+            current_offset += entry.offset.end;
+        }
+
+        // sentinel to mark end of chunks
+        out.write_all(&0u32.to_be_bytes())?;
+        out.write_all(&current_offset.to_be_bytes())?;
+
+        Ok(Chunk::new(out, self.chunks.into()))
+    }
+}