summaryrefslogtreecommitdiffstats
path: root/vendor/gix-chunk/src/file
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-chunk/src/file')
-rw-r--r--vendor/gix-chunk/src/file/decode.rs105
-rw-r--r--vendor/gix-chunk/src/file/index.rs107
-rw-r--r--vendor/gix-chunk/src/file/mod.rs20
-rw-r--r--vendor/gix-chunk/src/file/write.rs136
4 files changed, 368 insertions, 0 deletions
diff --git a/vendor/gix-chunk/src/file/decode.rs b/vendor/gix-chunk/src/file/decode.rs
new file mode 100644
index 000000000..1543ac9be
--- /dev/null
+++ b/vendor/gix-chunk/src/file/decode.rs
@@ -0,0 +1,105 @@
+use std::{convert::TryInto, ops::Range};
+
+mod error {
+ /// The value returned by [crate::FileRef::from_bytes()
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Sentinel value encountered while still processing chunks.")]
+ EarlySentinelValue,
+ #[error("Sentinel value wasn't found, saw {:?}", std::str::from_utf8(actual.as_ref()).unwrap_or("<non-ascii>"))]
+ MissingSentinelValue { actual: crate::Id },
+ #[error("The chunk offset {offset} went past the file of length {file_length} - was it truncated?")]
+ ChunkSizeOutOfBounds {
+ offset: crate::file::Offset,
+ file_length: u64,
+ },
+ #[error("All chunk offsets must be incrementing.")]
+ NonIncrementalChunkOffsets,
+ #[error("The chunk of kind {:?} was encountered more than once", std::str::from_utf8(kind.as_ref()).unwrap_or("<non-ascii>"))]
+ DuplicateChunk { kind: crate::Id },
+ #[error("The table of contents would be {expected} bytes, but got only {actual}")]
+ TocTooSmall { actual: usize, expected: usize },
+ #[error("Empty chunk indices are not allowed as the point of chunked files is to have chunks.")]
+ Empty,
+ }
+}
+pub use error::Error;
+
+use crate::{file, file::index};
+
+impl file::Index {
+ /// Provided a mapped file at the beginning via `data`, starting at `toc_offset` decode all chunk information to return
+ /// an index with `num_chunks` chunks.
+ pub fn from_bytes(data: &[u8], toc_offset: usize, num_chunks: u32) -> Result<Self, Error> {
+ if num_chunks == 0 {
+ return Err(Error::Empty);
+ }
+
+ let data_len: u64 = data.len() as u64;
+ let mut chunks = Vec::with_capacity(num_chunks as usize);
+ let mut toc_entry = &data[toc_offset..];
+ let expected_min_size = (num_chunks as usize + 1) * file::Index::ENTRY_SIZE;
+ if toc_entry.len() < expected_min_size {
+ return Err(Error::TocTooSmall {
+ expected: expected_min_size,
+ actual: toc_entry.len(),
+ });
+ }
+
+ for _ in 0..num_chunks {
+ let (kind, offset) = toc_entry.split_at(4);
+ let kind = to_kind(kind);
+ if kind == crate::SENTINEL {
+ return Err(Error::EarlySentinelValue);
+ }
+ if chunks.iter().any(|c: &index::Entry| c.kind == kind) {
+ return Err(Error::DuplicateChunk { kind });
+ }
+
+ let offset = be_u64(offset);
+ if offset > data_len {
+ return Err(Error::ChunkSizeOutOfBounds {
+ offset,
+ file_length: data_len,
+ });
+ }
+ toc_entry = &toc_entry[file::Index::ENTRY_SIZE..];
+ let next_offset = be_u64(&toc_entry[4..]);
+ if next_offset > data_len {
+ return Err(Error::ChunkSizeOutOfBounds {
+ offset: next_offset,
+ file_length: data_len,
+ });
+ }
+ if next_offset <= offset {
+ return Err(Error::NonIncrementalChunkOffsets);
+ }
+ chunks.push(index::Entry {
+ kind,
+ offset: Range {
+ start: offset,
+ end: next_offset,
+ },
+ })
+ }
+
+ let sentinel = to_kind(&toc_entry[..4]);
+ if sentinel != crate::SENTINEL {
+ return Err(Error::MissingSentinelValue { actual: sentinel });
+ }
+
+ Ok(file::Index {
+ chunks,
+ will_write: false,
+ })
+ }
+}
+
+fn to_kind(data: &[u8]) -> crate::Id {
+ data[..4].try_into().unwrap()
+}
+
+fn be_u64(data: &[u8]) -> u64 {
+ u64::from_be_bytes(data[..8].try_into().unwrap())
+}
diff --git a/vendor/gix-chunk/src/file/index.rs b/vendor/gix-chunk/src/file/index.rs
new file mode 100644
index 000000000..5b59f6767
--- /dev/null
+++ b/vendor/gix-chunk/src/file/index.rs
@@ -0,0 +1,107 @@
+use std::ops::Range;
+
+use crate::file::Index;
+
+///
+pub mod offset_by_kind {
+ use std::fmt::{Display, Formatter};
+
+ /// The error returned by [Index::offset_by_kind()][super::Index::offset_by_id()].
+ #[allow(missing_docs)]
+ #[derive(Debug)]
+ pub struct Error {
+ pub kind: crate::Id,
+ }
+
+ impl Display for Error {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(
+ f,
+ "Chunk named {:?} was not found in chunk file index",
+ std::str::from_utf8(&self.kind).unwrap_or("<non-ascii>")
+ )
+ }
+ }
+
+ impl std::error::Error for Error {}
+}
+
+///
+pub mod data_by_kind {
+ /// The error returned by [Index::data_by_kind()][super::Index::data_by_id()].
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("The chunk wasn't found in the file index")]
+ NotFound(#[from] super::offset_by_kind::Error),
+ #[error("The offsets into the file couldn't be represented by usize")]
+ FileTooLarge,
+ }
+}
+
+/// An entry of a chunk file index
+pub struct Entry {
+ /// The kind of the chunk file
+ pub kind: crate::Id,
+ /// The offset, relative to the beginning of the file, at which to find the chunk and its end.
+ pub offset: Range<crate::file::Offset>,
+}
+
+impl Index {
+ /// The size of a single index entry in bytes
+ pub const ENTRY_SIZE: usize = std::mem::size_of::<u32>() + std::mem::size_of::<u64>();
+ /// The smallest possible size of an index, consisting only of the sentinel value pointing past itself.
+ pub const EMPTY_SIZE: usize = Index::ENTRY_SIZE;
+
+ /// Returns the size in bytes an index with `num_entries` would take.
+ pub const fn size_for_entries(num_entries: usize) -> usize {
+ Self::ENTRY_SIZE * (num_entries + 1/*sentinel*/)
+ }
+
+ /// Find a chunk of `kind` and return its offset into the data if found
+ pub fn offset_by_id(&self, kind: crate::Id) -> Result<Range<crate::file::Offset>, offset_by_kind::Error> {
+ self.chunks
+ .iter()
+ .find_map(|c| (c.kind == kind).then(|| c.offset.clone()))
+ .ok_or(offset_by_kind::Error { kind })
+ }
+
+ /// Find a chunk of `kind` and return its offset as usize range into the data if found.
+ ///
+ ///
+ /// # Panics
+ ///
+ /// - if the usize conversion fails, which isn't expected as memory maps can't be created if files are too large
+ /// to require such offsets.
+ pub fn usize_offset_by_id(&self, kind: crate::Id) -> Result<Range<usize>, offset_by_kind::Error> {
+ self.chunks
+ .iter()
+ .find_map(|c| (c.kind == kind).then(|| crate::range::into_usize_or_panic(c.offset.clone())))
+ .ok_or(offset_by_kind::Error { kind })
+ }
+
+ /// Like [`Index::usize_offset_by_id()`] but with support for validation and transformation using a function.
+ pub fn validated_usize_offset_by_id<T>(
+ &self,
+ kind: crate::Id,
+ validate: impl FnOnce(Range<usize>) -> T,
+ ) -> Result<T, offset_by_kind::Error> {
+ self.chunks
+ .iter()
+ .find_map(|c| (c.kind == kind).then(|| crate::range::into_usize_or_panic(c.offset.clone())))
+ .map(validate)
+ .ok_or(offset_by_kind::Error { kind })
+ }
+
+ /// Find a chunk of `kind` and return its data slice based on its offset.
+ pub fn data_by_id<'a>(&self, data: &'a [u8], kind: crate::Id) -> Result<&'a [u8], data_by_kind::Error> {
+ let offset = self.offset_by_id(kind)?;
+ Ok(&data[crate::range::into_usize(offset).ok_or(data_by_kind::Error::FileTooLarge)?])
+ }
+
+ /// Return the end offset lf the last chunk, which is the highest offset as well.
+ /// It's definitely available as we have one or more chunks.
+ pub fn highest_offset(&self) -> crate::file::Offset {
+ self.chunks.last().expect("at least one chunk").offset.end
+ }
+}
diff --git a/vendor/gix-chunk/src/file/mod.rs b/vendor/gix-chunk/src/file/mod.rs
new file mode 100644
index 000000000..4ddd94999
--- /dev/null
+++ b/vendor/gix-chunk/src/file/mod.rs
@@ -0,0 +1,20 @@
+///
+pub mod decode;
+///
+pub mod index;
+
+///
+pub mod write;
+
+/// The offset to a chunk as seen relative to the beginning of the file containing it.
+pub type Offset = u64;
+
+/// A chunk file providing a table into the parent data.
+pub struct Index {
+ /// If true, we use `chunks` in a way that facilitates writing them.
+ will_write: bool,
+ /// Validated chunks as defined by their index entries.
+ ///
+ /// Note that this list cannot be empty.
+ chunks: Vec<index::Entry>,
+}
diff --git a/vendor/gix-chunk/src/file/write.rs b/vendor/gix-chunk/src/file/write.rs
new file mode 100644
index 000000000..8189140fe
--- /dev/null
+++ b/vendor/gix-chunk/src/file/write.rs
@@ -0,0 +1,136 @@
+use crate::file::{index::Entry, Index};
+
+mod write_chunk {
+ use std::collections::VecDeque;
+
+ use crate::file::index;
+
+ /// A [`Write`][std::io::Write] implementation that validates chunk sizes while allowing the user to know
+ /// which chunk is to be written next.
+ pub struct Chunk<W> {
+ chunks_to_write: VecDeque<index::Entry>,
+ inner: W,
+ next_chunk: Option<index::Entry>,
+ written_bytes: usize,
+ }
+
+ impl<W> Chunk<W>
+ where
+ W: std::io::Write,
+ {
+ pub(crate) fn new(out: W, chunks: VecDeque<index::Entry>) -> Chunk<W>
+ where
+ W: std::io::Write,
+ {
+ Chunk {
+ chunks_to_write: chunks,
+ inner: out,
+ next_chunk: None,
+ written_bytes: 0,
+ }
+ }
+ }
+
+ impl<W> std::io::Write for Chunk<W>
+ where
+ W: std::io::Write,
+ {
+ fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+ let written = self.inner.write(buf)?;
+ self.written_bytes += written;
+ Ok(written)
+ }
+
+ fn flush(&mut self) -> std::io::Result<()> {
+ self.inner.flush()
+ }
+ }
+
+ impl<W> Chunk<W> {
+ /// Return the inner writer - should only be called once there is no more chunk to write.
+ pub fn into_inner(self) -> W {
+ self.inner
+ }
+ /// Return the next chunk-id to write, if there is one.
+ pub fn next_chunk(&mut self) -> Option<crate::Id> {
+ if let Some(entry) = self.next_chunk.take() {
+ assert_eq!(
+ entry.offset.end,
+ self.written_bytes as u64,
+ "BUG: expected to write {} bytes, but only wrote {} for chunk {:?}",
+ entry.offset.end,
+ self.written_bytes,
+ std::str::from_utf8(&entry.kind)
+ )
+ }
+ self.written_bytes = 0;
+ self.next_chunk = self.chunks_to_write.pop_front();
+ self.next_chunk.as_ref().map(|e| e.kind)
+ }
+ }
+}
+pub use write_chunk::Chunk;
+
+/// Writing
+impl Index {
+ /// Create a new index whose sole purpose is to be receiving chunks using [`plan_chunk()`][Index::plan_chunk()] and to be written to
+ /// an output using [`into_write()`][Index::into_write()]
+ pub fn for_writing() -> Self {
+ Index {
+ will_write: true,
+ chunks: Vec::new(),
+ }
+ }
+ /// Plan to write a new chunk as part of the index when [`into_write()`][Index::into_write()] is called.
+ pub fn plan_chunk(&mut self, chunk: crate::Id, exact_size_on_disk: u64) {
+ assert!(self.will_write, "BUG: create the index with `for_writing()`");
+ assert!(
+ !self.chunks.iter().any(|e| e.kind == chunk),
+ "BUG: must not add chunk of same kind twice: {:?}",
+ std::str::from_utf8(&chunk)
+ );
+ self.chunks.push(Entry {
+ kind: chunk,
+ offset: 0..exact_size_on_disk,
+ })
+ }
+
+ /// Return the total size of all planned chunks thus far.
+ pub fn planned_storage_size(&self) -> u64 {
+ assert!(self.will_write, "BUG: create the index with `for_writing()`");
+ self.chunks.iter().map(|e| e.offset.end).sum()
+ }
+
+ /// Return the amount of chunks we currently know.
+ pub fn num_chunks(&self) -> usize {
+ self.chunks.len()
+ }
+
+ /// After [planning all chunks][Index::plan_chunk()] call this method with the destination to write the chunks to.
+ /// Use the [Chunk] writer to write each chunk in order.
+ /// `current_offset` is the byte position at which `out` will continue writing.
+ pub fn into_write<W>(self, mut out: W, current_offset: usize) -> std::io::Result<Chunk<W>>
+ where
+ W: std::io::Write,
+ {
+ assert!(
+ self.will_write,
+ "BUG: create the index with `for_writing()`, cannot write decoded indices"
+ );
+ // First chunk starts past the table of contents
+ let mut current_offset = (current_offset + Self::size_for_entries(self.num_chunks())) as u64;
+
+ for entry in &self.chunks {
+ out.write_all(&entry.kind)?;
+ out.write_all(&current_offset.to_be_bytes())?;
+
+ current_offset += entry.offset.end;
+ }
+
+ // sentinel to mark end of chunks
+ out.write_all(&0u32.to_be_bytes())?;
+ out.write_all(&current_offset.to_be_bytes())?;
+
+ Ok(Chunk::new(out, self.chunks.into()))
+ }
+}