summaryrefslogtreecommitdiffstats
path: root/vendor/gix-index/src/decode/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-index/src/decode/mod.rs')
-rw-r--r--vendor/gix-index/src/decode/mod.rs321
1 files changed, 321 insertions, 0 deletions
diff --git a/vendor/gix-index/src/decode/mod.rs b/vendor/gix-index/src/decode/mod.rs
new file mode 100644
index 000000000..e84d8f717
--- /dev/null
+++ b/vendor/gix-index/src/decode/mod.rs
@@ -0,0 +1,321 @@
+use filetime::FileTime;
+
+use crate::{entry, extension, Entry, State, Version};
+
+mod entries;
+///
+pub mod header;
+
+mod error {
+
+ use crate::{decode, extension};
+
+ /// The error returned by [State::from_bytes()][crate::State::from_bytes()].
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error(transparent)]
+ Header(#[from] decode::header::Error),
+ #[error("Could not parse entry at index {index}")]
+ Entry { index: u32 },
+ #[error("Mandatory extension wasn't implemented or malformed.")]
+ Extension(#[from] extension::decode::Error),
+ #[error("Index trailer should have been {expected} bytes long, but was {actual}")]
+ UnexpectedTrailerLength { expected: usize, actual: usize },
+ #[error("Shared index checksum was {actual_checksum} but should have been {expected_checksum}")]
+ ChecksumMismatch {
+ actual_checksum: gix_hash::ObjectId,
+ expected_checksum: gix_hash::ObjectId,
+ },
+ }
+}
+pub use error::Error;
+use gix_features::parallel::InOrderIter;
+
+use crate::util::read_u32;
+
+/// Options to define how to decode an index state [from bytes][State::from_bytes()].
+#[derive(Default, Clone, Copy)]
+pub struct Options {
+ /// If Some(_), we are allowed to use more than one thread. If Some(N), use no more than N threads. If Some(0)|None, use as many threads
+ /// as there are logical cores.
+ ///
+ /// This applies to loading extensions in parallel to entries if the common EOIE extension is available.
+ /// It also allows to use multiple threads for loading entries if the IEOT extension is present.
+ pub thread_limit: Option<usize>,
+ /// The minimum size in bytes to load extensions in their own thread, assuming there is enough `num_threads` available.
+ /// If set to 0, for example, extensions will always be read in their own thread if enough threads are available.
+ pub min_extension_block_in_bytes_for_threading: usize,
+ /// Set the expected hash of this index if we are read as part of a `link` extension.
+ ///
+ /// We will abort reading this file if it doesn't match.
+ pub expected_checksum: Option<gix_hash::ObjectId>,
+}
+
+impl State {
+ /// Decode an index state from `data` and store `timestamp` in the resulting instance for pass-through, assuming `object_hash`
+ /// to be used through the file.
+ pub fn from_bytes(
+ data: &[u8],
+ timestamp: FileTime,
+ object_hash: gix_hash::Kind,
+ Options {
+ thread_limit,
+ min_extension_block_in_bytes_for_threading,
+ expected_checksum,
+ }: Options,
+ ) -> Result<(Self, gix_hash::ObjectId), Error> {
+ let (version, num_entries, post_header_data) = header::decode(data, object_hash)?;
+ let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash);
+
+ let mut num_threads = gix_features::parallel::num_threads(thread_limit);
+ let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes(
+ num_entries,
+ data.len(),
+ start_of_extensions,
+ object_hash,
+ version,
+ );
+
+ let (entries, ext, data) = match start_of_extensions {
+ Some(offset) if num_threads > 1 => {
+ let extensions_data = &data[offset..];
+ let index_offsets_table = extension::index_entry_offset_table::find(extensions_data, object_hash);
+ let (entries_res, ext_res) = gix_features::parallel::threads(|scope| {
+ let extension_loading =
+ (extensions_data.len() > min_extension_block_in_bytes_for_threading).then({
+ num_threads -= 1;
+ || {
+ gix_features::parallel::build_thread()
+ .name("gix-index.from_bytes.load-extensions".into())
+ .spawn_scoped(scope, || extension::decode::all(extensions_data, object_hash))
+ .expect("valid name")
+ }
+ });
+ let entries_res = match index_offsets_table {
+ Some(entry_offsets) => {
+ let chunk_size = (entry_offsets.len() as f32 / num_threads as f32).ceil() as usize;
+ let num_chunks = entry_offsets.chunks(chunk_size).count();
+ let mut threads = Vec::with_capacity(num_chunks);
+ for (id, chunks) in entry_offsets.chunks(chunk_size).enumerate() {
+ let chunks = chunks.to_vec();
+ threads.push(
+ gix_features::parallel::build_thread()
+ .name(format!("gix-index.from_bytes.read-entries.{id}"))
+ .spawn_scoped(scope, move || {
+ let num_entries_for_chunks =
+ chunks.iter().map(|c| c.num_entries).sum::<u32>() as usize;
+ let mut entries = Vec::with_capacity(num_entries_for_chunks);
+ let path_backing_buffer_size_for_chunks =
+ entries::estimate_path_storage_requirements_in_bytes(
+ num_entries_for_chunks as u32,
+ data.len() / num_chunks,
+ start_of_extensions.map(|ofs| ofs / num_chunks),
+ object_hash,
+ version,
+ );
+ let mut path_backing =
+ Vec::with_capacity(path_backing_buffer_size_for_chunks);
+ let mut is_sparse = false;
+ for offset in chunks {
+ let (
+ entries::Outcome {
+ is_sparse: chunk_is_sparse,
+ },
+ _data,
+ ) = entries::chunk(
+ &data[offset.from_beginning_of_file as usize..],
+ &mut entries,
+ &mut path_backing,
+ offset.num_entries,
+ object_hash,
+ version,
+ )?;
+ is_sparse |= chunk_is_sparse;
+ }
+ Ok::<_, Error>((
+ id,
+ EntriesOutcome {
+ entries,
+ path_backing,
+ is_sparse,
+ },
+ ))
+ })
+ .expect("valid name"),
+ );
+ }
+ let mut results =
+ InOrderIter::from(threads.into_iter().map(|thread| thread.join().unwrap()));
+ let mut acc = results.next().expect("have at least two results, one per thread");
+ // We explicitly don't adjust the reserve in acc and rather allow for more copying
+ // to happens as vectors grow to keep the peak memory size low.
+ // NOTE: one day, we might use a memory pool for paths. We could encode the block of memory
+ // in some bytes in the path offset. That way there is more indirection/slower access
+ // to the path, but it would save time here.
+ // As it stands, `git` is definitely more efficient at this and probably uses less memory too.
+ // Maybe benchmarks can tell if that is noticeable later at 200/400GB/s memory bandwidth, or maybe just
+ // 100GB/s on a single core.
+ while let (Ok(lhs), Some(res)) = (acc.as_mut(), results.next()) {
+ match res {
+ Ok(rhs) => {
+ lhs.is_sparse |= rhs.is_sparse;
+ let ofs = lhs.path_backing.len();
+ lhs.path_backing.extend(rhs.path_backing);
+ lhs.entries.extend(rhs.entries.into_iter().map(|mut e| {
+ e.path.start += ofs;
+ e.path.end += ofs;
+ e
+ }));
+ }
+ Err(err) => {
+ acc = Err(err);
+ }
+ }
+ }
+ acc.map(|acc| (acc, &data[data.len() - object_hash.len_in_bytes()..]))
+ }
+ None => entries(
+ post_header_data,
+ path_backing_buffer_size,
+ num_entries,
+ object_hash,
+ version,
+ ),
+ };
+ let ext_res = extension_loading
+ .map(|thread| thread.join().unwrap())
+ .unwrap_or_else(|| extension::decode::all(extensions_data, object_hash));
+ (entries_res, ext_res)
+ });
+ let (ext, data) = ext_res?;
+ (entries_res?.0, ext, data)
+ }
+ None | Some(_) => {
+ let (entries, data) = entries(
+ post_header_data,
+ path_backing_buffer_size,
+ num_entries,
+ object_hash,
+ version,
+ )?;
+ let (ext, data) = extension::decode::all(data, object_hash)?;
+ (entries, ext, data)
+ }
+ };
+
+ if data.len() != object_hash.len_in_bytes() {
+ return Err(Error::UnexpectedTrailerLength {
+ expected: object_hash.len_in_bytes(),
+ actual: data.len(),
+ });
+ }
+
+ let checksum = gix_hash::ObjectId::from(data);
+ if let Some(expected_checksum) = expected_checksum {
+ if checksum != expected_checksum {
+ return Err(Error::ChecksumMismatch {
+ actual_checksum: checksum,
+ expected_checksum,
+ });
+ }
+ }
+ let EntriesOutcome {
+ entries,
+ path_backing,
+ mut is_sparse,
+ } = entries;
+ let extension::decode::Outcome {
+ tree,
+ link,
+ resolve_undo,
+ untracked,
+ fs_monitor,
+ is_sparse: is_sparse_from_ext, // a marker is needed in case there are no directories
+ } = ext;
+ is_sparse |= is_sparse_from_ext;
+
+ Ok((
+ State {
+ object_hash,
+ timestamp,
+ version,
+ entries,
+ path_backing,
+ is_sparse,
+
+ tree,
+ link,
+ resolve_undo,
+ untracked,
+ fs_monitor,
+ },
+ checksum,
+ ))
+ }
+}
+
+struct EntriesOutcome {
+ pub entries: Vec<Entry>,
+ pub path_backing: Vec<u8>,
+ pub is_sparse: bool,
+}
+
+fn entries(
+ post_header_data: &[u8],
+ path_backing_buffer_size: usize,
+ num_entries: u32,
+ object_hash: gix_hash::Kind,
+ version: Version,
+) -> Result<(EntriesOutcome, &[u8]), Error> {
+ let mut entries = Vec::with_capacity(num_entries as usize);
+ let mut path_backing = Vec::with_capacity(path_backing_buffer_size);
+ entries::chunk(
+ post_header_data,
+ &mut entries,
+ &mut path_backing,
+ num_entries,
+ object_hash,
+ version,
+ )
+ .map(|(entries::Outcome { is_sparse }, data): (entries::Outcome, &[u8])| {
+ (
+ EntriesOutcome {
+ entries,
+ path_backing,
+ is_sparse,
+ },
+ data,
+ )
+ })
+}
+
+pub(crate) fn stat(data: &[u8]) -> Option<(entry::Stat, &[u8])> {
+ let (ctime_secs, data) = read_u32(data)?;
+ let (ctime_nsecs, data) = read_u32(data)?;
+ let (mtime_secs, data) = read_u32(data)?;
+ let (mtime_nsecs, data) = read_u32(data)?;
+ let (dev, data) = read_u32(data)?;
+ let (ino, data) = read_u32(data)?;
+ let (uid, data) = read_u32(data)?;
+ let (gid, data) = read_u32(data)?;
+ let (size, data) = read_u32(data)?;
+ Some((
+ entry::Stat {
+ mtime: entry::Time {
+ secs: ctime_secs,
+ nsecs: ctime_nsecs,
+ },
+ ctime: entry::Time {
+ secs: mtime_secs,
+ nsecs: mtime_nsecs,
+ },
+ dev,
+ ino,
+ uid,
+ gid,
+ size,
+ },
+ data,
+ ))
+}