diff options
Diffstat (limited to 'vendor/gix-index/src')
33 files changed, 3124 insertions, 0 deletions
diff --git a/vendor/gix-index/src/access/mod.rs b/vendor/gix-index/src/access/mod.rs new file mode 100644 index 000000000..e8f2dc9f8 --- /dev/null +++ b/vendor/gix-index/src/access/mod.rs @@ -0,0 +1,223 @@ +use std::cmp::Ordering; + +use bstr::{BStr, ByteSlice, ByteVec}; + +use crate::{entry, extension, Entry, PathStorage, State, Version}; + +// TODO: integrate this somehow, somewhere, depending on later usage. +#[allow(dead_code)] +mod sparse; + +/// General information and entries +impl State { + /// Return the version used to store this state's information on disk. + pub fn version(&self) -> Version { + self.version + } + + /// Return the kind of hashes used in this instance. + pub fn object_hash(&self) -> gix_hash::Kind { + self.object_hash + } + + /// Return our entries + pub fn entries(&self) -> &[Entry] { + &self.entries + } + /// Return our path backing, the place which keeps all paths one after another, with entries storing only the range to access them. + pub fn path_backing(&self) -> &PathStorage { + &self.path_backing + } + + /// Runs `filter_map` on all entries, returning an iterator over all paths along with the result of `filter_map`. + pub fn entries_with_paths_by_filter_map<'a, T>( + &'a self, + mut filter_map: impl FnMut(&'a BStr, &Entry) -> Option<T> + 'a, + ) -> impl Iterator<Item = (&'a BStr, T)> + 'a { + self.entries.iter().filter_map(move |e| { + let p = e.path(self); + filter_map(p, e).map(|t| (p, t)) + }) + } + /// Return mutable entries along with their path, as obtained from `backing`. + pub fn entries_mut_with_paths_in<'state, 'backing>( + &'state mut self, + backing: &'backing PathStorage, + ) -> impl Iterator<Item = (&'state mut Entry, &'backing BStr)> { + self.entries.iter_mut().map(move |e| { + let path = backing[e.path.clone()].as_bstr(); + (e, path) + }) + } + + /// Find the entry index in [`entries()`][State::entries()] matching the given repository-relative + /// `path` and `stage`, or `None`. + /// + /// Use the index for accessing multiple stages if they exists, but at least the single matching entry. + pub fn entry_index_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<usize> { + self.entries + .binary_search_by(|e| e.path(self).cmp(path).then_with(|| e.stage().cmp(&stage))) + .ok() + } + + /// Find the entry index in [`entries()[..upper_bound]`][State::entries()] matching the given repository-relative + /// `path` and `stage`, or `None`. + /// + /// Use the index for accessing multiple stages if they exists, but at least the single matching entry. + /// + /// # Panics + /// + /// If `upper_bound` is out of bounds of our entries array. + pub fn entry_index_by_path_and_stage_bounded( + &self, + path: &BStr, + stage: entry::Stage, + upper_bound: usize, + ) -> Option<usize> { + self.entries[..upper_bound] + .binary_search_by(|e| e.path(self).cmp(path).then_with(|| e.stage().cmp(&stage))) + .ok() + } + + /// Like [`entry_index_by_path_and_stage()`][State::entry_index_by_path_and_stage()], + /// but returns the entry instead of the index. + pub fn entry_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<&Entry> { + self.entry_index_by_path_and_stage(path, stage) + .map(|idx| &self.entries[idx]) + } + + /// Return the entry at `idx` or _panic_ if the index is out of bounds. + /// + /// The `idx` is typically returned by [entry_by_path_and_stage()][State::entry_by_path_and_stage()]. + pub fn entry(&self, idx: usize) -> &Entry { + &self.entries[idx] + } + + /// Returns a boolean value indicating whether the index is sparse or not. + /// + /// An index is sparse if it contains at least one [Mode::DIR][entry::Mode::DIR] entry. + pub fn is_sparse(&self) -> bool { + self.is_sparse + } +} + +/// Mutation +impl State { + /// After usage of the storage obtained by [`take_path_backing()`][Self::take_path_backing()], return it here. + /// Note that it must not be empty. + pub fn return_path_backing(&mut self, backing: PathStorage) { + debug_assert!( + self.path_backing.is_empty(), + "BUG: return path backing only after taking it, once" + ); + self.path_backing = backing; + } + + /// Return mutable entries in a slice. + pub fn entries_mut(&mut self) -> &mut [Entry] { + &mut self.entries + } + /// Return mutable entries along with their paths in an iterator. + pub fn entries_mut_with_paths(&mut self) -> impl Iterator<Item = (&mut Entry, &BStr)> { + let paths = &self.path_backing; + self.entries.iter_mut().map(move |e| { + let path = paths[e.path.clone()].as_bstr(); + (e, path) + }) + } + + /// Sometimes it's needed to remove the path backing to allow certain mutation to happen in the state while supporting reading the entry's + /// path. + pub fn take_path_backing(&mut self) -> PathStorage { + assert_eq!( + self.entries.is_empty(), + self.path_backing.is_empty(), + "BUG: cannot take out backing multiple times" + ); + std::mem::take(&mut self.path_backing) + } + + /// Like [`entry_index_by_path_and_stage()`][State::entry_index_by_path_and_stage()], + /// but returns the mutable entry instead of the index. + pub fn entry_mut_by_path_and_stage(&mut self, path: &BStr, stage: entry::Stage) -> Option<&mut Entry> { + self.entry_index_by_path_and_stage(path, stage) + .map(|idx| &mut self.entries[idx]) + } + + /// Push a new entry containing `stat`, `id`, `flags` and `mode` and `path` to the end of our storage, without performing + /// any sanity checks. This means it's possible to push a new entry to the same path on the same stage and even after sorting + /// the entries lookups may still return the wrong one of them unless the correct binary search criteria is chosen. + /// + /// Note that this *is likely* to break invariants that will prevent further lookups by path unless + /// [`entry_index_by_path_and_stage_bounded()`][State::entry_index_by_path_and_stage_bounded()] is used with + /// the `upper_bound` being the amount of entries before the first call to this method. + /// + /// Alternatively, make sure to call [sort_entries()][State::sort_entries()] before entry lookup by path to restore + /// the invariant. + pub fn dangerously_push_entry( + &mut self, + stat: entry::Stat, + id: gix_hash::ObjectId, + flags: entry::Flags, + mode: entry::Mode, + path: &BStr, + ) { + let path = { + let path_start = self.path_backing.len(); + self.path_backing.push_str(path); + path_start..self.path_backing.len() + }; + + self.entries.push(Entry { + stat, + id, + flags, + mode, + path, + }); + } + + /// Unconditionally sort entries as needed to perform lookups quickly. + pub fn sort_entries(&mut self) { + let path_backing = &self.path_backing; + self.entries.sort_by(|a, b| { + Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing)) + .then_with(|| a.stage().cmp(&b.stage())) + }); + } + + /// Similar to [`sort_entries()`][State::sort_entries()], but applies `compare` after comparing + /// by path and stage as a third criteria. + pub fn sort_entries_by(&mut self, mut compare: impl FnMut(&Entry, &Entry) -> Ordering) { + let path_backing = &self.path_backing; + self.entries.sort_by(|a, b| { + Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing)) + .then_with(|| a.stage().cmp(&b.stage())) + .then_with(|| compare(a, b)) + }); + } +} + +/// Extensions +impl State { + /// Access the `tree` extension. + pub fn tree(&self) -> Option<&extension::Tree> { + self.tree.as_ref() + } + /// Access the `link` extension. + pub fn link(&self) -> Option<&extension::Link> { + self.link.as_ref() + } + /// Obtain the resolve-undo extension. + pub fn resolve_undo(&self) -> Option<&extension::resolve_undo::Paths> { + self.resolve_undo.as_ref() + } + /// Obtain the untracked extension. + pub fn untracked(&self) -> Option<&extension::UntrackedCache> { + self.untracked.as_ref() + } + /// Obtain the fsmonitor extension. + pub fn fs_monitor(&self) -> Option<&extension::FsMonitor> { + self.fs_monitor.as_ref() + } +} diff --git a/vendor/gix-index/src/access/sparse.rs b/vendor/gix-index/src/access/sparse.rs new file mode 100644 index 000000000..27804f846 --- /dev/null +++ b/vendor/gix-index/src/access/sparse.rs @@ -0,0 +1,59 @@ +/// Configuration related to sparse indexes. +#[derive(Debug, Default, Clone, Copy)] +pub struct Options { + /// If true, certain entries in the index will be excluded / skipped for certain operations, + /// based on the ignore patterns in the `.git/info/sparse-checkout` file. These entries will + /// carry the [`SKIP_WORKTREE`][crate::entry::Flags::SKIP_WORKTREE] flag. + /// + /// This typically is the value of `core.sparseCheckout` in the git configuration. + pub sparse_checkout: bool, + + /// Interpret the `.git/info/sparse-checkout` file using _cone mode_. + /// + /// If true, _cone mode_ is active and entire directories will be included in the checkout, as well as files in the root + /// of the repository. + /// If false, non-cone mode is active and entries to _include_ will be matched with patterns like those found in `.gitignore` files. + /// + /// This typically is the value of `core.sparseCheckoutCone` in the git configuration. + pub directory_patterns_only: bool, + + /// If true, will attempt to write a sparse index file which only works in cone mode. + /// + /// A sparse index has [`DIR` entries][crate::entry::Mode::DIR] that represent entire directories to be skipped + /// during checkout and other operations due to the added presence of + /// the [`SKIP_WORKTREE`][crate::entry::Flags::SKIP_WORKTREE] flag. + /// + /// This is typically the value of `index.sparse` in the git configuration. + pub write_sparse_index: bool, +} + +impl Options { + /// Derive a valid mode from all parameters that affect the 'sparseness' of the index. + /// + /// Some combinations of them degenerate to one particular mode. + pub fn sparse_mode(&self) -> Mode { + match ( + self.sparse_checkout, + self.directory_patterns_only, + self.write_sparse_index, + ) { + (true, true, true) => Mode::IncludeDirectoriesStoreIncludedEntriesAndExcludedDirs, + (true, true, false) => Mode::IncludeDirectoriesStoreAllEntriesSkipUnmatched, + (true, false, _) => Mode::IncludeByIgnorePatternStoreAllEntriesSkipUnmatched, + (false, _, _) => Mode::Disabled, + } + } +} + +/// Describes the configuration how a sparse index should be written, or if one should be written at all. +#[derive(Debug)] +pub enum Mode { + /// index with DIR entries for exclusion and included entries, directory-only include patterns in `.git/info/sparse-checkout` file. + IncludeDirectoriesStoreIncludedEntriesAndExcludedDirs, + /// index with all file entries and skip worktree flags for exclusion, directory-only include patterns in `.git/info/sparse-checkout` file. + IncludeDirectoriesStoreAllEntriesSkipUnmatched, + /// index with all file entries and skip-worktree flags for exclusion, `ignore` patterns to include entries in `.git/info/sparse-checkout` file. + IncludeByIgnorePatternStoreAllEntriesSkipUnmatched, + /// index with all entries, non is excluded, `.git/info/sparse-checkout` file is not considered, a regular index. + Disabled, +} diff --git a/vendor/gix-index/src/decode/entries.rs b/vendor/gix-index/src/decode/entries.rs new file mode 100644 index 000000000..0a968ae0c --- /dev/null +++ b/vendor/gix-index/src/decode/entries.rs @@ -0,0 +1,181 @@ +use std::{convert::TryInto, ops::Range}; + +use crate::{ + decode::{self, header}, + entry, + util::{read_u32, split_at_byte_exclusive, split_at_pos, var_int}, + Entry, Version, +}; + +/// a guess directly from git sources +pub const AVERAGE_V4_DELTA_PATH_LEN_IN_BYTES: usize = 80; + +pub struct Outcome { + pub is_sparse: bool, +} + +pub fn estimate_path_storage_requirements_in_bytes( + num_entries: u32, + on_disk_size: usize, + offset_to_extensions: Option<usize>, + object_hash: gix_hash::Kind, + version: Version, +) -> usize { + const fn on_disk_entry_sans_path(object_hash: gix_hash::Kind) -> usize { + 8 + // ctime + 8 + // mtime + (4 * 6) + // various stat fields + 2 + // flag, ignore extended flag as we'd rather overallocate a bit + object_hash.len_in_bytes() + } + match version { + Version::V3 | Version::V2 => { + let size_of_entries_block = offset_to_extensions.unwrap_or(on_disk_size); + size_of_entries_block + .saturating_sub(num_entries as usize * on_disk_entry_sans_path(object_hash)) + .saturating_sub(header::SIZE) + } + Version::V4 => num_entries as usize * AVERAGE_V4_DELTA_PATH_LEN_IN_BYTES, + } +} + +/// Note that `data` must point to the beginning of the entries, right past the header. +pub fn chunk<'a>( + mut data: &'a [u8], + entries: &mut Vec<Entry>, + path_backing: &mut Vec<u8>, + num_entries: u32, + object_hash: gix_hash::Kind, + version: Version, +) -> Result<(Outcome, &'a [u8]), decode::Error> { + let mut is_sparse = false; + let has_delta_paths = version == Version::V4; + let mut prev_path = None; + let mut delta_buf = Vec::<u8>::with_capacity(AVERAGE_V4_DELTA_PATH_LEN_IN_BYTES); + + for idx in 0..num_entries { + let (entry, remaining) = load_one( + data, + path_backing, + object_hash.len_in_bytes(), + has_delta_paths, + prev_path, + ) + .ok_or(decode::Error::Entry { index: idx })?; + + data = remaining; + if entry.mode.is_sparse() { + is_sparse = true; + } + // TODO: entries are actually in an intrusive collection, with path as key. Could be set for us. This affects 'ignore_case' which we + // also don't yet handle but probably could, maybe even smartly with the collection. + // For now it's unclear to me how they access the index, they could iterate quickly, and have fast access by path. + entries.push(entry); + prev_path = entries.last().map(|e| (e.path.clone(), &mut delta_buf)); + } + + Ok((Outcome { is_sparse }, data)) +} + +/// Note that `prev_path` is only useful if the version is V4 +fn load_one<'a>( + data: &'a [u8], + path_backing: &mut Vec<u8>, + hash_len: usize, + has_delta_paths: bool, + prev_path_and_buf: Option<(Range<usize>, &mut Vec<u8>)>, +) -> Option<(Entry, &'a [u8])> { + let first_byte_of_entry = data.as_ptr() as usize; + let (ctime_secs, data) = read_u32(data)?; + let (ctime_nsecs, data) = read_u32(data)?; + let (mtime_secs, data) = read_u32(data)?; + let (mtime_nsecs, data) = read_u32(data)?; + let (dev, data) = read_u32(data)?; + let (ino, data) = read_u32(data)?; + let (mode, data) = read_u32(data)?; + let (uid, data) = read_u32(data)?; + let (gid, data) = read_u32(data)?; + let (size, data) = read_u32(data)?; + let (hash, data) = split_at_pos(data, hash_len)?; + let (flags, data) = read_u16(data)?; + let flags = entry::at_rest::Flags::from_bits(flags)?; + let (flags, data) = if flags.contains(entry::at_rest::Flags::EXTENDED) { + let (extended_flags, data) = read_u16(data)?; + let extended_flags = entry::at_rest::FlagsExtended::from_bits(extended_flags)?; + let extended_flags = extended_flags.to_flags()?; + (flags.to_memory() | extended_flags, data) + } else { + (flags.to_memory(), data) + }; + + let start = path_backing.len(); + let data = if has_delta_paths { + let (strip_len, data) = var_int(data)?; + if let Some((prev_path, buf)) = prev_path_and_buf { + let end = prev_path.end.checked_sub(strip_len.try_into().ok()?)?; + let copy_len = end.checked_sub(prev_path.start)?; + if copy_len > 0 { + buf.resize(copy_len, 0); + buf.copy_from_slice(&path_backing[prev_path.start..end]); + path_backing.extend_from_slice(buf); + } + } + + let (path, data) = split_at_byte_exclusive(data, 0)?; + path_backing.extend_from_slice(path); + + data + } else { + let (path, data) = if flags.contains(entry::Flags::PATH_LEN) { + split_at_byte_exclusive(data, 0)? + } else { + let path_len = (flags.bits() & entry::Flags::PATH_LEN.bits()) as usize; + let (path, data) = split_at_pos(data, path_len)?; + (path, skip_padding(data, first_byte_of_entry)) + }; + + path_backing.extend_from_slice(path); + data + }; + let path_range = start..path_backing.len(); + + Some(( + Entry { + stat: entry::Stat { + ctime: entry::Time { + secs: ctime_secs, + nsecs: ctime_nsecs, + }, + mtime: entry::Time { + secs: mtime_secs, + nsecs: mtime_nsecs, + }, + dev, + ino, + uid, + gid, + size, + }, + id: gix_hash::ObjectId::from(hash), + flags: flags & !entry::Flags::PATH_LEN, + // This forces us to add the bits we need before being able to use them. + mode: entry::Mode::from_bits_truncate(mode), + path: path_range, + }, + data, + )) +} + +#[inline] +fn skip_padding(data: &[u8], first_byte_of_entry: usize) -> &[u8] { + let current_offset = data.as_ptr() as usize; + let c_padding = (current_offset - first_byte_of_entry + 8) & !7; + let skip = (first_byte_of_entry + c_padding) - current_offset; + + &data[skip..] +} + +#[inline] +fn read_u16(data: &[u8]) -> Option<(u16, &[u8])> { + split_at_pos(data, 2).map(|(num, data)| (u16::from_be_bytes(num.try_into().unwrap()), data)) +} diff --git a/vendor/gix-index/src/decode/header.rs b/vendor/gix-index/src/decode/header.rs new file mode 100644 index 000000000..04c3bfd98 --- /dev/null +++ b/vendor/gix-index/src/decode/header.rs @@ -0,0 +1,46 @@ +pub(crate) const SIZE: usize = 4 /*signature*/ + 4 /*version*/ + 4 /* num entries */; + +use crate::{util::from_be_u32, Version}; + +pub(crate) const SIGNATURE: &[u8] = b"DIRC"; + +mod error { + + /// The error produced when failing to decode an index header. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("{0}")] + Corrupt(&'static str), + #[error("Index version {0} is not supported")] + UnsupportedVersion(u32), + } +} +pub use error::Error; + +pub(crate) fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Result<(Version, u32, &[u8]), Error> { + if data.len() < (3 * 4) + object_hash.len_in_bytes() { + return Err(Error::Corrupt( + "File is too small even for header with zero entries and smallest hash", + )); + } + + let (signature, data) = data.split_at(4); + if signature != SIGNATURE { + return Err(Error::Corrupt( + "Signature mismatch - this doesn't claim to be a header file", + )); + } + + let (version, data) = data.split_at(4); + let version = match from_be_u32(version) { + 2 => Version::V2, + 3 => Version::V3, + 4 => Version::V4, + unknown => return Err(Error::UnsupportedVersion(unknown)), + }; + let (entries, data) = data.split_at(4); + let entries = from_be_u32(entries); + + Ok((version, entries, data)) +} diff --git a/vendor/gix-index/src/decode/mod.rs b/vendor/gix-index/src/decode/mod.rs new file mode 100644 index 000000000..e84d8f717 --- /dev/null +++ b/vendor/gix-index/src/decode/mod.rs @@ -0,0 +1,321 @@ +use filetime::FileTime; + +use crate::{entry, extension, Entry, State, Version}; + +mod entries; +/// +pub mod header; + +mod error { + + use crate::{decode, extension}; + + /// The error returned by [State::from_bytes()][crate::State::from_bytes()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Header(#[from] decode::header::Error), + #[error("Could not parse entry at index {index}")] + Entry { index: u32 }, + #[error("Mandatory extension wasn't implemented or malformed.")] + Extension(#[from] extension::decode::Error), + #[error("Index trailer should have been {expected} bytes long, but was {actual}")] + UnexpectedTrailerLength { expected: usize, actual: usize }, + #[error("Shared index checksum was {actual_checksum} but should have been {expected_checksum}")] + ChecksumMismatch { + actual_checksum: gix_hash::ObjectId, + expected_checksum: gix_hash::ObjectId, + }, + } +} +pub use error::Error; +use gix_features::parallel::InOrderIter; + +use crate::util::read_u32; + +/// Options to define how to decode an index state [from bytes][State::from_bytes()]. +#[derive(Default, Clone, Copy)] +pub struct Options { + /// If Some(_), we are allowed to use more than one thread. If Some(N), use no more than N threads. If Some(0)|None, use as many threads + /// as there are logical cores. + /// + /// This applies to loading extensions in parallel to entries if the common EOIE extension is available. + /// It also allows to use multiple threads for loading entries if the IEOT extension is present. + pub thread_limit: Option<usize>, + /// The minimum size in bytes to load extensions in their own thread, assuming there is enough `num_threads` available. + /// If set to 0, for example, extensions will always be read in their own thread if enough threads are available. + pub min_extension_block_in_bytes_for_threading: usize, + /// Set the expected hash of this index if we are read as part of a `link` extension. + /// + /// We will abort reading this file if it doesn't match. + pub expected_checksum: Option<gix_hash::ObjectId>, +} + +impl State { + /// Decode an index state from `data` and store `timestamp` in the resulting instance for pass-through, assuming `object_hash` + /// to be used through the file. + pub fn from_bytes( + data: &[u8], + timestamp: FileTime, + object_hash: gix_hash::Kind, + Options { + thread_limit, + min_extension_block_in_bytes_for_threading, + expected_checksum, + }: Options, + ) -> Result<(Self, gix_hash::ObjectId), Error> { + let (version, num_entries, post_header_data) = header::decode(data, object_hash)?; + let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash); + + let mut num_threads = gix_features::parallel::num_threads(thread_limit); + let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes( + num_entries, + data.len(), + start_of_extensions, + object_hash, + version, + ); + + let (entries, ext, data) = match start_of_extensions { + Some(offset) if num_threads > 1 => { + let extensions_data = &data[offset..]; + let index_offsets_table = extension::index_entry_offset_table::find(extensions_data, object_hash); + let (entries_res, ext_res) = gix_features::parallel::threads(|scope| { + let extension_loading = + (extensions_data.len() > min_extension_block_in_bytes_for_threading).then({ + num_threads -= 1; + || { + gix_features::parallel::build_thread() + .name("gix-index.from_bytes.load-extensions".into()) + .spawn_scoped(scope, || extension::decode::all(extensions_data, object_hash)) + .expect("valid name") + } + }); + let entries_res = match index_offsets_table { + Some(entry_offsets) => { + let chunk_size = (entry_offsets.len() as f32 / num_threads as f32).ceil() as usize; + let num_chunks = entry_offsets.chunks(chunk_size).count(); + let mut threads = Vec::with_capacity(num_chunks); + for (id, chunks) in entry_offsets.chunks(chunk_size).enumerate() { + let chunks = chunks.to_vec(); + threads.push( + gix_features::parallel::build_thread() + .name(format!("gix-index.from_bytes.read-entries.{id}")) + .spawn_scoped(scope, move || { + let num_entries_for_chunks = + chunks.iter().map(|c| c.num_entries).sum::<u32>() as usize; + let mut entries = Vec::with_capacity(num_entries_for_chunks); + let path_backing_buffer_size_for_chunks = + entries::estimate_path_storage_requirements_in_bytes( + num_entries_for_chunks as u32, + data.len() / num_chunks, + start_of_extensions.map(|ofs| ofs / num_chunks), + object_hash, + version, + ); + let mut path_backing = + Vec::with_capacity(path_backing_buffer_size_for_chunks); + let mut is_sparse = false; + for offset in chunks { + let ( + entries::Outcome { + is_sparse: chunk_is_sparse, + }, + _data, + ) = entries::chunk( + &data[offset.from_beginning_of_file as usize..], + &mut entries, + &mut path_backing, + offset.num_entries, + object_hash, + version, + )?; + is_sparse |= chunk_is_sparse; + } + Ok::<_, Error>(( + id, + EntriesOutcome { + entries, + path_backing, + is_sparse, + }, + )) + }) + .expect("valid name"), + ); + } + let mut results = + InOrderIter::from(threads.into_iter().map(|thread| thread.join().unwrap())); + let mut acc = results.next().expect("have at least two results, one per thread"); + // We explicitly don't adjust the reserve in acc and rather allow for more copying + // to happens as vectors grow to keep the peak memory size low. + // NOTE: one day, we might use a memory pool for paths. We could encode the block of memory + // in some bytes in the path offset. That way there is more indirection/slower access + // to the path, but it would save time here. + // As it stands, `git` is definitely more efficient at this and probably uses less memory too. + // Maybe benchmarks can tell if that is noticeable later at 200/400GB/s memory bandwidth, or maybe just + // 100GB/s on a single core. + while let (Ok(lhs), Some(res)) = (acc.as_mut(), results.next()) { + match res { + Ok(rhs) => { + lhs.is_sparse |= rhs.is_sparse; + let ofs = lhs.path_backing.len(); + lhs.path_backing.extend(rhs.path_backing); + lhs.entries.extend(rhs.entries.into_iter().map(|mut e| { + e.path.start += ofs; + e.path.end += ofs; + e + })); + } + Err(err) => { + acc = Err(err); + } + } + } + acc.map(|acc| (acc, &data[data.len() - object_hash.len_in_bytes()..])) + } + None => entries( + post_header_data, + path_backing_buffer_size, + num_entries, + object_hash, + version, + ), + }; + let ext_res = extension_loading + .map(|thread| thread.join().unwrap()) + .unwrap_or_else(|| extension::decode::all(extensions_data, object_hash)); + (entries_res, ext_res) + }); + let (ext, data) = ext_res?; + (entries_res?.0, ext, data) + } + None | Some(_) => { + let (entries, data) = entries( + post_header_data, + path_backing_buffer_size, + num_entries, + object_hash, + version, + )?; + let (ext, data) = extension::decode::all(data, object_hash)?; + (entries, ext, data) + } + }; + + if data.len() != object_hash.len_in_bytes() { + return Err(Error::UnexpectedTrailerLength { + expected: object_hash.len_in_bytes(), + actual: data.len(), + }); + } + + let checksum = gix_hash::ObjectId::from(data); + if let Some(expected_checksum) = expected_checksum { + if checksum != expected_checksum { + return Err(Error::ChecksumMismatch { + actual_checksum: checksum, + expected_checksum, + }); + } + } + let EntriesOutcome { + entries, + path_backing, + mut is_sparse, + } = entries; + let extension::decode::Outcome { + tree, + link, + resolve_undo, + untracked, + fs_monitor, + is_sparse: is_sparse_from_ext, // a marker is needed in case there are no directories + } = ext; + is_sparse |= is_sparse_from_ext; + + Ok(( + State { + object_hash, + timestamp, + version, + entries, + path_backing, + is_sparse, + + tree, + link, + resolve_undo, + untracked, + fs_monitor, + }, + checksum, + )) + } +} + +struct EntriesOutcome { + pub entries: Vec<Entry>, + pub path_backing: Vec<u8>, + pub is_sparse: bool, +} + +fn entries( + post_header_data: &[u8], + path_backing_buffer_size: usize, + num_entries: u32, + object_hash: gix_hash::Kind, + version: Version, +) -> Result<(EntriesOutcome, &[u8]), Error> { + let mut entries = Vec::with_capacity(num_entries as usize); + let mut path_backing = Vec::with_capacity(path_backing_buffer_size); + entries::chunk( + post_header_data, + &mut entries, + &mut path_backing, + num_entries, + object_hash, + version, + ) + .map(|(entries::Outcome { is_sparse }, data): (entries::Outcome, &[u8])| { + ( + EntriesOutcome { + entries, + path_backing, + is_sparse, + }, + data, + ) + }) +} + +pub(crate) fn stat(data: &[u8]) -> Option<(entry::Stat, &[u8])> { + let (ctime_secs, data) = read_u32(data)?; + let (ctime_nsecs, data) = read_u32(data)?; + let (mtime_secs, data) = read_u32(data)?; + let (mtime_nsecs, data) = read_u32(data)?; + let (dev, data) = read_u32(data)?; + let (ino, data) = read_u32(data)?; + let (uid, data) = read_u32(data)?; + let (gid, data) = read_u32(data)?; + let (size, data) = read_u32(data)?; + Some(( + entry::Stat { + mtime: entry::Time { + secs: ctime_secs, + nsecs: ctime_nsecs, + }, + ctime: entry::Time { + secs: mtime_secs, + nsecs: mtime_nsecs, + }, + dev, + ino, + uid, + gid, + size, + }, + data, + )) +} diff --git a/vendor/gix-index/src/entry/flags.rs b/vendor/gix-index/src/entry/flags.rs new file mode 100644 index 000000000..ec00a78db --- /dev/null +++ b/vendor/gix-index/src/entry/flags.rs @@ -0,0 +1,130 @@ +use bitflags::bitflags; + +use crate::entry::Stage; + +bitflags! { + /// In-memory flags + pub struct Flags: u32 { + /// The mask to apply to obtain the stage number of an entry. + const STAGE_MASK = 0x3000; + /// If set, additional bits need to be written to storage. + const EXTENDED = 0x4000; + // TODO: could we use the pathlen ourselves to save 8 bytes? And how to handle longer paths than that? 0 as sentinel maybe? + /// The mask to obtain the length of the path associated with this entry. + const PATH_LEN = 0x0fff; + /// If set, the entry be assumed to match with the version on the working tree, as a way to avoid `lstat()` checks. + const ASSUME_VALID = 1 << 15; + /// Indicates that an entry needs to be updated as it's in-memory representation doesn't match what's on disk. + const UPDATE = 1 << 16; + /// Indicates an entry should be removed - this typically happens during writing, by simply skipping over them. + const REMOVE = 1 << 17; + /// Indicates that an entry is known to be uptodate. + const UPTODATE = 1 << 18; + /// Only temporarily used by unpack_trees() (in C) + const ADDED = 1 << 19; + + /// Whether an up-to-date object hash exists for the entry. + const HASHED = 1 << 20; + /// Set if the filesystem monitor is valid. + const FSMONITOR_VALID = 1 << 21; + /// Remove in work directory + const WORKTREE_REMOVE = 1 << 22; + /// Set to indicate the entry exists in multiple stages at once due to conflicts. + const CONFLICTED = 1 << 23; + + /// Indicates that the entry was already turned into a tree. + const UNPACKED = 1 << 24; + /// Only temporarily used by unpack_trees() (in C) + const NEW_SKIP_WORKTREE = 1 << 25; + + /// temporarily mark paths matched by a path spec + const PATHSPEC_MATCHED = 1 << 26; + + /// When the index is split, this indicates the entry is up-to-date in the shared portion of the index. + const UPDATE_IN_BASE = 1 << 27; + /// Indicates the entry name is present in the base/shared index, and thus doesn't have to be stored in this one. + const STRIP_NAME = 1 << 28; + + /// + /// stored at rest, see at_rest::FlagsExtended + const INTENT_TO_ADD = 1 << 29; + /// Stored at rest + const SKIP_WORKTREE = 1 << 30; + + /// For future extension + const EXTENDED_2 = 1 << 31; + } +} + +impl Flags { + /// Return the stage as extracted from the bits of this instance. + pub fn stage(&self) -> Stage { + (*self & Flags::STAGE_MASK).bits >> 12 + } + + /// Transform ourselves to a storage representation to keep all flags which are to be persisted, + /// skipping all extended flags. Note that the caller has to check for the `EXTENDED` bit to be present + /// and write extended flags as well if so. + pub fn to_storage(mut self) -> at_rest::Flags { + at_rest::Flags::from_bits( + { + self.remove(Self::PATH_LEN); + self + } + .bits() as u16, + ) + .unwrap() + } +} + +pub(crate) mod at_rest { + use bitflags::bitflags; + + bitflags! { + /// Flags how they are serialized to a storage location + pub struct Flags: u16 { + /// A portion of a the flags that encodes the length of the path that follows. + const PATH_LEN = 0x0fff; + const STAGE_MASK = 0x3000; + /// If set, there is more extended flags past this one + const EXTENDED = 0x4000; + /// If set, the entry be assumed to match with the version on the working tree, as a way to avoid `lstat()` checks. + const ASSUME_VALID = 0x8000; + } + } + + impl Flags { + pub fn to_memory(self) -> super::Flags { + super::Flags::from_bits(self.bits as u32).expect("PATHLEN is part of memory representation") + } + } + + bitflags! { + /// Extended flags - add flags for serialization here and offset them down to u16. + pub struct FlagsExtended: u16 { + const INTENT_TO_ADD = 1 << (29 - 16); + const SKIP_WORKTREE = 1 << (30 - 16); + } + } + + impl FlagsExtended { + pub fn from_flags(flags: super::Flags) -> Self { + Self::from_bits(((flags & (super::Flags::INTENT_TO_ADD | super::Flags::SKIP_WORKTREE)).bits >> 16) as u16) + .expect("valid") + } + pub fn to_flags(self) -> Option<super::Flags> { + super::Flags::from_bits((self.bits as u32) << 16) + } + } + + #[cfg(test)] + mod tests { + use super::*; + + #[test] + fn flags_from_bits_with_conflict() { + let input = 0b1110_0010_1000_1011; + assert_eq!(Flags::from_bits(input).unwrap().bits, input); + } + } +} diff --git a/vendor/gix-index/src/entry/mod.rs b/vendor/gix-index/src/entry/mod.rs new file mode 100644 index 000000000..165df801e --- /dev/null +++ b/vendor/gix-index/src/entry/mod.rs @@ -0,0 +1,109 @@ +/// The stage of an entry, one of 0 = base, 1 = ours, 2 = theirs +pub type Stage = u32; + +mod mode; +pub use mode::Mode; + +mod flags; +pub(crate) use flags::at_rest; +pub use flags::Flags; + +mod write; + +/// The time component in a [`Stat`] struct. +#[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Time { + /// The amount of seconds elapsed since EPOCH + pub secs: u32, + /// The amount of nanoseconds elapsed in the current second, ranging from 0 to 999.999.999 . + pub nsecs: u32, +} + +/// An entry's filesystem stat information. +#[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Stat { + /// Modification time + pub mtime: Time, + /// Creation time + pub ctime: Time, + /// Device number + pub dev: u32, + /// Inode number + pub ino: u32, + /// User id of the owner + pub uid: u32, + /// Group id of the owning group + pub gid: u32, + /// The size of bytes on disk. Capped to u32 so files bigger than that will need thorough additional checking + pub size: u32, +} + +mod access { + use bstr::{BStr, ByteSlice}; + + use crate::{entry, Entry, State}; + + impl Entry { + /// Return an entry's path, relative to the repository, which is extracted from its owning `state`. + pub fn path<'a>(&self, state: &'a State) -> &'a BStr { + state.path_backing[self.path.clone()].as_bstr() + } + + /// Return an entry's path using the given `backing`. + pub fn path_in<'backing>(&self, backing: &'backing crate::PathStorageRef) -> &'backing BStr { + backing[self.path.clone()].as_bstr() + } + + /// Return an entry's stage. + pub fn stage(&self) -> entry::Stage { + self.flags.stage() + } + } +} + +mod _impls { + use std::{cmp::Ordering, ops::Add, time::SystemTime}; + + use bstr::BStr; + + use crate::{entry::Time, Entry, State}; + + impl From<SystemTime> for Time { + fn from(s: SystemTime) -> Self { + let d = s + .duration_since(std::time::UNIX_EPOCH) + .expect("system time is not before unix epoch!"); + Time { + secs: d.as_secs() as u32, + nsecs: d.subsec_nanos(), + } + } + } + + impl From<Time> for SystemTime { + fn from(s: Time) -> Self { + std::time::UNIX_EPOCH.add(std::time::Duration::new(s.secs.into(), s.nsecs)) + } + } + + impl Entry { + /// Compare one entry to another by their path, by comparing only their common path portion byte by byte, then resorting to + /// entry length and stage. + pub fn cmp(&self, other: &Self, state: &State) -> Ordering { + let lhs = self.path(state); + let rhs = other.path(state); + Entry::cmp_filepaths(lhs, rhs).then_with(|| self.stage().cmp(&other.stage())) + } + + /// Compare one entry to another by their path, by comparing only their common path portion byte by byte, then resorting to + /// entry length. + pub fn cmp_filepaths(a: &BStr, b: &BStr) -> Ordering { + let common_len = a.len().min(b.len()); + a[..common_len] + .cmp(&b[..common_len]) + .then_with(|| a.len().cmp(&b.len())) + } + } +} diff --git a/vendor/gix-index/src/entry/mode.rs b/vendor/gix-index/src/entry/mode.rs new file mode 100644 index 000000000..1045dfd5b --- /dev/null +++ b/vendor/gix-index/src/entry/mode.rs @@ -0,0 +1,24 @@ +use bitflags::bitflags; +bitflags! { + /// The kind of file of an entry. + pub struct Mode: u32 { + /// directory (only used for sparse checkouts), equivalent to a tree, which is _excluded_ from the index via + /// cone-mode. + const DIR = 0o040000; + /// regular file + const FILE = 0o100644; + /// regular file, executable + const FILE_EXECUTABLE = 0o100755; + /// Symbolic link + const SYMLINK = 0o120000; + /// A git commit for submodules + const COMMIT = 0o160000; + } +} + +impl Mode { + /// Return true if this is a sparse entry, as it points to a directory which usually isn't what an unsparse index tracks. + pub fn is_sparse(&self) -> bool { + *self == Self::DIR + } +} diff --git a/vendor/gix-index/src/entry/write.rs b/vendor/gix-index/src/entry/write.rs new file mode 100644 index 000000000..2a6ad5588 --- /dev/null +++ b/vendor/gix-index/src/entry/write.rs @@ -0,0 +1,39 @@ +use std::convert::TryInto; + +use crate::{entry, Entry, State}; + +impl Entry { + /// Serialize ourselves to `out` with path access via `state`, without padding. + pub fn write_to(&self, mut out: impl std::io::Write, state: &State) -> std::io::Result<()> { + let stat = self.stat; + out.write_all(&stat.ctime.secs.to_be_bytes())?; + out.write_all(&stat.ctime.nsecs.to_be_bytes())?; + out.write_all(&stat.mtime.secs.to_be_bytes())?; + out.write_all(&stat.mtime.nsecs.to_be_bytes())?; + out.write_all(&stat.dev.to_be_bytes())?; + out.write_all(&stat.ino.to_be_bytes())?; + out.write_all(&self.mode.bits().to_be_bytes())?; + out.write_all(&stat.uid.to_be_bytes())?; + out.write_all(&stat.gid.to_be_bytes())?; + out.write_all(&stat.size.to_be_bytes())?; + out.write_all(self.id.as_bytes())?; + let path = self.path(state); + let path_len: u16 = if path.len() >= entry::Flags::PATH_LEN.bits() as usize { + entry::Flags::PATH_LEN.bits() as u16 + } else { + path.len() + .try_into() + .expect("we just checked that the length is smaller than 0xfff") + }; + out.write_all(&(self.flags.to_storage().bits() | path_len).to_be_bytes())?; + if self.flags.contains(entry::Flags::EXTENDED) { + out.write_all( + &entry::at_rest::FlagsExtended::from_flags(self.flags) + .bits() + .to_be_bytes(), + )?; + } + out.write_all(path)?; + out.write_all(b"\0") + } +} diff --git a/vendor/gix-index/src/extension/decode.rs b/vendor/gix-index/src/extension/decode.rs new file mode 100644 index 000000000..af032f4e3 --- /dev/null +++ b/vendor/gix-index/src/extension/decode.rs @@ -0,0 +1,80 @@ +use std::convert::TryInto; + +use crate::{extension, extension::Signature, util::from_be_u32}; + +pub(crate) fn header(data: &[u8]) -> (Signature, u32, &[u8]) { + let (signature, data) = data.split_at(4); + let (size, data) = data.split_at(4); + (signature.try_into().unwrap(), from_be_u32(size), data) +} + +mod error { + use crate::extension; + + /// The error returned when decoding extensions. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error( + "Encountered mandatory extension '{}' which isn't implemented yet", + String::from_utf8_lossy(signature) + )] + MandatoryUnimplemented { signature: extension::Signature }, + #[error("Could not parse mandatory link extension")] + Link(#[from] extension::link::decode::Error), + } +} +pub use error::Error; + +pub(crate) fn all( + maybe_beginning_of_extensions: &[u8], + object_hash: gix_hash::Kind, +) -> Result<(Outcome, &[u8]), Error> { + let mut ext_iter = match extension::Iter::new_without_checksum(maybe_beginning_of_extensions, object_hash) { + Some(iter) => iter, + None => return Ok((Outcome::default(), maybe_beginning_of_extensions)), + }; + + let mut ext = Outcome::default(); + for (signature, ext_data) in ext_iter.by_ref() { + match signature { + extension::tree::SIGNATURE => { + ext.tree = extension::tree::decode(ext_data, object_hash); + } + extension::resolve_undo::SIGNATURE => { + ext.resolve_undo = extension::resolve_undo::decode(ext_data, object_hash); + } + extension::untracked_cache::SIGNATURE => { + ext.untracked = extension::untracked_cache::decode(ext_data, object_hash); + } + extension::fs_monitor::SIGNATURE => { + ext.fs_monitor = extension::fs_monitor::decode(ext_data); + } + extension::end_of_index_entry::SIGNATURE => {} // skip already done + extension::index_entry_offset_table::SIGNATURE => {} // not relevant/obtained already + mandatory if mandatory[0].is_ascii_lowercase() => match mandatory { + extension::link::SIGNATURE => ext.link = extension::link::decode(ext_data, object_hash)?.into(), + extension::sparse::SIGNATURE => { + if !ext_data.is_empty() { + // only used as a marker, if this changes we need this implementation. + return Err(Error::MandatoryUnimplemented { signature: mandatory }); + } + ext.is_sparse = true + } + unknown => return Err(Error::MandatoryUnimplemented { signature: unknown }), + }, + _unknown => {} // skip unknown extensions, too + } + } + Ok((ext, &maybe_beginning_of_extensions[ext_iter.consumed..])) +} + +#[derive(Default)] +pub(crate) struct Outcome { + pub tree: Option<extension::Tree>, + pub link: Option<extension::Link>, + pub resolve_undo: Option<extension::resolve_undo::Paths>, + pub untracked: Option<extension::UntrackedCache>, + pub fs_monitor: Option<extension::FsMonitor>, + pub is_sparse: bool, +} diff --git a/vendor/gix-index/src/extension/end_of_index_entry/decode.rs b/vendor/gix-index/src/extension/end_of_index_entry/decode.rs new file mode 100644 index 000000000..f8002ab7c --- /dev/null +++ b/vendor/gix-index/src/extension/end_of_index_entry/decode.rs @@ -0,0 +1,56 @@ +use crate::{ + decode::header, + extension, + extension::end_of_index_entry::{MIN_SIZE, MIN_SIZE_WITH_HEADER, SIGNATURE}, + util::from_be_u32, +}; + +/// Decode the end of index entry extension, which is no more than a glorified offset to the first byte of all extensions to allow +/// loading entries and extensions in parallel. +/// +/// Itself it's located at the end of the index file, which allows its location to be known and thus addressable. +/// From there it's possible to traverse the chunks of all set extensions, hash them, and compare that hash with all extensions +/// stored prior to this one to assure they are correct. +/// +/// If the checksum wasn't matched, we will ignore this extension entirely. +pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option<usize> { + let hash_len = object_hash.len_in_bytes(); + if data.len() < MIN_SIZE_WITH_HEADER + hash_len { + return None; + } + + let start_of_eoie = data.len() - MIN_SIZE_WITH_HEADER - hash_len; + let ext_data = &data[start_of_eoie..data.len() - hash_len]; + + let (signature, ext_size, ext_data) = extension::decode::header(ext_data); + if signature != SIGNATURE || ext_size as usize != MIN_SIZE { + return None; + } + + let (offset, checksum) = ext_data.split_at(4); + let offset = from_be_u32(offset) as usize; + if offset < header::SIZE || offset > start_of_eoie || checksum.len() != gix_hash::Kind::Sha1.len_in_bytes() { + return None; + } + + let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1); + let mut last_chunk = None; + for (signature, chunk) in extension::Iter::new(&data[offset..data.len() - MIN_SIZE_WITH_HEADER - hash_len]) { + hasher.update(&signature); + hasher.update(&(chunk.len() as u32).to_be_bytes()); + last_chunk = Some(chunk); + } + + if hasher.digest() != checksum { + return None; + } + // The last-to-this chunk ends where ours starts + if last_chunk + .map(|s| s.as_ptr_range().end != (&data[start_of_eoie]) as *const _) + .unwrap_or(true) + { + return None; + } + + Some(offset) +} diff --git a/vendor/gix-index/src/extension/end_of_index_entry/mod.rs b/vendor/gix-index/src/extension/end_of_index_entry/mod.rs new file mode 100644 index 000000000..d55496aee --- /dev/null +++ b/vendor/gix-index/src/extension/end_of_index_entry/mod.rs @@ -0,0 +1,14 @@ +use crate::{extension, extension::Signature}; + +/// The signature of the end-of-index-entry extension +pub const SIGNATURE: Signature = *b"EOIE"; +/// The minimal size of the extension, depending on the shortest hash. +pub const MIN_SIZE: usize = 4 /* offset to extensions */ + gix_hash::Kind::shortest().len_in_bytes(); +/// The smallest size of the extension varying by hash kind, along with the standard extension header. +pub const MIN_SIZE_WITH_HEADER: usize = extension::MIN_SIZE + MIN_SIZE; + +mod decode; +pub use decode::decode; + +mod write; +pub use write::write_to; diff --git a/vendor/gix-index/src/extension/end_of_index_entry/write.rs b/vendor/gix-index/src/extension/end_of_index_entry/write.rs new file mode 100644 index 000000000..6752bd238 --- /dev/null +++ b/vendor/gix-index/src/extension/end_of_index_entry/write.rs @@ -0,0 +1,29 @@ +use crate::extension::{end_of_index_entry::SIGNATURE, Signature}; + +/// Write this extension to out and generate a hash of `hash_kind` over all `prior_extensions` which are specified as `(signature, size)` +/// pair. `one_past_entries` is the offset to the first byte past the entries, which is also the first byte of the signature of the +/// first extension in `prior_extensions`. Note that `prior_extensions` must have been written prior to this one, as the name suggests, +/// allowing this extension to be the last one in the index file. +/// +/// Even if there are no `prior_extensions`, this extension will be written unconditionally. +pub fn write_to( + mut out: impl std::io::Write, + hash_kind: gix_hash::Kind, + offset_to_extensions: u32, + prior_extensions: impl IntoIterator<Item = (Signature, u32)>, +) -> Result<(), std::io::Error> { + out.write_all(&SIGNATURE)?; + let extension_size: u32 = 4 + hash_kind.len_in_bytes() as u32; + out.write_all(&extension_size.to_be_bytes())?; + + out.write_all(&offset_to_extensions.to_be_bytes())?; + + let mut hasher = gix_features::hash::hasher(hash_kind); + for (signature, size) in prior_extensions { + hasher.update(&signature); + hasher.update(&size.to_be_bytes()); + } + out.write_all(&hasher.digest())?; + + Ok(()) +} diff --git a/vendor/gix-index/src/extension/fs_monitor.rs b/vendor/gix-index/src/extension/fs_monitor.rs new file mode 100644 index 000000000..4bb5016ff --- /dev/null +++ b/vendor/gix-index/src/extension/fs_monitor.rs @@ -0,0 +1,38 @@ +use bstr::BString; + +use crate::{ + extension::{FsMonitor, Signature}, + util::{read_u32, read_u64, split_at_byte_exclusive}, +}; + +#[derive(Clone)] +pub enum Token { + V1 { nanos_since_1970: u64 }, + V2 { token: BString }, +} + +pub const SIGNATURE: Signature = *b"FSMN"; + +pub fn decode(data: &[u8]) -> Option<FsMonitor> { + let (version, data) = read_u32(data)?; + let (token, data) = match version { + 1 => { + let (nanos_since_1970, data) = read_u64(data)?; + (Token::V1 { nanos_since_1970 }, data) + } + 2 => { + let (token, data) = split_at_byte_exclusive(data, 0)?; + (Token::V2 { token: token.into() }, data) + } + _ => return None, + }; + + let (ewah_size, data) = read_u32(data)?; + let (entry_dirty, data) = gix_bitmap::ewah::decode(&data[..ewah_size as usize]).ok()?; + + if !data.is_empty() { + return None; + } + + FsMonitor { token, entry_dirty }.into() +} diff --git a/vendor/gix-index/src/extension/index_entry_offset_table.rs b/vendor/gix-index/src/extension/index_entry_offset_table.rs new file mode 100644 index 000000000..0a0108adf --- /dev/null +++ b/vendor/gix-index/src/extension/index_entry_offset_table.rs @@ -0,0 +1,43 @@ +use crate::{extension, extension::Signature, util::read_u32}; + +#[derive(Debug, Clone, Copy)] +pub struct Offset { + pub from_beginning_of_file: u32, + pub num_entries: u32, +} + +pub const SIGNATURE: Signature = *b"IEOT"; + +pub fn decode(data: &[u8]) -> Option<Vec<Offset>> { + let (version, mut data) = read_u32(data)?; + match version { + 1 => {} + _unknown => return None, + } + + let entry_size = 4 + 4; + let num_offsets = data.len() / entry_size; + if num_offsets == 0 || data.len() % entry_size != 0 { + return None; + } + + let mut out = Vec::with_capacity(entry_size); + for _ in 0..num_offsets { + let (offset, chunk) = read_u32(data)?; + let (num_entries, chunk) = read_u32(chunk)?; + out.push(Offset { + from_beginning_of_file: offset, + num_entries, + }); + data = chunk; + } + debug_assert!(data.is_empty()); + + out.into() +} + +pub fn find(extensions: &[u8], object_hash: gix_hash::Kind) -> Option<Vec<Offset>> { + extension::Iter::new_without_checksum(extensions, object_hash)? + .find_map(|(sig, ext_data)| (sig == SIGNATURE).then_some(ext_data)) + .and_then(decode) +} diff --git a/vendor/gix-index/src/extension/iter.rs b/vendor/gix-index/src/extension/iter.rs new file mode 100644 index 000000000..f791b064e --- /dev/null +++ b/vendor/gix-index/src/extension/iter.rs @@ -0,0 +1,58 @@ +use std::convert::TryInto; + +use crate::{extension, extension::Iter, util::from_be_u32}; + +impl<'a> Iter<'a> { + /// Create a new extension iterator at the entrypoint for extensions until the end of the extensions. + pub fn new(data_at_beginning_of_extensions_and_truncated: &'a [u8]) -> Self { + Iter { + data: data_at_beginning_of_extensions_and_truncated, + consumed: 0, + } + } + + /// Create a new iterator at with a data block to the end of the file, and we automatically remove the trailing + /// hash of type `object_hash`. + pub fn new_without_checksum( + data_at_beginning_of_extensions: &'a [u8], + object_hash: gix_hash::Kind, + ) -> Option<Self> { + let end = data_at_beginning_of_extensions + .len() + .checked_sub(object_hash.len_in_bytes())?; + Iter { + data: &data_at_beginning_of_extensions[..end], + consumed: 0, + } + .into() + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = (extension::Signature, &'a [u8]); + + fn next(&mut self) -> Option<Self::Item> { + if self.data.len() < 4 + 4 { + return None; + } + + let (signature, data) = self.data.split_at(4); + let (size, data) = data.split_at(4); + self.data = data; + self.consumed += 4 + 4; + + let size = from_be_u32(size) as usize; + + match data.get(..size) { + Some(ext_data) => { + self.data = &data[size..]; + self.consumed += size; + Some((signature.try_into().unwrap(), ext_data)) + } + None => { + self.data = &[]; + None + } + } + } +} diff --git a/vendor/gix-index/src/extension/link.rs b/vendor/gix-index/src/extension/link.rs new file mode 100644 index 000000000..20ce9cb21 --- /dev/null +++ b/vendor/gix-index/src/extension/link.rs @@ -0,0 +1,177 @@ +use crate::{ + extension::{Link, Signature}, + util::split_at_pos, +}; + +/// The signature of the link extension. +pub const SIGNATURE: Signature = *b"link"; + +/// Bitmaps to know which entries to delete or replace, even though details are still unknown. +#[derive(Clone)] +pub struct Bitmaps { + /// A bitmap to signal which entries to delete, maybe. + pub delete: gix_bitmap::ewah::Vec, + /// A bitmap to signal which entries to replace, maybe. + pub replace: gix_bitmap::ewah::Vec, +} + +/// +pub mod decode { + + /// The error returned when decoding link extensions. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("{0}")] + Corrupt(&'static str), + #[error("{kind} bitmap corrupt")] + BitmapDecode { + err: gix_bitmap::ewah::decode::Error, + kind: &'static str, + }, + } + + impl From<std::num::TryFromIntError> for Error { + fn from(_: std::num::TryFromIntError) -> Self { + Self::Corrupt("error in bitmap iteration trying to convert from u64 to usize") + } + } +} + +pub(crate) fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Result<Link, decode::Error> { + let (id, data) = split_at_pos(data, object_hash.len_in_bytes()) + .ok_or(decode::Error::Corrupt( + "link extension too short to read share index checksum", + )) + .map(|(id, d)| (gix_hash::ObjectId::from(id), d))?; + + if data.is_empty() { + return Ok(Link { + shared_index_checksum: id, + bitmaps: None, + }); + } + + let (delete, data) = + gix_bitmap::ewah::decode(data).map_err(|err| decode::Error::BitmapDecode { kind: "delete", err })?; + let (replace, data) = + gix_bitmap::ewah::decode(data).map_err(|err| decode::Error::BitmapDecode { kind: "replace", err })?; + + if !data.is_empty() { + return Err(decode::Error::Corrupt("garbage trailing link extension")); + } + + Ok(Link { + shared_index_checksum: id, + bitmaps: Some(Bitmaps { delete, replace }), + }) +} + +impl Link { + pub(crate) fn dissolve_into( + self, + split_index: &mut crate::File, + object_hash: gix_hash::Kind, + options: crate::decode::Options, + ) -> Result<(), crate::file::init::Error> { + let shared_index_path = split_index + .path + .parent() + .expect("split index file in .git folder") + .join(format!("sharedindex.{}", self.shared_index_checksum)); + let mut shared_index = crate::File::at( + &shared_index_path, + object_hash, + crate::decode::Options { + expected_checksum: self.shared_index_checksum.into(), + ..options + }, + )?; + + if let Some(bitmaps) = self.bitmaps { + let mut split_entry_index = 0; + + let mut err = None; + bitmaps.replace.for_each_set_bit(|replace_index| { + let shared_entry = match shared_index.entries.get_mut(replace_index) { + Some(e) => e, + None => { + err = decode::Error::Corrupt("replace bitmap length exceeds shared index length - more entries in bitmap than found in shared index").into(); + return None + } + }; + + if shared_entry.flags.contains(crate::entry::Flags::REMOVE) { + err = decode::Error::Corrupt("entry is marked as both replace and delete").into(); + return None + } + + let split_entry = match split_index.entries.get(split_entry_index) { + Some(e) => e, + None => { + err = decode::Error::Corrupt("replace bitmap length exceeds split index length - more entries in bitmap than found in split index").into(); + return None + } + }; + if !split_entry.path.is_empty() { + err = decode::Error::Corrupt("paths in split index entries that are for replacement should be empty").into(); + return None + } + if shared_entry.path.is_empty() { + err = decode::Error::Corrupt("paths in shared index entries that are replaced should not be empty").into(); + return None + } + shared_entry.stat = split_entry.stat; + shared_entry.id = split_entry.id; + shared_entry.flags = split_entry.flags; + shared_entry.mode = split_entry.mode; + + split_entry_index += 1; + Some(()) + }); + if let Some(err) = err { + return Err(err.into()); + } + + let split_index_path_backing = std::mem::take(&mut split_index.path_backing); + for mut split_entry in split_index.entries.drain(split_entry_index..) { + let start = shared_index.path_backing.len(); + let split_index_path = split_entry.path.clone(); + + split_entry.path = start..start + split_entry.path.len(); + shared_index.entries.push(split_entry); + + shared_index + .path_backing + .extend_from_slice(&split_index_path_backing[split_index_path]); + } + + bitmaps.delete.for_each_set_bit(|delete_index| { + let shared_entry = match shared_index.entries.get_mut(delete_index) { + Some(e) => e, + None => { + err = decode::Error::Corrupt("delete bitmap length exceeds shared index length - more entries in bitmap than found in shared index").into(); + return None + } + }; + shared_entry.flags.insert(crate::entry::Flags::REMOVE); + Some(()) + }); + if let Some(err) = err { + return Err(err.into()); + } + + shared_index + .entries + .retain(|e| !e.flags.contains(crate::entry::Flags::REMOVE)); + + let mut shared_entries = std::mem::take(&mut shared_index.entries); + shared_entries.sort_by(|a, b| a.cmp(b, &shared_index.state)); + + split_index.entries = shared_entries; + split_index.path_backing = std::mem::take(&mut shared_index.path_backing); + } + + Ok(()) + } +} diff --git a/vendor/gix-index/src/extension/mod.rs b/vendor/gix-index/src/extension/mod.rs new file mode 100644 index 000000000..07edfdfe0 --- /dev/null +++ b/vendor/gix-index/src/extension/mod.rs @@ -0,0 +1,96 @@ +use bstr::BString; +use smallvec::SmallVec; + +/// The size of the smallest possible extension, which is no more than a signature and a 0 indicating its size. +pub const MIN_SIZE: usize = 4 /* signature */ + 4 /* size */; + +/// The kind of index extension. +pub type Signature = [u8; 4]; + +/// An iterator over the data of index extensions. +pub struct Iter<'a> { + data: &'a [u8], + /// The amount of consumed bytes as seen from our internal data pointer. Useful to continue where the iterator left off. + pub consumed: usize, +} + +/// A structure to associate object ids of a tree with sections in the index entries list. +/// +/// It allows to more quickly build trees by avoiding as it can quickly re-use portions of the index and its associated tree ids +/// if there was no change to them. Portions of this tree are invalidated as the index is changed. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct Tree { + /// The name of the tree/directory, or empty if it's the root tree. + pub name: SmallVec<[u8; 23]>, + /// The id of the directory tree of the associated tree object. + pub id: gix_hash::ObjectId, + /// The amount of non-tree items in this directory tree, including sub-trees, recursively. + /// The value of the top-level tree is thus equal to the value of the total amount of entries. + /// If `None`, the tree is considered invalid and needs to be refreshed + pub num_entries: Option<u32>, + /// The child-trees below the current tree. + pub children: Vec<Tree>, +} + +/// The link extension to track a shared index. +#[derive(Clone)] +pub struct Link { + /// The checksum of the shared index as last seen. + pub shared_index_checksum: gix_hash::ObjectId, + /// Bitmaps to tell us which entries to delete or replace. + pub bitmaps: Option<link::Bitmaps>, +} + +/// The extension for untracked files. +#[allow(dead_code)] +#[derive(Clone)] +pub struct UntrackedCache { + /// Something identifying the location and machine that this cache is for. + /// Should the repository be copied to a different machine, the entire cache can immediately be invalidated. + identifier: BString, + /// Stat for the .git/info/exclude file + info_exclude: Option<untracked_cache::OidStat>, + /// Stat for the `core.excludesfile` + excludes_file: Option<untracked_cache::OidStat>, + /// Usually `.gitignore` + exclude_filename_per_dir: BString, + dir_flags: u32, + + /// A list of directories and sub-directories, with `directories[0]` being the root. + directories: Vec<untracked_cache::Directory>, +} + +/// The extension for keeping state on recent information provided by the filesystem monitor. +#[allow(dead_code)] +#[derive(Clone)] +pub struct FsMonitor { + token: fs_monitor::Token, + /// if a bit is true, the respective entry is NOT valid as per the fs monitor. + entry_dirty: gix_bitmap::ewah::Vec, +} + +mod iter; + +pub(crate) mod fs_monitor; + +/// +pub mod decode; + +/// +pub mod tree; + +/// +pub mod end_of_index_entry; + +pub(crate) mod index_entry_offset_table; + +/// +pub mod link; + +pub(crate) mod resolve_undo; + +/// +pub mod untracked_cache; + +/// +pub mod sparse; diff --git a/vendor/gix-index/src/extension/resolve_undo.rs b/vendor/gix-index/src/extension/resolve_undo.rs new file mode 100644 index 000000000..eb6db9ad7 --- /dev/null +++ b/vendor/gix-index/src/extension/resolve_undo.rs @@ -0,0 +1,64 @@ +use bstr::BString; +use gix_hash::ObjectId; + +use crate::{ + extension::Signature, + util::{split_at_byte_exclusive, split_at_pos}, +}; + +pub type Paths = Vec<ResolvePath>; + +#[allow(dead_code)] +#[derive(Clone)] +pub struct ResolvePath { + /// relative to the root of the repository, or what would be stored in the index + name: BString, + + /// 0 = ancestor/common, 1 = ours, 2 = theirs + stages: [Option<Stage>; 3], +} + +#[allow(dead_code)] +#[derive(Clone, Copy)] +pub struct Stage { + mode: u32, + id: ObjectId, +} + +pub const SIGNATURE: Signature = *b"REUC"; + +pub fn decode(mut data: &[u8], object_hash: gix_hash::Kind) -> Option<Paths> { + let hash_len = object_hash.len_in_bytes(); + let mut out = Vec::new(); + + while !data.is_empty() { + let (path, rest) = split_at_byte_exclusive(data, 0)?; + data = rest; + + let mut modes = [0u32; 3]; + for mode in modes.iter_mut() { + let (mode_ascii, rest) = split_at_byte_exclusive(data, 0)?; + data = rest; + *mode = u32::from_str_radix(std::str::from_utf8(mode_ascii).ok()?, 8).ok()?; + } + + let mut stages = [None, None, None]; + for (mode, stage) in modes.iter().zip(stages.iter_mut()) { + if *mode == 0 { + continue; + } + let (hash, rest) = split_at_pos(data, hash_len)?; + data = rest; + *stage = Some(Stage { + mode: *mode, + id: ObjectId::from(hash), + }); + } + + out.push(ResolvePath { + name: path.into(), + stages, + }); + } + out.into() +} diff --git a/vendor/gix-index/src/extension/sparse.rs b/vendor/gix-index/src/extension/sparse.rs new file mode 100644 index 000000000..ab219c8d6 --- /dev/null +++ b/vendor/gix-index/src/extension/sparse.rs @@ -0,0 +1,11 @@ +use crate::extension::Signature; + +/// The signature of the sparse index extension, nothing more than an indicator at this time. +pub const SIGNATURE: Signature = *b"sdir"; + +/// Serialize the sparse index extension to `out` +pub fn write_to(mut out: impl std::io::Write) -> Result<(), std::io::Error> { + out.write_all(&SIGNATURE)?; + out.write_all(&0_u32.to_be_bytes())?; + Ok(()) +} diff --git a/vendor/gix-index/src/extension/tree/decode.rs b/vendor/gix-index/src/extension/tree/decode.rs new file mode 100644 index 000000000..82221b48c --- /dev/null +++ b/vendor/gix-index/src/extension/tree/decode.rs @@ -0,0 +1,63 @@ +use std::convert::TryInto; + +use gix_hash::ObjectId; + +use crate::{ + extension::Tree, + util::{split_at_byte_exclusive, split_at_pos}, +}; + +/// A recursive data structure +pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option<Tree> { + let (tree, data) = one_recursive(data, object_hash.len_in_bytes())?; + assert!( + data.is_empty(), + "BUG: should fully consume the entire tree extension chunk, got {} left", + data.len() + ); + Some(tree) +} + +fn one_recursive(data: &[u8], hash_len: usize) -> Option<(Tree, &[u8])> { + let (path, data) = split_at_byte_exclusive(data, 0)?; + + let (entry_count, data) = split_at_byte_exclusive(data, b' ')?; + let num_entries: i32 = btoi::btoi(entry_count).ok()?; + + let (subtree_count, data) = split_at_byte_exclusive(data, b'\n')?; + let subtree_count: usize = btoi::btou(subtree_count).ok()?; + + let (id, mut data) = if num_entries >= 0 { + let (hash, data) = split_at_pos(data, hash_len)?; + (ObjectId::from(hash), data) + } else { + ( + ObjectId::null(gix_hash::Kind::from_hex_len(hash_len * 2).expect("valid hex_len")), + data, + ) + }; + + let mut subtrees = Vec::with_capacity(subtree_count); + for _ in 0..subtree_count { + let (tree, rest) = one_recursive(data, hash_len)?; + subtrees.push(tree); + data = rest; + } + + subtrees.sort_by(|a, b| a.name.cmp(&b.name)); + let num_trees = subtrees.len(); + subtrees.dedup_by(|a, b| a.name == b.name); + if num_trees != subtrees.len() { + return None; + } + + Some(( + Tree { + id, + num_entries: num_entries.try_into().ok(), + name: path.into(), + children: subtrees, + }, + data, + )) +} diff --git a/vendor/gix-index/src/extension/tree/mod.rs b/vendor/gix-index/src/extension/tree/mod.rs new file mode 100644 index 000000000..f20759399 --- /dev/null +++ b/vendor/gix-index/src/extension/tree/mod.rs @@ -0,0 +1,21 @@ +use crate::extension::Signature; + +/// The signature for tree extensions +pub const SIGNATURE: Signature = *b"TREE"; + +/// +pub mod verify; + +mod decode; +pub use decode::decode; + +mod write; + +#[cfg(test)] +mod tests { + + #[test] + fn size_of_tree() { + assert_eq!(std::mem::size_of::<crate::extension::Tree>(), 88); + } +} diff --git a/vendor/gix-index/src/extension/tree/verify.rs b/vendor/gix-index/src/extension/tree/verify.rs new file mode 100644 index 000000000..82fd03c8c --- /dev/null +++ b/vendor/gix-index/src/extension/tree/verify.rs @@ -0,0 +1,134 @@ +use std::cmp::Ordering; + +use bstr::{BString, ByteSlice}; + +use crate::extension::Tree; + +/// The error returned by [Tree::verify()][crate::extension::Tree::verify()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("The entry {entry_id} at path '{name}' in parent tree {parent_id} wasn't found in the nodes children, making it incomplete")] + MissingTreeDirectory { + parent_id: gix_hash::ObjectId, + entry_id: gix_hash::ObjectId, + name: BString, + }, + #[error("The tree with id {oid} wasn't found in the object database")] + TreeNodeNotFound { oid: gix_hash::ObjectId }, + #[error("The tree with id {oid} should have {expected_childcount} children, but its cached representation had {actual_childcount} of them")] + TreeNodeChildcountMismatch { + oid: gix_hash::ObjectId, + expected_childcount: usize, + actual_childcount: usize, + }, + #[error("The root tree was named '{name}', even though it should be empty")] + RootWithName { name: BString }, + #[error( + "Expected not more than {expected} entries to be reachable from the top-level, but actual count was {actual}" + )] + EntriesCount { actual: u32, expected: u32 }, + #[error( + "Parent tree '{parent_id}' contained out-of order trees prev = '{previous_path}' and next = '{current_path}'" + )] + OutOfOrder { + parent_id: gix_hash::ObjectId, + current_path: BString, + previous_path: BString, + }, +} + +impl Tree { + /// + pub fn verify<F>(&self, use_find: bool, mut find: F) -> Result<(), Error> + where + F: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Option<gix_object::TreeRefIter<'a>>, + { + fn verify_recursive<F>( + parent_id: gix_hash::ObjectId, + children: &[Tree], + mut find_buf: Option<&mut Vec<u8>>, + find: &mut F, + ) -> Result<Option<u32>, Error> + where + F: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Option<gix_object::TreeRefIter<'a>>, + { + if children.is_empty() { + return Ok(None); + } + let mut entries = 0; + let mut prev = None::<&Tree>; + for child in children { + entries += child.num_entries.unwrap_or(0); + if let Some(prev) = prev { + if prev.name.cmp(&child.name) != Ordering::Less { + return Err(Error::OutOfOrder { + parent_id, + previous_path: prev.name.as_bstr().into(), + current_path: child.name.as_bstr().into(), + }); + } + } + prev = Some(child); + } + if let Some(buf) = find_buf.as_mut() { + let tree_entries = find(&parent_id, buf).ok_or(Error::TreeNodeNotFound { oid: parent_id })?; + let mut num_entries = 0; + for entry in tree_entries + .filter_map(Result::ok) + .filter(|e| e.mode == gix_object::tree::EntryMode::Tree) + { + children + .binary_search_by(|e| e.name.as_bstr().cmp(entry.filename)) + .map_err(|_| Error::MissingTreeDirectory { + parent_id, + entry_id: entry.oid.to_owned(), + name: entry.filename.to_owned(), + })?; + num_entries += 1; + } + + if num_entries != children.len() { + return Err(Error::TreeNodeChildcountMismatch { + oid: parent_id, + expected_childcount: num_entries, + actual_childcount: children.len(), + }); + } + } + for child in children { + // This is actually needed here as it's a mut ref, which isn't copy. We do a re-borrow here. + #[allow(clippy::needless_option_as_deref)] + let actual_num_entries = verify_recursive(child.id, &child.children, find_buf.as_deref_mut(), find)?; + if let Some((actual, num_entries)) = actual_num_entries.zip(child.num_entries) { + if actual > num_entries { + return Err(Error::EntriesCount { + actual, + expected: num_entries, + }); + } + } + } + Ok(entries.into()) + } + + if !self.name.is_empty() { + return Err(Error::RootWithName { + name: self.name.as_bstr().into(), + }); + } + + let mut buf = Vec::new(); + let declared_entries = verify_recursive(self.id, &self.children, use_find.then_some(&mut buf), &mut find)?; + if let Some((actual, num_entries)) = declared_entries.zip(self.num_entries) { + if actual > num_entries { + return Err(Error::EntriesCount { + actual, + expected: num_entries, + }); + } + } + + Ok(()) + } +} diff --git a/vendor/gix-index/src/extension/tree/write.rs b/vendor/gix-index/src/extension/tree/write.rs new file mode 100644 index 000000000..2b1e3d949 --- /dev/null +++ b/vendor/gix-index/src/extension/tree/write.rs @@ -0,0 +1,45 @@ +use std::convert::TryFrom; + +use crate::extension::{tree, Tree}; + +impl Tree { + /// Serialize this instance to `out`. + pub fn write_to(&self, mut out: impl std::io::Write) -> Result<(), std::io::Error> { + fn tree_entry(out: &mut impl std::io::Write, tree: &Tree) -> Result<(), std::io::Error> { + let mut buf = itoa::Buffer::new(); + let num_entries = match tree.num_entries { + Some(num_entries) => buf.format(num_entries), + None => buf.format(-1), + }; + + out.write_all(tree.name.as_slice())?; + out.write_all(b"\0")?; + out.write_all(num_entries.as_bytes())?; + out.write_all(b" ")?; + let num_children = buf.format(tree.children.len()); + out.write_all(num_children.as_bytes())?; + out.write_all(b"\n")?; + if tree.num_entries.is_some() { + out.write_all(tree.id.as_bytes())?; + } + + for child in &tree.children { + tree_entry(out, child)?; + } + + Ok(()) + } + + let signature = tree::SIGNATURE; + + let estimated_size = self.num_entries.unwrap_or(0) * (300 + 3 + 1 + 3 + 1 + 20); + let mut entries: Vec<u8> = Vec::with_capacity(estimated_size as usize); + tree_entry(&mut entries, self)?; + + out.write_all(&signature)?; + out.write_all(&(u32::try_from(entries.len()).expect("less than 4GB tree extension")).to_be_bytes())?; + out.write_all(&entries)?; + + Ok(()) + } +} diff --git a/vendor/gix-index/src/extension/untracked_cache.rs b/vendor/gix-index/src/extension/untracked_cache.rs new file mode 100644 index 000000000..9f72e0775 --- /dev/null +++ b/vendor/gix-index/src/extension/untracked_cache.rs @@ -0,0 +1,156 @@ +use std::convert::TryInto; + +use bstr::BString; +use gix_hash::ObjectId; + +use crate::{ + entry, + extension::{Signature, UntrackedCache}, + util::{read_u32, split_at_byte_exclusive, split_at_pos, var_int}, +}; + +/// A structure to track filesystem stat information along with an object id, linking a worktree file with what's in our ODB. +#[derive(Clone)] +pub struct OidStat { + /// The file system stat information + pub stat: entry::Stat, + /// The id of the file in our ODB. + pub id: ObjectId, +} + +/// A directory with information about its untracked files, and its sub-directories +#[derive(Clone)] +pub struct Directory { + /// The directories name, or an empty string if this is the root directory. + pub name: BString, + /// Untracked files and directory names + pub untracked_entries: Vec<BString>, + /// indices for sub-directories similar to this one. + pub sub_directories: Vec<usize>, + + /// The directories stat data, if available or valid // TODO: or is it the exclude file? + pub stat: Option<entry::Stat>, + /// The oid of a .gitignore file, if it exists + pub exclude_file_oid: Option<ObjectId>, + /// TODO: figure out what this really does + pub check_only: bool, +} + +/// Only used as an indicator +pub const SIGNATURE: Signature = *b"UNTR"; + +// #[allow(unused)] +/// Decode an untracked cache extension from `data`, assuming object hashes are of type `object_hash`. +pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option<UntrackedCache> { + if !data.last().map(|b| *b == 0).unwrap_or(false) { + return None; + } + let (identifier_len, data) = var_int(data)?; + let (identifier, data) = split_at_pos(data, identifier_len.try_into().ok()?)?; + + let hash_len = object_hash.len_in_bytes(); + let (info_exclude, data) = decode_oid_stat(data, hash_len)?; + let (excludes_file, data) = decode_oid_stat(data, hash_len)?; + let (dir_flags, data) = read_u32(data)?; + let (exclude_filename_per_dir, data) = split_at_byte_exclusive(data, 0)?; + + let (num_directory_blocks, data) = var_int(data)?; + + let mut res = UntrackedCache { + identifier: identifier.into(), + info_exclude: (!info_exclude.id.is_null()).then_some(info_exclude), + excludes_file: (!excludes_file.id.is_null()).then_some(excludes_file), + exclude_filename_per_dir: exclude_filename_per_dir.into(), + dir_flags, + directories: Vec::new(), + }; + if num_directory_blocks == 0 { + return data.is_empty().then_some(res); + } + + let num_directory_blocks = num_directory_blocks.try_into().ok()?; + let directories = &mut res.directories; + directories.reserve(num_directory_blocks); + + let data = decode_directory_block(data, directories)?; + if directories.len() != num_directory_blocks { + return None; + } + let (valid, data) = gix_bitmap::ewah::decode(data).ok()?; + let (check_only, data) = gix_bitmap::ewah::decode(data).ok()?; + let (hash_valid, mut data) = gix_bitmap::ewah::decode(data).ok()?; + + if valid.num_bits() > num_directory_blocks + || check_only.num_bits() > num_directory_blocks + || hash_valid.num_bits() > num_directory_blocks + { + return None; + } + + check_only.for_each_set_bit(|index| { + directories[index].check_only = true; + Some(()) + })?; + valid.for_each_set_bit(|index| { + let (stat, rest) = crate::decode::stat(data)?; + directories[index].stat = stat.into(); + data = rest; + Some(()) + }); + hash_valid.for_each_set_bit(|index| { + let (hash, rest) = split_at_pos(data, hash_len)?; + data = rest; + directories[index].exclude_file_oid = ObjectId::from(hash).into(); + Some(()) + }); + + // null-byte checked in the beginning + if data.len() != 1 { + return None; + } + res.into() +} + +fn decode_directory_block<'a>(data: &'a [u8], directories: &mut Vec<Directory>) -> Option<&'a [u8]> { + let (num_untracked, data) = var_int(data)?; + let (num_dirs, data) = var_int(data)?; + let (name, mut data) = split_at_byte_exclusive(data, 0)?; + let mut untracked_entries = Vec::<BString>::with_capacity(num_untracked.try_into().ok()?); + for _ in 0..num_untracked { + let (name, rest) = split_at_byte_exclusive(data, 0)?; + data = rest; + untracked_entries.push(name.into()); + } + + let index = directories.len(); + directories.push(Directory { + name: name.into(), + untracked_entries, + sub_directories: Vec::with_capacity(num_dirs.try_into().ok()?), + // the following are set later through their bitmaps + stat: None, + exclude_file_oid: None, + check_only: false, + }); + + for _ in 0..num_dirs { + let subdir_index = directories.len(); + let rest = decode_directory_block(data, directories)?; + data = rest; + directories[index].sub_directories.push(subdir_index); + } + + data.into() +} + +fn decode_oid_stat(data: &[u8], hash_len: usize) -> Option<(OidStat, &[u8])> { + let (stat, data) = crate::decode::stat(data)?; + let (hash, data) = split_at_pos(data, hash_len)?; + Some(( + OidStat { + stat, + id: ObjectId::from(hash), + }, + data, + )) +} diff --git a/vendor/gix-index/src/file/init.rs b/vendor/gix-index/src/file/init.rs new file mode 100644 index 000000000..534f1f08b --- /dev/null +++ b/vendor/gix-index/src/file/init.rs @@ -0,0 +1,81 @@ +#![allow(unused)] + +use std::path::{Path, PathBuf}; + +use memmap2::Mmap; + +use crate::{decode, extension, File, State}; + +mod error { + + /// The error returned by [File::at()][super::File::at()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("An IO error occurred while opening the index")] + Io(#[from] std::io::Error), + #[error(transparent)] + Decode(#[from] crate::decode::Error), + #[error(transparent)] + LinkExtension(#[from] crate::extension::link::decode::Error), + } +} + +pub use error::Error; + +/// Initialization +impl File { + /// Try to open the index file at `path` with `options`, assuming `object_hash` is used throughout the file, or create a new + /// index that merely exists in memory and is empty. + /// + /// Note that the `path` will not be written if it doesn't exist. + pub fn at_or_default( + path: impl Into<PathBuf>, + object_hash: gix_hash::Kind, + options: decode::Options, + ) -> Result<Self, Error> { + let path = path.into(); + Ok(match Self::at(&path, object_hash, options) { + Ok(f) => f, + Err(Error::Io(err)) if err.kind() == std::io::ErrorKind::NotFound => { + File::from_state(State::new(object_hash), path) + } + Err(err) => return Err(err), + }) + } + + /// Open an index file at `path` with `options`, assuming `object_hash` is used throughout the file. + pub fn at(path: impl Into<PathBuf>, object_hash: gix_hash::Kind, options: decode::Options) -> Result<Self, Error> { + let path = path.into(); + let (data, mtime) = { + // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. + let file = std::fs::File::open(&path)?; + #[allow(unsafe_code)] + let data = unsafe { Mmap::map(&file)? }; + (data, filetime::FileTime::from_last_modification_time(&file.metadata()?)) + }; + + let (state, checksum) = State::from_bytes(&data, mtime, object_hash, options)?; + let mut file = File { + state, + path, + checksum: Some(checksum), + }; + if let Some(mut link) = file.link.take() { + link.dissolve_into(&mut file, object_hash, options)?; + } + + Ok(file) + } + + /// Consume `state` and pretend it was read from `path`, setting our checksum to `null`. + /// + /// `File` instances created like that should be written to disk to set the correct checksum via `[File::write()]`. + pub fn from_state(state: crate::State, path: impl Into<PathBuf>) -> Self { + File { + state, + path: path.into(), + checksum: None, + } + } +} diff --git a/vendor/gix-index/src/file/mod.rs b/vendor/gix-index/src/file/mod.rs new file mode 100644 index 000000000..40332abbd --- /dev/null +++ b/vendor/gix-index/src/file/mod.rs @@ -0,0 +1,90 @@ +mod impls { + use std::ops::{Deref, DerefMut}; + + use crate::{File, State}; + + impl Deref for File { + type Target = State; + + fn deref(&self) -> &Self::Target { + &self.state + } + } + + impl DerefMut for File { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.state + } + } +} + +mod impl_ { + use std::fmt::Formatter; + + use crate::{File, State}; + + impl std::fmt::Debug for File { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("File") + .field("path", &self.path.display()) + .field("checksum", &self.checksum) + .finish_non_exhaustive() + } + } + + impl From<File> for State { + fn from(f: File) -> Self { + f.state + } + } +} + +mod access { + use crate::File; + + /// Consumption + impl File { + /// Take all non-copy parts of the index. + pub fn into_parts(self) -> (crate::State, std::path::PathBuf) { + (self.state, self.path) + } + } + + /// Access + impl File { + /// The path from which the index was read or to which it is supposed to be written when used with [`File::from_state()`]. + pub fn path(&self) -> &std::path::Path { + &self.path + } + + /// The checksum over the file that was read or written to disk, or `None` if the state in memory was never serialized. + /// + /// Note that even if `Some`, it will only represent the state in memory right after reading or [writing][File::write()]. + pub fn checksum(&self) -> Option<gix_hash::ObjectId> { + self.checksum + } + } +} + +mod mutation { + use std::path::PathBuf; + + use crate::File; + + /// Mutating access + impl File { + /// Set the path at which we think we are located to the given `path`. + /// + /// This is useful to change the location of the index *once* it is written via [`write()`][File::write()]. + pub fn set_path(&mut self, path: impl Into<PathBuf>) { + self.path = path.into(); + } + } +} + +/// +pub mod init; +/// +pub mod verify; +/// +pub mod write; diff --git a/vendor/gix-index/src/file/verify.rs b/vendor/gix-index/src/file/verify.rs new file mode 100644 index 000000000..6743b37a7 --- /dev/null +++ b/vendor/gix-index/src/file/verify.rs @@ -0,0 +1,41 @@ +use std::sync::atomic::AtomicBool; + +use crate::File; + +mod error { + /// The error returned by [File::verify_integrity()][super::File::verify_integrity()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Could not read index file to generate hash")] + Io(#[from] std::io::Error), + #[error("Index checksum should have been {expected}, but was {actual}")] + ChecksumMismatch { + actual: gix_hash::ObjectId, + expected: gix_hash::ObjectId, + }, + #[error("Checksum of in-memory index wasn't computed yet")] + NoChecksum, + } +} +pub use error::Error; + +impl File { + /// Verify the integrity of the index to assure its consistency. + pub fn verify_integrity(&self) -> Result<(), Error> { + let checksum = self.checksum.ok_or(Error::NoChecksum)?; + let num_bytes_to_hash = self.path.metadata()?.len() - checksum.as_bytes().len() as u64; + let should_interrupt = AtomicBool::new(false); + let actual = gix_features::hash::bytes_of_file( + &self.path, + num_bytes_to_hash as usize, + checksum.kind(), + &mut gix_features::progress::Discard, + &should_interrupt, + )?; + (actual == checksum).then_some(()).ok_or(Error::ChecksumMismatch { + actual, + expected: checksum, + }) + } +} diff --git a/vendor/gix-index/src/file/write.rs b/vendor/gix-index/src/file/write.rs new file mode 100644 index 000000000..1e8afc07d --- /dev/null +++ b/vendor/gix-index/src/file/write.rs @@ -0,0 +1,51 @@ +use gix_features::hash; + +use crate::{write, File, Version}; + +/// The error produced by [`File::write()`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + Io(#[from] std::io::Error), + #[error("Could not acquire lock for index file")] + AcquireLock(#[from] gix_lock::acquire::Error), + #[error("Could not commit lock for index file")] + CommitLock(#[from] gix_lock::commit::Error<gix_lock::File>), +} + +impl File { + /// Write the index to `out` with `options`, to be readable by [`File::at()`], returning the version that was actually written + /// to retain all information of this index. + pub fn write_to( + &self, + mut out: impl std::io::Write, + options: write::Options, + ) -> std::io::Result<(Version, gix_hash::ObjectId)> { + let mut hasher = hash::Write::new(&mut out, self.state.object_hash); + let version = self.state.write_to(&mut hasher, options)?; + + let hash = hasher.hash.digest(); + out.write_all(&hash)?; + Ok((version, gix_hash::ObjectId::from(hash))) + } + + /// Write ourselves to the path we were read from after acquiring a lock, using `options`. + /// + /// Note that the hash produced will be stored which is why we need to be mutable. + pub fn write(&mut self, options: write::Options) -> Result<(), Error> { + let mut lock = std::io::BufWriter::new(gix_lock::File::acquire_to_update_resource( + &self.path, + gix_lock::acquire::Fail::Immediately, + None, + )?); + let (version, digest) = self.write_to(&mut lock, options)?; + match lock.into_inner() { + Ok(lock) => lock.commit()?, + Err(err) => return Err(err.into_error().into()), + }; + self.state.version = version; + self.checksum = Some(digest); + Ok(()) + } +} diff --git a/vendor/gix-index/src/init.rs b/vendor/gix-index/src/init.rs new file mode 100644 index 000000000..abd71ffdd --- /dev/null +++ b/vendor/gix-index/src/init.rs @@ -0,0 +1,155 @@ +mod from_tree { + use std::collections::VecDeque; + + use bstr::{BStr, BString, ByteSlice, ByteVec}; + use gix_object::{ + tree::{self, EntryMode}, + TreeRefIter, + }; + use gix_traverse::tree::{breadthfirst, visit::Action, Visit}; + + use crate::{ + entry::{Flags, Mode, Stat}, + Entry, PathStorage, State, Version, + }; + + /// Initialization + impl State { + /// Return a new and empty in-memory index assuming the given `object_hash`. + pub fn new(object_hash: gix_hash::Kind) -> Self { + State { + object_hash, + timestamp: filetime::FileTime::now(), + version: Version::V2, + entries: vec![], + path_backing: vec![], + is_sparse: false, + tree: None, + link: None, + resolve_undo: None, + untracked: None, + fs_monitor: None, + } + } + /// Create an index [`State`][crate::State] by traversing `tree` recursively, accessing sub-trees + /// with `find`. + /// + /// **No extension data is currently produced**. + pub fn from_tree<Find>(tree: &gix_hash::oid, mut find: Find) -> Result<Self, breadthfirst::Error> + where + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Option<TreeRefIter<'a>>, + { + let mut buf = Vec::new(); + let root = find(tree, &mut buf).ok_or(breadthfirst::Error::NotFound { oid: tree.into() })?; + + let mut delegate = CollectEntries::new(); + breadthfirst(root, breadthfirst::State::default(), &mut find, &mut delegate)?; + + let CollectEntries { + mut entries, + path_backing, + path: _, + path_deque: _, + } = delegate; + + entries.sort_by(|a, b| Entry::cmp_filepaths(a.path_in(&path_backing), b.path_in(&path_backing))); + + Ok(State { + object_hash: tree.kind(), + timestamp: filetime::FileTime::now(), + version: Version::V2, + entries, + path_backing, + is_sparse: false, + tree: None, + link: None, + resolve_undo: None, + untracked: None, + fs_monitor: None, + }) + } + } + + struct CollectEntries { + entries: Vec<Entry>, + path_backing: PathStorage, + path: BString, + path_deque: VecDeque<BString>, + } + + impl CollectEntries { + pub fn new() -> CollectEntries { + CollectEntries { + entries: Vec::new(), + path_backing: Vec::new(), + path: BString::default(), + path_deque: VecDeque::new(), + } + } + + fn push_element(&mut self, name: &BStr) { + if !self.path.is_empty() { + self.path.push(b'/'); + } + self.path.push_str(name); + } + + pub fn add_entry(&mut self, entry: &tree::EntryRef<'_>) { + let mode = match entry.mode { + EntryMode::Tree => unreachable!("visit_non_tree() called us"), + EntryMode::Blob => Mode::FILE, + EntryMode::BlobExecutable => Mode::FILE_EXECUTABLE, + EntryMode::Link => Mode::SYMLINK, + EntryMode::Commit => Mode::COMMIT, + }; + + let path_start = self.path_backing.len(); + self.path_backing.extend_from_slice(&self.path); + + let new_entry = Entry { + stat: Stat::default(), + id: entry.oid.into(), + flags: Flags::empty(), + mode, + path: path_start..self.path_backing.len(), + }; + + self.entries.push(new_entry); + } + } + + impl Visit for CollectEntries { + fn pop_front_tracked_path_and_set_current(&mut self) { + self.path = self + .path_deque + .pop_front() + .expect("every call is matched with push_tracked_path_component"); + } + + fn push_back_tracked_path_component(&mut self, component: &bstr::BStr) { + self.push_element(component); + self.path_deque.push_back(self.path.clone()); + } + + fn push_path_component(&mut self, component: &bstr::BStr) { + self.push_element(component); + } + + fn pop_path_component(&mut self) { + if let Some(pos) = self.path.rfind_byte(b'/') { + self.path.resize(pos, 0); + } else { + self.path.clear(); + } + } + + fn visit_tree(&mut self, _entry: &gix_object::tree::EntryRef<'_>) -> gix_traverse::tree::visit::Action { + Action::Continue + } + + fn visit_nontree(&mut self, entry: &gix_object::tree::EntryRef<'_>) -> gix_traverse::tree::visit::Action { + self.add_entry(entry); + Action::Continue + } + } +} diff --git a/vendor/gix-index/src/lib.rs b/vendor/gix-index/src/lib.rs new file mode 100644 index 000000000..d8451c545 --- /dev/null +++ b/vendor/gix-index/src/lib.rs @@ -0,0 +1,201 @@ +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(unsafe_code, missing_docs, rust_2018_idioms)] + +use std::{ops::Range, path::PathBuf}; + +use filetime::FileTime; +pub use gix_hash as hash; + +/// +pub mod file; + +/// +pub mod extension; + +/// +pub mod entry; + +mod access; + +mod init; + +/// +pub mod decode; + +/// +pub mod verify; + +/// +pub mod write; + +/// All known versions of a git index file. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum Version { + /// Supports entries and various extensions. + V2 = 2, + /// Adds support for additional flags for each entry, called extended entries. + V3 = 3, + /// Supports deltified entry paths. + V4 = 4, +} + +/// An entry in the index, identifying a non-tree item on disk. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Entry { + /// The filesystem stat information for the file on disk. + pub stat: entry::Stat, + /// The object id for this entry's ODB representation (assuming it's up-to-date with it). + pub id: gix_hash::ObjectId, + /// Additional flags for use in algorithms and for efficiently storing stage information. + pub flags: entry::Flags, + /// The kind of item this entry represents - it's not all blobs in the index anymore. + pub mode: entry::Mode, + /// The range to lookup in the path backing to obtain the entry path relative to the repository. + /// This costs additional memory but is probably worth it given that paths can stay in one big allocation. + path: Range<usize>, +} + +/// An index file whose state was read from a file on disk. +#[derive(Clone)] +pub struct File { + /// The state containing the actual index data. + pub(crate) state: State, + /// The path from which the index was read or to which it is supposed to be written. + pub(crate) path: PathBuf, + /// The checksum of all bytes prior to the checksum itself. + pub(crate) checksum: Option<gix_hash::ObjectId>, +} + +/// The type to use and store paths to all entries. +pub type PathStorage = Vec<u8>; +/// The type to use and store paths to all entries, as reference +pub type PathStorageRef = [u8]; + +/// An in-memory cache of a fully parsed git index file. +/// +/// As opposed to a snapshot, it's meant to be altered and eventually be written back to disk or converted into a tree. +/// We treat index and its state synonymous. +#[derive(Clone)] +pub struct State { + /// The kind of object hash used when storing the underlying file. + /// + /// Empty states for example won't have a single object id, so deduction of the hash used isn't always possible. + object_hash: gix_hash::Kind, + /// The time at which the state was created, indicating its freshness compared to other files on disk. + /// + /// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the + /// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened. + #[allow(dead_code)] + timestamp: FileTime, + version: Version, + entries: Vec<Entry>, + /// A memory area keeping all index paths, in full length, independently of the index version. + path_backing: PathStorage, + /// True if one entry in the index has a special marker mode + is_sparse: bool, + + // Extensions + tree: Option<extension::Tree>, + link: Option<extension::Link>, + resolve_undo: Option<extension::resolve_undo::Paths>, + untracked: Option<extension::UntrackedCache>, + fs_monitor: Option<extension::FsMonitor>, +} + +mod impls { + use std::fmt::{Debug, Formatter}; + + use crate::State; + + impl Debug for State { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + for entry in &self.entries { + writeln!( + f, + "{} {}{:?} {} {}", + match entry.flags.stage() { + 0 => "BASE ", + 1 => "OURS ", + 2 => "THEIRS ", + _ => "UNKNOWN", + }, + if entry.flags.is_empty() { + "".to_string() + } else { + format!("{:?} ", entry.flags) + }, + entry.mode, + entry.id, + entry.path(self) + )?; + } + Ok(()) + } + } +} + +pub(crate) mod util { + use std::convert::TryInto; + + #[inline] + pub fn var_int(data: &[u8]) -> Option<(u64, &[u8])> { + let (num, consumed) = gix_features::decode::leb64_from_read(data).ok()?; + let data = &data[consumed..]; + (num, data).into() + } + + #[inline] + pub fn read_u32(data: &[u8]) -> Option<(u32, &[u8])> { + split_at_pos(data, 4).map(|(num, data)| (u32::from_be_bytes(num.try_into().unwrap()), data)) + } + + #[inline] + pub fn read_u64(data: &[u8]) -> Option<(u64, &[u8])> { + split_at_pos(data, 8).map(|(num, data)| (u64::from_be_bytes(num.try_into().unwrap()), data)) + } + + #[inline] + pub fn from_be_u32(b: &[u8]) -> u32 { + u32::from_be_bytes(b.try_into().unwrap()) + } + + #[inline] + pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> { + if data.len() < 2 { + return None; + } + data.iter().enumerate().find_map(|(idx, b)| { + (*b == byte).then(|| { + if idx == 0 { + (&[] as &[u8], &data[1..]) + } else { + let (a, b) = data.split_at(idx); + (a, &b[1..]) + } + }) + }) + } + + #[inline] + pub fn split_at_pos(data: &[u8], pos: usize) -> Option<(&[u8], &[u8])> { + if data.len() < pos { + return None; + } + data.split_at(pos).into() + } +} + +#[test] +fn size_of_entry() { + assert_eq!(std::mem::size_of::<crate::Entry>(), 80); + + // the reason we have our own time is half the size. + assert_eq!(std::mem::size_of::<crate::entry::Time>(), 8); + assert_eq!(std::mem::size_of::<filetime::FileTime>(), 16); +} diff --git a/vendor/gix-index/src/verify.rs b/vendor/gix-index/src/verify.rs new file mode 100644 index 000000000..31b48d1cb --- /dev/null +++ b/vendor/gix-index/src/verify.rs @@ -0,0 +1,73 @@ +use std::cmp::Ordering; + +use crate::State; + +/// +pub mod entries { + use bstr::BString; + + /// The error returned by [State::verify_entries()][crate::State::verify_entries()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Entry '{current_path}' (stage = {current_stage}) at index {current_index} should order after prior entry '{previous_path}' (stage = {previous_stage})")] + OutOfOrder { + current_index: usize, + current_path: BString, + current_stage: u8, + previous_path: BString, + previous_stage: u8, + }, + } +} + +/// +pub mod extensions { + use crate::extension; + + /// An implementation of a `find` function that never finds or returns any object, a no-op. + pub fn no_find<'a>(_: &gix_hash::oid, _: &'a mut Vec<u8>) -> Option<gix_object::TreeRefIter<'a>> { + None + } + + /// The error returned by [State::verify_extensions()][crate::State::verify_extensions()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Tree(#[from] extension::tree::verify::Error), + } +} + +impl State { + /// Assure our entries are consistent. + pub fn verify_entries(&self) -> Result<(), entries::Error> { + let mut previous = None::<&crate::Entry>; + for (idx, entry) in self.entries.iter().enumerate() { + if let Some(prev) = previous { + if prev.cmp(entry, self) != Ordering::Less { + return Err(entries::Error::OutOfOrder { + current_index: idx, + current_path: entry.path(self).into(), + current_stage: entry.flags.stage() as u8, + previous_path: prev.path(self).into(), + previous_stage: prev.flags.stage() as u8, + }); + } + } + previous = Some(entry); + } + Ok(()) + } + + /// Note: `find` cannot be `Option<F>` as we can't call it with a closure then due to the indirection through `Some`. + pub fn verify_extensions<F>(&self, use_find: bool, find: F) -> Result<(), extensions::Error> + where + F: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Option<gix_object::TreeRefIter<'a>>, + { + self.tree().map(|t| t.verify(use_find, find)).transpose()?; + // TODO: verify links by running the whole set of tests on the index + // - do that once we load it as well, or maybe that's lazy loaded? Too many questions for now. + Ok(()) + } +} diff --git a/vendor/gix-index/src/write.rs b/vendor/gix-index/src/write.rs new file mode 100644 index 000000000..23643734c --- /dev/null +++ b/vendor/gix-index/src/write.rs @@ -0,0 +1,215 @@ +use std::{convert::TryInto, io::Write}; + +use crate::{entry, extension, write::util::CountBytes, State, Version}; + +/// A way to specify which of the optional extensions to write. +#[derive(Debug, Copy, Clone)] +pub enum Extensions { + /// Writes all available optional extensions to avoid loosing any information. + All, + /// Only write the given optional extensions, with each extension being marked by a boolean flag. + /// + /// # Note: mandatory extensions + /// + /// Mandatory extensions, like `sdir` or other lower-case ones, may not be configured here as they need to be present + /// or absent depending on the state of the index itself and for it to be valid. + Given { + /// Write the tree-cache extension, if present. + tree_cache: bool, + /// Write the end-of-index-entry extension. + end_of_index_entry: bool, + }, + /// Write no optional extension at all for what should be the smallest possible index + None, +} + +impl Default for Extensions { + fn default() -> Self { + Extensions::All + } +} + +impl Extensions { + /// Returns `Some(signature)` if it should be written out. + pub fn should_write(&self, signature: extension::Signature) -> Option<extension::Signature> { + match self { + Extensions::None => None, + Extensions::All => Some(signature), + Extensions::Given { + tree_cache, + end_of_index_entry, + } => match signature { + extension::tree::SIGNATURE => tree_cache, + extension::end_of_index_entry::SIGNATURE => end_of_index_entry, + _ => &false, + } + .then(|| signature), + } + } +} + +/// The options for use when [writing an index][State::write_to()]. +/// +/// Note that default options write either index V2 or V3 depending on the content of the entries. +#[derive(Debug, Default, Clone, Copy)] +pub struct Options { + /// Configures which extensions to write + pub extensions: Extensions, +} + +impl State { + /// Serialize this instance to `out` with [`options`][Options]. + pub fn write_to(&self, out: impl std::io::Write, Options { extensions }: Options) -> std::io::Result<Version> { + let version = self.detect_required_version(); + + let mut write = CountBytes::new(out); + let num_entries: u32 = self + .entries() + .len() + .try_into() + .expect("definitely not 4billion entries"); + let removed_entries: u32 = self + .entries() + .iter() + .filter(|e| e.flags.contains(entry::Flags::REMOVE)) + .count() + .try_into() + .expect("definitely not too many entries"); + + let offset_to_entries = header(&mut write, version, num_entries - removed_entries)?; + let offset_to_extensions = entries(&mut write, self, offset_to_entries)?; + let (extension_toc, out) = self.write_extensions(write, offset_to_extensions, extensions)?; + + if num_entries > 0 + && extensions + .should_write(extension::end_of_index_entry::SIGNATURE) + .is_some() + && !extension_toc.is_empty() + { + extension::end_of_index_entry::write_to(out, self.object_hash, offset_to_extensions, extension_toc)? + } + + Ok(version) + } + + fn write_extensions<T>( + &self, + mut write: CountBytes<T>, + offset_to_extensions: u32, + extensions: Extensions, + ) -> std::io::Result<(Vec<(extension::Signature, u32)>, T)> + where + T: std::io::Write, + { + type WriteExtFn<'a> = &'a dyn Fn(&mut dyn std::io::Write) -> Option<std::io::Result<extension::Signature>>; + let extensions: &[WriteExtFn<'_>] = &[ + &|write| { + extensions + .should_write(extension::tree::SIGNATURE) + .and_then(|signature| self.tree().map(|tree| tree.write_to(write).map(|_| signature))) + }, + &|write| { + self.is_sparse() + .then(|| extension::sparse::write_to(write).map(|_| extension::sparse::SIGNATURE)) + }, + ]; + + let mut offset_to_previous_ext = offset_to_extensions; + let mut out = Vec::with_capacity(5); + for write_ext in extensions { + if let Some(signature) = write_ext(&mut write).transpose()? { + let offset_past_ext = write.count; + let ext_size = offset_past_ext - offset_to_previous_ext - (extension::MIN_SIZE as u32); + offset_to_previous_ext = offset_past_ext; + out.push((signature, ext_size)); + } + } + Ok((out, write.inner)) + } +} + +impl State { + fn detect_required_version(&self) -> Version { + self.entries + .iter() + .find_map(|e| e.flags.contains(entry::Flags::EXTENDED).then_some(Version::V3)) + .unwrap_or(Version::V2) + } +} + +fn header<T: std::io::Write>( + out: &mut CountBytes<T>, + version: Version, + num_entries: u32, +) -> Result<u32, std::io::Error> { + let version = match version { + Version::V2 => 2_u32.to_be_bytes(), + Version::V3 => 3_u32.to_be_bytes(), + Version::V4 => 4_u32.to_be_bytes(), + }; + + out.write_all(crate::decode::header::SIGNATURE)?; + out.write_all(&version)?; + out.write_all(&num_entries.to_be_bytes())?; + + Ok(out.count) +} + +fn entries<T: std::io::Write>(out: &mut CountBytes<T>, state: &State, header_size: u32) -> Result<u32, std::io::Error> { + for entry in state.entries() { + if entry.flags.contains(entry::Flags::REMOVE) { + continue; + } + entry.write_to(&mut *out, state)?; + match (out.count - header_size) % 8 { + 0 => {} + n => { + let eight_null_bytes = [0u8; 8]; + out.write_all(&eight_null_bytes[n as usize..])?; + } + }; + } + + Ok(out.count) +} + +mod util { + use std::convert::TryFrom; + + pub struct CountBytes<T> { + pub count: u32, + pub inner: T, + } + + impl<T> CountBytes<T> + where + T: std::io::Write, + { + pub fn new(inner: T) -> Self { + CountBytes { inner, count: 0 } + } + } + + impl<T> std::io::Write for CountBytes<T> + where + T: std::io::Write, + { + fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { + let written = self.inner.write(buf)?; + self.count = self + .count + .checked_add(u32::try_from(written).expect("we don't write 4GB buffers")) + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::Other, + "Cannot write indices larger than 4 gigabytes", + ) + })?; + Ok(written) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.inner.flush() + } + } +} |