diff options
Diffstat (limited to 'vendor/gix-index/src/extension')
-rw-r--r-- | vendor/gix-index/src/extension/decode.rs | 80 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/end_of_index_entry/decode.rs | 56 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/end_of_index_entry/mod.rs | 14 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/end_of_index_entry/write.rs | 29 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/fs_monitor.rs | 38 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/index_entry_offset_table.rs | 43 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/iter.rs | 58 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/link.rs | 177 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/mod.rs | 96 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/resolve_undo.rs | 64 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/sparse.rs | 11 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/tree/decode.rs | 63 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/tree/mod.rs | 21 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/tree/verify.rs | 134 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/tree/write.rs | 45 | ||||
-rw-r--r-- | vendor/gix-index/src/extension/untracked_cache.rs | 156 |
16 files changed, 1085 insertions, 0 deletions
diff --git a/vendor/gix-index/src/extension/decode.rs b/vendor/gix-index/src/extension/decode.rs new file mode 100644 index 000000000..af032f4e3 --- /dev/null +++ b/vendor/gix-index/src/extension/decode.rs @@ -0,0 +1,80 @@ +use std::convert::TryInto; + +use crate::{extension, extension::Signature, util::from_be_u32}; + +pub(crate) fn header(data: &[u8]) -> (Signature, u32, &[u8]) { + let (signature, data) = data.split_at(4); + let (size, data) = data.split_at(4); + (signature.try_into().unwrap(), from_be_u32(size), data) +} + +mod error { + use crate::extension; + + /// The error returned when decoding extensions. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error( + "Encountered mandatory extension '{}' which isn't implemented yet", + String::from_utf8_lossy(signature) + )] + MandatoryUnimplemented { signature: extension::Signature }, + #[error("Could not parse mandatory link extension")] + Link(#[from] extension::link::decode::Error), + } +} +pub use error::Error; + +pub(crate) fn all( + maybe_beginning_of_extensions: &[u8], + object_hash: gix_hash::Kind, +) -> Result<(Outcome, &[u8]), Error> { + let mut ext_iter = match extension::Iter::new_without_checksum(maybe_beginning_of_extensions, object_hash) { + Some(iter) => iter, + None => return Ok((Outcome::default(), maybe_beginning_of_extensions)), + }; + + let mut ext = Outcome::default(); + for (signature, ext_data) in ext_iter.by_ref() { + match signature { + extension::tree::SIGNATURE => { + ext.tree = extension::tree::decode(ext_data, object_hash); + } + extension::resolve_undo::SIGNATURE => { + ext.resolve_undo = extension::resolve_undo::decode(ext_data, object_hash); + } + extension::untracked_cache::SIGNATURE => { + ext.untracked = extension::untracked_cache::decode(ext_data, object_hash); + } + extension::fs_monitor::SIGNATURE => { + ext.fs_monitor = extension::fs_monitor::decode(ext_data); + } + extension::end_of_index_entry::SIGNATURE => {} // skip already done + extension::index_entry_offset_table::SIGNATURE => {} // not relevant/obtained already + mandatory if mandatory[0].is_ascii_lowercase() => match mandatory { + extension::link::SIGNATURE => ext.link = extension::link::decode(ext_data, object_hash)?.into(), + extension::sparse::SIGNATURE => { + if !ext_data.is_empty() { + // only used as a marker, if this changes we need this implementation. + return Err(Error::MandatoryUnimplemented { signature: mandatory }); + } + ext.is_sparse = true + } + unknown => return Err(Error::MandatoryUnimplemented { signature: unknown }), + }, + _unknown => {} // skip unknown extensions, too + } + } + Ok((ext, &maybe_beginning_of_extensions[ext_iter.consumed..])) +} + +#[derive(Default)] +pub(crate) struct Outcome { + pub tree: Option<extension::Tree>, + pub link: Option<extension::Link>, + pub resolve_undo: Option<extension::resolve_undo::Paths>, + pub untracked: Option<extension::UntrackedCache>, + pub fs_monitor: Option<extension::FsMonitor>, + pub is_sparse: bool, +} diff --git a/vendor/gix-index/src/extension/end_of_index_entry/decode.rs b/vendor/gix-index/src/extension/end_of_index_entry/decode.rs new file mode 100644 index 000000000..f8002ab7c --- /dev/null +++ b/vendor/gix-index/src/extension/end_of_index_entry/decode.rs @@ -0,0 +1,56 @@ +use crate::{ + decode::header, + extension, + extension::end_of_index_entry::{MIN_SIZE, MIN_SIZE_WITH_HEADER, SIGNATURE}, + util::from_be_u32, +}; + +/// Decode the end of index entry extension, which is no more than a glorified offset to the first byte of all extensions to allow +/// loading entries and extensions in parallel. +/// +/// Itself it's located at the end of the index file, which allows its location to be known and thus addressable. +/// From there it's possible to traverse the chunks of all set extensions, hash them, and compare that hash with all extensions +/// stored prior to this one to assure they are correct. +/// +/// If the checksum wasn't matched, we will ignore this extension entirely. +pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option<usize> { + let hash_len = object_hash.len_in_bytes(); + if data.len() < MIN_SIZE_WITH_HEADER + hash_len { + return None; + } + + let start_of_eoie = data.len() - MIN_SIZE_WITH_HEADER - hash_len; + let ext_data = &data[start_of_eoie..data.len() - hash_len]; + + let (signature, ext_size, ext_data) = extension::decode::header(ext_data); + if signature != SIGNATURE || ext_size as usize != MIN_SIZE { + return None; + } + + let (offset, checksum) = ext_data.split_at(4); + let offset = from_be_u32(offset) as usize; + if offset < header::SIZE || offset > start_of_eoie || checksum.len() != gix_hash::Kind::Sha1.len_in_bytes() { + return None; + } + + let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1); + let mut last_chunk = None; + for (signature, chunk) in extension::Iter::new(&data[offset..data.len() - MIN_SIZE_WITH_HEADER - hash_len]) { + hasher.update(&signature); + hasher.update(&(chunk.len() as u32).to_be_bytes()); + last_chunk = Some(chunk); + } + + if hasher.digest() != checksum { + return None; + } + // The last-to-this chunk ends where ours starts + if last_chunk + .map(|s| s.as_ptr_range().end != (&data[start_of_eoie]) as *const _) + .unwrap_or(true) + { + return None; + } + + Some(offset) +} diff --git a/vendor/gix-index/src/extension/end_of_index_entry/mod.rs b/vendor/gix-index/src/extension/end_of_index_entry/mod.rs new file mode 100644 index 000000000..d55496aee --- /dev/null +++ b/vendor/gix-index/src/extension/end_of_index_entry/mod.rs @@ -0,0 +1,14 @@ +use crate::{extension, extension::Signature}; + +/// The signature of the end-of-index-entry extension +pub const SIGNATURE: Signature = *b"EOIE"; +/// The minimal size of the extension, depending on the shortest hash. +pub const MIN_SIZE: usize = 4 /* offset to extensions */ + gix_hash::Kind::shortest().len_in_bytes(); +/// The smallest size of the extension varying by hash kind, along with the standard extension header. +pub const MIN_SIZE_WITH_HEADER: usize = extension::MIN_SIZE + MIN_SIZE; + +mod decode; +pub use decode::decode; + +mod write; +pub use write::write_to; diff --git a/vendor/gix-index/src/extension/end_of_index_entry/write.rs b/vendor/gix-index/src/extension/end_of_index_entry/write.rs new file mode 100644 index 000000000..6752bd238 --- /dev/null +++ b/vendor/gix-index/src/extension/end_of_index_entry/write.rs @@ -0,0 +1,29 @@ +use crate::extension::{end_of_index_entry::SIGNATURE, Signature}; + +/// Write this extension to out and generate a hash of `hash_kind` over all `prior_extensions` which are specified as `(signature, size)` +/// pair. `one_past_entries` is the offset to the first byte past the entries, which is also the first byte of the signature of the +/// first extension in `prior_extensions`. Note that `prior_extensions` must have been written prior to this one, as the name suggests, +/// allowing this extension to be the last one in the index file. +/// +/// Even if there are no `prior_extensions`, this extension will be written unconditionally. +pub fn write_to( + mut out: impl std::io::Write, + hash_kind: gix_hash::Kind, + offset_to_extensions: u32, + prior_extensions: impl IntoIterator<Item = (Signature, u32)>, +) -> Result<(), std::io::Error> { + out.write_all(&SIGNATURE)?; + let extension_size: u32 = 4 + hash_kind.len_in_bytes() as u32; + out.write_all(&extension_size.to_be_bytes())?; + + out.write_all(&offset_to_extensions.to_be_bytes())?; + + let mut hasher = gix_features::hash::hasher(hash_kind); + for (signature, size) in prior_extensions { + hasher.update(&signature); + hasher.update(&size.to_be_bytes()); + } + out.write_all(&hasher.digest())?; + + Ok(()) +} diff --git a/vendor/gix-index/src/extension/fs_monitor.rs b/vendor/gix-index/src/extension/fs_monitor.rs new file mode 100644 index 000000000..4bb5016ff --- /dev/null +++ b/vendor/gix-index/src/extension/fs_monitor.rs @@ -0,0 +1,38 @@ +use bstr::BString; + +use crate::{ + extension::{FsMonitor, Signature}, + util::{read_u32, read_u64, split_at_byte_exclusive}, +}; + +#[derive(Clone)] +pub enum Token { + V1 { nanos_since_1970: u64 }, + V2 { token: BString }, +} + +pub const SIGNATURE: Signature = *b"FSMN"; + +pub fn decode(data: &[u8]) -> Option<FsMonitor> { + let (version, data) = read_u32(data)?; + let (token, data) = match version { + 1 => { + let (nanos_since_1970, data) = read_u64(data)?; + (Token::V1 { nanos_since_1970 }, data) + } + 2 => { + let (token, data) = split_at_byte_exclusive(data, 0)?; + (Token::V2 { token: token.into() }, data) + } + _ => return None, + }; + + let (ewah_size, data) = read_u32(data)?; + let (entry_dirty, data) = gix_bitmap::ewah::decode(&data[..ewah_size as usize]).ok()?; + + if !data.is_empty() { + return None; + } + + FsMonitor { token, entry_dirty }.into() +} diff --git a/vendor/gix-index/src/extension/index_entry_offset_table.rs b/vendor/gix-index/src/extension/index_entry_offset_table.rs new file mode 100644 index 000000000..0a0108adf --- /dev/null +++ b/vendor/gix-index/src/extension/index_entry_offset_table.rs @@ -0,0 +1,43 @@ +use crate::{extension, extension::Signature, util::read_u32}; + +#[derive(Debug, Clone, Copy)] +pub struct Offset { + pub from_beginning_of_file: u32, + pub num_entries: u32, +} + +pub const SIGNATURE: Signature = *b"IEOT"; + +pub fn decode(data: &[u8]) -> Option<Vec<Offset>> { + let (version, mut data) = read_u32(data)?; + match version { + 1 => {} + _unknown => return None, + } + + let entry_size = 4 + 4; + let num_offsets = data.len() / entry_size; + if num_offsets == 0 || data.len() % entry_size != 0 { + return None; + } + + let mut out = Vec::with_capacity(entry_size); + for _ in 0..num_offsets { + let (offset, chunk) = read_u32(data)?; + let (num_entries, chunk) = read_u32(chunk)?; + out.push(Offset { + from_beginning_of_file: offset, + num_entries, + }); + data = chunk; + } + debug_assert!(data.is_empty()); + + out.into() +} + +pub fn find(extensions: &[u8], object_hash: gix_hash::Kind) -> Option<Vec<Offset>> { + extension::Iter::new_without_checksum(extensions, object_hash)? + .find_map(|(sig, ext_data)| (sig == SIGNATURE).then_some(ext_data)) + .and_then(decode) +} diff --git a/vendor/gix-index/src/extension/iter.rs b/vendor/gix-index/src/extension/iter.rs new file mode 100644 index 000000000..f791b064e --- /dev/null +++ b/vendor/gix-index/src/extension/iter.rs @@ -0,0 +1,58 @@ +use std::convert::TryInto; + +use crate::{extension, extension::Iter, util::from_be_u32}; + +impl<'a> Iter<'a> { + /// Create a new extension iterator at the entrypoint for extensions until the end of the extensions. + pub fn new(data_at_beginning_of_extensions_and_truncated: &'a [u8]) -> Self { + Iter { + data: data_at_beginning_of_extensions_and_truncated, + consumed: 0, + } + } + + /// Create a new iterator at with a data block to the end of the file, and we automatically remove the trailing + /// hash of type `object_hash`. + pub fn new_without_checksum( + data_at_beginning_of_extensions: &'a [u8], + object_hash: gix_hash::Kind, + ) -> Option<Self> { + let end = data_at_beginning_of_extensions + .len() + .checked_sub(object_hash.len_in_bytes())?; + Iter { + data: &data_at_beginning_of_extensions[..end], + consumed: 0, + } + .into() + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = (extension::Signature, &'a [u8]); + + fn next(&mut self) -> Option<Self::Item> { + if self.data.len() < 4 + 4 { + return None; + } + + let (signature, data) = self.data.split_at(4); + let (size, data) = data.split_at(4); + self.data = data; + self.consumed += 4 + 4; + + let size = from_be_u32(size) as usize; + + match data.get(..size) { + Some(ext_data) => { + self.data = &data[size..]; + self.consumed += size; + Some((signature.try_into().unwrap(), ext_data)) + } + None => { + self.data = &[]; + None + } + } + } +} diff --git a/vendor/gix-index/src/extension/link.rs b/vendor/gix-index/src/extension/link.rs new file mode 100644 index 000000000..20ce9cb21 --- /dev/null +++ b/vendor/gix-index/src/extension/link.rs @@ -0,0 +1,177 @@ +use crate::{ + extension::{Link, Signature}, + util::split_at_pos, +}; + +/// The signature of the link extension. +pub const SIGNATURE: Signature = *b"link"; + +/// Bitmaps to know which entries to delete or replace, even though details are still unknown. +#[derive(Clone)] +pub struct Bitmaps { + /// A bitmap to signal which entries to delete, maybe. + pub delete: gix_bitmap::ewah::Vec, + /// A bitmap to signal which entries to replace, maybe. + pub replace: gix_bitmap::ewah::Vec, +} + +/// +pub mod decode { + + /// The error returned when decoding link extensions. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("{0}")] + Corrupt(&'static str), + #[error("{kind} bitmap corrupt")] + BitmapDecode { + err: gix_bitmap::ewah::decode::Error, + kind: &'static str, + }, + } + + impl From<std::num::TryFromIntError> for Error { + fn from(_: std::num::TryFromIntError) -> Self { + Self::Corrupt("error in bitmap iteration trying to convert from u64 to usize") + } + } +} + +pub(crate) fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Result<Link, decode::Error> { + let (id, data) = split_at_pos(data, object_hash.len_in_bytes()) + .ok_or(decode::Error::Corrupt( + "link extension too short to read share index checksum", + )) + .map(|(id, d)| (gix_hash::ObjectId::from(id), d))?; + + if data.is_empty() { + return Ok(Link { + shared_index_checksum: id, + bitmaps: None, + }); + } + + let (delete, data) = + gix_bitmap::ewah::decode(data).map_err(|err| decode::Error::BitmapDecode { kind: "delete", err })?; + let (replace, data) = + gix_bitmap::ewah::decode(data).map_err(|err| decode::Error::BitmapDecode { kind: "replace", err })?; + + if !data.is_empty() { + return Err(decode::Error::Corrupt("garbage trailing link extension")); + } + + Ok(Link { + shared_index_checksum: id, + bitmaps: Some(Bitmaps { delete, replace }), + }) +} + +impl Link { + pub(crate) fn dissolve_into( + self, + split_index: &mut crate::File, + object_hash: gix_hash::Kind, + options: crate::decode::Options, + ) -> Result<(), crate::file::init::Error> { + let shared_index_path = split_index + .path + .parent() + .expect("split index file in .git folder") + .join(format!("sharedindex.{}", self.shared_index_checksum)); + let mut shared_index = crate::File::at( + &shared_index_path, + object_hash, + crate::decode::Options { + expected_checksum: self.shared_index_checksum.into(), + ..options + }, + )?; + + if let Some(bitmaps) = self.bitmaps { + let mut split_entry_index = 0; + + let mut err = None; + bitmaps.replace.for_each_set_bit(|replace_index| { + let shared_entry = match shared_index.entries.get_mut(replace_index) { + Some(e) => e, + None => { + err = decode::Error::Corrupt("replace bitmap length exceeds shared index length - more entries in bitmap than found in shared index").into(); + return None + } + }; + + if shared_entry.flags.contains(crate::entry::Flags::REMOVE) { + err = decode::Error::Corrupt("entry is marked as both replace and delete").into(); + return None + } + + let split_entry = match split_index.entries.get(split_entry_index) { + Some(e) => e, + None => { + err = decode::Error::Corrupt("replace bitmap length exceeds split index length - more entries in bitmap than found in split index").into(); + return None + } + }; + if !split_entry.path.is_empty() { + err = decode::Error::Corrupt("paths in split index entries that are for replacement should be empty").into(); + return None + } + if shared_entry.path.is_empty() { + err = decode::Error::Corrupt("paths in shared index entries that are replaced should not be empty").into(); + return None + } + shared_entry.stat = split_entry.stat; + shared_entry.id = split_entry.id; + shared_entry.flags = split_entry.flags; + shared_entry.mode = split_entry.mode; + + split_entry_index += 1; + Some(()) + }); + if let Some(err) = err { + return Err(err.into()); + } + + let split_index_path_backing = std::mem::take(&mut split_index.path_backing); + for mut split_entry in split_index.entries.drain(split_entry_index..) { + let start = shared_index.path_backing.len(); + let split_index_path = split_entry.path.clone(); + + split_entry.path = start..start + split_entry.path.len(); + shared_index.entries.push(split_entry); + + shared_index + .path_backing + .extend_from_slice(&split_index_path_backing[split_index_path]); + } + + bitmaps.delete.for_each_set_bit(|delete_index| { + let shared_entry = match shared_index.entries.get_mut(delete_index) { + Some(e) => e, + None => { + err = decode::Error::Corrupt("delete bitmap length exceeds shared index length - more entries in bitmap than found in shared index").into(); + return None + } + }; + shared_entry.flags.insert(crate::entry::Flags::REMOVE); + Some(()) + }); + if let Some(err) = err { + return Err(err.into()); + } + + shared_index + .entries + .retain(|e| !e.flags.contains(crate::entry::Flags::REMOVE)); + + let mut shared_entries = std::mem::take(&mut shared_index.entries); + shared_entries.sort_by(|a, b| a.cmp(b, &shared_index.state)); + + split_index.entries = shared_entries; + split_index.path_backing = std::mem::take(&mut shared_index.path_backing); + } + + Ok(()) + } +} diff --git a/vendor/gix-index/src/extension/mod.rs b/vendor/gix-index/src/extension/mod.rs new file mode 100644 index 000000000..07edfdfe0 --- /dev/null +++ b/vendor/gix-index/src/extension/mod.rs @@ -0,0 +1,96 @@ +use bstr::BString; +use smallvec::SmallVec; + +/// The size of the smallest possible extension, which is no more than a signature and a 0 indicating its size. +pub const MIN_SIZE: usize = 4 /* signature */ + 4 /* size */; + +/// The kind of index extension. +pub type Signature = [u8; 4]; + +/// An iterator over the data of index extensions. +pub struct Iter<'a> { + data: &'a [u8], + /// The amount of consumed bytes as seen from our internal data pointer. Useful to continue where the iterator left off. + pub consumed: usize, +} + +/// A structure to associate object ids of a tree with sections in the index entries list. +/// +/// It allows to more quickly build trees by avoiding as it can quickly re-use portions of the index and its associated tree ids +/// if there was no change to them. Portions of this tree are invalidated as the index is changed. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct Tree { + /// The name of the tree/directory, or empty if it's the root tree. + pub name: SmallVec<[u8; 23]>, + /// The id of the directory tree of the associated tree object. + pub id: gix_hash::ObjectId, + /// The amount of non-tree items in this directory tree, including sub-trees, recursively. + /// The value of the top-level tree is thus equal to the value of the total amount of entries. + /// If `None`, the tree is considered invalid and needs to be refreshed + pub num_entries: Option<u32>, + /// The child-trees below the current tree. + pub children: Vec<Tree>, +} + +/// The link extension to track a shared index. +#[derive(Clone)] +pub struct Link { + /// The checksum of the shared index as last seen. + pub shared_index_checksum: gix_hash::ObjectId, + /// Bitmaps to tell us which entries to delete or replace. + pub bitmaps: Option<link::Bitmaps>, +} + +/// The extension for untracked files. +#[allow(dead_code)] +#[derive(Clone)] +pub struct UntrackedCache { + /// Something identifying the location and machine that this cache is for. + /// Should the repository be copied to a different machine, the entire cache can immediately be invalidated. + identifier: BString, + /// Stat for the .git/info/exclude file + info_exclude: Option<untracked_cache::OidStat>, + /// Stat for the `core.excludesfile` + excludes_file: Option<untracked_cache::OidStat>, + /// Usually `.gitignore` + exclude_filename_per_dir: BString, + dir_flags: u32, + + /// A list of directories and sub-directories, with `directories[0]` being the root. + directories: Vec<untracked_cache::Directory>, +} + +/// The extension for keeping state on recent information provided by the filesystem monitor. +#[allow(dead_code)] +#[derive(Clone)] +pub struct FsMonitor { + token: fs_monitor::Token, + /// if a bit is true, the respective entry is NOT valid as per the fs monitor. + entry_dirty: gix_bitmap::ewah::Vec, +} + +mod iter; + +pub(crate) mod fs_monitor; + +/// +pub mod decode; + +/// +pub mod tree; + +/// +pub mod end_of_index_entry; + +pub(crate) mod index_entry_offset_table; + +/// +pub mod link; + +pub(crate) mod resolve_undo; + +/// +pub mod untracked_cache; + +/// +pub mod sparse; diff --git a/vendor/gix-index/src/extension/resolve_undo.rs b/vendor/gix-index/src/extension/resolve_undo.rs new file mode 100644 index 000000000..eb6db9ad7 --- /dev/null +++ b/vendor/gix-index/src/extension/resolve_undo.rs @@ -0,0 +1,64 @@ +use bstr::BString; +use gix_hash::ObjectId; + +use crate::{ + extension::Signature, + util::{split_at_byte_exclusive, split_at_pos}, +}; + +pub type Paths = Vec<ResolvePath>; + +#[allow(dead_code)] +#[derive(Clone)] +pub struct ResolvePath { + /// relative to the root of the repository, or what would be stored in the index + name: BString, + + /// 0 = ancestor/common, 1 = ours, 2 = theirs + stages: [Option<Stage>; 3], +} + +#[allow(dead_code)] +#[derive(Clone, Copy)] +pub struct Stage { + mode: u32, + id: ObjectId, +} + +pub const SIGNATURE: Signature = *b"REUC"; + +pub fn decode(mut data: &[u8], object_hash: gix_hash::Kind) -> Option<Paths> { + let hash_len = object_hash.len_in_bytes(); + let mut out = Vec::new(); + + while !data.is_empty() { + let (path, rest) = split_at_byte_exclusive(data, 0)?; + data = rest; + + let mut modes = [0u32; 3]; + for mode in modes.iter_mut() { + let (mode_ascii, rest) = split_at_byte_exclusive(data, 0)?; + data = rest; + *mode = u32::from_str_radix(std::str::from_utf8(mode_ascii).ok()?, 8).ok()?; + } + + let mut stages = [None, None, None]; + for (mode, stage) in modes.iter().zip(stages.iter_mut()) { + if *mode == 0 { + continue; + } + let (hash, rest) = split_at_pos(data, hash_len)?; + data = rest; + *stage = Some(Stage { + mode: *mode, + id: ObjectId::from(hash), + }); + } + + out.push(ResolvePath { + name: path.into(), + stages, + }); + } + out.into() +} diff --git a/vendor/gix-index/src/extension/sparse.rs b/vendor/gix-index/src/extension/sparse.rs new file mode 100644 index 000000000..ab219c8d6 --- /dev/null +++ b/vendor/gix-index/src/extension/sparse.rs @@ -0,0 +1,11 @@ +use crate::extension::Signature; + +/// The signature of the sparse index extension, nothing more than an indicator at this time. +pub const SIGNATURE: Signature = *b"sdir"; + +/// Serialize the sparse index extension to `out` +pub fn write_to(mut out: impl std::io::Write) -> Result<(), std::io::Error> { + out.write_all(&SIGNATURE)?; + out.write_all(&0_u32.to_be_bytes())?; + Ok(()) +} diff --git a/vendor/gix-index/src/extension/tree/decode.rs b/vendor/gix-index/src/extension/tree/decode.rs new file mode 100644 index 000000000..82221b48c --- /dev/null +++ b/vendor/gix-index/src/extension/tree/decode.rs @@ -0,0 +1,63 @@ +use std::convert::TryInto; + +use gix_hash::ObjectId; + +use crate::{ + extension::Tree, + util::{split_at_byte_exclusive, split_at_pos}, +}; + +/// A recursive data structure +pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option<Tree> { + let (tree, data) = one_recursive(data, object_hash.len_in_bytes())?; + assert!( + data.is_empty(), + "BUG: should fully consume the entire tree extension chunk, got {} left", + data.len() + ); + Some(tree) +} + +fn one_recursive(data: &[u8], hash_len: usize) -> Option<(Tree, &[u8])> { + let (path, data) = split_at_byte_exclusive(data, 0)?; + + let (entry_count, data) = split_at_byte_exclusive(data, b' ')?; + let num_entries: i32 = btoi::btoi(entry_count).ok()?; + + let (subtree_count, data) = split_at_byte_exclusive(data, b'\n')?; + let subtree_count: usize = btoi::btou(subtree_count).ok()?; + + let (id, mut data) = if num_entries >= 0 { + let (hash, data) = split_at_pos(data, hash_len)?; + (ObjectId::from(hash), data) + } else { + ( + ObjectId::null(gix_hash::Kind::from_hex_len(hash_len * 2).expect("valid hex_len")), + data, + ) + }; + + let mut subtrees = Vec::with_capacity(subtree_count); + for _ in 0..subtree_count { + let (tree, rest) = one_recursive(data, hash_len)?; + subtrees.push(tree); + data = rest; + } + + subtrees.sort_by(|a, b| a.name.cmp(&b.name)); + let num_trees = subtrees.len(); + subtrees.dedup_by(|a, b| a.name == b.name); + if num_trees != subtrees.len() { + return None; + } + + Some(( + Tree { + id, + num_entries: num_entries.try_into().ok(), + name: path.into(), + children: subtrees, + }, + data, + )) +} diff --git a/vendor/gix-index/src/extension/tree/mod.rs b/vendor/gix-index/src/extension/tree/mod.rs new file mode 100644 index 000000000..f20759399 --- /dev/null +++ b/vendor/gix-index/src/extension/tree/mod.rs @@ -0,0 +1,21 @@ +use crate::extension::Signature; + +/// The signature for tree extensions +pub const SIGNATURE: Signature = *b"TREE"; + +/// +pub mod verify; + +mod decode; +pub use decode::decode; + +mod write; + +#[cfg(test)] +mod tests { + + #[test] + fn size_of_tree() { + assert_eq!(std::mem::size_of::<crate::extension::Tree>(), 88); + } +} diff --git a/vendor/gix-index/src/extension/tree/verify.rs b/vendor/gix-index/src/extension/tree/verify.rs new file mode 100644 index 000000000..82fd03c8c --- /dev/null +++ b/vendor/gix-index/src/extension/tree/verify.rs @@ -0,0 +1,134 @@ +use std::cmp::Ordering; + +use bstr::{BString, ByteSlice}; + +use crate::extension::Tree; + +/// The error returned by [Tree::verify()][crate::extension::Tree::verify()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("The entry {entry_id} at path '{name}' in parent tree {parent_id} wasn't found in the nodes children, making it incomplete")] + MissingTreeDirectory { + parent_id: gix_hash::ObjectId, + entry_id: gix_hash::ObjectId, + name: BString, + }, + #[error("The tree with id {oid} wasn't found in the object database")] + TreeNodeNotFound { oid: gix_hash::ObjectId }, + #[error("The tree with id {oid} should have {expected_childcount} children, but its cached representation had {actual_childcount} of them")] + TreeNodeChildcountMismatch { + oid: gix_hash::ObjectId, + expected_childcount: usize, + actual_childcount: usize, + }, + #[error("The root tree was named '{name}', even though it should be empty")] + RootWithName { name: BString }, + #[error( + "Expected not more than {expected} entries to be reachable from the top-level, but actual count was {actual}" + )] + EntriesCount { actual: u32, expected: u32 }, + #[error( + "Parent tree '{parent_id}' contained out-of order trees prev = '{previous_path}' and next = '{current_path}'" + )] + OutOfOrder { + parent_id: gix_hash::ObjectId, + current_path: BString, + previous_path: BString, + }, +} + +impl Tree { + /// + pub fn verify<F>(&self, use_find: bool, mut find: F) -> Result<(), Error> + where + F: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Option<gix_object::TreeRefIter<'a>>, + { + fn verify_recursive<F>( + parent_id: gix_hash::ObjectId, + children: &[Tree], + mut find_buf: Option<&mut Vec<u8>>, + find: &mut F, + ) -> Result<Option<u32>, Error> + where + F: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Option<gix_object::TreeRefIter<'a>>, + { + if children.is_empty() { + return Ok(None); + } + let mut entries = 0; + let mut prev = None::<&Tree>; + for child in children { + entries += child.num_entries.unwrap_or(0); + if let Some(prev) = prev { + if prev.name.cmp(&child.name) != Ordering::Less { + return Err(Error::OutOfOrder { + parent_id, + previous_path: prev.name.as_bstr().into(), + current_path: child.name.as_bstr().into(), + }); + } + } + prev = Some(child); + } + if let Some(buf) = find_buf.as_mut() { + let tree_entries = find(&parent_id, buf).ok_or(Error::TreeNodeNotFound { oid: parent_id })?; + let mut num_entries = 0; + for entry in tree_entries + .filter_map(Result::ok) + .filter(|e| e.mode == gix_object::tree::EntryMode::Tree) + { + children + .binary_search_by(|e| e.name.as_bstr().cmp(entry.filename)) + .map_err(|_| Error::MissingTreeDirectory { + parent_id, + entry_id: entry.oid.to_owned(), + name: entry.filename.to_owned(), + })?; + num_entries += 1; + } + + if num_entries != children.len() { + return Err(Error::TreeNodeChildcountMismatch { + oid: parent_id, + expected_childcount: num_entries, + actual_childcount: children.len(), + }); + } + } + for child in children { + // This is actually needed here as it's a mut ref, which isn't copy. We do a re-borrow here. + #[allow(clippy::needless_option_as_deref)] + let actual_num_entries = verify_recursive(child.id, &child.children, find_buf.as_deref_mut(), find)?; + if let Some((actual, num_entries)) = actual_num_entries.zip(child.num_entries) { + if actual > num_entries { + return Err(Error::EntriesCount { + actual, + expected: num_entries, + }); + } + } + } + Ok(entries.into()) + } + + if !self.name.is_empty() { + return Err(Error::RootWithName { + name: self.name.as_bstr().into(), + }); + } + + let mut buf = Vec::new(); + let declared_entries = verify_recursive(self.id, &self.children, use_find.then_some(&mut buf), &mut find)?; + if let Some((actual, num_entries)) = declared_entries.zip(self.num_entries) { + if actual > num_entries { + return Err(Error::EntriesCount { + actual, + expected: num_entries, + }); + } + } + + Ok(()) + } +} diff --git a/vendor/gix-index/src/extension/tree/write.rs b/vendor/gix-index/src/extension/tree/write.rs new file mode 100644 index 000000000..2b1e3d949 --- /dev/null +++ b/vendor/gix-index/src/extension/tree/write.rs @@ -0,0 +1,45 @@ +use std::convert::TryFrom; + +use crate::extension::{tree, Tree}; + +impl Tree { + /// Serialize this instance to `out`. + pub fn write_to(&self, mut out: impl std::io::Write) -> Result<(), std::io::Error> { + fn tree_entry(out: &mut impl std::io::Write, tree: &Tree) -> Result<(), std::io::Error> { + let mut buf = itoa::Buffer::new(); + let num_entries = match tree.num_entries { + Some(num_entries) => buf.format(num_entries), + None => buf.format(-1), + }; + + out.write_all(tree.name.as_slice())?; + out.write_all(b"\0")?; + out.write_all(num_entries.as_bytes())?; + out.write_all(b" ")?; + let num_children = buf.format(tree.children.len()); + out.write_all(num_children.as_bytes())?; + out.write_all(b"\n")?; + if tree.num_entries.is_some() { + out.write_all(tree.id.as_bytes())?; + } + + for child in &tree.children { + tree_entry(out, child)?; + } + + Ok(()) + } + + let signature = tree::SIGNATURE; + + let estimated_size = self.num_entries.unwrap_or(0) * (300 + 3 + 1 + 3 + 1 + 20); + let mut entries: Vec<u8> = Vec::with_capacity(estimated_size as usize); + tree_entry(&mut entries, self)?; + + out.write_all(&signature)?; + out.write_all(&(u32::try_from(entries.len()).expect("less than 4GB tree extension")).to_be_bytes())?; + out.write_all(&entries)?; + + Ok(()) + } +} diff --git a/vendor/gix-index/src/extension/untracked_cache.rs b/vendor/gix-index/src/extension/untracked_cache.rs new file mode 100644 index 000000000..9f72e0775 --- /dev/null +++ b/vendor/gix-index/src/extension/untracked_cache.rs @@ -0,0 +1,156 @@ +use std::convert::TryInto; + +use bstr::BString; +use gix_hash::ObjectId; + +use crate::{ + entry, + extension::{Signature, UntrackedCache}, + util::{read_u32, split_at_byte_exclusive, split_at_pos, var_int}, +}; + +/// A structure to track filesystem stat information along with an object id, linking a worktree file with what's in our ODB. +#[derive(Clone)] +pub struct OidStat { + /// The file system stat information + pub stat: entry::Stat, + /// The id of the file in our ODB. + pub id: ObjectId, +} + +/// A directory with information about its untracked files, and its sub-directories +#[derive(Clone)] +pub struct Directory { + /// The directories name, or an empty string if this is the root directory. + pub name: BString, + /// Untracked files and directory names + pub untracked_entries: Vec<BString>, + /// indices for sub-directories similar to this one. + pub sub_directories: Vec<usize>, + + /// The directories stat data, if available or valid // TODO: or is it the exclude file? + pub stat: Option<entry::Stat>, + /// The oid of a .gitignore file, if it exists + pub exclude_file_oid: Option<ObjectId>, + /// TODO: figure out what this really does + pub check_only: bool, +} + +/// Only used as an indicator +pub const SIGNATURE: Signature = *b"UNTR"; + +// #[allow(unused)] +/// Decode an untracked cache extension from `data`, assuming object hashes are of type `object_hash`. +pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option<UntrackedCache> { + if !data.last().map(|b| *b == 0).unwrap_or(false) { + return None; + } + let (identifier_len, data) = var_int(data)?; + let (identifier, data) = split_at_pos(data, identifier_len.try_into().ok()?)?; + + let hash_len = object_hash.len_in_bytes(); + let (info_exclude, data) = decode_oid_stat(data, hash_len)?; + let (excludes_file, data) = decode_oid_stat(data, hash_len)?; + let (dir_flags, data) = read_u32(data)?; + let (exclude_filename_per_dir, data) = split_at_byte_exclusive(data, 0)?; + + let (num_directory_blocks, data) = var_int(data)?; + + let mut res = UntrackedCache { + identifier: identifier.into(), + info_exclude: (!info_exclude.id.is_null()).then_some(info_exclude), + excludes_file: (!excludes_file.id.is_null()).then_some(excludes_file), + exclude_filename_per_dir: exclude_filename_per_dir.into(), + dir_flags, + directories: Vec::new(), + }; + if num_directory_blocks == 0 { + return data.is_empty().then_some(res); + } + + let num_directory_blocks = num_directory_blocks.try_into().ok()?; + let directories = &mut res.directories; + directories.reserve(num_directory_blocks); + + let data = decode_directory_block(data, directories)?; + if directories.len() != num_directory_blocks { + return None; + } + let (valid, data) = gix_bitmap::ewah::decode(data).ok()?; + let (check_only, data) = gix_bitmap::ewah::decode(data).ok()?; + let (hash_valid, mut data) = gix_bitmap::ewah::decode(data).ok()?; + + if valid.num_bits() > num_directory_blocks + || check_only.num_bits() > num_directory_blocks + || hash_valid.num_bits() > num_directory_blocks + { + return None; + } + + check_only.for_each_set_bit(|index| { + directories[index].check_only = true; + Some(()) + })?; + valid.for_each_set_bit(|index| { + let (stat, rest) = crate::decode::stat(data)?; + directories[index].stat = stat.into(); + data = rest; + Some(()) + }); + hash_valid.for_each_set_bit(|index| { + let (hash, rest) = split_at_pos(data, hash_len)?; + data = rest; + directories[index].exclude_file_oid = ObjectId::from(hash).into(); + Some(()) + }); + + // null-byte checked in the beginning + if data.len() != 1 { + return None; + } + res.into() +} + +fn decode_directory_block<'a>(data: &'a [u8], directories: &mut Vec<Directory>) -> Option<&'a [u8]> { + let (num_untracked, data) = var_int(data)?; + let (num_dirs, data) = var_int(data)?; + let (name, mut data) = split_at_byte_exclusive(data, 0)?; + let mut untracked_entries = Vec::<BString>::with_capacity(num_untracked.try_into().ok()?); + for _ in 0..num_untracked { + let (name, rest) = split_at_byte_exclusive(data, 0)?; + data = rest; + untracked_entries.push(name.into()); + } + + let index = directories.len(); + directories.push(Directory { + name: name.into(), + untracked_entries, + sub_directories: Vec::with_capacity(num_dirs.try_into().ok()?), + // the following are set later through their bitmaps + stat: None, + exclude_file_oid: None, + check_only: false, + }); + + for _ in 0..num_dirs { + let subdir_index = directories.len(); + let rest = decode_directory_block(data, directories)?; + data = rest; + directories[index].sub_directories.push(subdir_index); + } + + data.into() +} + +fn decode_oid_stat(data: &[u8], hash_len: usize) -> Option<(OidStat, &[u8])> { + let (stat, data) = crate::decode::stat(data)?; + let (hash, data) = split_at_pos(data, hash_len)?; + Some(( + OidStat { + stat, + id: ObjectId::from(hash), + }, + data, + )) +} |