diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
commit | 9835e2ae736235810b4ea1c162ca5e65c547e770 (patch) | |
tree | 3fcebf40ed70e581d776a8a4c65923e8ec20e026 /vendor/gix-worktree/src | |
parent | Releasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff) | |
download | rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip |
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-worktree/src')
25 files changed, 1857 insertions, 1282 deletions
diff --git a/vendor/gix-worktree/src/cache/delegate.rs b/vendor/gix-worktree/src/cache/delegate.rs new file mode 100644 index 000000000..90a141b0d --- /dev/null +++ b/vendor/gix-worktree/src/cache/delegate.rs @@ -0,0 +1,162 @@ +use crate::cache::State; +use crate::PathIdMapping; + +/// Various aggregate numbers related to the stack delegate itself. +#[derive(Default, Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Statistics { + /// The amount of `std::fs::create_dir` calls. + /// + /// This only happens if we are in the respective mode to create leading directories efficiently. + pub num_mkdir_calls: usize, + /// Amount of calls to push a path element. + pub push_element: usize, + /// Amount of calls to push a directory. + pub push_directory: usize, + /// Amount of calls to pop a directory. + pub pop_directory: usize, +} + +pub(crate) struct StackDelegate<'a, Find> { + pub state: &'a mut State, + pub buf: &'a mut Vec<u8>, + pub is_dir: bool, + pub id_mappings: &'a Vec<PathIdMapping>, + pub find: Find, + pub case: gix_glob::pattern::Case, + pub statistics: &'a mut super::Statistics, +} + +impl<'a, Find, E> gix_fs::stack::Delegate for StackDelegate<'a, Find> +where + Find: for<'b> FnMut(&gix_hash::oid, &'b mut Vec<u8>) -> Result<gix_object::BlobRef<'b>, E>, + E: std::error::Error + Send + Sync + 'static, +{ + fn push_directory(&mut self, stack: &gix_fs::Stack) -> std::io::Result<()> { + self.statistics.delegate.push_directory += 1; + let dir_bstr = gix_path::into_bstr(stack.current()); + let mut rela_dir = gix_glob::search::pattern::strip_base_handle_recompute_basename_pos( + gix_path::into_bstr(stack.root()).as_ref(), + dir_bstr.as_ref(), + None, + self.case, + ) + .expect("dir in root") + .0; + if rela_dir.starts_with(b"/") { + rela_dir = &rela_dir[1..]; + } + match &mut self.state { + State::CreateDirectoryAndAttributesStack { attributes, .. } => { + attributes.push_directory( + stack.root(), + stack.current(), + rela_dir, + self.buf, + self.id_mappings, + &mut self.find, + &mut self.statistics.attributes, + )?; + } + State::AttributesAndIgnoreStack { ignore, attributes } => { + attributes.push_directory( + stack.root(), + stack.current(), + rela_dir, + self.buf, + self.id_mappings, + &mut self.find, + &mut self.statistics.attributes, + )?; + ignore.push_directory( + stack.root(), + stack.current(), + rela_dir, + self.buf, + self.id_mappings, + &mut self.find, + self.case, + &mut self.statistics.ignore, + )? + } + State::IgnoreStack(ignore) => ignore.push_directory( + stack.root(), + stack.current(), + rela_dir, + self.buf, + self.id_mappings, + &mut self.find, + self.case, + &mut self.statistics.ignore, + )?, + } + Ok(()) + } + + fn push(&mut self, is_last_component: bool, stack: &gix_fs::Stack) -> std::io::Result<()> { + self.statistics.delegate.push_element += 1; + match &mut self.state { + State::CreateDirectoryAndAttributesStack { + unlink_on_collision, + attributes: _, + } => create_leading_directory( + is_last_component, + stack, + self.is_dir, + &mut self.statistics.delegate.num_mkdir_calls, + *unlink_on_collision, + )?, + State::AttributesAndIgnoreStack { .. } | State::IgnoreStack(_) => {} + } + Ok(()) + } + + fn pop_directory(&mut self) { + self.statistics.delegate.pop_directory += 1; + match &mut self.state { + State::CreateDirectoryAndAttributesStack { attributes, .. } => { + attributes.pop_directory(); + } + State::AttributesAndIgnoreStack { attributes, ignore } => { + attributes.pop_directory(); + ignore.pop_directory(); + } + State::IgnoreStack(ignore) => { + ignore.pop_directory(); + } + } + } +} + +fn create_leading_directory( + is_last_component: bool, + stack: &gix_fs::Stack, + is_dir: bool, + mkdir_calls: &mut usize, + unlink_on_collision: bool, +) -> std::io::Result<()> { + if is_last_component && !is_dir { + return Ok(()); + } + *mkdir_calls += 1; + match std::fs::create_dir(stack.current()) { + Ok(()) => Ok(()), + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { + let meta = stack.current().symlink_metadata()?; + if meta.is_dir() { + Ok(()) + } else if unlink_on_collision { + if meta.file_type().is_symlink() { + gix_fs::symlink::remove(stack.current())?; + } else { + std::fs::remove_file(stack.current())?; + } + *mkdir_calls += 1; + std::fs::create_dir(stack.current()) + } else { + Err(err) + } + } + Err(err) => Err(err), + } +} diff --git a/vendor/gix-worktree/src/fs/cache/mod.rs b/vendor/gix-worktree/src/cache/mod.rs index 1f7710e59..7984b2c4c 100644 --- a/vendor/gix-worktree/src/fs/cache/mod.rs +++ b/vendor/gix-worktree/src/cache/mod.rs @@ -5,7 +5,21 @@ use bstr::{BStr, ByteSlice}; use gix_hash::oid; use super::Cache; -use crate::{fs, fs::PathOidMapping}; +use crate::PathIdMapping; + +/// Various aggregate numbers collected from when the corresponding [`Cache`] was instantiated. +#[derive(Default, Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Statistics { + /// The amount of platforms created to do further matching. + pub platforms: usize, + /// Information about the stack delegate. + pub delegate: delegate::Statistics, + /// Information about attributes + pub attributes: state::attributes::Statistics, + /// Information about the ignore stack + pub ignore: state::ignore::Statistics, +} #[derive(Clone)] pub enum State { @@ -13,10 +27,6 @@ pub enum State { CreateDirectoryAndAttributesStack { /// If there is a symlink or a file in our path, try to unlink it before creating the directory. unlink_on_collision: bool, - - /// just for testing - #[cfg(debug_assertions)] - test_mkdir_calls: usize, /// State to handle attribute information attributes: state::Attributes, }, @@ -31,66 +41,47 @@ pub enum State { IgnoreStack(state::Ignore), } -#[cfg(debug_assertions)] -impl Cache { - pub fn set_case(&mut self, case: gix_glob::pattern::Case) { - self.case = case; - } - pub fn num_mkdir_calls(&self) -> usize { - match self.state { - State::CreateDirectoryAndAttributesStack { test_mkdir_calls, .. } => test_mkdir_calls, - _ => 0, - } - } - - pub fn reset_mkdir_calls(&mut self) { - if let State::CreateDirectoryAndAttributesStack { test_mkdir_calls, .. } = &mut self.state { - *test_mkdir_calls = 0; - } - } - - pub fn unlink_on_collision(&mut self, value: bool) { - if let State::CreateDirectoryAndAttributesStack { - unlink_on_collision, .. - } = &mut self.state - { - *unlink_on_collision = value; - } - } -} - #[must_use] pub struct Platform<'a> { parent: &'a Cache, is_dir: Option<bool>, } +/// Initialization impl Cache { - /// Create a new instance with `worktree_root` being the base for all future paths we handle, assuming it to be valid which includes - /// symbolic links to be included in it as well. - /// The `case` configures attribute and exclusion query case sensitivity. + /// Create a new instance with `worktree_root` being the base for all future paths we match. + /// `state` defines the capabilities of the cache. + /// The `case` configures attribute and exclusion case sensitivity at *query time*, which should match the case that + /// `state` might be configured with. + /// `buf` is used when reading files, and `id_mappings` should have been created with [State::id_mappings_from_index()]. pub fn new( worktree_root: impl Into<PathBuf>, state: State, case: gix_glob::pattern::Case, buf: Vec<u8>, - attribute_files_in_index: Vec<PathOidMapping>, + id_mappings: Vec<PathIdMapping>, ) -> Self { let root = worktree_root.into(); Cache { - stack: fs::Stack::new(root), + stack: gix_fs::Stack::new(root), state, case, buf, - attribute_files_in_index, + id_mappings, + statistics: Statistics::default(), } } +} - /// Append the `relative` path to the root directory the cache contains and efficiently create leading directories - /// unless `is_dir` is known (`Some(…)`) then `relative` points to a directory itself in which case the entire resulting +/// Entry points for attribute query +impl Cache { + /// Append the `relative` path to the root directory of the cache and efficiently create leading directories, while assuring that no + /// symlinks are in that path. + /// Unless `is_dir` is known with `Some(…)`, then `relative` points to a directory itself in which case the entire resulting /// path is created as directory. If it's not known it is assumed to be a file. + /// `find` maybe used to lookup objects from an [id mapping][crate::cache::State::id_mappings_from_index()], with mappnigs /// - /// Provide access to cached information for that `relative` entry via the platform returned. + /// Provide access to cached information for that `relative` path via the returned platform. pub fn at_path<Find, E>( &mut self, relative: impl AsRef<Path>, @@ -101,19 +92,30 @@ impl Cache { Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E>, E: std::error::Error + Send + Sync + 'static, { - let mut delegate = platform::StackDelegate { + self.statistics.platforms += 1; + let mut delegate = StackDelegate { state: &mut self.state, buf: &mut self.buf, is_dir: is_dir.unwrap_or(false), - attribute_files_in_index: &self.attribute_files_in_index, + id_mappings: &self.id_mappings, find, + case: self.case, + statistics: &mut self.statistics, }; self.stack.make_relative_path_current(relative, &mut delegate)?; Ok(Platform { parent: self, is_dir }) } - /// **Panics** on illformed UTF8 in `relative` - // TODO: more docs + /// Obtain a platform for lookups from a repo-`relative` path, typically obtained from an index entry. `is_dir` should reflect + /// whether it's a directory or not, or left at `None` if unknown. + /// `find` maybe used to lookup objects from an [id mapping][crate::cache::State::id_mappings_from_index()]. + /// All effects are similar to [`at_path()`][Self::at_path()]. + /// + /// If `relative` ends with `/` and `is_dir` is `None`, it is automatically assumed to be a directory. + /// + /// ### Panics + /// + /// on illformed UTF8 in `relative` pub fn at_entry<'r, Find, E>( &mut self, relative: impl Into<&'r BStr>, @@ -130,10 +132,34 @@ impl Cache { self.at_path( relative_path, is_dir.or_else(|| relative.ends_with_str("/").then_some(true)), - // is_dir, find, ) } +} + +/// Mutation +impl Cache { + /// Reset the statistics after returning them. + pub fn take_statistics(&mut self) -> Statistics { + std::mem::take(&mut self.statistics) + } + + /// Return our state for applying changes. + pub fn state_mut(&mut self) -> &mut State { + &mut self.state + } +} + +/// Access +impl Cache { + /// Return the statistics we gathered thus far. + pub fn statistics(&self) -> &Statistics { + &self.statistics + } + /// Return the state for introspection. + pub fn state(&self) -> &State { + &self.state + } /// Return the base path against which all entries or paths should be relative to when querying. /// @@ -143,6 +169,10 @@ impl Cache { } } +/// +pub mod delegate; +use delegate::StackDelegate; + mod platform; /// pub mod state; diff --git a/vendor/gix-worktree/src/cache/platform.rs b/vendor/gix-worktree/src/cache/platform.rs new file mode 100644 index 000000000..f52b53205 --- /dev/null +++ b/vendor/gix-worktree/src/cache/platform.rs @@ -0,0 +1,55 @@ +use std::path::Path; + +use bstr::ByteSlice; + +use crate::cache::Platform; + +/// Access +impl<'a> Platform<'a> { + /// The full path to `relative` will be returned for use on the file system. + pub fn path(&self) -> &'a Path { + self.parent.stack.current() + } + + /// See if the currently set entry is excluded as per exclude and git-ignore files. + /// + /// # Panics + /// + /// If the cache was configured without exclude patterns. + pub fn is_excluded(&self) -> bool { + self.matching_exclude_pattern() + .map_or(false, |m| !m.pattern.is_negative()) + } + + /// Check all exclude patterns to see if the currently set path matches any of them. + /// + /// Note that this pattern might be negated, and means this path in included. + /// + /// # Panics + /// + /// If the cache was configured without exclude patterns. + pub fn matching_exclude_pattern(&self) -> Option<gix_ignore::search::Match<'_, ()>> { + let ignore = self.parent.state.ignore_or_panic(); + let relative_path = + gix_path::to_unix_separators_on_windows(gix_path::into_bstr(self.parent.stack.current_relative())); + ignore.matching_exclude_pattern(relative_path.as_bstr(), self.is_dir, self.parent.case) + } + + /// Match all attributes at the current path and store the result in `out`, returning `true` if at least one attribute was found. + /// + /// # Panics + /// + /// If the cache was configured without attributes. + pub fn matching_attributes(&self, out: &mut gix_attributes::search::Outcome) -> bool { + let attrs = self.parent.state.attributes_or_panic(); + let relative_path = + gix_path::to_unix_separators_on_windows(gix_path::into_bstr(self.parent.stack.current_relative())); + attrs.matching_attributes(relative_path.as_bstr(), self.parent.case, out) + } +} + +impl<'a> std::fmt::Debug for Platform<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(&self.path(), f) + } +} diff --git a/vendor/gix-worktree/src/cache/state/attributes.rs b/vendor/gix-worktree/src/cache/state/attributes.rs new file mode 100644 index 000000000..64316b35b --- /dev/null +++ b/vendor/gix-worktree/src/cache/state/attributes.rs @@ -0,0 +1,201 @@ +use std::path::{Path, PathBuf}; + +use crate::{Cache, PathIdMapping}; +use gix_glob::pattern::Case; + +use crate::cache::state::{AttributeMatchGroup, Attributes}; +use bstr::{BStr, ByteSlice}; + +/// Various aggregate numbers related [`Attributes`]. +#[derive(Default, Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Statistics { + /// Amount of patterns buffers read from the index. + pub patterns_buffers: usize, + /// Amount of pattern files read from disk. + pub pattern_files: usize, + /// Amount of pattern files we tried to find on disk. + pub tried_pattern_files: usize, +} + +/// Decide where to read `.gitattributes` files from. +#[derive(Default, Debug, Clone, Copy)] +pub enum Source { + /// Retrieve attribute files from id mappings, see + /// [State::id_mappings_from_index()][crate::cache::State::id_mappings_from_index()]. + /// + /// These mappings are typically produced from an index. + /// If a tree should be the source, build an attribute list from a tree instead, or convert a tree to an index. + /// + /// Use this when no worktree checkout is available, like in bare repositories or when accessing blobs from other parts + /// of the history which aren't checked out. + #[default] + IdMapping, + /// Read from an id mappings and if not present, read from the worktree. + /// + /// This us typically used when *checking out* files. + IdMappingThenWorktree, + /// Read from the worktree and if not present, read them from the id mappings. + /// + /// This is typically used when *checking in* files, and it's possible for sparse worktrees not to have a `.gitattribute` file + /// checked out even though it's available in the index. + WorktreeThenIdMapping, +} + +/// Initialization +impl Attributes { + /// Create a new instance from an attribute match group that represents `globals`. It can more easily be created with + /// [AttributeMatchGroup::new_globals()]. + /// + /// * `globals` contribute first and consist of all globally available, static files. + /// * `info_attributes` is a path that should refer to `.git/info/attributes`, and it's not an error if the file doesn't exist. + /// * `case` is used to control case-sensitivity during matching. + /// * `source` specifies from where the directory-based attribute files should be loaded from. + pub fn new( + globals: AttributeMatchGroup, + info_attributes: Option<PathBuf>, + source: Source, + collection: gix_attributes::search::MetadataCollection, + ) -> Self { + Attributes { + globals, + stack: Default::default(), + info_attributes, + source, + collection, + } + } +} + +impl Attributes { + pub(crate) fn pop_directory(&mut self) { + self.stack.pop_pattern_list().expect("something to pop"); + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn push_directory<Find, E>( + &mut self, + root: &Path, + dir: &Path, + rela_dir: &BStr, + buf: &mut Vec<u8>, + id_mappings: &[PathIdMapping], + mut find: Find, + stats: &mut Statistics, + ) -> std::io::Result<()> + where + Find: for<'b> FnMut(&gix_hash::oid, &'b mut Vec<u8>) -> Result<gix_object::BlobRef<'b>, E>, + E: std::error::Error + Send + Sync + 'static, + { + let attr_path_relative = + gix_path::to_unix_separators_on_windows(gix_path::join_bstr_unix_pathsep(rela_dir, ".gitattributes")); + let attr_file_in_index = id_mappings.binary_search_by(|t| t.0.as_bstr().cmp(attr_path_relative.as_ref())); + // Git does not follow symbolic links as per documentation. + let no_follow_symlinks = false; + + let mut added = false; + match self.source { + Source::IdMapping | Source::IdMappingThenWorktree => { + if let Ok(idx) = attr_file_in_index { + let blob = find(&id_mappings[idx].1, buf) + .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; + let attr_path = gix_path::from_bstring(attr_path_relative.into_owned()); + self.stack + .add_patterns_buffer(blob.data, attr_path, Some(Path::new("")), &mut self.collection); + added = true; + stats.patterns_buffers += 1; + } + if !added && matches!(self.source, Source::IdMappingThenWorktree) { + added = self.stack.add_patterns_file( + dir.join(".gitattributes"), + no_follow_symlinks, + Some(root), + buf, + &mut self.collection, + )?; + stats.pattern_files += usize::from(added); + stats.tried_pattern_files += 1; + } + } + Source::WorktreeThenIdMapping => { + added = self.stack.add_patterns_file( + dir.join(".gitattributes"), + no_follow_symlinks, + Some(root), + buf, + &mut self.collection, + )?; + stats.pattern_files += usize::from(added); + stats.tried_pattern_files += 1; + if let Some(idx) = attr_file_in_index.ok().filter(|_| !added) { + let blob = find(&id_mappings[idx].1, buf) + .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; + let attr_path = gix_path::from_bstring(attr_path_relative.into_owned()); + self.stack + .add_patterns_buffer(blob.data, attr_path, Some(Path::new("")), &mut self.collection); + added = true; + stats.patterns_buffers += 1; + } + } + } + + // Need one stack level per component so push and pop matches, but only if this isn't the root level which is never popped. + if !added && self.info_attributes.is_none() { + self.stack + .add_patterns_buffer(&[], Path::new("<empty dummy>"), None, &mut self.collection) + } + + // When reading the root, always the first call, we can try to also read the `.git/info/attributes` file which is + // by nature never popped, and follows the root, as global. + if let Some(info_attr) = self.info_attributes.take() { + let added = self + .stack + .add_patterns_file(info_attr, true, None, buf, &mut self.collection)?; + stats.pattern_files += usize::from(added); + stats.tried_pattern_files += 1; + } + + Ok(()) + } + + pub(crate) fn matching_attributes( + &self, + relative_path: &BStr, + case: Case, + out: &mut gix_attributes::search::Outcome, + ) -> bool { + // assure `out` is ready to deal with possibly changed collections (append-only) + out.initialize(&self.collection); + + let groups = [&self.globals, &self.stack]; + let mut has_match = false; + groups.iter().rev().any(|group| { + has_match |= group.pattern_matching_relative_path(relative_path, case, out); + out.is_done() + }); + has_match + } +} + +/// Attribute matching specific methods +impl Cache { + /// Creates a new container to store match outcomes for all attribute matches. + pub fn attribute_matches(&self) -> gix_attributes::search::Outcome { + let mut out = gix_attributes::search::Outcome::default(); + out.initialize(&self.state.attributes_or_panic().collection); + out + } + + /// Creates a new container to store match outcomes for the given attributes. + pub fn selected_attribute_matches<'a>( + &self, + given: impl IntoIterator<Item = impl Into<&'a str>>, + ) -> gix_attributes::search::Outcome { + let mut out = gix_attributes::search::Outcome::default(); + out.initialize_with_selection( + &self.state.attributes_or_panic().collection, + given.into_iter().map(|n| n.into()), + ); + out + } +} diff --git a/vendor/gix-worktree/src/cache/state/ignore.rs b/vendor/gix-worktree/src/cache/state/ignore.rs new file mode 100644 index 000000000..5ff4ccd42 --- /dev/null +++ b/vendor/gix-worktree/src/cache/state/ignore.rs @@ -0,0 +1,216 @@ +use std::path::Path; + +use crate::cache::state::Ignore; +use crate::{cache::state::IgnoreMatchGroup, PathIdMapping}; +use bstr::{BStr, ByteSlice}; +use gix_glob::pattern::Case; + +/// Decide where to read `.gitignore` files from. +#[derive(Default, Debug, Clone, Copy)] +pub enum Source { + /// Retrieve ignore files from id mappings, see + /// [State::id_mappings_from_index()][crate::cache::State::id_mappings_from_index()]. + /// + /// These mappings are typically produced from an index. + /// If a tree should be the source, build an attribute list from a tree instead, or convert a tree to an index. + /// + /// Use this when no worktree checkout is available, like in bare repositories or when accessing blobs from other parts + /// of the history which aren't checked out. + IdMapping, + /// Read from the worktree and if not present, read them from the id mappings *if* these don't have the skip-worktree bit set. + #[default] + WorktreeThenIdMappingIfNotSkipped, +} + +/// Various aggregate numbers related [`Ignore`]. +#[derive(Default, Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Statistics { + /// Amount of patterns buffers read from the index. + pub patterns_buffers: usize, + /// Amount of pattern files read from disk. + pub pattern_files: usize, + /// Amount of pattern files we tried to find on disk. + pub tried_pattern_files: usize, +} + +impl Ignore { + /// Configure gitignore file matching by providing the immutable groups being `overrides` and `globals`, while letting the directory + /// stack be dynamic. + /// + /// The `exclude_file_name_for_directories` is an optional override for the filename to use when checking per-directory + /// ignore files within the repository, defaults to`.gitignore`. + pub fn new( + overrides: IgnoreMatchGroup, + globals: IgnoreMatchGroup, + exclude_file_name_for_directories: Option<&BStr>, + source: Source, + ) -> Self { + Ignore { + overrides, + globals, + stack: Default::default(), + matched_directory_patterns_stack: Vec::with_capacity(6), + exclude_file_name_for_directories: exclude_file_name_for_directories + .map(ToOwned::to_owned) + .unwrap_or_else(|| ".gitignore".into()), + source, + } + } +} + +impl Ignore { + pub(crate) fn pop_directory(&mut self) { + self.matched_directory_patterns_stack.pop().expect("something to pop"); + self.stack.patterns.pop().expect("something to pop"); + } + /// The match groups from lowest priority to highest. + pub(crate) fn match_groups(&self) -> [&IgnoreMatchGroup; 3] { + [&self.globals, &self.stack, &self.overrides] + } + + pub(crate) fn matching_exclude_pattern( + &self, + relative_path: &BStr, + is_dir: Option<bool>, + case: Case, + ) -> Option<gix_ignore::search::Match<'_, ()>> { + let groups = self.match_groups(); + let mut dir_match = None; + if let Some((source, mapping)) = self + .matched_directory_patterns_stack + .iter() + .rev() + .filter_map(|v| *v) + .map(|(gidx, plidx, pidx)| { + let list = &groups[gidx].patterns[plidx]; + (list.source.as_deref(), &list.patterns[pidx]) + }) + .next() + { + let match_ = gix_ignore::search::Match { + pattern: &mapping.pattern, + value: &mapping.value, + sequence_number: mapping.sequence_number, + source, + }; + if mapping.pattern.is_negative() { + dir_match = Some(match_); + } else { + // Note that returning here is wrong if this pattern _was_ preceded by a negative pattern that + // didn't match the directory, but would match now. + // Git does it similarly so we do too even though it's incorrect. + // To fix this, one would probably keep track of whether there was a preceding negative pattern, and + // if so we check the path in full and only use the dir match if there was no match, similar to the negative + // case above whose fix fortunately won't change the overall result. + return match_.into(); + } + } + groups + .iter() + .rev() + .find_map(|group| group.pattern_matching_relative_path(relative_path, is_dir, case)) + .or(dir_match) + } + + /// Like `matching_exclude_pattern()` but without checking if the current directory is excluded. + /// It returns a triple-index into our data structure from which a match can be reconstructed. + pub(crate) fn matching_exclude_pattern_no_dir( + &self, + relative_path: &BStr, + is_dir: Option<bool>, + case: Case, + ) -> Option<(usize, usize, usize)> { + let groups = self.match_groups(); + groups.iter().enumerate().rev().find_map(|(gidx, group)| { + let basename_pos = relative_path.rfind(b"/").map(|p| p + 1); + group + .patterns + .iter() + .enumerate() + .rev() + .find_map(|(plidx, pl)| { + gix_ignore::search::pattern_idx_matching_relative_path( + pl, + relative_path, + basename_pos, + is_dir, + case, + ) + .map(|idx| (plidx, idx)) + }) + .map(|(plidx, pidx)| (gidx, plidx, pidx)) + }) + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn push_directory<Find, E>( + &mut self, + root: &Path, + dir: &Path, + rela_dir: &BStr, + buf: &mut Vec<u8>, + id_mappings: &[PathIdMapping], + mut find: Find, + case: Case, + stats: &mut Statistics, + ) -> std::io::Result<()> + where + Find: for<'b> FnMut(&gix_hash::oid, &'b mut Vec<u8>) -> Result<gix_object::BlobRef<'b>, E>, + E: std::error::Error + Send + Sync + 'static, + { + self.matched_directory_patterns_stack + .push(self.matching_exclude_pattern_no_dir(rela_dir, Some(true), case)); + + let ignore_path_relative = + gix_path::to_unix_separators_on_windows(gix_path::join_bstr_unix_pathsep(rela_dir, ".gitignore")); + let ignore_file_in_index = id_mappings.binary_search_by(|t| t.0.as_bstr().cmp(ignore_path_relative.as_ref())); + match self.source { + Source::IdMapping => { + match ignore_file_in_index { + Ok(idx) => { + let ignore_blob = find(&id_mappings[idx].1, buf) + .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; + let ignore_path = gix_path::from_bstring(ignore_path_relative.into_owned()); + self.stack + .add_patterns_buffer(ignore_blob.data, ignore_path, Some(Path::new(""))); + stats.patterns_buffers += 1; + } + Err(_) => { + // Need one stack level per component so push and pop matches. + self.stack.patterns.push(Default::default()) + } + } + } + Source::WorktreeThenIdMappingIfNotSkipped => { + let follow_symlinks = ignore_file_in_index.is_err(); + let added = gix_glob::search::add_patterns_file( + &mut self.stack.patterns, + dir.join(".gitignore"), + follow_symlinks, + Some(root), + buf, + )?; + stats.pattern_files += usize::from(added); + stats.tried_pattern_files += 1; + if !added { + match ignore_file_in_index { + Ok(idx) => { + let ignore_blob = find(&id_mappings[idx].1, buf) + .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; + let ignore_path = gix_path::from_bstring(ignore_path_relative.into_owned()); + self.stack + .add_patterns_buffer(ignore_blob.data, ignore_path, Some(Path::new(""))); + stats.patterns_buffers += 1; + } + Err(_) => { + // Need one stack level per component so push and pop matches. + self.stack.patterns.push(Default::default()) + } + } + } + } + } + Ok(()) + } +} diff --git a/vendor/gix-worktree/src/cache/state/mod.rs b/vendor/gix-worktree/src/cache/state/mod.rs new file mode 100644 index 000000000..ae2c6bafd --- /dev/null +++ b/vendor/gix-worktree/src/cache/state/mod.rs @@ -0,0 +1,177 @@ +use std::path::PathBuf; + +use bstr::{BString, ByteSlice}; +use gix_glob::pattern::Case; + +use crate::{cache::State, PathIdMapping}; + +type AttributeMatchGroup = gix_attributes::Search; +type IgnoreMatchGroup = gix_ignore::Search; + +/// State related to attributes associated with files in the repository. +#[derive(Default, Clone)] +pub struct Attributes { + /// Attribute patterns which aren't tied to the repository root, hence are global, they contribute first. + globals: AttributeMatchGroup, + /// Attribute patterns that match the currently set directory (in the stack). + /// + /// Note that the root-level file is always loaded, if present, followed by, the `$GIT_DIR/info/attributes`, if present, based + /// on the location of the `info_attributes` file. + stack: AttributeMatchGroup, + /// The first time we push the root, we have to load additional information from this file if it exists along with the root attributes + /// file if possible, and keep them there throughout. + info_attributes: Option<PathBuf>, + /// A lookup table to accelerate searches. + collection: gix_attributes::search::MetadataCollection, + /// Where to read `.gitattributes` data from. + source: attributes::Source, +} + +/// State related to the exclusion of files, supporting static overrides and globals, along with a stack of dynamically read +/// ignore files from disk or from the index each time the directory changes. +#[derive(Default, Clone)] +#[allow(unused)] +pub struct Ignore { + /// Ignore patterns passed as overrides to everything else, typically passed on the command-line and the first patterns to + /// be consulted. + overrides: IgnoreMatchGroup, + /// Ignore patterns that match the currently set director (in the stack), which is pushed and popped as needed. + stack: IgnoreMatchGroup, + /// Ignore patterns which aren't tied to the repository root, hence are global. They are consulted last. + globals: IgnoreMatchGroup, + /// A matching stack of pattern indices which is empty if we have just been initialized to indicate that the + /// currently set directory had a pattern matched. Note that this one could be negated. + /// (index into match groups, index into list of pattern lists, index into pattern list) + matched_directory_patterns_stack: Vec<Option<(usize, usize, usize)>>, + /// The name of the file to look for in directories. + pub(crate) exclude_file_name_for_directories: BString, + /// Where to read ignore files from + source: ignore::Source, +} + +/// +pub mod attributes; +/// +pub mod ignore; + +/// Initialization +impl State { + /// Configure a state to be suitable for checking out files, which only needs access to attribute files read from the index. + pub fn for_checkout(unlink_on_collision: bool, attributes: Attributes) -> Self { + State::CreateDirectoryAndAttributesStack { + unlink_on_collision, + attributes, + } + } + + /// Configure a state for adding files, with support for ignore files and attribute files. + pub fn for_add(attributes: Attributes, ignore: Ignore) -> Self { + State::AttributesAndIgnoreStack { attributes, ignore } + } + + /// Configure a state for status retrieval, which needs access to ignore files only. + pub fn for_status(ignore: Ignore) -> Self { + State::IgnoreStack(ignore) + } +} + +/// Utilities +impl State { + /// Returns a vec of tuples of relative index paths along with the best usable blob OID for + /// either *ignore* or *attribute* files or both. This allows files to be accessed directly from + /// the object database without the need for a worktree checkout. + /// + /// Note that this method… + /// - ignores entries which aren't blobs. + /// - ignores ignore entries which are not skip-worktree. + /// - within merges, picks 'our' stage both for *ignore* and *attribute* files. + /// + /// * `index` is where we look for suitable files by path in order to obtain their blob hash. + /// * `paths` is the indices storage backend for paths. + /// * `case` determines if the search for files should be case-sensitive or not. + pub fn id_mappings_from_index( + &self, + index: &gix_index::State, + paths: &gix_index::PathStorageRef, + ignore_source: ignore::Source, + case: Case, + ) -> Vec<PathIdMapping> { + let a1_backing; + let a2_backing; + let names = match self { + State::IgnoreStack(v) => { + a1_backing = [(v.exclude_file_name_for_directories.as_bytes().as_bstr(), true)]; + a1_backing.as_ref() + } + State::AttributesAndIgnoreStack { ignore, .. } => { + a2_backing = [ + (ignore.exclude_file_name_for_directories.as_bytes().as_bstr(), true), + (".gitattributes".into(), false), + ]; + a2_backing.as_ref() + } + State::CreateDirectoryAndAttributesStack { .. } => { + a1_backing = [(".gitattributes".into(), true)]; + a1_backing.as_ref() + } + }; + + index + .entries() + .iter() + .filter_map(move |entry| { + let path = entry.path_in(paths); + + // Stage 0 means there is no merge going on, stage 2 means it's 'our' side of the merge, but then + // there won't be a stage 0. + if entry.mode == gix_index::entry::Mode::FILE && (entry.stage() == 0 || entry.stage() == 2) { + let basename = path + .rfind_byte(b'/') + .map(|pos| path[pos + 1..].as_bstr()) + .unwrap_or(path); + let is_ignore = names.iter().find_map(|t| { + match case { + Case::Sensitive => basename == t.0, + Case::Fold => basename.eq_ignore_ascii_case(t.0), + } + .then_some(t.1) + })?; + if is_ignore { + match ignore_source { + ignore::Source::IdMapping => {} + ignore::Source::WorktreeThenIdMappingIfNotSkipped => { + // See https://github.com/git/git/blob/master/dir.c#L912:L912 + if !entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) { + return None; + } + } + }; + } + Some((path.to_owned(), entry.id)) + } else { + None + } + }) + .collect() + } + + pub(crate) fn ignore_or_panic(&self) -> &Ignore { + match self { + State::IgnoreStack(v) => v, + State::AttributesAndIgnoreStack { ignore, .. } => ignore, + State::CreateDirectoryAndAttributesStack { .. } => { + unreachable!("BUG: must not try to check excludes without it being setup") + } + } + } + + pub(crate) fn attributes_or_panic(&self) -> &Attributes { + match self { + State::AttributesAndIgnoreStack { attributes, .. } + | State::CreateDirectoryAndAttributesStack { attributes, .. } => attributes, + State::IgnoreStack(_) => { + unreachable!("BUG: must not try to check excludes without it being setup") + } + } + } +} diff --git a/vendor/gix-worktree/src/checkout/chunk.rs b/vendor/gix-worktree/src/checkout/chunk.rs new file mode 100644 index 000000000..9de9e424e --- /dev/null +++ b/vendor/gix-worktree/src/checkout/chunk.rs @@ -0,0 +1,182 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; + +use bstr::BStr; +use gix_features::progress::Progress; +use gix_hash::oid; + +use crate::{checkout, checkout::entry, Cache}; + +mod reduce { + use std::{ + marker::PhantomData, + sync::atomic::{AtomicUsize, Ordering}, + }; + + use gix_features::progress::Progress; + + use crate::checkout; + + pub struct Reduce<'a, 'entry, P1, P2, E> { + pub files: &'a mut P1, + pub bytes: &'a mut P2, + pub num_files: &'a AtomicUsize, + pub aggregate: super::Outcome<'entry>, + pub marker: PhantomData<E>, + } + + impl<'a, 'entry, P1, P2, E> gix_features::parallel::Reduce for Reduce<'a, 'entry, P1, P2, E> + where + P1: Progress, + P2: Progress, + E: std::error::Error + Send + Sync + 'static, + { + type Input = Result<super::Outcome<'entry>, checkout::Error<E>>; + type FeedProduce = (); + type Output = super::Outcome<'entry>; + type Error = checkout::Error<E>; + + fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> { + let item = item?; + let super::Outcome { + bytes_written, + delayed, + errors, + collisions, + } = item; + self.aggregate.bytes_written += bytes_written; + self.aggregate.delayed.extend(delayed); + self.aggregate.errors.extend(errors); + self.aggregate.collisions.extend(collisions); + + self.bytes.set(self.aggregate.bytes_written as usize); + self.files.set(self.num_files.load(Ordering::Relaxed)); + + Ok(()) + } + + fn finalize(self) -> Result<Self::Output, Self::Error> { + Ok(self.aggregate) + } + } +} +pub use reduce::Reduce; + +#[derive(Default)] +pub struct Outcome<'a> { + pub collisions: Vec<checkout::Collision>, + pub errors: Vec<checkout::ErrorRecord>, + pub delayed: Vec<(&'a mut gix_index::Entry, &'a BStr)>, + pub bytes_written: u64, +} + +#[derive(Clone)] +pub struct Context<'a, Find: Clone> { + pub find: Find, + pub path_cache: Cache, + pub buf: Vec<u8>, + pub options: checkout::Options, + /// We keep these shared so that there is the chance for printing numbers that aren't looking like + /// multiple of chunk sizes. Purely cosmetic. Otherwise it's the same as `files`. + pub num_files: &'a AtomicUsize, +} + +pub fn process<'entry, Find, E>( + entries_with_paths: impl Iterator<Item = (&'entry mut gix_index::Entry, &'entry BStr)>, + files: &mut impl Progress, + bytes: &mut impl Progress, + ctx: &mut Context<'_, Find>, +) -> Result<Outcome<'entry>, checkout::Error<E>> +where + Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Clone, + E: std::error::Error + Send + Sync + 'static, +{ + let mut delayed = Vec::new(); + let mut collisions = Vec::new(); + let mut errors = Vec::new(); + let mut bytes_written = 0; + + for (entry, entry_path) in entries_with_paths { + // TODO: write test for that + if entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) { + files.inc(); + continue; + } + + // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. + // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is + // before creating it. + // And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety + // around writing through symlinks (even though we handle this). + // This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too. + if entry.mode == gix_index::entry::Mode::SYMLINK { + delayed.push((entry, entry_path)); + continue; + } + + bytes_written += + checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, files, bytes, ctx)? as u64; + } + + Ok(Outcome { + bytes_written, + errors, + collisions, + delayed, + }) +} + +pub fn checkout_entry_handle_result<Find, E>( + entry: &mut gix_index::Entry, + entry_path: &BStr, + errors: &mut Vec<checkout::ErrorRecord>, + collisions: &mut Vec<checkout::Collision>, + files: &mut impl Progress, + bytes: &mut impl Progress, + Context { + find, + path_cache, + buf, + options, + num_files, + }: &mut Context<'_, Find>, +) -> Result<usize, checkout::Error<E>> +where + Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Clone, + E: std::error::Error + Send + Sync + 'static, +{ + let res = entry::checkout( + entry, + entry_path, + entry::Context { find, path_cache, buf }, + options.clone(), + ); + files.inc(); + num_files.fetch_add(1, Ordering::SeqCst); + match res { + Ok(object_size) => { + bytes.inc_by(object_size); + Ok(object_size) + } + Err(checkout::Error::Io(err)) if gix_fs::symlink::is_collision_error(&err) => { + // We are here because a file existed or was blocked by a directory which shouldn't be possible unless + // we are on a file insensitive file system. + files.fail(format!("{}: collided ({:?})", entry_path, err.kind())); + collisions.push(checkout::Collision { + path: entry_path.into(), + error_kind: err.kind(), + }); + Ok(0) + } + Err(err) => { + if options.keep_going { + errors.push(checkout::ErrorRecord { + path: entry_path.into(), + error: Box::new(err), + }); + Ok(0) + } else { + Err(err) + } + } + } +} diff --git a/vendor/gix-worktree/src/index/entry.rs b/vendor/gix-worktree/src/checkout/entry.rs index 32628c4e0..524cf90f2 100644 --- a/vendor/gix-worktree/src/index/entry.rs +++ b/vendor/gix-worktree/src/checkout/entry.rs @@ -1,15 +1,15 @@ -use std::{convert::TryInto, fs::OpenOptions, io::Write, path::Path, time::Duration}; +use std::{fs::OpenOptions, io::Write, path::Path}; use bstr::BStr; use gix_hash::oid; -use gix_index::Entry; +use gix_index::{entry::Stat, Entry}; use io_close::Close; -use crate::{fs, index, os}; +use crate::Cache; pub struct Context<'a, Find> { pub find: &'a mut Find, - pub path_cache: &'a mut fs::Cache, + pub path_cache: &'a mut Cache, pub buf: &'a mut Vec<u8>, } @@ -18,8 +18,8 @@ pub fn checkout<Find, E>( entry: &mut Entry, entry_path: &BStr, Context { find, path_cache, buf }: Context<'_, Find>, - index::checkout::Options { - fs: fs::Capabilities { + crate::checkout::Options { + fs: gix_fs::Capabilities { symlink, executable_bit, .. @@ -27,13 +27,13 @@ pub fn checkout<Find, E>( destination_is_initially_empty, overwrite_existing, .. - }: index::checkout::Options, -) -> Result<usize, index::checkout::Error<E>> + }: crate::checkout::Options, +) -> Result<usize, crate::checkout::Error<E>> where Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E>, E: std::error::Error + Send + Sync + 'static, { - let dest_relative = gix_path::try_from_bstr(entry_path).map_err(|_| index::checkout::Error::IllformedUtf8 { + let dest_relative = gix_path::try_from_bstr(entry_path).map_err(|_| crate::checkout::Error::IllformedUtf8 { path: entry_path.to_owned(), })?; let is_dir = Some(entry.mode == gix_index::entry::Mode::COMMIT || entry.mode == gix_index::entry::Mode::DIR); @@ -41,7 +41,7 @@ where let object_size = match entry.mode { gix_index::entry::Mode::FILE | gix_index::entry::Mode::FILE_EXECUTABLE => { - let obj = find(&entry.id, buf).map_err(|err| index::checkout::Error::Find { + let obj = find(&entry.id, buf).map_err(|err| crate::checkout::Error::Find { err, oid: entry.id, path: dest.to_path_buf(), @@ -71,21 +71,23 @@ where } // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. // revisit this once there is a bug to fix. - update_fstat(entry, file.metadata()?)?; + entry.stat = Stat::from_fs(&file.metadata()?)?; file.close()?; obj.data.len() } gix_index::entry::Mode::SYMLINK => { - let obj = find(&entry.id, buf).map_err(|err| index::checkout::Error::Find { + let obj = find(&entry.id, buf).map_err(|err| crate::checkout::Error::Find { err, oid: entry.id, path: dest.to_path_buf(), })?; let symlink_destination = gix_path::try_from_byte_slice(obj.data) - .map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; + .map_err(|_| crate::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; if symlink { - try_write_or_unlink(dest, overwrite_existing, |p| os::create_symlink(symlink_destination, p))?; + try_write_or_unlink(dest, overwrite_existing, |p| { + gix_fs::symlink::create(symlink_destination, p) + })?; } else { let mut file = try_write_or_unlink(dest, overwrite_existing, |p| { open_options(p, destination_is_initially_empty, overwrite_existing).open(dest) @@ -94,7 +96,7 @@ where file.close()?; } - update_fstat(entry, std::fs::symlink_metadata(dest)?)?; + entry.stat = Stat::from_fs(&std::fs::symlink_metadata(dest)?)?; obj.data.len() } gix_index::entry::Mode::DIR => todo!(), @@ -115,7 +117,7 @@ fn try_write_or_unlink<T>( if overwrite_existing { match op(path) { Ok(res) => Ok(res), - Err(err) if os::indicates_collision(&err) => { + Err(err) if gix_fs::symlink::is_collision_error(&err) => { try_unlink_path_recursively(path, &std::fs::symlink_metadata(path)?)?; op(path) } @@ -130,7 +132,7 @@ fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> st if path_meta.is_dir() { std::fs::remove_dir_all(path) } else if path_meta.file_type().is_symlink() { - os::remove_symlink(path) + gix_fs::symlink::remove(path) } else { std::fs::remove_file(path) } @@ -162,28 +164,3 @@ fn open_options(path: &Path, destination_is_initially_empty: bool, overwrite_exi .write(true); options } - -fn update_fstat<E>(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error<E>> -where - E: std::error::Error + Send + Sync + 'static, -{ - let ctime = meta - .created() - .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; - let mtime = meta - .modified() - .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; - - let stat = &mut entry.stat; - stat.mtime.secs = mtime - .as_secs() - .try_into() - .expect("by 2038 we found a solution for this"); - stat.mtime.nsecs = mtime.subsec_nanos(); - stat.ctime.secs = ctime - .as_secs() - .try_into() - .expect("by 2038 we found a solution for this"); - stat.ctime.nsecs = ctime.subsec_nanos(); - Ok(()) -} diff --git a/vendor/gix-worktree/src/checkout/function.rs b/vendor/gix-worktree/src/checkout/function.rs new file mode 100644 index 000000000..8e69fd4d6 --- /dev/null +++ b/vendor/gix-worktree/src/checkout/function.rs @@ -0,0 +1,119 @@ +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + +use gix_features::{interrupt, parallel::in_parallel, progress, progress::Progress}; +use gix_hash::oid; + +use crate::{cache, checkout::chunk, Cache}; + +/// Note that interruption still produce an `Ok(…)` value, so the caller should look at `should_interrupt` to communicate the outcome. +/// `dir` is the directory into which to checkout the `index`. +/// `git_dir` is the `.git` directory for reading additional per-repository configuration files. +#[allow(clippy::too_many_arguments)] +pub fn checkout<Find, E>( + index: &mut gix_index::State, + dir: impl Into<std::path::PathBuf>, + find: Find, + files: &mut impl Progress, + bytes: &mut impl Progress, + should_interrupt: &AtomicBool, + options: crate::checkout::Options, +) -> Result<crate::checkout::Outcome, crate::checkout::Error<E>> +where + Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Send + Clone, + E: std::error::Error + Send + Sync + 'static, +{ + let paths = index.take_path_backing(); + let res = checkout_inner(index, &paths, dir, find, files, bytes, should_interrupt, options); + index.return_path_backing(paths); + res +} + +#[allow(clippy::too_many_arguments)] +fn checkout_inner<Find, E>( + index: &mut gix_index::State, + paths: &gix_index::PathStorage, + dir: impl Into<std::path::PathBuf>, + find: Find, + files: &mut impl Progress, + bytes: &mut impl Progress, + should_interrupt: &AtomicBool, + options: crate::checkout::Options, +) -> Result<crate::checkout::Outcome, crate::checkout::Error<E>> +where + Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Send + Clone, + E: std::error::Error + Send + Sync + 'static, +{ + let num_files = AtomicUsize::default(); + let dir = dir.into(); + let case = if options.fs.ignore_case { + gix_glob::pattern::Case::Fold + } else { + gix_glob::pattern::Case::Sensitive + }; + let (chunk_size, thread_limit, num_threads) = gix_features::parallel::optimize_chunk_size_and_thread_limit( + 100, + index.entries().len().into(), + options.thread_limit, + None, + ); + + let state = cache::State::for_checkout(options.overwrite_existing, options.attributes.clone()); + let attribute_files = state.id_mappings_from_index(index, paths, Default::default(), case); + let mut ctx = chunk::Context { + buf: Vec::new(), + path_cache: Cache::new(dir, state, case, Vec::with_capacity(512), attribute_files), + find, + options, + num_files: &num_files, + }; + + let chunk::Outcome { + mut collisions, + mut errors, + mut bytes_written, + delayed, + } = if num_threads == 1 { + let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); + chunk::process(entries_with_paths, files, bytes, &mut ctx)? + } else { + let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); + in_parallel( + gix_features::iter::Chunks { + inner: entries_with_paths, + size: chunk_size, + }, + thread_limit, + { + let ctx = ctx.clone(); + move |_| (progress::Discard, progress::Discard, ctx.clone()) + }, + |chunk, (files, bytes, ctx)| chunk::process(chunk.into_iter(), files, bytes, ctx), + chunk::Reduce { + files, + bytes, + num_files: &num_files, + aggregate: Default::default(), + marker: Default::default(), + }, + )? + }; + + for (entry, entry_path) in delayed { + bytes_written += chunk::checkout_entry_handle_result( + entry, + entry_path, + &mut errors, + &mut collisions, + files, + bytes, + &mut ctx, + )? as u64; + } + + Ok(crate::checkout::Outcome { + files_updated: num_files.load(Ordering::Relaxed), + collisions, + errors, + bytes_written, + }) +} diff --git a/vendor/gix-worktree/src/index/checkout.rs b/vendor/gix-worktree/src/checkout/mod.rs index 6bc465375..11f39b1b2 100644 --- a/vendor/gix-worktree/src/index/checkout.rs +++ b/vendor/gix-worktree/src/checkout/mod.rs @@ -1,6 +1,7 @@ #![allow(missing_docs)] + use bstr::BString; -use gix_attributes::Attributes; +use gix_index::entry::stat; #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Collision { @@ -27,10 +28,10 @@ pub struct Outcome { pub errors: Vec<ErrorRecord>, } -#[derive(Clone)] +#[derive(Clone, Default)] pub struct Options { /// capabilities of the file system - pub fs: crate::fs::Capabilities, + pub fs: gix_fs::Capabilities, /// If set, don't use more than this amount of threads. /// Otherwise, usually use as many threads as there are logical cores. /// A value of 0 is interpreted as no-limit @@ -48,35 +49,12 @@ pub struct Options { /// due to a conflict. /// The checkout operation will never fail, but count the encountered errors instead along with their paths. pub keep_going: bool, - /// If true, a files creation time is taken into consideration when checking if a file changed. - /// Can be set to false in case other tools alter the creation time in ways that interfere with our operation. - /// - /// Default true. - pub trust_ctime: bool, - /// If true, all stat fields will be used when checking for up-to-date'ness of the entry. Otherwise - /// nano-second parts of mtime and ctime,uid, gid, inode and device number _will not_ be used, leaving only - /// the whole-second part of ctime and mtime and the file size to be checked. - /// - /// Default true. - pub check_stat: bool, - /// A group of attribute patterns that are applied globally, i.e. aren't rooted within the repository itself. - pub attribute_globals: gix_attributes::MatchGroup<Attributes>, + /// Control how stat comparisons are made when checking if a file is fresh. + pub stat_options: stat::Options, + /// A stack of attributes to use with the filesystem cache to use as driver for filters. + pub attributes: crate::cache::state::Attributes, } -impl Default for Options { - fn default() -> Self { - Options { - fs: Default::default(), - thread_limit: None, - destination_is_initially_empty: false, - keep_going: false, - trust_ctime: true, - check_stat: true, - overwrite_existing: false, - attribute_globals: Default::default(), - } - } -} #[derive(Debug, thiserror::Error)] pub enum Error<E: std::error::Error + Send + Sync + 'static> { #[error("Could not convert path to UTF8: {}", .path)] @@ -93,3 +71,7 @@ pub enum Error<E: std::error::Error + Send + Sync + 'static> { path: std::path::PathBuf, }, } + +mod chunk; +mod entry; +pub(crate) mod function; diff --git a/vendor/gix-worktree/src/fs/cache/platform.rs b/vendor/gix-worktree/src/fs/cache/platform.rs deleted file mode 100644 index 90bbdbe3c..000000000 --- a/vendor/gix-worktree/src/fs/cache/platform.rs +++ /dev/null @@ -1,171 +0,0 @@ -use std::path::Path; - -use bstr::ByteSlice; -use gix_hash::oid; - -use crate::{ - fs, - fs::{ - cache::{Platform, State}, - PathOidMapping, - }, -}; - -impl<'a> Platform<'a> { - /// The full path to `relative` will be returned for use on the file system. - pub fn path(&self) -> &'a Path { - self.parent.stack.current() - } - - /// See if the currently set entry is excluded as per exclude and git-ignore files. - /// - /// # Panics - /// - /// If the cache was configured without exclude patterns. - pub fn is_excluded(&self) -> bool { - self.matching_exclude_pattern() - .map_or(false, |m| !m.pattern.is_negative()) - } - - /// Check all exclude patterns to see if the currently set path matches any of them. - /// - /// Note that this pattern might be negated, and means this path in included. - /// - /// # Panics - /// - /// If the cache was configured without exclude patterns. - pub fn matching_exclude_pattern(&self) -> Option<gix_attributes::Match<'_, ()>> { - let ignore = self.parent.state.ignore_or_panic(); - let relative_path = - gix_path::to_unix_separators_on_windows(gix_path::into_bstr(self.parent.stack.current_relative.as_path())); - ignore.matching_exclude_pattern(relative_path.as_bstr(), self.is_dir, self.parent.case) - } -} - -impl<'a> std::fmt::Debug for Platform<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Debug::fmt(&self.path(), f) - } -} - -pub struct StackDelegate<'a, Find> { - pub state: &'a mut State, - pub buf: &'a mut Vec<u8>, - pub is_dir: bool, - pub attribute_files_in_index: &'a Vec<PathOidMapping>, - pub find: Find, -} - -impl<'a, Find, E> fs::stack::Delegate for StackDelegate<'a, Find> -where - Find: for<'b> FnMut(&oid, &'b mut Vec<u8>) -> Result<gix_object::BlobRef<'b>, E>, - E: std::error::Error + Send + Sync + 'static, -{ - fn push_directory(&mut self, stack: &fs::Stack) -> std::io::Result<()> { - match &mut self.state { - State::CreateDirectoryAndAttributesStack { attributes: _, .. } => { - // TODO: attributes - } - State::AttributesAndIgnoreStack { ignore, attributes: _ } => { - // TODO: attributes - ignore.push_directory( - &stack.root, - &stack.current, - self.buf, - self.attribute_files_in_index, - &mut self.find, - )? - } - State::IgnoreStack(ignore) => ignore.push_directory( - &stack.root, - &stack.current, - self.buf, - self.attribute_files_in_index, - &mut self.find, - )?, - } - Ok(()) - } - - fn push(&mut self, is_last_component: bool, stack: &fs::Stack) -> std::io::Result<()> { - match &mut self.state { - State::CreateDirectoryAndAttributesStack { - #[cfg(debug_assertions)] - test_mkdir_calls, - unlink_on_collision, - attributes: _, - } => { - #[cfg(debug_assertions)] - { - create_leading_directory( - is_last_component, - stack, - self.is_dir, - test_mkdir_calls, - *unlink_on_collision, - )? - } - #[cfg(not(debug_assertions))] - { - create_leading_directory(is_last_component, stack, self.is_dir, *unlink_on_collision)? - } - } - State::AttributesAndIgnoreStack { .. } | State::IgnoreStack(_) => {} - } - Ok(()) - } - - fn pop_directory(&mut self) { - match &mut self.state { - State::CreateDirectoryAndAttributesStack { attributes: _, .. } => { - // TODO: attributes - } - State::AttributesAndIgnoreStack { attributes: _, ignore } => { - // TODO: attributes - ignore.pop_directory(); - } - State::IgnoreStack(ignore) => { - ignore.pop_directory(); - } - } - } -} - -fn create_leading_directory( - is_last_component: bool, - stack: &fs::Stack, - is_dir: bool, - #[cfg(debug_assertions)] mkdir_calls: &mut usize, - unlink_on_collision: bool, -) -> std::io::Result<()> { - if is_last_component && !is_dir { - return Ok(()); - } - #[cfg(debug_assertions)] - { - *mkdir_calls += 1; - } - match std::fs::create_dir(stack.current()) { - Ok(()) => Ok(()), - Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { - let meta = stack.current().symlink_metadata()?; - if meta.is_dir() { - Ok(()) - } else if unlink_on_collision { - if meta.file_type().is_symlink() { - crate::os::remove_symlink(stack.current())?; - } else { - std::fs::remove_file(stack.current())?; - } - #[cfg(debug_assertions)] - { - *mkdir_calls += 1; - } - std::fs::create_dir(stack.current()) - } else { - Err(err) - } - } - Err(err) => Err(err), - } -} diff --git a/vendor/gix-worktree/src/fs/cache/state.rs b/vendor/gix-worktree/src/fs/cache/state.rs deleted file mode 100644 index 1692bfa5e..000000000 --- a/vendor/gix-worktree/src/fs/cache/state.rs +++ /dev/null @@ -1,301 +0,0 @@ -use std::path::Path; - -use bstr::{BStr, BString, ByteSlice}; -use gix_glob::pattern::Case; -use gix_hash::oid; - -use crate::fs::{cache::State, PathOidMapping}; - -type AttributeMatchGroup = gix_attributes::MatchGroup<gix_attributes::Attributes>; -type IgnoreMatchGroup = gix_attributes::MatchGroup<gix_attributes::Ignore>; - -/// State related to attributes associated with files in the repository. -#[derive(Default, Clone)] -#[allow(unused)] -pub struct Attributes { - /// Attribute patterns that match the currently set directory (in the stack). - pub stack: AttributeMatchGroup, - /// Attribute patterns which aren't tied to the repository root, hence are global. They are consulted last. - pub globals: AttributeMatchGroup, -} - -/// State related to the exclusion of files. -#[derive(Default, Clone)] -#[allow(unused)] -pub struct Ignore { - /// Ignore patterns passed as overrides to everything else, typically passed on the command-line and the first patterns to - /// be consulted. - overrides: IgnoreMatchGroup, - /// Ignore patterns that match the currently set director (in the stack), which is pushed and popped as needed. - stack: IgnoreMatchGroup, - /// Ignore patterns which aren't tied to the repository root, hence are global. They are consulted last. - globals: IgnoreMatchGroup, - /// A matching stack of pattern indices which is empty if we have just been initialized to indicate that the - /// currently set directory had a pattern matched. Note that this one could be negated. - /// (index into match groups, index into list of pattern lists, index into pattern list) - matched_directory_patterns_stack: Vec<Option<(usize, usize, usize)>>, - /// The name of the file to look for in directories. - exclude_file_name_for_directories: BString, - /// The case to use when matching directories as they are pushed onto the stack. We run them against the exclude engine - /// to know if an entire path can be ignored as a parent directory is ignored. - case: Case, -} - -impl Ignore { - /// The `exclude_file_name_for_directories` is an optional override for the filename to use when checking per-directory - /// ignore files within the repository, defaults to`.gitignore`. - // TODO: more docs - pub fn new( - overrides: IgnoreMatchGroup, - globals: IgnoreMatchGroup, - exclude_file_name_for_directories: Option<&BStr>, - case: Case, - ) -> Self { - Ignore { - case, - overrides, - globals, - stack: Default::default(), - matched_directory_patterns_stack: Vec::with_capacity(6), - exclude_file_name_for_directories: exclude_file_name_for_directories - .map(ToOwned::to_owned) - .unwrap_or_else(|| ".gitignore".into()), - } - } -} - -impl Ignore { - pub(crate) fn pop_directory(&mut self) { - self.matched_directory_patterns_stack.pop().expect("something to pop"); - self.stack.patterns.pop().expect("something to pop"); - } - /// The match groups from lowest priority to highest. - pub(crate) fn match_groups(&self) -> [&IgnoreMatchGroup; 3] { - [&self.globals, &self.stack, &self.overrides] - } - - pub(crate) fn matching_exclude_pattern( - &self, - relative_path: &BStr, - is_dir: Option<bool>, - case: Case, - ) -> Option<gix_attributes::Match<'_, ()>> { - let groups = self.match_groups(); - let mut dir_match = None; - if let Some((source, mapping)) = self - .matched_directory_patterns_stack - .iter() - .rev() - .filter_map(|v| *v) - .map(|(gidx, plidx, pidx)| { - let list = &groups[gidx].patterns[plidx]; - (list.source.as_deref(), &list.patterns[pidx]) - }) - .next() - { - let match_ = gix_attributes::Match { - pattern: &mapping.pattern, - value: &mapping.value, - sequence_number: mapping.sequence_number, - source, - }; - if mapping.pattern.is_negative() { - dir_match = Some(match_); - } else { - // Note that returning here is wrong if this pattern _was_ preceded by a negative pattern that - // didn't match the directory, but would match now. - // Git does it similarly so we do too even though it's incorrect. - // To fix this, one would probably keep track of whether there was a preceding negative pattern, and - // if so we check the path in full and only use the dir match if there was no match, similar to the negative - // case above whose fix fortunately won't change the overall result. - return match_.into(); - } - } - groups - .iter() - .rev() - .find_map(|group| group.pattern_matching_relative_path(relative_path.as_bytes(), is_dir, case)) - .or(dir_match) - } - - /// Like `matching_exclude_pattern()` but without checking if the current directory is excluded. - /// It returns a triple-index into our data structure from which a match can be reconstructed. - pub(crate) fn matching_exclude_pattern_no_dir( - &self, - relative_path: &BStr, - is_dir: Option<bool>, - case: Case, - ) -> Option<(usize, usize, usize)> { - let groups = self.match_groups(); - groups.iter().enumerate().rev().find_map(|(gidx, group)| { - let basename_pos = relative_path.rfind(b"/").map(|p| p + 1); - group - .patterns - .iter() - .enumerate() - .rev() - .find_map(|(plidx, pl)| { - pl.pattern_idx_matching_relative_path(relative_path, basename_pos, is_dir, case) - .map(|idx| (plidx, idx)) - }) - .map(|(plidx, pidx)| (gidx, plidx, pidx)) - }) - } - - pub(crate) fn push_directory<Find, E>( - &mut self, - root: &Path, - dir: &Path, - buf: &mut Vec<u8>, - attribute_files_in_index: &[PathOidMapping], - mut find: Find, - ) -> std::io::Result<()> - where - Find: for<'b> FnMut(&oid, &'b mut Vec<u8>) -> Result<gix_object::BlobRef<'b>, E>, - E: std::error::Error + Send + Sync + 'static, - { - let rela_dir = dir.strip_prefix(root).expect("dir in root"); - self.matched_directory_patterns_stack - .push(self.matching_exclude_pattern_no_dir(gix_path::into_bstr(rela_dir).as_ref(), Some(true), self.case)); - - let ignore_path_relative = rela_dir.join(".gitignore"); - let ignore_path_relative = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(ignore_path_relative)); - let ignore_file_in_index = - attribute_files_in_index.binary_search_by(|t| t.0.as_bstr().cmp(ignore_path_relative.as_ref())); - let follow_symlinks = ignore_file_in_index.is_err(); - if !self - .stack - .add_patterns_file(dir.join(".gitignore"), follow_symlinks, Some(root), buf)? - { - match ignore_file_in_index { - Ok(idx) => { - let ignore_blob = find(&attribute_files_in_index[idx].1, buf) - .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; - let ignore_path = gix_path::from_bstring(ignore_path_relative.into_owned()); - self.stack - .add_patterns_buffer(ignore_blob.data, ignore_path, Some(root)); - } - Err(_) => { - // Need one stack level per component so push and pop matches. - self.stack.patterns.push(Default::default()) - } - } - } - Ok(()) - } -} - -impl Attributes { - /// Create a new instance from an attribute match group that represents `globals`. - /// - /// A stack of attributes will be applied on top of it later. - pub fn new(globals: AttributeMatchGroup) -> Self { - Attributes { - globals, - stack: Default::default(), - } - } -} - -impl From<AttributeMatchGroup> for Attributes { - fn from(group: AttributeMatchGroup) -> Self { - Attributes::new(group) - } -} - -impl State { - /// Configure a state to be suitable for checking out files. - pub fn for_checkout(unlink_on_collision: bool, attributes: Attributes) -> Self { - State::CreateDirectoryAndAttributesStack { - unlink_on_collision, - #[cfg(debug_assertions)] - test_mkdir_calls: 0, - attributes, - } - } - - /// Configure a state for adding files. - pub fn for_add(attributes: Attributes, ignore: Ignore) -> Self { - State::AttributesAndIgnoreStack { attributes, ignore } - } - - /// Configure a state for status retrieval. - pub fn for_status(ignore: Ignore) -> Self { - State::IgnoreStack(ignore) - } -} - -impl State { - /// Returns a vec of tuples of relative index paths along with the best usable OID for either ignore, attribute files or both. - /// - /// - ignores entries which aren't blobs - /// - ignores ignore entries which are not skip-worktree - /// - within merges, picks 'our' stage both for ignore and attribute files. - pub fn build_attribute_list( - &self, - index: &gix_index::State, - paths: &gix_index::PathStorageRef, - case: Case, - ) -> Vec<PathOidMapping> { - let a1_backing; - let a2_backing; - let names = match self { - State::IgnoreStack(v) => { - a1_backing = [(v.exclude_file_name_for_directories.as_bytes().as_bstr(), true)]; - a1_backing.as_ref() - } - State::AttributesAndIgnoreStack { ignore, .. } => { - a2_backing = [ - (ignore.exclude_file_name_for_directories.as_bytes().as_bstr(), true), - (".gitattributes".into(), false), - ]; - a2_backing.as_ref() - } - State::CreateDirectoryAndAttributesStack { .. } => { - a1_backing = [(".gitattributes".into(), true)]; - a1_backing.as_ref() - } - }; - - index - .entries() - .iter() - .filter_map(move |entry| { - let path = entry.path_in(paths); - - // Stage 0 means there is no merge going on, stage 2 means it's 'our' side of the merge, but then - // there won't be a stage 0. - if entry.mode == gix_index::entry::Mode::FILE && (entry.stage() == 0 || entry.stage() == 2) { - let basename = path - .rfind_byte(b'/') - .map(|pos| path[pos + 1..].as_bstr()) - .unwrap_or(path); - let is_ignore = names.iter().find_map(|t| { - match case { - Case::Sensitive => basename == t.0, - Case::Fold => basename.eq_ignore_ascii_case(t.0), - } - .then_some(t.1) - })?; - // See https://github.com/git/git/blob/master/dir.c#L912:L912 - if is_ignore && !entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) { - return None; - } - Some((path.to_owned(), entry.id)) - } else { - None - } - }) - .collect() - } - - pub(crate) fn ignore_or_panic(&self) -> &Ignore { - match self { - State::IgnoreStack(v) => v, - State::AttributesAndIgnoreStack { ignore, .. } => ignore, - State::CreateDirectoryAndAttributesStack { .. } => { - unreachable!("BUG: must not try to check excludes without it being setup") - } - } - } -} diff --git a/vendor/gix-worktree/src/fs/capabilities.rs b/vendor/gix-worktree/src/fs/capabilities.rs deleted file mode 100644 index 64daab9ce..000000000 --- a/vendor/gix-worktree/src/fs/capabilities.rs +++ /dev/null @@ -1,122 +0,0 @@ -use std::path::Path; - -use crate::fs::Capabilities; - -#[cfg(windows)] -impl Default for Capabilities { - fn default() -> Self { - Capabilities { - precompose_unicode: false, - ignore_case: true, - executable_bit: false, - symlink: false, - } - } -} - -#[cfg(target_os = "macos")] -impl Default for Capabilities { - fn default() -> Self { - Capabilities { - precompose_unicode: true, - ignore_case: true, - executable_bit: true, - symlink: true, - } - } -} - -#[cfg(all(unix, not(target_os = "macos")))] -impl Default for Capabilities { - fn default() -> Self { - Capabilities { - precompose_unicode: false, - ignore_case: false, - executable_bit: true, - symlink: true, - } - } -} - -impl Capabilities { - /// try to determine all values in this context by probing them in the given `git_dir`, which - /// should be on the file system the git repository is located on. - /// `git_dir` is a typical git repository, expected to be populated with the typical files like `config`. - /// - /// All errors are ignored and interpreted on top of the default for the platform the binary is compiled for. - pub fn probe(git_dir: impl AsRef<Path>) -> Self { - let root = git_dir.as_ref(); - let ctx = Capabilities::default(); - Capabilities { - symlink: Self::probe_symlink(root).unwrap_or(ctx.symlink), - ignore_case: Self::probe_ignore_case(root).unwrap_or(ctx.ignore_case), - precompose_unicode: Self::probe_precompose_unicode(root).unwrap_or(ctx.precompose_unicode), - executable_bit: Self::probe_file_mode(root).unwrap_or(ctx.executable_bit), - } - } - - #[cfg(unix)] - fn probe_file_mode(root: &Path) -> std::io::Result<bool> { - use std::os::unix::fs::{MetadataExt, OpenOptionsExt}; - - // test it exactly as we typically create executable files, not using chmod. - let test_path = root.join("_test_executable_bit"); - let res = std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .mode(0o777) - .open(&test_path) - .and_then(|f| f.metadata().map(|m| m.mode() & 0o100 == 0o100)); - std::fs::remove_file(test_path)?; - res - } - - #[cfg(not(unix))] - fn probe_file_mode(_root: &Path) -> std::io::Result<bool> { - Ok(false) - } - - fn probe_ignore_case(git_dir: &Path) -> std::io::Result<bool> { - std::fs::metadata(git_dir.join("cOnFiG")).map(|_| true).or_else(|err| { - if err.kind() == std::io::ErrorKind::NotFound { - Ok(false) - } else { - Err(err) - } - }) - } - - fn probe_precompose_unicode(root: &Path) -> std::io::Result<bool> { - let precomposed = "ä"; - let decomposed = "a\u{308}"; - - let precomposed = root.join(precomposed); - std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .open(&precomposed)?; - let res = root.join(decomposed).symlink_metadata().map(|_| true); - std::fs::remove_file(precomposed)?; - res - } - - fn probe_symlink(root: &Path) -> std::io::Result<bool> { - let src_path = root.join("__link_src_file"); - std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .open(&src_path)?; - let link_path = root.join("__file_link"); - if crate::os::create_symlink(&src_path, &link_path).is_err() { - std::fs::remove_file(&src_path)?; - return Ok(false); - } - - let res = std::fs::symlink_metadata(&link_path).map(|m| m.file_type().is_symlink()); - - let cleanup = crate::os::remove_symlink(&link_path).or_else(|_| std::fs::remove_file(&link_path)); - std::fs::remove_file(&src_path).and(cleanup)?; - - res - } -} diff --git a/vendor/gix-worktree/src/fs/mod.rs b/vendor/gix-worktree/src/fs/mod.rs deleted file mode 100644 index a58c461fe..000000000 --- a/vendor/gix-worktree/src/fs/mod.rs +++ /dev/null @@ -1,81 +0,0 @@ -use std::path::PathBuf; - -use bstr::BString; - -/// Common knowledge about the worktree that is needed across most interactions with the work tree -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] -pub struct Capabilities { - /// If true, the filesystem will store paths as decomposed unicode, i.e. `ä` becomes `"a\u{308}"`, which means that - /// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally - /// using it. This also applies to input received from the command-line, so callers may have to be aware of this and - /// perform conversions accordingly. - /// If false, no conversions will be performed. - pub precompose_unicode: bool, - /// If true, the filesystem ignores the case of input, which makes `A` the same file as `a`. - /// This is also called case-folding. - pub ignore_case: bool, - /// If true, we assume the the executable bit is honored as part of the files mode. If false, we assume the file system - /// ignores the executable bit, hence it will be reported as 'off' even though we just tried to set it to be on. - pub executable_bit: bool, - /// If true, the file system supports symbolic links and we should try to create them. Otherwise symbolic links will be checked - /// out as files which contain the link as text. - pub symlink: bool, -} - -/// A stack of path components with the delegation of side-effects as the currently set path changes, component by component. -#[derive(Clone)] -pub struct Stack { - /// The prefix/root for all paths we handle. - root: PathBuf, - /// the most recent known cached that we know is valid. - current: PathBuf, - /// The relative portion of `valid` that was added previously. - current_relative: PathBuf, - /// The amount of path components of 'current' beyond the roots components. - valid_components: usize, - /// If set, we assume the `current` element is a directory to affect calls to `(push|pop)_directory()`. - current_is_directory: bool, -} - -/// A cache for efficiently executing operations on directories and files which are encountered in sorted order. -/// That way, these operations can be re-used for subsequent invocations in the same directory. -/// -/// This cache can be configured to create directories efficiently, read git-ignore files and git-attribute files, -/// in any combination. -/// -/// A cache for directory creation to reduce the amount of stat calls when creating -/// directories safely, that is without following symlinks that might be on the way. -/// -/// As a special case, it offers a 'prefix' which (by itself) is assumed to exist and may contain symlinks. -/// Everything past that prefix boundary must not contain a symlink. We do this by allowing any input path. -/// -/// Another added benefit is its ability to store the path of full path of the entry to which leading directories -/// are to be created to avoid allocating memory. -/// -/// For this to work, it remembers the last 'good' path to a directory and assumes that all components of it -/// are still valid, too. -/// As directories are created, the cache will be adjusted to reflect the latest seen directory. -/// -/// The caching is only useful if consecutive calls to create a directory are using a sorted list of entries. -#[derive(Clone)] -pub struct Cache { - stack: Stack, - /// tells us what to do as we change paths. - state: cache::State, - /// A buffer used when reading attribute or ignore files or their respective objects from the object database. - buf: Vec<u8>, - /// If case folding should happen when looking up attributes or exclusions. - case: gix_glob::pattern::Case, - /// A lookup table for object ids to read from in some situations when looking up attributes or exclusions. - attribute_files_in_index: Vec<PathOidMapping>, -} - -pub(crate) type PathOidMapping = (BString, gix_hash::ObjectId); - -/// -pub mod cache; -/// -pub mod stack; - -mod capabilities; diff --git a/vendor/gix-worktree/src/fs/stack.rs b/vendor/gix-worktree/src/fs/stack.rs deleted file mode 100644 index 734a4988b..000000000 --- a/vendor/gix-worktree/src/fs/stack.rs +++ /dev/null @@ -1,123 +0,0 @@ -use std::path::{Path, PathBuf}; - -use crate::fs::Stack; - -impl Stack { - /// Returns the top-level path of the stack. - pub fn root(&self) -> &Path { - &self.root - } - - /// Returns the absolute path the currently set path. - pub fn current(&self) -> &Path { - &self.current - } - - /// Returns the currently set path relative to the [`root()`][Stack::root()]. - pub fn current_relative(&self) -> &Path { - &self.current_relative - } -} - -/// A delegate for use in a [`Stack`]. -pub trait Delegate { - /// Called whenever we push a directory on top of the stack, after the fact. - /// - /// It is also called if the currently acted on path is a directory in itself. - /// Use `stack.current()` to see the directory. - fn push_directory(&mut self, stack: &Stack) -> std::io::Result<()>; - - /// Called after any component was pushed, with the path available at `stack.current()`. - /// - /// `is_last_component` is true if the path is completely built. - fn push(&mut self, is_last_component: bool, stack: &Stack) -> std::io::Result<()>; - - /// Called right after a directory-component was popped off the stack. - /// - /// Use it to pop information off internal data structures. - fn pop_directory(&mut self); -} - -impl Stack { - /// Create a new instance with `root` being the base for all future paths we handle, assuming it to be valid which includes - /// symbolic links to be included in it as well. - pub fn new(root: impl Into<PathBuf>) -> Self { - let root = root.into(); - Stack { - current: root.clone(), - current_relative: PathBuf::with_capacity(128), - valid_components: 0, - root, - current_is_directory: true, - } - } - - /// Set the current stack to point to the `relative` path and call `push_comp()` each time a new path component is popped - /// along with the stacks state for inspection to perform an operation that produces some data. - /// - /// The full path to `relative` will be returned along with the data returned by push_comp. - /// Note that this only works correctly for the delegate's `push_directory()` and `pop_directory()` methods if - /// `relative` paths are terminal, so point to their designated file or directory. - pub fn make_relative_path_current( - &mut self, - relative: impl AsRef<Path>, - delegate: &mut impl Delegate, - ) -> std::io::Result<()> { - let relative = relative.as_ref(); - debug_assert!( - relative.is_relative(), - "only index paths are handled correctly here, must be relative" - ); - debug_assert!(!relative.to_string_lossy().is_empty(), "empty paths are not allowed"); - - if self.valid_components == 0 { - delegate.push_directory(self)?; - } - - let mut components = relative.components().peekable(); - let mut existing_components = self.current_relative.components(); - let mut matching_components = 0; - while let (Some(existing_comp), Some(new_comp)) = (existing_components.next(), components.peek()) { - if existing_comp == *new_comp { - components.next(); - matching_components += 1; - } else { - break; - } - } - - for _ in 0..self.valid_components - matching_components { - self.current.pop(); - self.current_relative.pop(); - if self.current_is_directory { - delegate.pop_directory(); - } - self.current_is_directory = true; - } - self.valid_components = matching_components; - - if !self.current_is_directory && components.peek().is_some() { - delegate.push_directory(self)?; - } - - while let Some(comp) = components.next() { - let is_last_component = components.peek().is_none(); - self.current_is_directory = !is_last_component; - self.current.push(comp); - self.current_relative.push(comp); - self.valid_components += 1; - let res = delegate.push(is_last_component, self); - if self.current_is_directory { - delegate.push_directory(self)?; - } - - if let Err(err) = res { - self.current.pop(); - self.current_relative.pop(); - self.valid_components -= 1; - return Err(err); - } - } - Ok(()) - } -} diff --git a/vendor/gix-worktree/src/index/mod.rs b/vendor/gix-worktree/src/index/mod.rs deleted file mode 100644 index 684d1cae9..000000000 --- a/vendor/gix-worktree/src/index/mod.rs +++ /dev/null @@ -1,311 +0,0 @@ -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; - -use gix_features::{interrupt, parallel::in_parallel, progress, progress::Progress}; -use gix_hash::oid; - -use crate::fs; - -pub mod checkout; -pub(crate) mod entry; - -/// Note that interruption still produce an `Ok(…)` value, so the caller should look at `should_interrupt` to communicate the outcome. -/// `dir` is the directory into which to checkout the `index`. -/// `git_dir` is the `.git` directory for reading additional per-repository configuration files. -#[allow(clippy::too_many_arguments)] -pub fn checkout<Find, E>( - index: &mut gix_index::State, - dir: impl Into<std::path::PathBuf>, - find: Find, - files: &mut impl Progress, - bytes: &mut impl Progress, - should_interrupt: &AtomicBool, - options: checkout::Options, -) -> Result<checkout::Outcome, checkout::Error<E>> -where - Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Send + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let paths = index.take_path_backing(); - let res = checkout_inner(index, &paths, dir, find, files, bytes, should_interrupt, options); - index.return_path_backing(paths); - res -} -#[allow(clippy::too_many_arguments)] -fn checkout_inner<Find, E>( - index: &mut gix_index::State, - paths: &gix_index::PathStorage, - dir: impl Into<std::path::PathBuf>, - find: Find, - files: &mut impl Progress, - bytes: &mut impl Progress, - should_interrupt: &AtomicBool, - options: checkout::Options, -) -> Result<checkout::Outcome, checkout::Error<E>> -where - Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Send + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let num_files = AtomicUsize::default(); - let dir = dir.into(); - let case = if options.fs.ignore_case { - gix_glob::pattern::Case::Fold - } else { - gix_glob::pattern::Case::Sensitive - }; - let (chunk_size, thread_limit, num_threads) = gix_features::parallel::optimize_chunk_size_and_thread_limit( - 100, - index.entries().len().into(), - options.thread_limit, - None, - ); - - let state = fs::cache::State::for_checkout(options.overwrite_existing, options.attribute_globals.clone().into()); - let attribute_files = state.build_attribute_list(index, paths, case); - let mut ctx = chunk::Context { - buf: Vec::new(), - path_cache: fs::Cache::new(dir, state, case, Vec::with_capacity(512), attribute_files), - find, - options, - num_files: &num_files, - }; - - let chunk::Outcome { - mut collisions, - mut errors, - mut bytes_written, - delayed, - } = if num_threads == 1 { - let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); - chunk::process(entries_with_paths, files, bytes, &mut ctx)? - } else { - let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); - in_parallel( - gix_features::iter::Chunks { - inner: entries_with_paths, - size: chunk_size, - }, - thread_limit, - { - let ctx = ctx.clone(); - move |_| (progress::Discard, progress::Discard, ctx.clone()) - }, - |chunk, (files, bytes, ctx)| chunk::process(chunk.into_iter(), files, bytes, ctx), - chunk::Reduce { - files, - bytes, - num_files: &num_files, - aggregate: Default::default(), - marker: Default::default(), - }, - )? - }; - - for (entry, entry_path) in delayed { - bytes_written += chunk::checkout_entry_handle_result( - entry, - entry_path, - &mut errors, - &mut collisions, - files, - bytes, - &mut ctx, - )? as u64; - } - - Ok(checkout::Outcome { - files_updated: num_files.load(Ordering::Relaxed), - collisions, - errors, - bytes_written, - }) -} - -mod chunk { - use std::sync::atomic::{AtomicUsize, Ordering}; - - use bstr::BStr; - use gix_features::progress::Progress; - use gix_hash::oid; - - use crate::{ - fs, index, - index::{checkout, entry}, - os, - }; - - mod reduce { - use std::{ - marker::PhantomData, - sync::atomic::{AtomicUsize, Ordering}, - }; - - use gix_features::progress::Progress; - - use crate::index::checkout; - - pub struct Reduce<'a, 'entry, P1, P2, E> { - pub files: &'a mut P1, - pub bytes: &'a mut P2, - pub num_files: &'a AtomicUsize, - pub aggregate: super::Outcome<'entry>, - pub marker: PhantomData<E>, - } - - impl<'a, 'entry, P1, P2, E> gix_features::parallel::Reduce for Reduce<'a, 'entry, P1, P2, E> - where - P1: Progress, - P2: Progress, - E: std::error::Error + Send + Sync + 'static, - { - type Input = Result<super::Outcome<'entry>, checkout::Error<E>>; - type FeedProduce = (); - type Output = super::Outcome<'entry>; - type Error = checkout::Error<E>; - - fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> { - let item = item?; - let super::Outcome { - bytes_written, - delayed, - errors, - collisions, - } = item; - self.aggregate.bytes_written += bytes_written; - self.aggregate.delayed.extend(delayed); - self.aggregate.errors.extend(errors); - self.aggregate.collisions.extend(collisions); - - self.bytes.set(self.aggregate.bytes_written as usize); - self.files.set(self.num_files.load(Ordering::Relaxed)); - - Ok(()) - } - - fn finalize(self) -> Result<Self::Output, Self::Error> { - Ok(self.aggregate) - } - } - } - pub use reduce::Reduce; - - #[derive(Default)] - pub struct Outcome<'a> { - pub collisions: Vec<checkout::Collision>, - pub errors: Vec<checkout::ErrorRecord>, - pub delayed: Vec<(&'a mut gix_index::Entry, &'a BStr)>, - pub bytes_written: u64, - } - - #[derive(Clone)] - pub struct Context<'a, Find: Clone> { - pub find: Find, - pub path_cache: fs::Cache, - pub buf: Vec<u8>, - pub options: checkout::Options, - /// We keep these shared so that there is the chance for printing numbers that aren't looking like - /// multiple of chunk sizes. Purely cosmetic. Otherwise it's the same as `files`. - pub num_files: &'a AtomicUsize, - } - - pub fn process<'entry, Find, E>( - entries_with_paths: impl Iterator<Item = (&'entry mut gix_index::Entry, &'entry BStr)>, - files: &mut impl Progress, - bytes: &mut impl Progress, - ctx: &mut Context<'_, Find>, - ) -> Result<Outcome<'entry>, checkout::Error<E>> - where - Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Clone, - E: std::error::Error + Send + Sync + 'static, - { - let mut delayed = Vec::new(); - let mut collisions = Vec::new(); - let mut errors = Vec::new(); - let mut bytes_written = 0; - - for (entry, entry_path) in entries_with_paths { - // TODO: write test for that - if entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) { - files.inc(); - continue; - } - - // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. - // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is - // before creating it. - // And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety - // around writing through symlinks (even though we handle this). - // This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too. - if entry.mode == gix_index::entry::Mode::SYMLINK { - delayed.push((entry, entry_path)); - continue; - } - - bytes_written += - checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, files, bytes, ctx)? - as u64; - } - - Ok(Outcome { - bytes_written, - errors, - collisions, - delayed, - }) - } - - pub fn checkout_entry_handle_result<Find, E>( - entry: &mut gix_index::Entry, - entry_path: &BStr, - errors: &mut Vec<checkout::ErrorRecord>, - collisions: &mut Vec<checkout::Collision>, - files: &mut impl Progress, - bytes: &mut impl Progress, - Context { - find, - path_cache, - buf, - options, - num_files, - }: &mut Context<'_, Find>, - ) -> Result<usize, checkout::Error<E>> - where - Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Clone, - E: std::error::Error + Send + Sync + 'static, - { - let res = entry::checkout( - entry, - entry_path, - entry::Context { find, path_cache, buf }, - options.clone(), - ); - files.inc(); - num_files.fetch_add(1, Ordering::SeqCst); - match res { - Ok(object_size) => { - bytes.inc_by(object_size); - Ok(object_size) - } - Err(index::checkout::Error::Io(err)) if os::indicates_collision(&err) => { - // We are here because a file existed or was blocked by a directory which shouldn't be possible unless - // we are on a file insensitive file system. - files.fail(format!("{}: collided ({:?})", entry_path, err.kind())); - collisions.push(checkout::Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); - Ok(0) - } - Err(err) => { - if options.keep_going { - errors.push(checkout::ErrorRecord { - path: entry_path.into(), - error: Box::new(err), - }); - Ok(0) - } else { - Err(err) - } - } - } - } -} diff --git a/vendor/gix-worktree/src/lib.rs b/vendor/gix-worktree/src/lib.rs index 9a67e0289..2626fe508 100644 --- a/vendor/gix-worktree/src/lib.rs +++ b/vendor/gix-worktree/src/lib.rs @@ -1,3 +1,7 @@ +//! A crate with all index-centric functionality that is interacting with a worktree. +//! +//! Unless specified differently, all operations need an index file (e.g. `.git/index`) as driver. +//! //! ## Feature Flags #![cfg_attr( feature = "document-features", @@ -5,11 +9,51 @@ )] #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms, unsafe_code)] +use bstr::BString; -/// file system related utilities -pub mod fs; +/// +pub mod read; + +/// A cache for efficiently executing operations on directories and files which are encountered in sorted order. +/// That way, these operations can be re-used for subsequent invocations in the same directory. +/// +/// This cache can be configured to create directories efficiently, read git-ignore files and git-attribute files, +/// in any combination. +/// +/// A cache for directory creation to reduce the amount of stat calls when creating +/// directories safely, that is without following symlinks that might be on the way. +/// +/// As a special case, it offers a 'prefix' which (by itself) is assumed to exist and may contain symlinks. +/// Everything past that prefix boundary must not contain a symlink. We do this by allowing any input path. +/// +/// Another added benefit is its ability to store the path of full path of the entry to which leading directories +/// are to be created to avoid allocating memory. +/// +/// For this to work, it remembers the last 'good' path to a directory and assumes that all components of it +/// are still valid, too. +/// As directories are created, the cache will be adjusted to reflect the latest seen directory. +/// +/// The caching is only useful if consecutive calls to create a directory are using a sorted list of entries. +#[derive(Clone)] +pub struct Cache { + stack: gix_fs::Stack, + /// tells us what to do as we change paths. + state: cache::State, + /// A buffer used when reading attribute or ignore files or their respective objects from the object database. + buf: Vec<u8>, + /// If case folding should happen when looking up attributes or exclusions. + case: gix_glob::pattern::Case, + /// A lookup table for object ids to read from in some situations when looking up attributes or exclusions. + id_mappings: Vec<PathIdMapping>, + statistics: cache::Statistics, +} + +pub(crate) type PathIdMapping = (BString, gix_hash::ObjectId); /// -pub mod index; +pub mod cache; +pub mod checkout; +pub use checkout::function::checkout; -pub(crate) mod os; +pub mod status; +pub use status::function::status; diff --git a/vendor/gix-worktree/src/os.rs b/vendor/gix-worktree/src/os.rs deleted file mode 100644 index a297e73cd..000000000 --- a/vendor/gix-worktree/src/os.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::{io, io::ErrorKind::AlreadyExists, path::Path}; - -#[cfg(not(windows))] -pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { - std::os::unix::fs::symlink(original, link) -} - -#[cfg(not(windows))] -pub fn remove_symlink(path: &Path) -> io::Result<()> { - std::fs::remove_file(path) -} - -// TODO: use the `symlink` crate once it can delete directory symlinks -#[cfg(windows)] -pub fn remove_symlink(path: &Path) -> io::Result<()> { - if let Ok(meta) = std::fs::metadata(path) { - if meta.is_file() { - std::fs::remove_file(path) // this removes the link itself - } else { - std::fs::remove_dir(path) // however, this sees the destination directory, which isn't the right thing actually - } - } else { - std::fs::remove_file(path).or_else(|_| std::fs::remove_dir(path)) - } -} - -#[cfg(windows)] -pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { - use std::os::windows::fs::{symlink_dir, symlink_file}; - // TODO: figure out if links to links count as files or whatever they point at - if std::fs::metadata(link.parent().expect("dir for link").join(original))?.is_dir() { - symlink_dir(original, link) - } else { - symlink_file(original, link) - } -} - -#[cfg(not(windows))] -pub fn indicates_collision(err: &std::io::Error) -> bool { - // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error(), and ::FileSystemLoop respectively - err.kind() == AlreadyExists - || err.raw_os_error() == Some(21) - || err.raw_os_error() == Some(62) // no-follow on symlnk on mac-os - || err.raw_os_error() == Some(40) // no-follow on symlnk on ubuntu -} - -#[cfg(windows)] -pub fn indicates_collision(err: &std::io::Error) -> bool { - err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied -} diff --git a/vendor/gix-worktree/src/read.rs b/vendor/gix-worktree/src/read.rs new file mode 100644 index 000000000..a54fc2c76 --- /dev/null +++ b/vendor/gix-worktree/src/read.rs @@ -0,0 +1,64 @@ +//! This module allows creating git blobs from worktree files. +//! +//! For the most part a blob just contains the raw on-disk data. However symlinks need to be considered properly +//! and attributes/config options need to be considered. + +use std::{ + fs::{read_link, File}, + io::{self, Read}, + path::Path, +}; + +use gix_object::Blob; +use gix_path as path; + +// TODO: tests + +// TODO: what to do about precompose unicode and ignore_case for symlinks + +/// Create a blob from a file or symlink. +pub fn blob(path: &Path, capabilities: &gix_fs::Capabilities) -> io::Result<Blob> { + let mut data = Vec::new(); + data_to_buf(path, &mut data, capabilities)?; + Ok(Blob { data }) +} + +/// Create a blob from a file or symlink. +pub fn blob_with_meta(path: &Path, is_symlink: bool, capabilities: &gix_fs::Capabilities) -> io::Result<Blob> { + let mut data = Vec::new(); + data_to_buf_with_meta(path, &mut data, is_symlink, capabilities)?; + Ok(Blob { data }) +} + +/// Create blob data from a file or symlink. +pub fn data_to_buf<'a>(path: &Path, buf: &'a mut Vec<u8>, capabilities: &gix_fs::Capabilities) -> io::Result<&'a [u8]> { + data_to_buf_with_meta(path, buf, path.symlink_metadata()?.is_symlink(), capabilities) +} + +/// Create a blob from a file or symlink. +pub fn data_to_buf_with_meta<'a>( + path: &Path, + buf: &'a mut Vec<u8>, + is_symlink: bool, + capabilities: &gix_fs::Capabilities, +) -> io::Result<&'a [u8]> { + buf.clear(); + // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just + // normal files with their content equal to the linked path (so can be read normally) + // + if is_symlink && capabilities.symlink { + // conversion to bstr can never fail because symlinks are only used + // on unix (by git) so no reason to use the try version here + let symlink_path = path::into_bstr(read_link(path)?); + buf.extend_from_slice(&symlink_path); + // TODO: there is no reason this should be a clone + // std isn't great about allowing users to avoid allocations but we could + // simply write our own wrapper around libc::readlink which reuses the + // buffer. This would require unsafe code tough (obviously) + } else { + buf.clear(); + File::open(path)?.read_to_end(buf)?; + // TODO apply filters + } + Ok(buf.as_slice()) +} diff --git a/vendor/gix-worktree/src/status/content.rs b/vendor/gix-worktree/src/status/content.rs new file mode 100644 index 000000000..d47749ef8 --- /dev/null +++ b/vendor/gix-worktree/src/status/content.rs @@ -0,0 +1,86 @@ +use gix_features::hash; +use gix_hash::ObjectId; +use gix_index as index; +use gix_object::encode::loose_header; +use index::Entry; + +/// Compares the content of two blobs in some way. +pub trait CompareBlobs { + /// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()]. + type Output; + + /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size` + /// and allow reading its bytes using `worktree_blob`. + /// If this function returns `None` the `entry` and the `worktree_blob` are assumed to be identical. + /// Use `entry_blob` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. + fn compare_blobs<'a, E>( + &mut self, + entry: &'a gix_index::Entry, + worktree_blob_size: usize, + worktree_blob: impl ReadDataOnce<'a, E>, + entry_blob: impl ReadDataOnce<'a, E>, + ) -> Result<Option<Self::Output>, E>; +} + +/// Lazy borrowed access to blob data. +pub trait ReadDataOnce<'a, E> { + /// Returns the contents of this blob. + /// + /// This potentially performs IO and other expensive operations + /// and should only be called when necessary. + fn read_data(self) -> Result<&'a [u8], E>; +} + +/// Compares to blobs by comparing their size and oid, and only looks at the file if +/// the size matches, therefore it's very fast. +#[derive(Clone)] +pub struct FastEq; + +impl CompareBlobs for FastEq { + type Output = (); + + fn compare_blobs<'a, E>( + &mut self, + entry: &'a Entry, + worktree_blob_size: usize, + worktree_blob: impl ReadDataOnce<'a, E>, + _entry_blob: impl ReadDataOnce<'a, E>, + ) -> Result<Option<Self::Output>, E> { + // make sure to account for racily smudged entries here so that they don't always keep + // showing up as modified even after their contents have changed again, to a potentially + // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs. + if entry.stat.size as usize != worktree_blob_size && (entry.id.is_empty_blob() || entry.stat.size != 0) { + return Ok(Some(())); + } + let blob = worktree_blob.read_data()?; + let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); + Ok((entry.id != file_hash).then_some(())) + } +} + +/// Compares files to blobs by *always* comparing their hashes. +/// +/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and +/// therefore always returns an OID that can be reused later. +#[derive(Clone)] +pub struct HashEq; + +impl CompareBlobs for HashEq { + type Output = ObjectId; + + fn compare_blobs<'a, E>( + &mut self, + entry: &'a Entry, + _worktree_blob_size: usize, + worktree_blob: impl ReadDataOnce<'a, E>, + _entry_blob: impl ReadDataOnce<'a, E>, + ) -> Result<Option<Self::Output>, E> { + let blob = worktree_blob.read_data()?; + let header = loose_header(gix_object::Kind::Blob, blob.len()); + let mut hasher = hash::hasher(entry.id.kind()); + hasher.update(&header); + hasher.update(blob); + let file_hash: ObjectId = hasher.digest().into(); + Ok((entry.id != file_hash).then_some(file_hash)) + } +} diff --git a/vendor/gix-worktree/src/status/function.rs b/vendor/gix-worktree/src/status/function.rs new file mode 100644 index 000000000..5e01628b4 --- /dev/null +++ b/vendor/gix-worktree/src/status/function.rs @@ -0,0 +1,331 @@ +use std::{io, marker::PhantomData, path::Path}; + +use bstr::BStr; +use filetime::FileTime; +use gix_features::parallel::{in_parallel_if, Reduce}; + +use crate::{ + read, + status::{ + content, + content::CompareBlobs, + types::{Error, Options}, + Change, VisitEntry, + }, +}; + +/// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them +/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes. +/// `options` are used to configure the operation. +/// +/// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to +/// the current time for which it will be considered fresh. +/// +/// Note that this isn't technically quite what this function does as this also provides some additional information, +/// like whether a file has conflicts, and files that were added with `git add` are shown as a special +/// changes despite not technically requiring a change to the index since `git add` already added the file to the index. +pub fn status<'index, T, Find, E>( + index: &'index mut gix_index::State, + worktree: &Path, + collector: &mut impl VisitEntry<'index, ContentChange = T>, + compare: impl CompareBlobs<Output = T> + Send + Clone, + find: Find, + options: Options, +) -> Result<(), Error> +where + T: Send, + E: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Send + Clone, +{ + // the order is absolutely critical here we use the old timestamp to detect racy index entries + // (modified at or after the last index update) during the index update we then set those + // entries size to 0 (see below) to ensure they keep showing up as racy and reset the timestamp. + let timestamp = index.timestamp(); + index.set_timestamp(FileTime::now()); + let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit( + 100, + index.entries().len().into(), + options.thread_limit, + None, + ); + let (entries, path_backing) = index.entries_mut_and_pathbacking(); + in_parallel_if( + || true, // TODO: heuristic: when is parallelization not worth it? + entries.chunks_mut(chunk_size), + thread_limit, + { + let options = &options; + move |_| { + ( + State { + buf: Vec::new(), + odb_buf: Vec::new(), + timestamp, + path_backing, + worktree, + options, + }, + compare.clone(), + find.clone(), + ) + } + }, + |entries, (state, diff, find)| { + entries + .iter_mut() + .filter_map(|entry| state.process(entry, diff, find)) + .collect() + }, + ReduceChange { + collector, + phantom: PhantomData, + }, + ) +} + +struct State<'a, 'b> { + buf: Vec<u8>, + odb_buf: Vec<u8>, + timestamp: FileTime, + // path_cache: fs::Cache TODO path cache + path_backing: &'b [u8], + worktree: &'a Path, + options: &'a Options, +} + +type StatusResult<'index, T> = Result<(&'index gix_index::Entry, &'index BStr, Option<Change<T>>, bool), Error>; + +impl<'index> State<'_, 'index> { + fn process<T, Find, E>( + &mut self, + entry: &'index mut gix_index::Entry, + diff: &mut impl CompareBlobs<Output = T>, + find: &mut Find, + ) -> Option<StatusResult<'index, T>> + where + E: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Send + Clone, + { + let conflict = match entry.stage() { + 0 => false, + 1 => true, + _ => return None, + }; + if entry.flags.intersects( + gix_index::entry::Flags::UPTODATE + | gix_index::entry::Flags::SKIP_WORKTREE + | gix_index::entry::Flags::ASSUME_VALID + | gix_index::entry::Flags::FSMONITOR_VALID, + ) { + return None; + } + let path = entry.path_in(self.path_backing); + let status = self.compute_status(&mut *entry, path, diff, find); + Some(status.map(move |status| (&*entry, path, status, conflict))) + } + + /// # On how racy-git is handled here + /// + /// Basically the racy detection is a safety mechanism that ensures we can always just compare the stat + /// information between index and worktree and if they match we don't need to look at the content. + /// This usually just works but if a file updates quickly we could run into the following situation: + /// + /// * save file version `A` from disk into worktree (git add) + /// * file is changed so fast that the mtime doesn't change - *we only looks at seconds by default* + /// * file contents change but file-size stays the same, so `"foo" -> "bar"` has the same size but different content + /// + /// Now both `mtime` and `size`, and all other stat information, is the same but the file has actually changed. + /// This case is called *racily clean*. *The file should show up as changed but due to a data race it doesn't.* + /// This is the racy git problem. + /// + /// To solve this we do the following trick: Whenever we modify the index, which includes `git status`, we save the + /// current timestamp before the modification starts. This timestamp fundamentally represents a checkpoint of sorts. + /// We "promise" ourselves that after the modification finishes all entries modified before this timestamp have the + /// racy git problem resolved. + /// + /// So now when we modify the index we must resolve the racy git problem somehow. To do that we only need to look at + /// unchanged entries. Changed entries are not interesting since they are already showing up as changed anyway so there + /// isn't really a race-condition to worry about. This also explains why removing the `return` here doesn't have an apparent effect. + /// This entire branch here is just the optimization of "don't even look at index entries where the stat hasn't changed". + /// If we don't have this optimization the result shouldn't change, our status implementation will just be super slow :D + + /// We calculate whether this change is `racy_clean`, so if the last `timestamp` is before or the same as the `mtime` of the entry + /// which is what `new_stat.is_racy(..)` does in the branch, and only if we are sure that there is no race condition + /// do we `return` early. Since we don't `return` early we just do a full content comparison below, + /// which always yields the correct result, there is no race condition there. + /// + /// If a file showed up as racily clean and didn't change then we don't need to do anything. After this status check is + /// complete and the file won't show up as racily clean anymore, since it's mtime is now before the new timestamp. + /// However if the file did actually change then we really ran into one of those rare race conditions in that case we, + /// and git does the same, set the size of the file in the index to 0. This will always make the file show up as changed. + /// This adds the need to treat all files of size 0 in the index as changed. This is not quite right of course because 0 sized files + /// could be entirely valid and unchanged. Therefore this only applies if the oid doesn't match the oid of an empty file, + /// which is a constant. + /// + /// Adapted from [here](https://github.com/Byron/gitoxide/pull/805#discussion_r1164676777). + fn compute_status<T, Find, E>( + &mut self, + entry: &mut gix_index::Entry, + git_path: &BStr, + diff: &mut impl CompareBlobs<Output = T>, + find: &mut Find, + ) -> Result<Option<Change<T>>, Error> + where + E: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E> + Send + Clone, + { + // TODO fs cache + let worktree_path = gix_path::try_from_bstr(git_path).map_err(|_| Error::IllformedUtf8)?; + let worktree_path = self.worktree.join(worktree_path); + let metadata = match worktree_path.symlink_metadata() { + // TODO: check if any parent directory is a symlink + // we need to use fs::Cache for that + Ok(metadata) if metadata.is_dir() => { + // index entries are normally only for files/symlinks + // if a file turned into a directory it was removed + // the only exception here are submodules which are + // part of the index despite being directories + // + // TODO: submodules: + // if entry.mode.contains(Mode::COMMIT) && + // resolve_gitlink_ref(ce->name, "HEAD", &sub)) + return Ok(Some(Change::Removed)); + } + Ok(metadata) => metadata, + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), + Err(err) => { + return Err(err.into()); + } + }; + if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { + return Ok(Some(Change::IntentToAdd)); + } + let new_stat = gix_index::entry::Stat::from_fs(&metadata)?; + let executable_bit_changed = + match entry + .mode + .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit) + { + Some(gix_index::entry::mode::Change::Type { .. }) => return Ok(Some(Change::Type)), + Some(gix_index::entry::mode::Change::ExecutableBit) => true, + None => false, + }; + + // Here we implement racy-git. See racy-git.txt in the git documentation for a detailed documentation. + // + // A file is racy if: + // 1. its `mtime` is at or after the last index timestamp and its entry stat information + // matches the on-disk file but the file contents are actually modified + // 2. it's size is 0 (set after detecting a file was racy previously) + // + // The first case is detected below by checking the timestamp if the file is marked unmodified. + // The second case is usually detected either because the on-disk file is not empty, hence + // the basic stat match fails, or by checking whether the size doesn't fit the oid. + let mut racy_clean = false; + if !executable_bit_changed + && new_stat.matches(&entry.stat, self.options.stat) + // TODO: find a test for the following line or remove it. Is this more often hit with smudge/clean filters? + && (!entry.id.is_empty_blob() || entry.stat.size == 0) + { + racy_clean = new_stat.is_racy(self.timestamp, self.options.stat); + if !racy_clean { + return Ok(None); + } + } + + let read_file = WorktreeBlob { + buf: &mut self.buf, + path: &worktree_path, + entry, + options: self.options, + }; + let read_blob = OdbBlob { + buf: &mut self.odb_buf, + id: &entry.id, + find, + }; + let content_change = diff.compare_blobs::<Error>(entry, metadata.len() as usize, read_file, read_blob)?; + // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. + if content_change.is_some() && racy_clean { + entry.stat.size = 0; + } + if content_change.is_some() || executable_bit_changed { + Ok(Some(Change::Modification { + executable_bit_changed, + content_change, + })) + } else { + // don't diff against this file next time since we know the file is unchanged. + entry.stat = new_stat; + Ok(None) + } + } +} + +struct ReduceChange<'a, 'index, T: VisitEntry<'index>> { + collector: &'a mut T, + phantom: PhantomData<fn(&'index ())>, +} + +impl<'index, T, C: VisitEntry<'index, ContentChange = T>> Reduce for ReduceChange<'_, 'index, C> { + type Input = Vec<StatusResult<'index, T>>; + + type FeedProduce = (); + + type Output = (); + + type Error = Error; + + fn feed(&mut self, items: Self::Input) -> Result<Self::FeedProduce, Self::Error> { + for item in items { + let (entry, path, change, conflict) = item?; + self.collector.visit_entry(entry, path, change, conflict); + } + Ok(()) + } + + fn finalize(self) -> Result<Self::Output, Self::Error> { + Ok(()) + } +} + +struct WorktreeBlob<'a> { + buf: &'a mut Vec<u8>, + path: &'a Path, + entry: &'a gix_index::Entry, + options: &'a Options, +} + +struct OdbBlob<'a, Find, E> +where + E: std::error::Error + Send + Sync + 'static, + Find: FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E>, +{ + buf: &'a mut Vec<u8>, + id: &'a gix_hash::oid, + find: Find, +} + +impl<'a> content::ReadDataOnce<'a, Error> for WorktreeBlob<'a> { + fn read_data(self) -> Result<&'a [u8], Error> { + let res = read::data_to_buf_with_meta( + self.path, + self.buf, + self.entry.mode == gix_index::entry::Mode::SYMLINK, + &self.options.fs, + )?; + Ok(res) + } +} + +impl<'a, Find, E> content::ReadDataOnce<'a, Error> for OdbBlob<'a, Find, E> +where + E: std::error::Error + Send + Sync + 'static, + Find: FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::BlobRef<'a>, E>, +{ + fn read_data(mut self) -> Result<&'a [u8], Error> { + (self.find)(self.id, self.buf) + .map(|b| b.data) + .map_err(move |err| Error::Find(Box::new(err))) + } +} diff --git a/vendor/gix-worktree/src/status/mod.rs b/vendor/gix-worktree/src/status/mod.rs new file mode 100644 index 000000000..8294a54e8 --- /dev/null +++ b/vendor/gix-worktree/src/status/mod.rs @@ -0,0 +1,11 @@ +//! Changes between an index and a worktree. +/// +mod types; +pub use types::{Change, Error, Options, VisitEntry}; + +mod recorder; +pub use recorder::Recorder; + +/// +pub mod content; +pub(crate) mod function; diff --git a/vendor/gix-worktree/src/status/recorder.rs b/vendor/gix-worktree/src/status/recorder.rs new file mode 100644 index 000000000..ea10303ae --- /dev/null +++ b/vendor/gix-worktree/src/status/recorder.rs @@ -0,0 +1,27 @@ +use bstr::BStr; +use gix_index as index; + +use crate::status::{Change, VisitEntry}; + +/// Convenience implementation of [`VisitEntry`] that collects all non-trivial changes into a `Vec`. +#[derive(Debug, Default)] +pub struct Recorder<'index, T = ()> { + /// collected changes, index entries without conflicts or changes are excluded. + pub records: Vec<(&'index BStr, Option<Change<T>>, bool)>, +} + +impl<'index, T: Send> VisitEntry<'index> for Recorder<'index, T> { + type ContentChange = T; + + fn visit_entry( + &mut self, + _entry: &'index index::Entry, + rela_path: &'index BStr, + status: Option<Change<Self::ContentChange>>, + conflict: bool, + ) { + if conflict || status.is_some() { + self.records.push((rela_path, status, conflict)) + } + } +} diff --git a/vendor/gix-worktree/src/status/types.rs b/vendor/gix-worktree/src/status/types.rs new file mode 100644 index 000000000..3d488d24e --- /dev/null +++ b/vendor/gix-worktree/src/status/types.rs @@ -0,0 +1,69 @@ +use bstr::BStr; + +/// The error returned by [`status()`][crate::status()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Could not convert path to UTF8")] + IllformedUtf8, + #[error("The clock was off when reading file related metadata after updating a file on disk")] + Time(#[from] std::time::SystemTimeError), + #[error("IO error while writing blob or reading file metadata or changing filetype")] + Io(#[from] std::io::Error), + #[error("Failed to obtain blob from object database")] + Find(#[source] Box<dyn std::error::Error + Send + Sync + 'static>), +} + +#[derive(Clone, Default)] +/// Options that control how the index status with a worktree is computed. +pub struct Options { + /// Capabilities of the file system which affect the status computation. + pub fs: gix_fs::Capabilities, + /// If set, don't use more than this amount of threads. + /// Otherwise, usually use as many threads as there are logical cores. + /// A value of 0 is interpreted as no-limit + pub thread_limit: Option<usize>, + /// Options that control how stat comparisons are made when checking if a file is fresh. + pub stat: gix_index::entry::stat::Options, +} + +/// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum Change<T = ()> { + /// This corresponding file does not exist in the worktree anymore. + Removed, + /// The type of file changed compared to the worktree, i.e. a symlink s now a file. + Type, + /// This worktree file was modified in some form, like a permission change or content change or both, + /// as compared to this entry. + Modification { + /// Indicates that one of the stat changes was an executable bit change + /// which is a significant change itself. + executable_bit_changed: bool, + /// The output of the [`CompareBlobs`][crate::status::content::CompareBlobs] run on this entry. + /// If there is no content change and only the executable bit + /// changed than this is `None`. + content_change: Option<T>, + }, + /// An index entry that correspond to an untracked worktree file marked with `git add --intent-to-add`. + /// + /// This means it's not available in the object database yet or the index was created from, + /// even though now an entry exists that represents the worktree file. + IntentToAdd, +} + +/// Observe changes by comparing an index entry to the worktree or another index. +pub trait VisitEntry<'index> { + /// Data generated by comparing an entry with a file. + type ContentChange; + /// Observe the `change` of `entry` at the repository-relative `rela_path`, indicating whether + /// or not it has a `conflict`. + /// If `change` is `None`, there is no change. + fn visit_entry( + &mut self, + entry: &'index gix_index::Entry, + rela_path: &'index BStr, + change: Option<Change<Self::ContentChange>>, + conflict: bool, + ); +} diff --git a/vendor/gix-worktree/src/untracked.rs b/vendor/gix-worktree/src/untracked.rs new file mode 100644 index 000000000..6e77d7fa3 --- /dev/null +++ b/vendor/gix-worktree/src/untracked.rs @@ -0,0 +1 @@ +// TODO: untracked file detection, needs fs::Cache |