//! lower-level access to filters which are applied to create working tree checkouts or to 'clean' working tree contents for storage in git. use std::borrow::Cow; pub use gix_filter as plumbing; use gix_object::Find; use crate::{ bstr::BStr, config::{ cache::util::{ApplyLeniency, ApplyLeniencyDefaultValue}, tree::Core, }, Repository, }; /// pub mod pipeline { /// pub mod options { use crate::{bstr::BString, config}; /// The error returned by [Pipeline::options()][crate::filter::Pipeline::options()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] CheckRoundTripEncodings(#[from] config::encoding::Error), #[error(transparent)] SafeCrlf(#[from] config::key::GenericErrorWithValue), #[error("Could not interpret 'filter.{name}.required' configuration")] Driver { name: BString, source: gix_config::value::Error, }, #[error(transparent)] CommandContext(#[from] config::command_context::Error), } } /// pub mod convert_to_git { /// The error returned by [Pipeline::convert_to_git()][crate::filter::Pipeline::convert_to_git()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Failed to prime attributes to the path at which the data resides")] WorktreeCacheAtPath(#[from] std::io::Error), #[error(transparent)] Convert(#[from] gix_filter::pipeline::convert::to_git::Error), } } /// pub mod convert_to_worktree { /// The error returned by [Pipeline::convert_to_worktree()][crate::filter::Pipeline::convert_to_worktree()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Failed to prime attributes to the path at which the data resides")] WorktreeCacheAtPath(#[from] std::io::Error), #[error(transparent)] Convert(#[from] gix_filter::pipeline::convert::to_worktree::Error), } } } /// A git pipeline for transforming data *to-git* and *to-worktree*, based /// [on git configuration and attributes](https://git-scm.com/docs/gitattributes). #[derive(Clone)] pub struct Pipeline<'repo> { inner: gix_filter::Pipeline, cache: gix_worktree::Stack, repo: &'repo Repository, } /// Lifecycle impl<'repo> Pipeline<'repo> { /// Extract options from `repo` that are needed to properly drive a standard git filter pipeline. pub fn options(repo: &'repo Repository) -> Result { let config = &repo.config.resolved; let encodings = Core::CHECK_ROUND_TRIP_ENCODING.try_into_encodings(config.string_by_key("core.checkRoundtripEncoding"))?; let safe_crlf = config .string_by_key("core.safecrlf") .map(|value| Core::SAFE_CRLF.try_into_safecrlf(value)) .transpose() .map(Option::unwrap_or_default) .with_lenient_default_value( repo.config.lenient_config, // in lenient mode, we prefer the safe option, instead of just (trying) to output warnings. gix_filter::pipeline::CrlfRoundTripCheck::Fail, )?; let auto_crlf = config .string_by_key("core.autocrlf") .map(|value| Core::AUTO_CRLF.try_into_autocrlf(value)) .transpose() .with_leniency(repo.config.lenient_config)? .unwrap_or_default(); let eol = config .string_by_key("core.eol") .map(|value| Core::EOL.try_into_eol(value)) .transpose()?; let drivers = extract_drivers(repo)?; Ok(gix_filter::pipeline::Options { drivers, eol_config: gix_filter::eol::Configuration { auto_crlf, eol }, encodings_with_roundtrip_check: encodings, crlf_roundtrip_check: safe_crlf, object_hash: repo.object_hash(), }) } /// Create a new instance by extracting all necessary information and configuration from a `repo` along with `cache` for accessing /// attributes. The `index` is used for some filters which may access it under very specific circumstances. pub fn new(repo: &'repo Repository, cache: gix_worktree::Stack) -> Result { let pipeline = gix_filter::Pipeline::new(repo.command_context()?, Self::options(repo)?); Ok(Pipeline { inner: pipeline, cache, repo, }) } /// Detach the repository and obtain the individual functional parts. pub fn into_parts(self) -> (gix_filter::Pipeline, gix_worktree::Stack) { (self.inner, self.cache) } } /// Conversions impl<'repo> Pipeline<'repo> { /// Convert a `src` stream (to be found at `rela_path`, a repo-relative path) to a representation suitable for storage in `git` /// by using all attributes at `rela_path` and configuration of the repository to know exactly which filters apply. /// `index` is used in particularly rare cases where the CRLF filter in auto-mode tries to determine whether or not to apply itself, /// and it should match the state used when [instantiating this instance][Self::new()]. /// Note that the return-type implements [`std::io::Read`]. pub fn convert_to_git( &mut self, src: R, rela_path: &std::path::Path, index: &gix_index::State, ) -> Result, pipeline::convert_to_git::Error> where R: std::io::Read, { let entry = self.cache.at_path(rela_path, Some(false), &self.repo.objects)?; Ok(self.inner.convert_to_git( src, rela_path, &mut |_, attrs| { entry.matching_attributes(attrs); }, &mut |buf| -> Result<_, gix_object::find::Error> { let entry = match index.entry_by_path(gix_path::into_bstr(rela_path).as_ref()) { None => return Ok(None), Some(entry) => entry, }; let obj = self.repo.objects.try_find(&entry.id, buf)?; Ok(obj.filter(|obj| obj.kind == gix_object::Kind::Blob).map(|_| ())) }, )?) } /// Convert a `src` buffer located at `rela_path` (in the index) from what's in `git` to the worktree representation. /// This method will obtain all attributes and configuration necessary to know exactly which filters to apply. /// Note that the return-type implements [`std::io::Read`]. /// /// Use `can_delay` to tell driver processes that they may delay the return of data. Doing this will require the caller to specifically /// handle delayed files by keeping state and using [`Self::into_parts()`] to get access to the driver state to follow the delayed-files /// protocol. For simplicity, most will want to disallow delayed processing. pub fn convert_to_worktree<'input>( &mut self, src: &'input [u8], rela_path: &BStr, can_delay: gix_filter::driver::apply::Delay, ) -> Result, pipeline::convert_to_worktree::Error> { let entry = self.cache.at_entry(rela_path, Some(false), &self.repo.objects)?; Ok(self.inner.convert_to_worktree( src, rela_path, &mut |_, attrs| { entry.matching_attributes(attrs); }, can_delay, )?) } /// Retrieve the static context that is made available to the process filters. /// /// The context set here is relevant for the [`convert_to_git()`][Self::convert_to_git()] and /// [`convert_to_worktree()`][Self::convert_to_worktree()] methods. pub fn driver_context_mut(&mut self) -> &mut gix_filter::pipeline::Context { self.inner.driver_context_mut() } } /// Obtain a list of all configured driver, but ignore those in sections that we don't trust enough. fn extract_drivers(repo: &Repository) -> Result, pipeline::options::Error> { repo.config .resolved .sections_by_name("filter") .into_iter() .flatten() .filter(|s| repo.filter_config_section()(s.meta())) .filter_map(|s| { s.header().subsection_name().map(|name| { Ok(gix_filter::Driver { name: name.to_owned(), clean: s.value("clean").map(Cow::into_owned), smudge: s.value("smudge").map(Cow::into_owned), process: s.value("process").map(Cow::into_owned), required: s .value("required") .map(|value| gix_config::Boolean::try_from(value.as_ref())) .transpose() .map_err(|err| pipeline::options::Error::Driver { name: name.to_owned(), source: err, })? .unwrap_or_default() .into(), }) }) }) .collect::, pipeline::options::Error>>() }