From 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:41:41 +0200 Subject: Merging upstream version 1.70.0+dfsg2. Signed-off-by: Daniel Baumann --- vendor/gix-config/src/file/access/comfort.rs | 274 ++++++ vendor/gix-config/src/file/access/mod.rs | 4 + vendor/gix-config/src/file/access/mutate.rs | 387 +++++++++ vendor/gix-config/src/file/access/raw.rs | 536 ++++++++++++ vendor/gix-config/src/file/access/read_only.rs | 353 ++++++++ vendor/gix-config/src/file/impls.rs | 111 +++ vendor/gix-config/src/file/includes/mod.rs | 319 +++++++ vendor/gix-config/src/file/includes/types.rs | 131 +++ vendor/gix-config/src/file/init/comfort.rs | 159 ++++ vendor/gix-config/src/file/init/from_env.rs | 88 ++ vendor/gix-config/src/file/init/from_paths.rs | 94 +++ vendor/gix-config/src/file/init/mod.rs | 86 ++ vendor/gix-config/src/file/init/types.rs | 47 ++ vendor/gix-config/src/file/meta.rs | 59 ++ vendor/gix-config/src/file/mod.rs | 136 +++ vendor/gix-config/src/file/mutable/mod.rs | 107 +++ vendor/gix-config/src/file/mutable/multi_value.rs | 266 ++++++ vendor/gix-config/src/file/mutable/section.rs | 316 +++++++ vendor/gix-config/src/file/mutable/value.rs | 63 ++ vendor/gix-config/src/file/section/body.rs | 206 +++++ vendor/gix-config/src/file/section/mod.rs | 145 ++++ vendor/gix-config/src/file/tests.rs | 228 +++++ vendor/gix-config/src/file/util.rs | 190 +++++ vendor/gix-config/src/file/write.rs | 93 +++ vendor/gix-config/src/lib.rs | 52 ++ vendor/gix-config/src/lookup.rs | 24 + vendor/gix-config/src/parse/comment.rs | 50 ++ vendor/gix-config/src/parse/error.rs | 64 ++ vendor/gix-config/src/parse/event.rs | 83 ++ vendor/gix-config/src/parse/events.rs | 336 ++++++++ vendor/gix-config/src/parse/key.rs | 35 + vendor/gix-config/src/parse/mod.rs | 116 +++ vendor/gix-config/src/parse/nom/mod.rs | 460 ++++++++++ vendor/gix-config/src/parse/nom/tests.rs | 924 +++++++++++++++++++++ vendor/gix-config/src/parse/section/header.rs | 180 ++++ vendor/gix-config/src/parse/section/mod.rs | 187 +++++ vendor/gix-config/src/parse/section/unvalidated.rs | 25 + vendor/gix-config/src/parse/tests.rs | 162 ++++ vendor/gix-config/src/source.rs | 163 ++++ vendor/gix-config/src/types.rs | 124 +++ vendor/gix-config/src/value/mod.rs | 4 + vendor/gix-config/src/value/normalize.rs | 110 +++ 42 files changed, 7497 insertions(+) create mode 100644 vendor/gix-config/src/file/access/comfort.rs create mode 100644 vendor/gix-config/src/file/access/mod.rs create mode 100644 vendor/gix-config/src/file/access/mutate.rs create mode 100644 vendor/gix-config/src/file/access/raw.rs create mode 100644 vendor/gix-config/src/file/access/read_only.rs create mode 100644 vendor/gix-config/src/file/impls.rs create mode 100644 vendor/gix-config/src/file/includes/mod.rs create mode 100644 vendor/gix-config/src/file/includes/types.rs create mode 100644 vendor/gix-config/src/file/init/comfort.rs create mode 100644 vendor/gix-config/src/file/init/from_env.rs create mode 100644 vendor/gix-config/src/file/init/from_paths.rs create mode 100644 vendor/gix-config/src/file/init/mod.rs create mode 100644 vendor/gix-config/src/file/init/types.rs create mode 100644 vendor/gix-config/src/file/meta.rs create mode 100644 vendor/gix-config/src/file/mod.rs create mode 100644 vendor/gix-config/src/file/mutable/mod.rs create mode 100644 vendor/gix-config/src/file/mutable/multi_value.rs create mode 100644 vendor/gix-config/src/file/mutable/section.rs create mode 100644 vendor/gix-config/src/file/mutable/value.rs create mode 100644 vendor/gix-config/src/file/section/body.rs create mode 100644 vendor/gix-config/src/file/section/mod.rs create mode 100644 vendor/gix-config/src/file/tests.rs create mode 100644 vendor/gix-config/src/file/util.rs create mode 100644 vendor/gix-config/src/file/write.rs create mode 100644 vendor/gix-config/src/lib.rs create mode 100644 vendor/gix-config/src/lookup.rs create mode 100644 vendor/gix-config/src/parse/comment.rs create mode 100644 vendor/gix-config/src/parse/error.rs create mode 100644 vendor/gix-config/src/parse/event.rs create mode 100644 vendor/gix-config/src/parse/events.rs create mode 100644 vendor/gix-config/src/parse/key.rs create mode 100644 vendor/gix-config/src/parse/mod.rs create mode 100644 vendor/gix-config/src/parse/nom/mod.rs create mode 100644 vendor/gix-config/src/parse/nom/tests.rs create mode 100644 vendor/gix-config/src/parse/section/header.rs create mode 100644 vendor/gix-config/src/parse/section/mod.rs create mode 100644 vendor/gix-config/src/parse/section/unvalidated.rs create mode 100644 vendor/gix-config/src/parse/tests.rs create mode 100644 vendor/gix-config/src/source.rs create mode 100644 vendor/gix-config/src/types.rs create mode 100644 vendor/gix-config/src/value/mod.rs create mode 100644 vendor/gix-config/src/value/normalize.rs (limited to 'vendor/gix-config/src') diff --git a/vendor/gix-config/src/file/access/comfort.rs b/vendor/gix-config/src/file/access/comfort.rs new file mode 100644 index 000000000..b4953c597 --- /dev/null +++ b/vendor/gix-config/src/file/access/comfort.rs @@ -0,0 +1,274 @@ +use std::{borrow::Cow, convert::TryFrom}; + +use bstr::BStr; + +use crate::{file::MetadataFilter, value, File}; + +/// Comfortable API for accessing values +impl<'event> File<'event> { + /// Like [`value()`][File::value()], but returning `None` if the string wasn't found. + /// + /// As strings perform no conversions, this will never fail. + pub fn string( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + ) -> Option> { + self.string_filter(section_name, subsection_name, key, &mut |_| true) + } + + /// Like [`string()`][File::string()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn string_by_key<'a>(&self, key: impl Into<&'a BStr>) -> Option> { + self.string_filter_by_key(key, &mut |_| true) + } + + /// Like [`string()`][File::string()], but the section containing the returned value must pass `filter` as well. + pub fn string_filter( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + filter: &mut MetadataFilter, + ) -> Option> { + self.raw_value_filter(section_name, subsection_name, key, filter).ok() + } + + /// Like [`string_filter()`][File::string_filter()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn string_filter_by_key<'a>( + &self, + key: impl Into<&'a BStr>, + filter: &mut MetadataFilter, + ) -> Option> { + let key = crate::parse::key(key)?; + self.raw_value_filter(key.section_name, key.subsection_name, key.value_name, filter) + .ok() + } + + /// Like [`value()`][File::value()], but returning `None` if the path wasn't found. + /// + /// Note that this path is not vetted and should only point to resources which can't be used + /// to pose a security risk. Prefer using [`path_filter()`][File::path_filter()] instead. + /// + /// As paths perform no conversions, this will never fail. + pub fn path( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + ) -> Option> { + self.path_filter(section_name, subsection_name, key, &mut |_| true) + } + + /// Like [`path()`][File::path()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn path_by_key<'a>(&self, key: impl Into<&'a BStr>) -> Option> { + self.path_filter_by_key(key, &mut |_| true) + } + + /// Like [`path()`][File::path()], but the section containing the returned value must pass `filter` as well. + /// + /// This should be the preferred way of accessing paths as those from untrusted + /// locations can be + /// + /// As paths perform no conversions, this will never fail. + pub fn path_filter( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + filter: &mut MetadataFilter, + ) -> Option> { + self.raw_value_filter(section_name, subsection_name, key, filter) + .ok() + .map(crate::Path::from) + } + + /// Like [`path_filter()`][File::path_filter()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn path_filter_by_key<'a>( + &self, + key: impl Into<&'a BStr>, + filter: &mut MetadataFilter, + ) -> Option> { + let key = crate::parse::key(key)?; + self.path_filter(key.section_name, key.subsection_name, key.value_name, filter) + } + + /// Like [`value()`][File::value()], but returning `None` if the boolean value wasn't found. + pub fn boolean( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + ) -> Option> { + self.boolean_filter(section_name, subsection_name, key, &mut |_| true) + } + + /// Like [`boolean()`][File::boolean()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn boolean_by_key<'a>(&self, key: impl Into<&'a BStr>) -> Option> { + self.boolean_filter_by_key(key, &mut |_| true) + } + + /// Like [`boolean()`][File::boolean()], but the section containing the returned value must pass `filter` as well. + pub fn boolean_filter( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + filter: &mut MetadataFilter, + ) -> Option> { + let section_name = section_name.as_ref(); + let section_ids = self + .section_ids_by_name_and_subname(section_name, subsection_name) + .ok()?; + let key = key.as_ref(); + for section_id in section_ids.rev() { + let section = self.sections.get(§ion_id).expect("known section id"); + if !filter(section.meta()) { + continue; + } + match section.value_implicit(key) { + Some(Some(v)) => return Some(crate::Boolean::try_from(v).map(|b| b.into())), + Some(None) => return Some(Ok(true)), + None => continue, + } + } + None + } + + /// Like [`boolean_filter()`][File::boolean_filter()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn boolean_filter_by_key<'a>( + &self, + key: impl Into<&'a BStr>, + filter: &mut MetadataFilter, + ) -> Option> { + let key = crate::parse::key(key)?; + self.boolean_filter(key.section_name, key.subsection_name, key.value_name, filter) + } + + /// Like [`value()`][File::value()], but returning an `Option` if the integer wasn't found. + pub fn integer( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + ) -> Option> { + self.integer_filter(section_name, subsection_name, key, &mut |_| true) + } + + /// Like [`integer()`][File::integer()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn integer_by_key<'a>(&self, key: impl Into<&'a BStr>) -> Option> { + self.integer_filter_by_key(key, &mut |_| true) + } + + /// Like [`integer()`][File::integer()], but the section containing the returned value must pass `filter` as well. + pub fn integer_filter( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + filter: &mut MetadataFilter, + ) -> Option> { + let int = self.raw_value_filter(section_name, subsection_name, key, filter).ok()?; + Some(crate::Integer::try_from(int.as_ref()).and_then(|b| { + b.to_decimal() + .ok_or_else(|| value::Error::new("Integer overflow", int.into_owned())) + })) + } + + /// Like [`integer_filter()`][File::integer_filter()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn integer_filter_by_key<'a>( + &self, + key: impl Into<&'a BStr>, + filter: &mut MetadataFilter, + ) -> Option> { + let key = crate::parse::key(key)?; + self.integer_filter(key.section_name, key.subsection_name, key.value_name, filter) + } + + /// Similar to [`values(…)`][File::values()] but returning strings if at least one of them was found. + pub fn strings( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + ) -> Option>> { + self.raw_values(section_name, subsection_name, key).ok() + } + + /// Like [`strings()`][File::strings()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn strings_by_key<'a>(&self, key: impl Into<&'a BStr>) -> Option>> { + let key = crate::parse::key(key)?; + self.strings(key.section_name, key.subsection_name, key.value_name) + } + + /// Similar to [`strings(…)`][File::strings()], but all values are in sections that passed `filter`. + pub fn strings_filter( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + filter: &mut MetadataFilter, + ) -> Option>> { + self.raw_values_filter(section_name, subsection_name, key, filter).ok() + } + + /// Like [`strings_filter()`][File::strings_filter()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn strings_filter_by_key<'a>( + &self, + key: impl Into<&'a BStr>, + filter: &mut MetadataFilter, + ) -> Option>> { + let key = crate::parse::key(key)?; + self.strings_filter(key.section_name, key.subsection_name, key.value_name, filter) + } + + /// Similar to [`values(…)`][File::values()] but returning integers if at least one of them was found + /// and if none of them overflows. + pub fn integers( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + ) -> Option, value::Error>> { + self.integers_filter(section_name, subsection_name, key, &mut |_| true) + } + + /// Like [`integers()`][File::integers()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn integers_by_key<'a>(&self, key: impl Into<&'a BStr>) -> Option, value::Error>> { + self.integers_filter_by_key(key, &mut |_| true) + } + + /// Similar to [`integers(…)`][File::integers()] but all integers are in sections that passed `filter` + /// and that are not overflowing. + pub fn integers_filter( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + filter: &mut MetadataFilter, + ) -> Option, value::Error>> { + self.raw_values_filter(section_name, subsection_name, key, filter) + .ok() + .map(|values| { + values + .into_iter() + .map(|v| { + crate::Integer::try_from(v.as_ref()).and_then(|int| { + int.to_decimal() + .ok_or_else(|| value::Error::new("Integer overflow", v.into_owned())) + }) + }) + .collect() + }) + } + + /// Like [`integers_filter()`][File::integers_filter()], but suitable for statically known `key`s like `remote.origin.url`. + pub fn integers_filter_by_key<'a>( + &self, + key: impl Into<&'a BStr>, + filter: &mut MetadataFilter, + ) -> Option, value::Error>> { + let key = crate::parse::key(key)?; + self.integers_filter(key.section_name, key.subsection_name, key.value_name, filter) + } +} diff --git a/vendor/gix-config/src/file/access/mod.rs b/vendor/gix-config/src/file/access/mod.rs new file mode 100644 index 000000000..d602b5f8b --- /dev/null +++ b/vendor/gix-config/src/file/access/mod.rs @@ -0,0 +1,4 @@ +mod comfort; +mod mutate; +mod raw; +mod read_only; diff --git a/vendor/gix-config/src/file/access/mutate.rs b/vendor/gix-config/src/file/access/mutate.rs new file mode 100644 index 000000000..e1cfc6e1c --- /dev/null +++ b/vendor/gix-config/src/file/access/mutate.rs @@ -0,0 +1,387 @@ +use std::borrow::Cow; + +use bstr::BStr; +use gix_features::threading::OwnShared; + +use crate::{ + file::{self, rename_section, write::ends_with_newline, MetadataFilter, SectionBodyIdsLut, SectionId, SectionMut}, + lookup, + parse::{section, Event, FrontMatterEvents}, + File, +}; + +/// Mutating low-level access methods. +impl<'event> File<'event> { + /// Returns the last mutable section with a given `name` and optional `subsection_name`, _if it exists_. + pub fn section_mut<'a>( + &'a mut self, + name: impl AsRef, + subsection_name: Option<&BStr>, + ) -> Result, lookup::existing::Error> { + let id = self + .section_ids_by_name_and_subname(name.as_ref(), subsection_name)? + .rev() + .next() + .expect("BUG: Section lookup vec was empty"); + let nl = self.detect_newline_style_smallvec(); + Ok(self + .sections + .get_mut(&id) + .expect("BUG: Section did not have id from lookup") + .to_mut(nl)) + } + + /// Returns the last found mutable section with a given `key`, identifying the name and subsection name like `core` or `remote.origin`. + pub fn section_mut_by_key<'a, 'b>( + &'a mut self, + key: impl Into<&'b BStr>, + ) -> Result, lookup::existing::Error> { + let key = section::unvalidated::Key::parse(key).ok_or(lookup::existing::Error::KeyMissing)?; + self.section_mut(key.section_name, key.subsection_name) + } + + /// Return the mutable section identified by `id`, or `None` if it didn't exist. + /// + /// Note that `id` is stable across deletions and insertions. + pub fn section_mut_by_id<'a>(&'a mut self, id: SectionId) -> Option> { + let nl = self.detect_newline_style_smallvec(); + self.sections.get_mut(&id).map(|s| s.to_mut(nl)) + } + + /// Returns the last mutable section with a given `name` and optional `subsection_name`, _if it exists_, or create a new section. + pub fn section_mut_or_create_new<'a>( + &'a mut self, + name: impl AsRef, + subsection_name: Option<&BStr>, + ) -> Result, section::header::Error> { + self.section_mut_or_create_new_filter(name, subsection_name, &mut |_| true) + } + + /// Returns an mutable section with a given `name` and optional `subsection_name`, _if it exists_ **and** passes `filter`, or create + /// a new section. + pub fn section_mut_or_create_new_filter<'a>( + &'a mut self, + name: impl AsRef, + subsection_name: Option<&BStr>, + filter: &mut MetadataFilter, + ) -> Result, section::header::Error> { + let name = name.as_ref(); + match self + .section_ids_by_name_and_subname(name.as_ref(), subsection_name) + .ok() + .and_then(|it| { + it.rev().find(|id| { + let s = &self.sections[id]; + filter(s.meta()) + }) + }) { + Some(id) => { + let nl = self.detect_newline_style_smallvec(); + Ok(self + .sections + .get_mut(&id) + .expect("BUG: Section did not have id from lookup") + .to_mut(nl)) + } + None => self.new_section(name.to_owned(), subsection_name.map(|n| Cow::Owned(n.to_owned()))), + } + } + + /// Returns the last found mutable section with a given `name` and optional `subsection_name`, that matches `filter`, _if it exists_. + /// + /// If there are sections matching `section_name` and `subsection_name` but the `filter` rejects all of them, `Ok(None)` + /// is returned. + pub fn section_mut_filter<'a>( + &'a mut self, + name: impl AsRef, + subsection_name: Option<&BStr>, + filter: &mut MetadataFilter, + ) -> Result>, lookup::existing::Error> { + let id = self + .section_ids_by_name_and_subname(name.as_ref(), subsection_name)? + .rev() + .find(|id| { + let s = &self.sections[id]; + filter(s.meta()) + }); + let nl = self.detect_newline_style_smallvec(); + Ok(id.and_then(move |id| self.sections.get_mut(&id).map(move |s| s.to_mut(nl)))) + } + + /// Like [`section_mut_filter()`][File::section_mut_filter()], but identifies the with a given `key`, + /// like `core` or `remote.origin`. + pub fn section_mut_filter_by_key<'a, 'b>( + &'a mut self, + key: impl Into<&'b BStr>, + filter: &mut MetadataFilter, + ) -> Result>, lookup::existing::Error> { + let key = section::unvalidated::Key::parse(key).ok_or(lookup::existing::Error::KeyMissing)?; + self.section_mut_filter(key.section_name, key.subsection_name, filter) + } + + /// Adds a new section. If a subsection name was provided, then + /// the generated header will use the modern subsection syntax. + /// Returns a reference to the new section for immediate editing. + /// + /// # Examples + /// + /// Creating a new empty section: + /// + /// ``` + /// # use std::borrow::Cow; + /// # use gix_config::File; + /// # use std::convert::TryFrom; + /// let mut gix_config = gix_config::File::default(); + /// let section = gix_config.new_section("hello", Some(Cow::Borrowed("world".into())))?; + /// let nl = section.newline().to_owned(); + /// assert_eq!(gix_config.to_string(), format!("[hello \"world\"]{nl}")); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Creating a new empty section and adding values to it: + /// + /// ``` + /// # use gix_config::File; + /// # use std::borrow::Cow; + /// # use std::convert::TryFrom; + /// # use bstr::ByteSlice; + /// # use gix_config::parse::section; + /// let mut gix_config = gix_config::File::default(); + /// let mut section = gix_config.new_section("hello", Some(Cow::Borrowed("world".into())))?; + /// section.push(section::Key::try_from("a")?, Some("b".into())); + /// let nl = section.newline().to_owned(); + /// assert_eq!(gix_config.to_string(), format!("[hello \"world\"]{nl}\ta = b{nl}")); + /// let _section = gix_config.new_section("core", None); + /// assert_eq!(gix_config.to_string(), format!("[hello \"world\"]{nl}\ta = b{nl}[core]{nl}")); + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_section( + &mut self, + name: impl Into>, + subsection: impl Into>>, + ) -> Result, section::header::Error> { + let id = self.push_section_internal(file::Section::new(name, subsection, OwnShared::clone(&self.meta))?); + let nl = self.detect_newline_style_smallvec(); + let mut section = self.sections.get_mut(&id).expect("each id yields a section").to_mut(nl); + section.push_newline(); + Ok(section) + } + + /// Removes the section with `name` and `subsection_name` , returning it if there was a matching section. + /// If multiple sections have the same name, then the last one is returned. Note that + /// later sections with the same name have precedent over earlier ones. + /// + /// # Examples + /// + /// Creating and removing a section: + /// + /// ``` + /// # use gix_config::File; + /// # use std::convert::TryFrom; + /// let mut gix_config = gix_config::File::try_from( + /// r#"[hello "world"] + /// some-value = 4 + /// "#)?; + /// + /// let section = gix_config.remove_section("hello", Some("world".into())); + /// assert_eq!(gix_config.to_string(), ""); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Precedence example for removing sections with the same name: + /// + /// ``` + /// # use gix_config::File; + /// # use std::convert::TryFrom; + /// let mut gix_config = gix_config::File::try_from( + /// r#"[hello "world"] + /// some-value = 4 + /// [hello "world"] + /// some-value = 5 + /// "#)?; + /// + /// let section = gix_config.remove_section("hello", Some("world".into())); + /// assert_eq!(gix_config.to_string(), "[hello \"world\"]\n some-value = 4\n"); + /// # Ok::<(), Box>(()) + /// ``` + pub fn remove_section<'a>( + &mut self, + name: &str, + subsection_name: impl Into>, + ) -> Option> { + let id = self + .section_ids_by_name_and_subname(name, subsection_name.into()) + .ok()? + .rev() + .next()?; + self.remove_section_by_id(id) + } + + /// Remove the section identified by `id` if it exists and return it, or return `None` if no such section was present. + /// + /// Note that section ids are unambiguous even in the face of removals and additions of sections. + pub fn remove_section_by_id(&mut self, id: SectionId) -> Option> { + self.section_order + .remove(self.section_order.iter().position(|v| *v == id)?); + let section = self.sections.remove(&id)?; + let lut = self + .section_lookup_tree + .get_mut(§ion.header.name) + .expect("lookup cache still has name to be deleted"); + // NOTE: this leaves empty lists in the data structure which our code now has to deal with. + for entry in lut { + match section.header.subsection_name.as_deref() { + Some(subsection_name) => { + if let SectionBodyIdsLut::NonTerminal(map) = entry { + if let Some(ids) = map.get_mut(subsection_name) { + ids.remove(ids.iter().position(|v| *v == id).expect("present")); + break; + } + } + } + None => { + if let SectionBodyIdsLut::Terminal(ids) = entry { + ids.remove(ids.iter().position(|v| *v == id).expect("present")); + break; + } + } + } + } + Some(section) + } + + /// Removes the section with `name` and `subsection_name` that passed `filter`, returning the removed section + /// if at least one section matched the `filter`. + /// If multiple sections have the same name, then the last one is returned. Note that + /// later sections with the same name have precedent over earlier ones. + pub fn remove_section_filter<'a>( + &mut self, + name: &str, + subsection_name: impl Into>, + filter: &mut MetadataFilter, + ) -> Option> { + let id = self + .section_ids_by_name_and_subname(name, subsection_name.into()) + .ok()? + .rev() + .find(|id| filter(self.sections.get(id).expect("each id has a section").meta()))?; + self.section_order.remove( + self.section_order + .iter() + .position(|v| *v == id) + .expect("known section id"), + ); + self.sections.remove(&id) + } + + /// Adds the provided section to the config, returning a mutable reference + /// to it for immediate editing. + /// Note that its meta-data will remain as is. + pub fn push_section( + &mut self, + section: file::Section<'event>, + ) -> Result, section::header::Error> { + let id = self.push_section_internal(section); + let nl = self.detect_newline_style_smallvec(); + let section = self.sections.get_mut(&id).expect("each id yields a section").to_mut(nl); + Ok(section) + } + + /// Renames the section with `name` and `subsection_name`, modifying the last matching section + /// to use `new_name` and `new_subsection_name`. + pub fn rename_section<'a>( + &mut self, + name: impl AsRef, + subsection_name: impl Into>, + new_name: impl Into>, + new_subsection_name: impl Into>>, + ) -> Result<(), rename_section::Error> { + let id = self + .section_ids_by_name_and_subname(name.as_ref(), subsection_name.into())? + .rev() + .next() + .expect("list of sections were empty, which violates invariant"); + let section = self.sections.get_mut(&id).expect("known section-id"); + section.header = section::Header::new(new_name, new_subsection_name)?; + Ok(()) + } + + /// Renames the section with `name` and `subsection_name`, modifying the last matching section + /// that also passes `filter` to use `new_name` and `new_subsection_name`. + /// + /// Note that the otherwise unused [`lookup::existing::Error::KeyMissing`] variant is used to indicate + /// that the `filter` rejected all candidates, leading to no section being renamed after all. + pub fn rename_section_filter<'a>( + &mut self, + name: impl AsRef, + subsection_name: impl Into>, + new_name: impl Into>, + new_subsection_name: impl Into>>, + filter: &mut MetadataFilter, + ) -> Result<(), rename_section::Error> { + let id = self + .section_ids_by_name_and_subname(name.as_ref(), subsection_name.into())? + .rev() + .find(|id| filter(self.sections.get(id).expect("each id has a section").meta())) + .ok_or(rename_section::Error::Lookup(lookup::existing::Error::KeyMissing))?; + let section = self.sections.get_mut(&id).expect("known section-id"); + section.header = section::Header::new(new_name, new_subsection_name)?; + Ok(()) + } + + /// Append another File to the end of ourselves, without losing any information. + pub fn append(&mut self, other: Self) -> &mut Self { + self.append_or_insert(other, None) + } + + /// Append another File to the end of ourselves, without losing any information. + pub(crate) fn append_or_insert(&mut self, mut other: Self, mut insert_after: Option) -> &mut Self { + let nl = self.detect_newline_style_smallvec(); + fn extend_and_assure_newline<'a>( + lhs: &mut FrontMatterEvents<'a>, + rhs: FrontMatterEvents<'a>, + nl: &impl AsRef<[u8]>, + ) { + if !ends_with_newline(lhs.as_ref(), nl, true) + && !rhs.first().map_or(true, |e| e.to_bstr_lossy().starts_with(nl.as_ref())) + { + lhs.push(Event::Newline(Cow::Owned(nl.as_ref().into()))) + } + lhs.extend(rhs); + } + #[allow(clippy::unnecessary_lazy_evaluations)] + let our_last_section_before_append = + insert_after.or_else(|| (self.section_id_counter != 0).then(|| SectionId(self.section_id_counter - 1))); + + for id in std::mem::take(&mut other.section_order) { + let section = other.sections.remove(&id).expect("present"); + + let new_id = match insert_after { + Some(id) => { + let new_id = self.insert_section_after(section, id); + insert_after = Some(new_id); + new_id + } + None => self.push_section_internal(section), + }; + + if let Some(post_matter) = other.frontmatter_post_section.remove(&id) { + self.frontmatter_post_section.insert(new_id, post_matter); + } + } + + if other.frontmatter_events.is_empty() { + return self; + } + + match our_last_section_before_append { + Some(last_id) => extend_and_assure_newline( + self.frontmatter_post_section.entry(last_id).or_default(), + other.frontmatter_events, + &nl, + ), + None => extend_and_assure_newline(&mut self.frontmatter_events, other.frontmatter_events, &nl), + } + self + } +} diff --git a/vendor/gix-config/src/file/access/raw.rs b/vendor/gix-config/src/file/access/raw.rs new file mode 100644 index 000000000..46f1fb006 --- /dev/null +++ b/vendor/gix-config/src/file/access/raw.rs @@ -0,0 +1,536 @@ +use std::{borrow::Cow, collections::HashMap, convert::TryInto}; + +use bstr::BStr; +use smallvec::ToSmallVec; + +use crate::{ + file::{mutable::multi_value::EntryData, Index, MetadataFilter, MultiValueMut, Size, ValueMut}, + lookup, + parse::{section, Event}, + File, +}; + +/// # Raw value API +/// +/// These functions are the raw value API, returning normalized byte strings. +impl<'event> File<'event> { + /// Returns an uninterpreted value given a section, an optional subsection + /// and key. + /// + /// Consider [`Self::raw_values()`] if you want to get all values of + /// a multivar instead. + pub fn raw_value( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + ) -> Result, lookup::existing::Error> { + self.raw_value_filter(section_name, subsection_name, key, &mut |_| true) + } + + /// Returns an uninterpreted value given a section, an optional subsection + /// and key, if it passes the `filter`. + /// + /// Consider [`Self::raw_values()`] if you want to get all values of + /// a multivar instead. + pub fn raw_value_filter( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + filter: &mut MetadataFilter, + ) -> Result, lookup::existing::Error> { + let section_ids = self.section_ids_by_name_and_subname(section_name.as_ref(), subsection_name)?; + let key = key.as_ref(); + for section_id in section_ids.rev() { + let section = self.sections.get(§ion_id).expect("known section id"); + if !filter(section.meta()) { + continue; + } + if let Some(v) = section.value(key) { + return Ok(v); + } + } + + Err(lookup::existing::Error::KeyMissing) + } + + /// Returns a mutable reference to an uninterpreted value given a section, + /// an optional subsection and key. + /// + /// Consider [`Self::raw_values_mut`] if you want to get mutable + /// references to all values of a multivar instead. + pub fn raw_value_mut<'lookup>( + &mut self, + section_name: impl AsRef, + subsection_name: Option<&'lookup BStr>, + key: &'lookup str, + ) -> Result, lookup::existing::Error> { + self.raw_value_mut_filter(section_name, subsection_name, key, &mut |_| true) + } + + /// Returns a mutable reference to an uninterpreted value given a section, + /// an optional subsection and key, and if it passes `filter`. + /// + /// Consider [`Self::raw_values_mut`] if you want to get mutable + /// references to all values of a multivar instead. + pub fn raw_value_mut_filter<'lookup>( + &mut self, + section_name: impl AsRef, + subsection_name: Option<&'lookup BStr>, + key: &'lookup str, + filter: &mut MetadataFilter, + ) -> Result, lookup::existing::Error> { + let mut section_ids = self + .section_ids_by_name_and_subname(section_name.as_ref(), subsection_name)? + .rev(); + let key = section::Key(Cow::::Borrowed(key.into())); + + while let Some(section_id) = section_ids.next() { + let mut index = 0; + let mut size = 0; + let mut found_key = false; + let section = self.sections.get(§ion_id).expect("known section id"); + if !filter(section.meta()) { + continue; + } + for (i, event) in section.as_ref().iter().enumerate() { + match event { + Event::SectionKey(event_key) if *event_key == key => { + found_key = true; + index = i; + size = 1; + } + Event::Newline(_) | Event::Whitespace(_) | Event::ValueNotDone(_) if found_key => { + size += 1; + } + Event::ValueDone(_) | Event::Value(_) if found_key => { + found_key = false; + size += 1; + } + Event::KeyValueSeparator if found_key => { + size += 1; + } + _ => {} + } + } + + if size == 0 { + continue; + } + + drop(section_ids); + let nl = self.detect_newline_style().to_smallvec(); + return Ok(ValueMut { + section: self.sections.get_mut(§ion_id).expect("known section-id").to_mut(nl), + key, + index: Index(index), + size: Size(size), + }); + } + + Err(lookup::existing::Error::KeyMissing) + } + + /// Returns all uninterpreted values given a section, an optional subsection + /// ain order of occurrence. + /// + /// The ordering means that the last of the returned values is the one that would be the + /// value used in the single-value case.nd key. + /// + /// # Examples + /// + /// If you have the following config: + /// + /// ```text + /// [core] + /// a = b + /// [core] + /// a = c + /// a = d + /// ``` + /// + /// Attempting to get all values of `a` yields the following: + /// + /// ``` + /// # use gix_config::File; + /// # use std::borrow::Cow; + /// # use std::convert::TryFrom; + /// # use bstr::BStr; + /// # let gix_config = gix_config::File::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); + /// assert_eq!( + /// gix_config.raw_values("core", None, "a").unwrap(), + /// vec![ + /// Cow::::Borrowed("b".into()), + /// Cow::::Borrowed("c".into()), + /// Cow::::Borrowed("d".into()), + /// ], + /// ); + /// ``` + /// + /// Consider [`Self::raw_value`] if you want to get the resolved single + /// value for a given key, if your key does not support multi-valued values. + pub fn raw_values( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + ) -> Result>, lookup::existing::Error> { + self.raw_values_filter(section_name, subsection_name, key, &mut |_| true) + } + + /// Returns all uninterpreted values given a section, an optional subsection + /// and key, if the value passes `filter`, in order of occurrence. + /// + /// The ordering means that the last of the returned values is the one that would be the + /// value used in the single-value case. + pub fn raw_values_filter( + &self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + filter: &mut MetadataFilter, + ) -> Result>, lookup::existing::Error> { + let mut values = Vec::new(); + let section_ids = self.section_ids_by_name_and_subname(section_name.as_ref(), subsection_name)?; + let key = key.as_ref(); + for section_id in section_ids { + let section = self.sections.get(§ion_id).expect("known section id"); + if !filter(section.meta()) { + continue; + } + values.extend(section.values(key)); + } + + if values.is_empty() { + Err(lookup::existing::Error::KeyMissing) + } else { + Ok(values) + } + } + + /// Returns mutable references to all uninterpreted values given a section, + /// an optional subsection and key. + /// + /// # Examples + /// + /// If you have the following config: + /// + /// ```text + /// [core] + /// a = b + /// [core] + /// a = c + /// a = d + /// ``` + /// + /// Attempting to get all values of `a` yields the following: + /// + /// ``` + /// # use gix_config::File; + /// # use std::borrow::Cow; + /// # use std::convert::TryFrom; + /// # use bstr::BStr; + /// # let mut gix_config = gix_config::File::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); + /// assert_eq!( + /// gix_config.raw_values("core", None, "a")?, + /// vec![ + /// Cow::::Borrowed("b".into()), + /// Cow::::Borrowed("c".into()), + /// Cow::::Borrowed("d".into()) + /// ] + /// ); + /// + /// gix_config.raw_values_mut("core", None, "a")?.set_all("g"); + /// + /// assert_eq!( + /// gix_config.raw_values("core", None, "a")?, + /// vec![ + /// Cow::::Borrowed("g".into()), + /// Cow::::Borrowed("g".into()), + /// Cow::::Borrowed("g".into()) + /// ], + /// ); + /// # Ok::<(), gix_config::lookup::existing::Error>(()) + /// ``` + /// + /// Consider [`Self::raw_value`] if you want to get the resolved single + /// value for a given key, if your key does not support multi-valued values. + /// + /// Note that this operation is relatively expensive, requiring a full + /// traversal of the config. + pub fn raw_values_mut<'lookup>( + &mut self, + section_name: impl AsRef, + subsection_name: Option<&'lookup BStr>, + key: &'lookup str, + ) -> Result, lookup::existing::Error> { + self.raw_values_mut_filter(section_name, subsection_name, key, &mut |_| true) + } + + /// Returns mutable references to all uninterpreted values given a section, + /// an optional subsection and key, if their sections pass `filter`. + pub fn raw_values_mut_filter<'lookup>( + &mut self, + section_name: impl AsRef, + subsection_name: Option<&'lookup BStr>, + key: &'lookup str, + filter: &mut MetadataFilter, + ) -> Result, lookup::existing::Error> { + let section_ids = self.section_ids_by_name_and_subname(section_name.as_ref(), subsection_name)?; + let key = section::Key(Cow::::Borrowed(key.into())); + + let mut offsets = HashMap::new(); + let mut entries = Vec::new(); + for section_id in section_ids.rev() { + let mut last_boundary = 0; + let mut expect_value = false; + let mut offset_list = Vec::new(); + let mut offset_index = 0; + let section = self.sections.get(§ion_id).expect("known section-id"); + if !filter(section.meta()) { + continue; + } + for (i, event) in section.as_ref().iter().enumerate() { + match event { + Event::SectionKey(event_key) if *event_key == key => { + expect_value = true; + offset_list.push(i - last_boundary); + offset_index += 1; + last_boundary = i; + } + Event::Value(_) | Event::ValueDone(_) if expect_value => { + expect_value = false; + entries.push(EntryData { + section_id, + offset_index, + }); + offset_list.push(i - last_boundary + 1); + offset_index += 1; + last_boundary = i + 1; + } + _ => (), + } + } + offsets.insert(section_id, offset_list); + } + + entries.sort(); + + if entries.is_empty() { + Err(lookup::existing::Error::KeyMissing) + } else { + Ok(MultiValueMut { + section: &mut self.sections, + key, + indices_and_sizes: entries, + offsets, + }) + } + } + + /// Sets a value in a given `section_name`, optional `subsection_name`, and `key`. + /// Note sections named `section_name` and `subsection_name` (if not `None`) + /// must exist for this method to work. + /// + /// # Examples + /// + /// Given the config, + /// + /// ```text + /// [core] + /// a = b + /// [core] + /// a = c + /// a = d + /// ``` + /// + /// Setting a new value to the key `core.a` will yield the following: + /// + /// ``` + /// # use gix_config::File; + /// # use std::borrow::Cow; + /// # use bstr::BStr; + /// # use std::convert::TryFrom; + /// # let mut gix_config = gix_config::File::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); + /// gix_config.set_existing_raw_value("core", None, "a", "e")?; + /// assert_eq!(gix_config.raw_value("core", None, "a")?, Cow::::Borrowed("e".into())); + /// assert_eq!( + /// gix_config.raw_values("core", None, "a")?, + /// vec![ + /// Cow::::Borrowed("b".into()), + /// Cow::::Borrowed("c".into()), + /// Cow::::Borrowed("e".into()) + /// ], + /// ); + /// # Ok::<(), Box>(()) + /// ``` + pub fn set_existing_raw_value<'b>( + &mut self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + new_value: impl Into<&'b BStr>, + ) -> Result<(), lookup::existing::Error> { + self.raw_value_mut(section_name, subsection_name, key.as_ref()) + .map(|mut entry| entry.set(new_value)) + } + + /// Sets a value in a given `section_name`, optional `subsection_name`, and `key`. + /// Creates the section if necessary and the key as well, or overwrites the last existing value otherwise. + /// + /// # Examples + /// + /// Given the config, + /// + /// ```text + /// [core] + /// a = b + /// ``` + /// + /// Setting a new value to the key `core.a` will yield the following: + /// + /// ``` + /// # use gix_config::File; + /// # use std::borrow::Cow; + /// # use bstr::BStr; + /// # use std::convert::TryFrom; + /// # let mut gix_config = gix_config::File::try_from("[core]a=b").unwrap(); + /// let prev = gix_config.set_raw_value("core", None, "a", "e")?; + /// gix_config.set_raw_value("core", None, "b", "f")?; + /// assert_eq!(prev.expect("present").as_ref(), "b"); + /// assert_eq!(gix_config.raw_value("core", None, "a")?, Cow::::Borrowed("e".into())); + /// assert_eq!(gix_config.raw_value("core", None, "b")?, Cow::::Borrowed("f".into())); + /// # Ok::<(), Box>(()) + /// ``` + pub fn set_raw_value<'b, Key, E>( + &mut self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: Key, + new_value: impl Into<&'b BStr>, + ) -> Result>, crate::file::set_raw_value::Error> + where + Key: TryInto, Error = E>, + section::key::Error: From, + { + self.set_raw_value_filter(section_name, subsection_name, key, new_value, &mut |_| true) + } + + /// Similar to [`set_raw_value()`][Self::set_raw_value()], but only sets existing values in sections matching + /// `filter`, creating a new section otherwise. + pub fn set_raw_value_filter<'b, Key, E>( + &mut self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: Key, + new_value: impl Into<&'b BStr>, + filter: &mut MetadataFilter, + ) -> Result>, crate::file::set_raw_value::Error> + where + Key: TryInto, Error = E>, + section::key::Error: From, + { + let mut section = self.section_mut_or_create_new_filter(section_name, subsection_name, filter)?; + Ok(section.set(key.try_into().map_err(section::key::Error::from)?, new_value)) + } + + /// Sets a multivar in a given section, optional subsection, and key value. + /// + /// This internally zips together the new values and the existing values. + /// As a result, if more new values are provided than the current amount of + /// multivars, then the latter values are not applied. If there are less + /// new values than old ones then the remaining old values are unmodified. + /// + /// **Note**: Mutation order is _not_ guaranteed and is non-deterministic. + /// If you need finer control over which values of the multivar are set, + /// consider using [`raw_values_mut()`][Self::raw_values_mut()], which will let you iterate + /// and check over the values instead. This is best used as a convenience + /// function for setting multivars whose values should be treated as an + /// unordered set. + /// + /// # Examples + /// + /// Let us use the follow config for all examples: + /// + /// ```text + /// [core] + /// a = b + /// [core] + /// a = c + /// a = d + /// ``` + /// + /// Setting an equal number of values: + /// + /// ``` + /// # use gix_config::File; + /// # use std::borrow::Cow; + /// # use std::convert::TryFrom; + /// # use bstr::BStr; + /// # let mut gix_config = gix_config::File::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); + /// let new_values = vec![ + /// "x", + /// "y", + /// "z", + /// ]; + /// gix_config.set_existing_raw_multi_value("core", None, "a", new_values.into_iter())?; + /// let fetched_config = gix_config.raw_values("core", None, "a")?; + /// assert!(fetched_config.contains(&Cow::::Borrowed("x".into()))); + /// assert!(fetched_config.contains(&Cow::::Borrowed("y".into()))); + /// assert!(fetched_config.contains(&Cow::::Borrowed("z".into()))); + /// # Ok::<(), gix_config::lookup::existing::Error>(()) + /// ``` + /// + /// Setting less than the number of present values sets the first ones found: + /// + /// ``` + /// # use gix_config::File; + /// # use std::borrow::Cow; + /// # use std::convert::TryFrom; + /// # use bstr::BStr; + /// # let mut gix_config = gix_config::File::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); + /// let new_values = vec![ + /// "x", + /// "y", + /// ]; + /// gix_config.set_existing_raw_multi_value("core", None, "a", new_values.into_iter())?; + /// let fetched_config = gix_config.raw_values("core", None, "a")?; + /// assert!(fetched_config.contains(&Cow::::Borrowed("x".into()))); + /// assert!(fetched_config.contains(&Cow::::Borrowed("y".into()))); + /// # Ok::<(), gix_config::lookup::existing::Error>(()) + /// ``` + /// + /// Setting more than the number of present values discards the rest: + /// + /// ``` + /// # use gix_config::File; + /// # use std::borrow::Cow; + /// # use std::convert::TryFrom; + /// # use bstr::BStr; + /// # let mut gix_config = gix_config::File::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); + /// let new_values = vec![ + /// "x", + /// "y", + /// "z", + /// "discarded", + /// ]; + /// gix_config.set_existing_raw_multi_value("core", None, "a", new_values)?; + /// assert!(!gix_config.raw_values("core", None, "a")?.contains(&Cow::::Borrowed("discarded".into()))); + /// # Ok::<(), gix_config::lookup::existing::Error>(()) + /// ``` + pub fn set_existing_raw_multi_value<'a, Iter, Item>( + &mut self, + section_name: impl AsRef, + subsection_name: Option<&BStr>, + key: impl AsRef, + new_values: Iter, + ) -> Result<(), lookup::existing::Error> + where + Iter: IntoIterator, + Item: Into<&'a BStr>, + { + self.raw_values_mut(section_name, subsection_name, key.as_ref()) + .map(|mut v| v.set_values(new_values)) + } +} diff --git a/vendor/gix-config/src/file/access/read_only.rs b/vendor/gix-config/src/file/access/read_only.rs new file mode 100644 index 000000000..5520c6566 --- /dev/null +++ b/vendor/gix-config/src/file/access/read_only.rs @@ -0,0 +1,353 @@ +use std::{borrow::Cow, convert::TryFrom}; + +use bstr::{BStr, ByteSlice}; +use gix_features::threading::OwnShared; +use smallvec::SmallVec; + +use crate::{ + file, + file::{ + write::{extract_newline, platform_newline}, + Metadata, MetadataFilter, SectionId, + }, + lookup, + parse::Event, + File, +}; + +/// Read-only low-level access methods, as it requires generics for converting into +/// custom values defined in this crate like [`Integer`][crate::Integer] and +/// [`Color`][crate::Color]. +impl<'event> File<'event> { + /// Returns an interpreted value given a section, an optional subsection and + /// key. + /// + /// It's recommended to use one of the value types provide dby this crate + /// as they implement the conversion, but this function is flexible and + /// will accept any type that implements [`TryFrom<&BStr>`][std::convert::TryFrom]. + /// + /// Consider [`Self::values`] if you want to get all values of a multivar instead. + /// + /// If a `string` is desired, use the [`string()`][Self::string()] method instead. + /// + /// # Examples + /// + /// ``` + /// # use gix_config::File; + /// # use gix_config::{Integer, Boolean}; + /// # use std::borrow::Cow; + /// # use std::convert::TryFrom; + /// let config = r#" + /// [core] + /// a = 10k + /// c = false + /// "#; + /// let gix_config = gix_config::File::try_from(config)?; + /// // You can either use the turbofish to determine the type... + /// let a_value = gix_config.value::("core", None, "a")?; + /// // ... or explicitly declare the type to avoid the turbofish + /// let c_value: Boolean = gix_config.value("core", None, "c")?; + /// # Ok::<(), Box>(()) + /// ``` + pub fn value<'a, T: TryFrom>>( + &'a self, + section_name: &str, + subsection_name: Option<&BStr>, + key: &str, + ) -> Result> { + T::try_from(self.raw_value(section_name, subsection_name, key)?).map_err(lookup::Error::FailedConversion) + } + + /// Like [`value()`][File::value()], but returning an `None` if the value wasn't found at `section[.subsection].key` + pub fn try_value<'a, T: TryFrom>>( + &'a self, + section_name: &str, + subsection_name: Option<&BStr>, + key: &str, + ) -> Option> { + self.raw_value(section_name, subsection_name, key).ok().map(T::try_from) + } + + /// Returns all interpreted values given a section, an optional subsection + /// and key. + /// + /// It's recommended to use one of the value types provide dby this crate + /// as they implement the conversion, but this function is flexible and + /// will accept any type that implements [`TryFrom<&BStr>`][std::convert::TryFrom]. + /// + /// Consider [`Self::value`] if you want to get a single value + /// (following last-one-wins resolution) instead. + /// + /// To access plain strings, use the [`strings()`][Self::strings()] method instead. + /// + /// # Examples + /// + /// ``` + /// # use gix_config::File; + /// # use gix_config::{Integer, Boolean}; + /// # use std::borrow::Cow; + /// # use std::convert::TryFrom; + /// # use bstr::ByteSlice; + /// let config = r#" + /// [core] + /// a = true + /// c + /// [core] + /// a + /// a = false + /// "#; + /// let gix_config = gix_config::File::try_from(config).unwrap(); + /// // You can either use the turbofish to determine the type... + /// let a_value = gix_config.values::("core", None, "a")?; + /// assert_eq!( + /// a_value, + /// vec![ + /// Boolean(true), + /// Boolean(false), + /// Boolean(false), + /// ] + /// ); + /// // ... or explicitly declare the type to avoid the turbofish + /// let c_value: Vec = gix_config.values("core", None, "c").unwrap(); + /// assert_eq!(c_value, vec![Boolean(false)]); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// [`value`]: crate::value + /// [`TryFrom`]: std::convert::TryFrom + pub fn values<'a, T: TryFrom>>( + &'a self, + section_name: &str, + subsection_name: Option<&BStr>, + key: &str, + ) -> Result, lookup::Error> { + self.raw_values(section_name, subsection_name, key)? + .into_iter() + .map(T::try_from) + .collect::, _>>() + .map_err(lookup::Error::FailedConversion) + } + + /// Returns the last found immutable section with a given `name` and optional `subsection_name`. + pub fn section( + &self, + name: impl AsRef, + subsection_name: Option<&BStr>, + ) -> Result<&file::Section<'event>, lookup::existing::Error> { + self.section_filter(name, subsection_name, &mut |_| true)? + .ok_or(lookup::existing::Error::SectionMissing) + } + + /// Returns the last found immutable section with a given `key`, identifying the name and subsection name like `core` + /// or `remote.origin`. + pub fn section_by_key<'a>( + &self, + key: impl Into<&'a BStr>, + ) -> Result<&file::Section<'event>, lookup::existing::Error> { + let key = crate::parse::section::unvalidated::Key::parse(key).ok_or(lookup::existing::Error::KeyMissing)?; + self.section(key.section_name, key.subsection_name) + } + + /// Returns the last found immutable section with a given `name` and optional `subsection_name`, that matches `filter`. + /// + /// If there are sections matching `section_name` and `subsection_name` but the `filter` rejects all of them, `Ok(None)` + /// is returned. + pub fn section_filter<'a>( + &'a self, + name: impl AsRef, + subsection_name: Option<&BStr>, + filter: &mut MetadataFilter, + ) -> Result>, lookup::existing::Error> { + Ok(self + .section_ids_by_name_and_subname(name.as_ref(), subsection_name)? + .rev() + .find_map({ + let sections = &self.sections; + move |id| { + let s = §ions[&id]; + filter(s.meta()).then_some(s) + } + })) + } + + /// Like [`section_filter()`][File::section_filter()], but identifies the section with `key` like `core` or `remote.origin`. + pub fn section_filter_by_key<'a, 'b>( + &'a self, + key: impl Into<&'b BStr>, + filter: &mut MetadataFilter, + ) -> Result>, lookup::existing::Error> { + let key = crate::parse::section::unvalidated::Key::parse(key).ok_or(lookup::existing::Error::KeyMissing)?; + self.section_filter(key.section_name, key.subsection_name, filter) + } + + /// Gets all sections that match the provided `name`, ignoring any subsections. + /// + /// # Examples + /// + /// Provided the following config: + /// + /// ```text + /// [core] + /// a = b + /// [core ""] + /// c = d + /// [core "apple"] + /// e = f + /// ``` + /// + /// Calling this method will yield all sections: + /// + /// ``` + /// # use gix_config::File; + /// # use gix_config::{Integer, Boolean}; + /// # use std::borrow::Cow; + /// # use std::convert::TryFrom; + /// let config = r#" + /// [core] + /// a = b + /// [core ""] + /// c = d + /// [core "apple"] + /// e = f + /// "#; + /// let gix_config = gix_config::File::try_from(config)?; + /// assert_eq!(gix_config.sections_by_name("core").map_or(0, |s|s.count()), 3); + /// # Ok::<(), Box>(()) + /// ``` + #[must_use] + pub fn sections_by_name<'a>(&'a self, name: &'a str) -> Option> + '_> { + self.section_ids_by_name(name).ok().map(move |ids| { + ids.map(move |id| { + self.sections + .get(&id) + .expect("section doesn't have id from from lookup") + }) + }) + } + + /// Similar to [`sections_by_name()`][Self::sections_by_name()], but returns an identifier for this section as well to allow + /// referring to it unambiguously even in the light of deletions. + #[must_use] + pub fn sections_and_ids_by_name<'a>( + &'a self, + name: &'a str, + ) -> Option, SectionId)> + '_> { + self.section_ids_by_name(name).ok().map(move |ids| { + ids.map(move |id| { + ( + self.sections + .get(&id) + .expect("section doesn't have id from from lookup"), + id, + ) + }) + }) + } + + /// Gets all sections that match the provided `name`, ignoring any subsections, and pass the `filter`. + #[must_use] + pub fn sections_by_name_and_filter<'a>( + &'a self, + name: &'a str, + filter: &'a mut MetadataFilter, + ) -> Option> + '_> { + self.section_ids_by_name(name).ok().map(move |ids| { + ids.filter_map(move |id| { + let s = self + .sections + .get(&id) + .expect("section doesn't have id from from lookup"); + filter(s.meta()).then_some(s) + }) + }) + } + + /// Returns the number of values in the config, no matter in which section. + /// + /// For example, a config with multiple empty sections will return 0. + /// This ignores any comments. + #[must_use] + pub fn num_values(&self) -> usize { + self.sections.values().map(|section| section.num_values()).sum() + } + + /// Returns if there are no entries in the config. This will return true + /// if there are only empty sections, with whitespace and comments not being considered + /// void. + #[must_use] + pub fn is_void(&self) -> bool { + self.sections.values().all(|s| s.body.is_void()) + } + + /// Return this file's metadata, typically set when it was first created to indicate its origins. + /// + /// It will be used in all newly created sections to identify them. + /// Change it with [`File::set_meta()`]. + pub fn meta(&self) -> &Metadata { + &self.meta + } + + /// Change the origin of this instance to be the given `meta`data. + /// + /// This is useful to control what origin about-to-be-added sections receive. + pub fn set_meta(&mut self, meta: impl Into>) -> &mut Self { + self.meta = meta.into(); + self + } + + /// Similar to [`meta()`][File::meta()], but with shared ownership. + pub fn meta_owned(&self) -> OwnShared { + OwnShared::clone(&self.meta) + } + + /// Return an iterator over all sections, in order of occurrence in the file itself. + pub fn sections(&self) -> impl Iterator> + '_ { + self.section_order.iter().map(move |id| &self.sections[id]) + } + + /// Return an iterator over all sections and their ids, in order of occurrence in the file itself. + pub fn sections_and_ids(&self) -> impl Iterator, SectionId)> + '_ { + self.section_order.iter().map(move |id| (&self.sections[id], *id)) + } + + /// Return an iterator over all sections along with non-section events that are placed right after them, + /// in order of occurrence in the file itself. + /// + /// This allows to reproduce the look of sections perfectly when serializing them with + /// [`write_to()`][file::Section::write_to()]. + pub fn sections_and_postmatter(&self) -> impl Iterator, Vec<&Event<'event>>)> { + self.section_order.iter().map(move |id| { + let s = &self.sections[id]; + let pm: Vec<_> = self + .frontmatter_post_section + .get(id) + .map(|events| events.iter().collect()) + .unwrap_or_default(); + (s, pm) + }) + } + + /// Return all events which are in front of the first of our sections, or `None` if there are none. + pub fn frontmatter(&self) -> Option>> { + (!self.frontmatter_events.is_empty()).then(|| self.frontmatter_events.iter()) + } + + /// Return the newline characters that have been detected in this config file or the default ones + /// for the current platform. + /// + /// Note that the first found newline is the one we use in the assumption of consistency. + pub fn detect_newline_style(&self) -> &BStr { + self.frontmatter_events + .iter() + .find_map(extract_newline) + .or_else(|| { + self.sections() + .find_map(|s| s.body.as_ref().iter().find_map(extract_newline)) + }) + .unwrap_or_else(|| platform_newline()) + } + + pub(crate) fn detect_newline_style_smallvec(&self) -> SmallVec<[u8; 2]> { + self.detect_newline_style().as_bytes().into() + } +} diff --git a/vendor/gix-config/src/file/impls.rs b/vendor/gix-config/src/file/impls.rs new file mode 100644 index 000000000..c26df5fb8 --- /dev/null +++ b/vendor/gix-config/src/file/impls.rs @@ -0,0 +1,111 @@ +use std::{borrow::Cow, convert::TryFrom, fmt::Display, str::FromStr}; + +use bstr::{BStr, BString, ByteVec}; + +use crate::{ + file::Metadata, + parse, + parse::{section, Event}, + value::normalize, + File, +}; + +impl FromStr for File<'static> { + type Err = parse::Error; + + fn from_str(s: &str) -> Result { + parse::Events::from_bytes_owned(s.as_bytes(), None) + .map(|events| File::from_parse_events_no_includes(events, Metadata::api())) + } +} + +impl<'a> TryFrom<&'a str> for File<'a> { + type Error = parse::Error; + + /// Convenience constructor. Attempts to parse the provided string into a + /// [`File`]. See [`Events::from_str()`][crate::parse::Events::from_str()] for more information. + fn try_from(s: &'a str) -> Result, Self::Error> { + parse::Events::from_str(s).map(|events| Self::from_parse_events_no_includes(events, Metadata::api())) + } +} + +impl<'a> TryFrom<&'a BStr> for File<'a> { + type Error = parse::Error; + + /// Convenience constructor. Attempts to parse the provided byte string into + /// a [`File`]. See [`Events::from_bytes()`][parse::Events::from_bytes()] for more information. + fn try_from(value: &'a BStr) -> Result, Self::Error> { + parse::Events::from_bytes(value, None) + .map(|events| Self::from_parse_events_no_includes(events, Metadata::api())) + } +} + +impl From> for BString { + fn from(c: File<'_>) -> Self { + c.into() + } +} + +impl Display for File<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.to_bstring(), f) + } +} + +impl PartialEq for File<'_> { + fn eq(&self, other: &Self) -> bool { + fn find_key<'a>(mut it: impl Iterator>) -> Option<&'a section::Key<'a>> { + it.find_map(|e| match e { + Event::SectionKey(k) => Some(k), + _ => None, + }) + } + fn collect_value<'a>(it: impl Iterator>) -> Cow<'a, BStr> { + let mut partial_value = BString::default(); + let mut value = None; + + for event in it { + match event { + Event::SectionKey(_) => break, + Event::Value(v) => { + value = v.clone().into(); + break; + } + Event::ValueNotDone(v) => partial_value.push_str(v.as_ref()), + Event::ValueDone(v) => { + partial_value.push_str(v.as_ref()); + value = Some(partial_value.into()); + break; + } + _ => (), + } + } + value.map(normalize).unwrap_or_default() + } + if self.section_order.len() != other.section_order.len() { + return false; + } + + for (lhs, rhs) in self + .section_order + .iter() + .zip(&other.section_order) + .map(|(lhs, rhs)| (&self.sections[lhs], &other.sections[rhs])) + { + if !(lhs.header.name == rhs.header.name && lhs.header.subsection_name == rhs.header.subsection_name) { + return false; + } + + let (mut lhs, mut rhs) = (lhs.body.0.iter(), rhs.body.0.iter()); + while let (Some(lhs_key), Some(rhs_key)) = (find_key(&mut lhs), find_key(&mut rhs)) { + if lhs_key != rhs_key { + return false; + } + if collect_value(&mut lhs) != collect_value(&mut rhs) { + return false; + } + } + } + true + } +} diff --git a/vendor/gix-config/src/file/includes/mod.rs b/vendor/gix-config/src/file/includes/mod.rs new file mode 100644 index 000000000..8fd92725f --- /dev/null +++ b/vendor/gix-config/src/file/includes/mod.rs @@ -0,0 +1,319 @@ +use std::{ + borrow::Cow, + path::{Path, PathBuf}, +}; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; +use gix_features::threading::OwnShared; +use gix_ref::Category; + +use crate::{ + file, + file::{includes, init, Metadata, SectionId}, + path, File, +}; + +impl File<'static> { + /// Traverse all `include` and `includeIf` directives found in this instance and follow them, loading the + /// referenced files from their location and adding their content right past the value that included them. + /// + /// # Limitations + /// + /// - Note that this method is _not idempotent_ and calling it multiple times will resolve includes multiple + /// times. It's recommended use is as part of a multi-step bootstrapping which needs fine-grained control, + /// and unless that's given one should prefer one of the other ways of initialization that resolve includes + /// at the right time. + /// - included values are added after the _section_ that included them, not directly after the value. This is + /// a deviation from how git does it, as it technically adds new value right after the include path itself, + /// technically 'splitting' the section. This can only make a difference if the `include` section also has values + /// which later overwrite portions of the included file, which seems unusual as these would be related to `includes`. + /// We can fix this by 'splitting' the include section if needed so the included sections are put into the right place. + pub fn resolve_includes(&mut self, options: init::Options<'_>) -> Result<(), Error> { + if options.includes.max_depth == 0 { + return Ok(()); + } + let mut buf = Vec::new(); + resolve(self, &mut buf, options) + } +} + +pub(crate) fn resolve(config: &mut File<'static>, buf: &mut Vec, options: init::Options<'_>) -> Result<(), Error> { + resolve_includes_recursive(config, 0, buf, options) +} + +fn resolve_includes_recursive( + target_config: &mut File<'static>, + depth: u8, + buf: &mut Vec, + options: init::Options<'_>, +) -> Result<(), Error> { + if depth == options.includes.max_depth { + return if options.includes.err_on_max_depth_exceeded { + Err(Error::IncludeDepthExceeded { + max_depth: options.includes.max_depth, + }) + } else { + Ok(()) + }; + } + + let mut section_ids_and_include_paths = Vec::new(); + for (id, section) in target_config + .section_order + .iter() + .map(|id| (*id, &target_config.sections[id])) + { + let header = §ion.header; + let header_name = header.name.as_ref(); + if header_name == "include" && header.subsection_name.is_none() { + detach_include_paths(&mut section_ids_and_include_paths, section, id) + } else if header_name == "includeIf" { + if let Some(condition) = &header.subsection_name { + let target_config_path = section.meta.path.as_deref(); + if include_condition_match(condition.as_ref(), target_config_path, options.includes)? { + detach_include_paths(&mut section_ids_and_include_paths, section, id) + } + } + } + } + + append_followed_includes_recursively(section_ids_and_include_paths, target_config, depth, options, buf) +} + +fn append_followed_includes_recursively( + section_ids_and_include_paths: Vec<(SectionId, crate::Path<'_>)>, + target_config: &mut File<'static>, + depth: u8, + options: init::Options<'_>, + buf: &mut Vec, +) -> Result<(), Error> { + for (section_id, config_path) in section_ids_and_include_paths { + let meta = OwnShared::clone(&target_config.sections[§ion_id].meta); + let target_config_path = meta.path.as_deref(); + let config_path = match resolve_path(config_path, target_config_path, options.includes)? { + Some(p) => p, + None => continue, + }; + if !config_path.is_file() { + continue; + } + + buf.clear(); + std::io::copy(&mut std::fs::File::open(&config_path)?, buf)?; + let config_meta = Metadata { + path: Some(config_path), + trust: meta.trust, + level: meta.level + 1, + source: meta.source, + }; + let no_follow_options = init::Options { + includes: includes::Options::no_follow(), + ..options + }; + + let mut include_config = + File::from_bytes_owned(buf, config_meta, no_follow_options).map_err(|err| match err { + init::Error::Parse(err) => Error::Parse(err), + init::Error::Interpolate(err) => Error::Interpolate(err), + init::Error::Includes(_) => unreachable!("BUG: {:?} not possible due to no-follow options", err), + })?; + resolve_includes_recursive(&mut include_config, depth + 1, buf, options)?; + + target_config.append_or_insert(include_config, Some(section_id)); + } + Ok(()) +} + +fn detach_include_paths( + include_paths: &mut Vec<(SectionId, crate::Path<'static>)>, + section: &file::Section<'_>, + id: SectionId, +) { + include_paths.extend( + section + .body + .values("path") + .into_iter() + .map(|path| (id, crate::Path::from(Cow::Owned(path.into_owned())))), + ) +} + +fn include_condition_match( + condition: &BStr, + target_config_path: Option<&Path>, + options: Options<'_>, +) -> Result { + let mut tokens = condition.splitn(2, |b| *b == b':'); + let (prefix, condition) = match (tokens.next(), tokens.next()) { + (Some(a), Some(b)) => (a, b), + _ => return Ok(false), + }; + let condition = condition.as_bstr(); + match prefix { + b"gitdir" => gitdir_matches( + condition, + target_config_path, + options, + gix_glob::wildmatch::Mode::empty(), + ), + b"gitdir/i" => gitdir_matches( + condition, + target_config_path, + options, + gix_glob::wildmatch::Mode::IGNORE_CASE, + ), + b"onbranch" => Ok(onbranch_matches(condition, options.conditional).is_some()), + _ => Ok(false), + } +} + +fn onbranch_matches( + condition: &BStr, + conditional::Context { branch_name, .. }: conditional::Context<'_>, +) -> Option<()> { + let branch_name = branch_name?; + let (_, branch_name) = branch_name + .category_and_short_name() + .filter(|(cat, _)| *cat == Category::LocalBranch)?; + + let condition = if condition.ends_with(b"/") { + let mut condition: BString = condition.into(); + condition.push_str("**"); + Cow::Owned(condition) + } else { + condition.into() + }; + + gix_glob::wildmatch( + condition.as_ref(), + branch_name, + gix_glob::wildmatch::Mode::NO_MATCH_SLASH_LITERAL, + ) + .then_some(()) +} + +fn gitdir_matches( + condition_path: &BStr, + target_config_path: Option<&Path>, + Options { + conditional: conditional::Context { git_dir, .. }, + interpolate: context, + err_on_interpolation_failure, + err_on_missing_config_path, + .. + }: Options<'_>, + wildmatch_mode: gix_glob::wildmatch::Mode, +) -> Result { + if !err_on_interpolation_failure && git_dir.is_none() { + return Ok(false); + } + let git_dir = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(git_dir.ok_or(Error::MissingGitDir)?)); + + let mut pattern_path: Cow<'_, _> = { + let path = match check_interpolation_result( + err_on_interpolation_failure, + crate::Path::from(Cow::Borrowed(condition_path)).interpolate(context), + )? { + Some(p) => p, + None => return Ok(false), + }; + gix_path::into_bstr(path).into_owned().into() + }; + // NOTE: yes, only if we do path interpolation will the slashes be forced to unix separators on windows + if pattern_path != condition_path { + pattern_path = gix_path::to_unix_separators_on_windows(pattern_path); + } + + if let Some(relative_pattern_path) = pattern_path.strip_prefix(b"./") { + if !err_on_missing_config_path && target_config_path.is_none() { + return Ok(false); + } + let parent_dir = target_config_path + .ok_or(Error::MissingConfigPath)? + .parent() + .expect("config path can never be /"); + let mut joined_path = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(parent_dir)).into_owned(); + joined_path.push(b'/'); + joined_path.extend_from_slice(relative_pattern_path); + pattern_path = joined_path.into(); + } + + // NOTE: this special handling of leading backslash is needed to do it like git does + if pattern_path.iter().next() != Some(&(std::path::MAIN_SEPARATOR as u8)) + && !gix_path::from_bstr(pattern_path.clone()).is_absolute() + { + let mut prefixed = pattern_path.into_owned(); + prefixed.insert_str(0, "**/"); + pattern_path = prefixed.into() + } + if pattern_path.ends_with(b"/") { + let mut suffixed = pattern_path.into_owned(); + suffixed.push_str("**"); + pattern_path = suffixed.into(); + } + + let match_mode = gix_glob::wildmatch::Mode::NO_MATCH_SLASH_LITERAL | wildmatch_mode; + let is_match = gix_glob::wildmatch(pattern_path.as_bstr(), git_dir.as_bstr(), match_mode); + if is_match { + return Ok(true); + } + + let expanded_git_dir = gix_path::into_bstr(gix_path::realpath(gix_path::from_byte_slice(&git_dir))?); + Ok(gix_glob::wildmatch( + pattern_path.as_bstr(), + expanded_git_dir.as_bstr(), + match_mode, + )) +} + +fn check_interpolation_result( + disable: bool, + res: Result, path::interpolate::Error>, +) -> Result>, path::interpolate::Error> { + if disable { + return res.map(Some); + } + match res { + Ok(good) => Ok(good.into()), + Err(err) => match err { + path::interpolate::Error::Missing { .. } | path::interpolate::Error::UserInterpolationUnsupported => { + Ok(None) + } + path::interpolate::Error::UsernameConversion(_) | path::interpolate::Error::Utf8Conversion { .. } => { + Err(err) + } + }, + } +} + +fn resolve_path( + path: crate::Path<'_>, + target_config_path: Option<&Path>, + includes::Options { + interpolate: context, + err_on_interpolation_failure, + err_on_missing_config_path, + .. + }: includes::Options<'_>, +) -> Result, Error> { + let path = match check_interpolation_result(err_on_interpolation_failure, path.interpolate(context))? { + Some(p) => p, + None => return Ok(None), + }; + let path: PathBuf = if path.is_relative() { + if !err_on_missing_config_path && target_config_path.is_none() { + return Ok(None); + } + target_config_path + .ok_or(Error::MissingConfigPath)? + .parent() + .expect("path is a config file which naturally lives in a directory") + .join(path) + } else { + path.into() + }; + Ok(Some(path)) +} + +mod types; +pub use types::{conditional, Error, Options}; diff --git a/vendor/gix-config/src/file/includes/types.rs b/vendor/gix-config/src/file/includes/types.rs new file mode 100644 index 000000000..64306bd9c --- /dev/null +++ b/vendor/gix-config/src/file/includes/types.rs @@ -0,0 +1,131 @@ +use crate::{parse, path::interpolate}; + +/// The error returned when following includes. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + Io(#[from] std::io::Error), + #[error(transparent)] + Parse(#[from] parse::Error), + #[error(transparent)] + Interpolate(#[from] interpolate::Error), + #[error("The maximum allowed length {} of the file include chain built by following nested resolve_includes is exceeded", .max_depth)] + IncludeDepthExceeded { max_depth: u8 }, + #[error("Include paths from environment variables must not be relative as no config file paths exists as root")] + MissingConfigPath, + #[error("The git directory must be provided to support `gitdir:` conditional includes")] + MissingGitDir, + #[error(transparent)] + Realpath(#[from] gix_path::realpath::Error), +} + +/// Options to handle includes, like `include.path` or `includeIf..path`, +#[derive(Clone, Copy)] +pub struct Options<'a> { + /// The maximum allowed length of the file include chain built by following nested resolve_includes where base level is depth = 0. + pub max_depth: u8, + /// When max depth is exceeded while following nested includes, + /// return an error if true or silently stop following resolve_includes. + /// + /// Setting this value to false allows to read configuration with cycles, + /// which otherwise always results in an error. + pub err_on_max_depth_exceeded: bool, + /// If true, default false, failing to interpolate paths will result in an error. + /// + /// Interpolation also happens if paths in conditional includes can't be interpolated. + pub err_on_interpolation_failure: bool, + /// If true, default true, configuration not originating from a path will cause errors when trying to resolve + /// relative include paths (which would require the including configuration's path). + pub err_on_missing_config_path: bool, + /// Used during path interpolation, both for include paths before trying to read the file, and for + /// paths used in conditional `gitdir` includes. + pub interpolate: interpolate::Context<'a>, + + /// Additional context for conditional includes to work. + pub conditional: conditional::Context<'a>, +} + +impl<'a> Options<'a> { + /// Provide options to never follow include directives at all. + pub fn no_follow() -> Self { + Options { + max_depth: 0, + err_on_max_depth_exceeded: false, + err_on_interpolation_failure: false, + err_on_missing_config_path: false, + interpolate: Default::default(), + conditional: Default::default(), + } + } + /// Provide options to follow includes like git does, provided the required `conditional` and `interpolate` contexts + /// to support `gitdir` and `onbranch` based `includeIf` directives as well as standard `include.path` resolution. + /// Note that the follow-mode is `git`-style, following at most 10 indirections while + /// producing an error if the depth is exceeded. + pub fn follow(interpolate: interpolate::Context<'a>, conditional: conditional::Context<'a>) -> Self { + Options { + max_depth: 10, + err_on_max_depth_exceeded: true, + err_on_interpolation_failure: false, + err_on_missing_config_path: true, + interpolate, + conditional, + } + } + + /// For use with `follow` type options, cause failure if an include path couldn't be interpolated or the depth limit is exceeded. + pub fn strict(mut self) -> Self { + self.err_on_interpolation_failure = true; + self.err_on_max_depth_exceeded = true; + self.err_on_missing_config_path = true; + self + } + + /// Like [`follow`][Options::follow()], but without information to resolve `includeIf` directories as well as default + /// configuration to allow resolving `~username/` path. `home_dir` is required to resolve `~/` paths if set. + /// Note that `%(prefix)` paths cannot be interpolated with this configuration, use [`follow()`][Options::follow()] + /// instead for complete control. + pub fn follow_without_conditional(home_dir: Option<&'a std::path::Path>) -> Self { + Options { + max_depth: 10, + err_on_max_depth_exceeded: true, + err_on_interpolation_failure: false, + err_on_missing_config_path: true, + interpolate: interpolate::Context { + git_install_dir: None, + home_dir, + home_for_user: Some(interpolate::home_for_user), + }, + conditional: Default::default(), + } + } + + /// Set the context used for interpolation when interpolating paths to include as well as the paths + /// in `gitdir` conditional includes. + pub fn interpolate_with(mut self, context: interpolate::Context<'a>) -> Self { + self.interpolate = context; + self + } +} + +impl Default for Options<'_> { + fn default() -> Self { + Self::no_follow() + } +} + +/// +pub mod conditional { + /// Options to handle conditional includes like `includeIf..path`. + #[derive(Clone, Copy, Default)] + pub struct Context<'a> { + /// The location of the .git directory. If `None`, `gitdir` conditions cause an error. + /// + /// Used for conditional includes, e.g. `includeIf.gitdir:…` or `includeIf:gitdir/i…`. + pub git_dir: Option<&'a std::path::Path>, + /// The name of the branch that is currently checked out. If `None`, `onbranch` conditions cause an error. + /// + /// Used for conditional includes, e.g. `includeIf.onbranch:main.…` + pub branch_name: Option<&'a gix_ref::FullNameRef>, + } +} diff --git a/vendor/gix-config/src/file/init/comfort.rs b/vendor/gix-config/src/file/init/comfort.rs new file mode 100644 index 000000000..ffe859a1a --- /dev/null +++ b/vendor/gix-config/src/file/init/comfort.rs @@ -0,0 +1,159 @@ +use std::path::PathBuf; + +use crate::{ + file::{init, Metadata}, + path, source, File, Source, +}; + +/// Easy-instantiation of typical non-repository git configuration files with all configuration defaulting to typical values. +/// +/// ### Limitations +/// +/// Note that `includeIf` conditions in global files will cause failure as the required information +/// to resolve them isn't present without a repository. +/// +/// Also note that relevant information to interpolate paths will be obtained from the environment or other +/// source on unix. +impl File<'static> { + /// Open all global configuration files which involves the following sources: + /// + /// * [system][crate::Source::System] + /// * [git][crate::Source::Git] + /// * [user][crate::Source::User] + /// + /// which excludes repository local configuration, as well as override-configuration from environment variables. + /// + /// Note that the file might [be empty][File::is_void()] in case no configuration file was found. + pub fn from_globals() -> Result, init::from_paths::Error> { + let metas = [source::Kind::System, source::Kind::Global] + .iter() + .flat_map(|kind| kind.sources()) + .filter_map(|source| { + let path = source + .storage_location(&mut |name| std::env::var_os(name)) + .and_then(|p| p.is_file().then_some(p)) + .map(|p| p.into_owned()); + + Metadata { + path, + source: *source, + level: 0, + trust: gix_sec::Trust::Full, + } + .into() + }); + + let home = std::env::var("HOME").ok().map(PathBuf::from); + let options = init::Options { + includes: init::includes::Options::follow_without_conditional(home.as_deref()), + ..Default::default() + }; + File::from_paths_metadata(metas, options).map(Option::unwrap_or_default) + } + + /// Generates a config from `GIT_CONFIG_*` environment variables and return a possibly empty `File`. + /// A typical use of this is to [`append`][File::append()] this configuration to another one with lower + /// precedence to obtain overrides. + /// + /// See [`gix-config`'s documentation] for more information on the environment variables in question. + /// + /// [`gix-config`'s documentation]: https://git-scm.com/docs/gix-config#Documentation/gix-config.txt-GITCONFIGCOUNT + pub fn from_environment_overrides() -> Result, init::from_env::Error> { + let home = std::env::var("HOME").ok().map(PathBuf::from); + let options = init::Options { + includes: init::includes::Options::follow_without_conditional(home.as_deref()), + ..Default::default() + }; + + File::from_env(options).map(Option::unwrap_or_default) + } +} + +/// An easy way to provide complete configuration for a repository. +impl File<'static> { + /// This configuration type includes the following sources, in order of precedence: + /// + /// - globals + /// - repository-local by loading `dir`/config + /// - worktree by loading `dir`/config.worktree + /// - environment + /// + /// Note that `dir` is the `.git` dir to load the configuration from, not the configuration file. + /// + /// Includes will be resolved within limits as some information like the git installation directory is missing to interpolate + /// paths with as well as git repository information like the branch name. + pub fn from_git_dir(dir: impl Into) -> Result, from_git_dir::Error> { + let (mut local, git_dir) = { + let source = Source::Local; + let mut path = dir.into(); + path.push( + source + .storage_location(&mut |n| std::env::var_os(n)) + .expect("location available for local"), + ); + let local = Self::from_path_no_includes(&path, source)?; + path.pop(); + (local, path) + }; + + let worktree = match local.boolean("extensions", None, "worktreeConfig") { + Some(Ok(worktree_config)) => worktree_config.then(|| { + let source = Source::Worktree; + let path = git_dir.join( + source + .storage_location(&mut |n| std::env::var_os(n)) + .expect("location available for worktree"), + ); + Self::from_path_no_includes(path, source) + }), + _ => None, + } + .transpose()?; + + let home = std::env::var("HOME").ok().map(PathBuf::from); + let options = init::Options { + includes: init::includes::Options::follow( + path::interpolate::Context { + home_dir: home.as_deref(), + ..Default::default() + }, + init::includes::conditional::Context { + git_dir: Some(git_dir.as_ref()), + branch_name: None, + }, + ), + lossy: false, + }; + + let mut globals = Self::from_globals()?; + globals.resolve_includes(options)?; + local.resolve_includes(options)?; + + globals.append(local); + if let Some(mut worktree) = worktree { + worktree.resolve_includes(options)?; + globals.append(worktree); + } + globals.append(Self::from_environment_overrides()?); + + Ok(globals) + } +} + +/// +pub mod from_git_dir { + use crate::file::init; + + /// The error returned by [`File::from_git_dir()`][crate::File::from_git_dir()]. + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error(transparent)] + FromPaths(#[from] init::from_paths::Error), + #[error(transparent)] + FromEnv(#[from] init::from_env::Error), + #[error(transparent)] + Init(#[from] init::Error), + #[error(transparent)] + Includes(#[from] init::includes::Error), + } +} diff --git a/vendor/gix-config/src/file/init/from_env.rs b/vendor/gix-config/src/file/init/from_env.rs new file mode 100644 index 000000000..167d37399 --- /dev/null +++ b/vendor/gix-config/src/file/init/from_env.rs @@ -0,0 +1,88 @@ +use std::convert::TryFrom; + +use bstr::{BStr, ByteSlice}; + +use crate::{file, file::init, parse, parse::section, path::interpolate, File}; + +/// Represents the errors that may occur when calling [`File::from_env()`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Configuration {kind} at index {index} contained illformed UTF-8")] + IllformedUtf8 { index: usize, kind: &'static str }, + #[error("GIT_CONFIG_COUNT was not a positive integer: {}", .input)] + InvalidConfigCount { input: String }, + #[error("GIT_CONFIG_KEY_{} was not set", .key_id)] + InvalidKeyId { key_id: usize }, + #[error("GIT_CONFIG_KEY_{} was set to an invalid value: {}", .key_id, .key_val)] + InvalidKeyValue { key_id: usize, key_val: String }, + #[error("GIT_CONFIG_VALUE_{} was not set", .value_id)] + InvalidValueId { value_id: usize }, + #[error(transparent)] + PathInterpolationError(#[from] interpolate::Error), + #[error(transparent)] + Includes(#[from] init::includes::Error), + #[error(transparent)] + Section(#[from] section::header::Error), + #[error(transparent)] + Key(#[from] section::key::Error), +} + +/// Instantiation from environment variables +impl File<'static> { + /// Generates a config from `GIT_CONFIG_*` environment variables or returns `Ok(None)` if no configuration was found. + /// See [`gix-config`'s documentation] for more information on the environment variables in question. + /// + /// With `options` configured, it's possible to resolve `include.path` or `includeIf..path` directives as well. + /// + /// [`gix-config`'s documentation]: https://git-scm.com/docs/gix-config#Documentation/gix-config.txt-GITCONFIGCOUNT + pub fn from_env(options: init::Options<'_>) -> Result>, Error> { + use std::env; + let count: usize = match env::var("GIT_CONFIG_COUNT") { + Ok(v) => v.parse().map_err(|_| Error::InvalidConfigCount { input: v })?, + Err(_) => return Ok(None), + }; + + if count == 0 { + return Ok(None); + } + + let meta = file::Metadata { + path: None, + source: crate::Source::Env, + level: 0, + trust: gix_sec::Trust::Full, + }; + let mut config = File::new(meta); + for i in 0..count { + let key = gix_path::os_string_into_bstring( + env::var_os(format!("GIT_CONFIG_KEY_{i}")).ok_or(Error::InvalidKeyId { key_id: i })?, + ) + .map_err(|_| Error::IllformedUtf8 { index: i, kind: "key" })?; + let value = env::var_os(format!("GIT_CONFIG_VALUE_{i}")).ok_or(Error::InvalidValueId { value_id: i })?; + let key = parse::key(<_ as AsRef>::as_ref(&key)).ok_or_else(|| Error::InvalidKeyValue { + key_id: i, + key_val: key.to_string(), + })?; + + config + .section_mut_or_create_new(key.section_name, key.subsection_name)? + .push( + section::Key::try_from(key.value_name.to_owned())?, + Some( + gix_path::os_str_into_bstr(&value) + .map_err(|_| Error::IllformedUtf8 { + index: i, + kind: "value", + })? + .as_bytes() + .into(), + ), + ); + } + + let mut buf = Vec::new(); + init::includes::resolve(&mut config, &mut buf, options)?; + Ok(Some(config)) + } +} diff --git a/vendor/gix-config/src/file/init/from_paths.rs b/vendor/gix-config/src/file/init/from_paths.rs new file mode 100644 index 000000000..5d671b69e --- /dev/null +++ b/vendor/gix-config/src/file/init/from_paths.rs @@ -0,0 +1,94 @@ +use std::collections::BTreeSet; + +use crate::{ + file::{init, init::Options, Metadata}, + File, +}; + +/// The error returned by [`File::from_paths_metadata()`] and [`File::from_path_no_includes()`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + Io(#[from] std::io::Error), + #[error(transparent)] + Init(#[from] init::Error), +} + +/// Instantiation from one or more paths +impl File<'static> { + /// Load the single file at `path` with `source` without following include directives. + /// + /// Note that the path will be checked for ownership to derive trust. + pub fn from_path_no_includes(path: impl Into, source: crate::Source) -> Result { + let path = path.into(); + let trust = gix_sec::Trust::from_path_ownership(&path)?; + + let mut buf = Vec::new(); + std::io::copy(&mut std::fs::File::open(&path)?, &mut buf)?; + + Ok(File::from_bytes_owned( + &mut buf, + Metadata::from(source).at(path).with(trust), + Default::default(), + )?) + } + + /// Constructs a `gix-config` file from the provided metadata, which must include a path to read from or be ignored. + /// Returns `Ok(None)` if there was not a single input path provided, which is a possibility due to + /// [`Metadata::path`] being an `Option`. + /// If an input path doesn't exist, the entire operation will abort. See [`from_paths_metadata_buf()`][Self::from_paths_metadata_buf()] + /// for a more powerful version of this method. + pub fn from_paths_metadata( + path_meta: impl IntoIterator>, + options: Options<'_>, + ) -> Result, Error> { + let mut buf = Vec::with_capacity(512); + let err_on_nonexisting_paths = true; + Self::from_paths_metadata_buf(path_meta, &mut buf, err_on_nonexisting_paths, options) + } + + /// Like [from_paths_metadata()][Self::from_paths_metadata()], but will use `buf` to temporarily store the config file + /// contents for parsing instead of allocating an own buffer. + /// + /// If `err_on_nonexisting_paths` is false, instead of aborting with error, we will continue to the next path instead. + pub fn from_paths_metadata_buf( + path_meta: impl IntoIterator>, + buf: &mut Vec, + err_on_non_existing_paths: bool, + options: Options<'_>, + ) -> Result, Error> { + let mut target = None; + let mut seen = BTreeSet::default(); + for (path, mut meta) in path_meta.into_iter().filter_map(|meta| { + let mut meta = meta.into(); + meta.path.take().map(|p| (p, meta)) + }) { + if !seen.insert(path.clone()) { + continue; + } + + buf.clear(); + std::io::copy( + &mut match std::fs::File::open(&path) { + Ok(f) => f, + Err(err) if !err_on_non_existing_paths && err.kind() == std::io::ErrorKind::NotFound => continue, + Err(err) => return Err(err.into()), + }, + buf, + )?; + meta.path = Some(path); + + let config = Self::from_bytes_owned(buf, meta, options)?; + match &mut target { + None => { + target = Some(config); + } + Some(target) => { + target.append(config); + } + } + } + Ok(target) + } +} diff --git a/vendor/gix-config/src/file/init/mod.rs b/vendor/gix-config/src/file/init/mod.rs new file mode 100644 index 000000000..5b4030241 --- /dev/null +++ b/vendor/gix-config/src/file/init/mod.rs @@ -0,0 +1,86 @@ +use gix_features::threading::OwnShared; + +use crate::{ + file::{includes, section, Metadata}, + parse, File, +}; + +mod types; +pub use types::{Error, Options}; + +mod comfort; +/// +pub mod from_env; +/// +pub mod from_paths; + +impl<'a> File<'a> { + /// Return an empty `File` with the given `meta`-data to be attached to all new sections. + pub fn new(meta: impl Into>) -> Self { + Self { + frontmatter_events: Default::default(), + frontmatter_post_section: Default::default(), + section_lookup_tree: Default::default(), + sections: Default::default(), + section_id_counter: 0, + section_order: Default::default(), + meta: meta.into(), + } + } + + /// Instantiate a new `File` from given `input`, associating each section and their values with + /// `meta`-data, while respecting `options`. + pub fn from_bytes_no_includes( + input: &'a [u8], + meta: impl Into>, + options: Options<'_>, + ) -> Result { + let meta = meta.into(); + Ok(Self::from_parse_events_no_includes( + parse::Events::from_bytes(input, options.to_event_filter())?, + meta, + )) + } + + /// Instantiate a new `File` from given `events`, associating each section and their values with + /// `meta`-data. + pub fn from_parse_events_no_includes( + parse::Events { frontmatter, sections }: parse::Events<'a>, + meta: impl Into>, + ) -> Self { + let meta = meta.into(); + let mut this = File::new(OwnShared::clone(&meta)); + + this.frontmatter_events = frontmatter; + + for section in sections { + this.push_section_internal(crate::file::Section { + header: section.header, + body: section::Body(section.events), + meta: OwnShared::clone(&meta), + id: Default::default(), + }); + } + + this + } +} + +impl File<'static> { + /// Instantiate a new fully-owned `File` from given `input` (later reused as buffer when resolving includes), + /// associating each section and their values with `meta`-data, while respecting `options`, and + /// following includes as configured there. + pub fn from_bytes_owned( + input_and_buf: &mut Vec, + meta: impl Into>, + options: Options<'_>, + ) -> Result { + let mut config = Self::from_parse_events_no_includes( + parse::Events::from_bytes_owned(input_and_buf, options.to_event_filter()).map_err(Error::from)?, + meta, + ); + + includes::resolve(&mut config, input_and_buf, options).map_err(Error::from)?; + Ok(config) + } +} diff --git a/vendor/gix-config/src/file/init/types.rs b/vendor/gix-config/src/file/init/types.rs new file mode 100644 index 000000000..fcb17c0ca --- /dev/null +++ b/vendor/gix-config/src/file/init/types.rs @@ -0,0 +1,47 @@ +use crate::{file::init, parse, parse::Event, path::interpolate}; + +/// The error returned by [`File::from_bytes_no_includes()`][crate::File::from_bytes_no_includes()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + Parse(#[from] parse::Error), + #[error(transparent)] + Interpolate(#[from] interpolate::Error), + #[error(transparent)] + Includes(#[from] init::includes::Error), +} + +/// Options when loading git config using [`File::from_paths_metadata()`][crate::File::from_paths_metadata()]. +#[derive(Clone, Copy, Default)] +pub struct Options<'a> { + /// Configure how to follow includes while handling paths. + pub includes: init::includes::Options<'a>, + /// If true, only value-bearing parse events will be kept to reduce memory usage and increase performance. + /// + /// Note that doing so will degenerate [`write_to()`][crate::File::write_to()] and strip it off its comments + /// and additional whitespace entirely, but will otherwise be a valid configuration file. + pub lossy: bool, +} + +impl Options<'_> { + pub(crate) fn to_event_filter(self) -> Option) -> bool> { + if self.lossy { + Some(discard_nonessential_events) + } else { + None + } + } +} + +fn discard_nonessential_events(e: &Event<'_>) -> bool { + match e { + Event::Whitespace(_) | Event::Comment(_) | Event::Newline(_) => false, + Event::SectionHeader(_) + | Event::SectionKey(_) + | Event::KeyValueSeparator + | Event::Value(_) + | Event::ValueNotDone(_) + | Event::ValueDone(_) => true, + } +} diff --git a/vendor/gix-config/src/file/meta.rs b/vendor/gix-config/src/file/meta.rs new file mode 100644 index 000000000..b5229bd7a --- /dev/null +++ b/vendor/gix-config/src/file/meta.rs @@ -0,0 +1,59 @@ +use std::path::PathBuf; + +use crate::{file, file::Metadata, Source}; + +/// Instantiation +impl Metadata { + /// Return metadata indicating the source of a [`File`][crate::File] is from an API user. + pub fn api() -> Self { + file::Metadata { + path: None, + source: Source::Api, + level: 0, + trust: gix_sec::Trust::Full, + } + } + + /// Return metadata as derived from the given `path` at `source`, which will also be used to derive the trust level + /// by checking its ownership. + pub fn try_from_path(path: impl Into, source: Source) -> std::io::Result { + let path = path.into(); + gix_sec::Trust::from_path_ownership(&path).map(|trust| Metadata { + path: path.into(), + source, + level: 0, + trust, + }) + } + + /// Set the trust level of this instance to the given `trust` and return it. + /// + /// Useful in conjunction with `Metadata::from(source)`. + pub fn with(mut self, trust: gix_sec::Trust) -> Self { + self.trust = trust; + self + } + + /// Set the metadata to be located at the given `path`. + pub fn at(mut self, path: impl Into) -> Self { + self.path = Some(path.into()); + self + } +} + +impl Default for Metadata { + fn default() -> Self { + Metadata::api() + } +} + +impl From for Metadata { + fn from(source: Source) -> Self { + file::Metadata { + path: None, + source, + level: 0, + trust: gix_sec::Trust::Full, + } + } +} diff --git a/vendor/gix-config/src/file/mod.rs b/vendor/gix-config/src/file/mod.rs new file mode 100644 index 000000000..2dd8c88fe --- /dev/null +++ b/vendor/gix-config/src/file/mod.rs @@ -0,0 +1,136 @@ +//! A high level wrapper around a single or multiple `gix-config` file, for reading and mutation. +use std::{ + borrow::Cow, + collections::HashMap, + ops::{Add, AddAssign}, + path::PathBuf, +}; + +use bstr::BStr; +use gix_features::threading::OwnShared; + +mod mutable; +pub use mutable::{multi_value::MultiValueMut, section::SectionMut, value::ValueMut}; + +/// +pub mod init; + +mod access; +mod impls; +/// +pub mod includes; +mod meta; +mod util; + +/// +pub mod section; + +/// +pub mod rename_section { + /// The error returned by [`File::rename_section(…)`][crate::File::rename_section()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Lookup(#[from] crate::lookup::existing::Error), + #[error(transparent)] + Section(#[from] crate::parse::section::header::Error), + } +} + +/// +pub mod set_raw_value { + /// The error returned by [`File::set_raw_value(…)`][crate::File::set_raw_value()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Header(#[from] crate::parse::section::header::Error), + #[error(transparent)] + Key(#[from] crate::parse::section::key::Error), + } +} + +/// Additional information about a section. +#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Hash)] +pub struct Metadata { + /// The file path of the source, if known. + pub path: Option, + /// Where the section is coming from. + pub source: crate::Source, + /// The levels of indirection of the file, with 0 being a section + /// that was directly loaded, and 1 being an `include.path` of a + /// level 0 file. + pub level: u8, + /// The trust-level for the section this meta-data is associated with. + pub trust: gix_sec::Trust, +} + +/// A section in a gix-config file, like `[core]` or `[remote "origin"]`, along with all of its keys. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub struct Section<'a> { + header: crate::parse::section::Header<'a>, + body: section::Body<'a>, + meta: OwnShared, + id: SectionId, +} + +/// A function to filter metadata, returning `true` if the corresponding but omitted value can be used. +pub type MetadataFilter = dyn FnMut(&'_ Metadata) -> bool; + +/// A strongly typed index into some range. +#[derive(PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Clone, Copy)] +pub(crate) struct Index(pub(crate) usize); + +impl Add for Index { + type Output = Self; + + fn add(self, rhs: Size) -> Self::Output { + Self(self.0 + rhs.0) + } +} + +/// A strongly typed a size. +#[derive(PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Clone, Copy)] +pub(crate) struct Size(pub(crate) usize); + +impl AddAssign for Size { + fn add_assign(&mut self, rhs: usize) { + self.0 += rhs; + } +} + +/// The section ID is a monotonically increasing ID used to refer to section bodies. +/// This value does not imply any ordering between sections, as new sections +/// with higher section IDs may be in between lower ID sections after `File` mutation. +/// +/// We need to use a section id because `gix-config` permits sections with +/// identical names, making it ambiguous when used in maps, for instance. +/// +/// This id guaranteed to be unique, but not guaranteed to be compact. In other +/// words, it's possible that a section may have an ID of 3 but the next section +/// has an ID of 5 as 4 was deleted. +#[derive(PartialEq, Eq, Hash, Copy, Clone, PartialOrd, Ord, Debug)] +pub struct SectionId(pub(crate) usize); + +impl Default for SectionId { + fn default() -> Self { + SectionId(usize::MAX) + } +} + +/// All section body ids referred to by a section name. +/// +/// Note that order in Vec matters as it represents the order +/// of section ids with the matched section and name, and is used for precedence +/// management. +#[derive(PartialEq, Eq, Clone, Debug)] +pub(crate) enum SectionBodyIdsLut<'a> { + /// The list of section ids to use for obtaining the section body. + Terminal(Vec), + /// A hashmap from sub-section names to section ids. + NonTerminal(HashMap, Vec>), +} +#[cfg(test)] +mod tests; +mod write; diff --git a/vendor/gix-config/src/file/mutable/mod.rs b/vendor/gix-config/src/file/mutable/mod.rs new file mode 100644 index 000000000..ad99e09b9 --- /dev/null +++ b/vendor/gix-config/src/file/mutable/mod.rs @@ -0,0 +1,107 @@ +use std::borrow::Cow; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; + +use crate::{file, parse::Event}; + +pub(crate) mod multi_value; +pub(crate) mod section; +pub(crate) mod value; + +fn escape_value(value: &BStr) -> BString { + let starts_with_whitespace = value.first().map_or(false, |b| b.is_ascii_whitespace()); + let ends_with_whitespace = value + .get(value.len().saturating_sub(1)) + .map_or(false, |b| b.is_ascii_whitespace()); + let contains_comment_indicators = value.find_byteset(b";#").is_some(); + let quote = starts_with_whitespace || ends_with_whitespace || contains_comment_indicators; + + let mut buf: BString = Vec::with_capacity(value.len()).into(); + if quote { + buf.push(b'"'); + } + + for b in value.iter().copied() { + match b { + b'\n' => buf.push_str("\\n"), + b'\t' => buf.push_str("\\t"), + b'"' => buf.push_str("\\\""), + b'\\' => buf.push_str("\\\\"), + _ => buf.push(b), + } + } + + if quote { + buf.push(b'"'); + } + buf +} + +#[derive(PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +struct Whitespace<'a> { + pre_key: Option>, + pre_sep: Option>, + post_sep: Option>, +} + +impl Default for Whitespace<'_> { + fn default() -> Self { + Whitespace { + pre_key: Some(b"\t".as_bstr().into()), + pre_sep: Some(b" ".as_bstr().into()), + post_sep: Some(b" ".as_bstr().into()), + } + } +} + +impl<'a> Whitespace<'a> { + fn key_value_separators(&self) -> Vec> { + let mut out = Vec::with_capacity(3); + if let Some(ws) = &self.pre_sep { + out.push(Event::Whitespace(ws.clone())); + } + out.push(Event::KeyValueSeparator); + if let Some(ws) = &self.post_sep { + out.push(Event::Whitespace(ws.clone())); + } + out + } + + fn from_body(s: &file::section::Body<'a>) -> Self { + let key_pos = + s.0.iter() + .enumerate() + .find_map(|(idx, e)| matches!(e, Event::SectionKey(_)).then(|| idx)); + key_pos + .map(|key_pos| { + let pre_key = s.0[..key_pos].iter().rev().next().and_then(|e| match e { + Event::Whitespace(s) => Some(s.clone()), + _ => None, + }); + let from_key = &s.0[key_pos..]; + let (pre_sep, post_sep) = from_key + .iter() + .enumerate() + .find_map(|(idx, e)| matches!(e, Event::KeyValueSeparator).then(|| idx)) + .map(|sep_pos| { + ( + from_key.get(sep_pos - 1).and_then(|e| match e { + Event::Whitespace(ws) => Some(ws.clone()), + _ => None, + }), + from_key.get(sep_pos + 1).and_then(|e| match e { + Event::Whitespace(ws) => Some(ws.clone()), + _ => None, + }), + ) + }) + .unwrap_or_default(); + Whitespace { + pre_key, + pre_sep, + post_sep, + } + }) + .unwrap_or_default() + } +} diff --git a/vendor/gix-config/src/file/mutable/multi_value.rs b/vendor/gix-config/src/file/mutable/multi_value.rs new file mode 100644 index 000000000..396b49b6a --- /dev/null +++ b/vendor/gix-config/src/file/mutable/multi_value.rs @@ -0,0 +1,266 @@ +use std::{borrow::Cow, collections::HashMap, ops::DerefMut}; + +use bstr::{BStr, BString, ByteVec}; + +use crate::{ + file::{ + self, + mutable::{escape_value, Whitespace}, + Section, SectionId, + }, + lookup, + parse::{section, Event}, + value::{normalize_bstr, normalize_bstring}, +}; + +/// Internal data structure for [`MutableMultiValue`] +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub(crate) struct EntryData { + pub(crate) section_id: SectionId, + pub(crate) offset_index: usize, +} + +/// An intermediate representation of a mutable multivar obtained from a [`File`][crate::File]. +#[derive(PartialEq, Eq, Debug)] +pub struct MultiValueMut<'borrow, 'lookup, 'event> { + pub(crate) section: &'borrow mut HashMap>, + pub(crate) key: section::Key<'lookup>, + /// Each entry data struct provides sufficient information to index into + /// [`Self::offsets`]. This layer of indirection is used for users to index + /// into the offsets rather than leaking the internal data structures. + pub(crate) indices_and_sizes: Vec, + /// Each offset represents the size of a event slice and whether or not the + /// event slice is significant or not. This is used to index into the + /// actual section. + pub(crate) offsets: HashMap>, +} + +impl<'borrow, 'lookup, 'event> MultiValueMut<'borrow, 'lookup, 'event> { + /// Returns the actual values. + pub fn get(&self) -> Result>, lookup::existing::Error> { + let mut expect_value = false; + let mut values = Vec::new(); + let mut concatenated_value = BString::default(); + + for EntryData { + section_id, + offset_index, + } in &self.indices_and_sizes + { + let (offset, size) = MultiValueMut::index_and_size(&self.offsets, *section_id, *offset_index); + for event in &self.section.get(section_id).expect("known section id").as_ref()[offset..offset + size] { + match event { + Event::SectionKey(section_key) if *section_key == self.key => expect_value = true, + Event::Value(v) if expect_value => { + expect_value = false; + values.push(normalize_bstr(v.as_ref())); + } + Event::ValueNotDone(v) if expect_value => concatenated_value.push_str(v.as_ref()), + Event::ValueDone(v) if expect_value => { + expect_value = false; + concatenated_value.push_str(v.as_ref()); + values.push(normalize_bstring(std::mem::take(&mut concatenated_value))); + } + _ => (), + } + } + } + + if values.is_empty() { + return Err(lookup::existing::Error::KeyMissing); + } + + Ok(values) + } + + /// Returns the amount of values within this multivar. + #[must_use] + pub fn len(&self) -> usize { + self.indices_and_sizes.len() + } + + /// Returns true if the multivar does not have any values. + /// This might occur if the value was deleted but wasn't yet set with a new value. + #[must_use] + pub fn is_empty(&self) -> bool { + self.indices_and_sizes.is_empty() + } + + /// Sets the value at the given index. + /// + /// # Safety + /// + /// This will panic if the index is out of range. + pub fn set_string_at(&mut self, index: usize, value: impl AsRef) { + self.set_at(index, value.as_ref()); + } + + /// Sets the value at the given index. + /// + /// # Safety + /// + /// This will panic if the index is out of range. + pub fn set_at<'a>(&mut self, index: usize, value: impl Into<&'a BStr>) { + let EntryData { + section_id, + offset_index, + } = self.indices_and_sizes[index]; + MultiValueMut::set_value_inner( + &self.key, + &mut self.offsets, + &mut self.section.get_mut(§ion_id).expect("known section id").body, + section_id, + offset_index, + value.into(), + ); + } + + /// Sets all values to the provided ones. Note that this follows [`zip`] + /// logic: if the number of values in the input is less than the number of + /// values currently existing, then only the first `n` values are modified. + /// If more values are provided than there currently are, then the + /// remaining values are ignored. + /// + /// [`zip`]: std::iter::Iterator::zip + pub fn set_values<'a, Iter, Item>(&mut self, values: Iter) + where + Iter: IntoIterator, + Item: Into<&'a BStr>, + { + for ( + EntryData { + section_id, + offset_index, + }, + value, + ) in self.indices_and_sizes.iter().zip(values) + { + Self::set_value_inner( + &self.key, + &mut self.offsets, + &mut self.section.get_mut(section_id).expect("known section id").body, + *section_id, + *offset_index, + value.into(), + ); + } + } + + /// Sets all values in this multivar to the provided one without owning the + /// provided input. + pub fn set_all<'a>(&mut self, input: impl Into<&'a BStr>) { + let input = input.into(); + for EntryData { + section_id, + offset_index, + } in &self.indices_and_sizes + { + Self::set_value_inner( + &self.key, + &mut self.offsets, + &mut self.section.get_mut(section_id).expect("known section id").body, + *section_id, + *offset_index, + input, + ); + } + } + + fn set_value_inner<'a: 'event>( + key: §ion::Key<'lookup>, + offsets: &mut HashMap>, + section: &mut file::section::Body<'event>, + section_id: SectionId, + offset_index: usize, + value: &BStr, + ) { + let (offset, size) = MultiValueMut::index_and_size(offsets, section_id, offset_index); + let whitespace = Whitespace::from_body(section); + let section = section.as_mut(); + section.drain(offset..offset + size); + + let key_sep_events = whitespace.key_value_separators(); + MultiValueMut::set_offset(offsets, section_id, offset_index, 2 + key_sep_events.len()); + section.insert(offset, Event::Value(escape_value(value).into())); + section.insert_many(offset, key_sep_events.into_iter().rev()); + section.insert(offset, Event::SectionKey(key.to_owned())); + } + + /// Removes the value at the given index. Does nothing when called multiple + /// times in succession. + /// + /// # Safety + /// + /// This will panic if the index is out of range. + pub fn delete(&mut self, index: usize) { + let EntryData { + section_id, + offset_index, + } = &self.indices_and_sizes[index]; + let (offset, size) = MultiValueMut::index_and_size(&self.offsets, *section_id, *offset_index); + if size == 0 { + return; + } + self.section + .get_mut(section_id) + .expect("known section id") + .body + .as_mut() + .drain(offset..offset + size); + + Self::set_offset(&mut self.offsets, *section_id, *offset_index, 0); + self.indices_and_sizes.remove(index); + } + + /// Removes all values. Does nothing when called multiple times in + /// succession. + pub fn delete_all(&mut self) { + for EntryData { + section_id, + offset_index, + } in &self.indices_and_sizes + { + let (offset, size) = MultiValueMut::index_and_size(&self.offsets, *section_id, *offset_index); + if size == 0 { + continue; + } + self.section + .get_mut(section_id) + .expect("known section id") + .body + .as_mut() + .drain(offset..offset + size); + Self::set_offset(&mut self.offsets, *section_id, *offset_index, 0); + } + self.indices_and_sizes.clear(); + } + + fn index_and_size( + offsets: &'lookup HashMap>, + section_id: SectionId, + offset_index: usize, + ) -> (usize, usize) { + offsets + .get(§ion_id) + .expect("known section id") + .iter() + .take(offset_index + 1) + .fold((0, 0), |(total_ofs, ofs), size| (total_ofs + ofs, *size)) + } + + // This must be an associated function rather than a method to allow Rust + // to split mutable borrows. + fn set_offset( + offsets: &mut HashMap>, + section_id: SectionId, + offset_index: usize, + value: usize, + ) { + *offsets + .get_mut(§ion_id) + .expect("known section id") + .get_mut(offset_index) + .unwrap() + .deref_mut() = value; + } +} diff --git a/vendor/gix-config/src/file/mutable/section.rs b/vendor/gix-config/src/file/mutable/section.rs new file mode 100644 index 000000000..def68ac60 --- /dev/null +++ b/vendor/gix-config/src/file/mutable/section.rs @@ -0,0 +1,316 @@ +use std::{ + borrow::Cow, + ops::{Deref, Range}, +}; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; +use smallvec::SmallVec; + +use crate::{ + file::{ + self, + mutable::{escape_value, Whitespace}, + Index, Section, Size, + }, + lookup, parse, + parse::{section::Key, Event}, + value::{normalize, normalize_bstr, normalize_bstring}, +}; + +/// A opaque type that represents a mutable reference to a section. +#[derive(PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct SectionMut<'a, 'event> { + section: &'a mut Section<'event>, + implicit_newline: bool, + whitespace: Whitespace<'event>, + newline: SmallVec<[u8; 2]>, +} + +/// Mutating methods. +impl<'a, 'event> SectionMut<'a, 'event> { + /// Adds an entry to the end of this section name `key` and `value`. If `value` is `None`, no equal sign will be written leaving + /// just the key. This is useful for boolean values which are true if merely the key exists. + pub fn push<'b>(&mut self, key: Key<'event>, value: Option<&'b BStr>) -> &mut Self { + self.push_with_comment_inner(key, value, None); + self + } + + /// Adds an entry to the end of this section name `key` and `value`. If `value` is `None`, no equal sign will be written leaving + /// just the key. This is useful for boolean values which are true if merely the key exists. + /// `comment` has to be the text to put right after the value and behind a `#` character. Note that newlines are silently transformed + /// into spaces. + pub fn push_with_comment<'b, 'c>( + &mut self, + key: Key<'event>, + value: Option<&'b BStr>, + comment: impl Into<&'c BStr>, + ) -> &mut Self { + self.push_with_comment_inner(key, value, comment.into().into()); + self + } + + fn push_with_comment_inner(&mut self, key: Key<'event>, value: Option<&BStr>, comment: Option<&BStr>) { + let body = &mut self.section.body.0; + if let Some(ws) = &self.whitespace.pre_key { + body.push(Event::Whitespace(ws.clone())); + } + + body.push(Event::SectionKey(key)); + match value { + Some(value) => { + body.extend(self.whitespace.key_value_separators()); + body.push(Event::Value(escape_value(value).into())); + } + None => body.push(Event::Value(Cow::Borrowed("".into()))), + } + if let Some(comment) = comment { + body.push(Event::Whitespace(Cow::Borrowed(" ".into()))); + body.push(Event::Comment(parse::Comment { + tag: b'#', + text: Cow::Owned({ + let mut c = Vec::with_capacity(comment.len()); + let mut bytes = comment.iter().peekable(); + if !bytes.peek().map_or(true, |b| b.is_ascii_whitespace()) { + c.insert(0, b' '); + } + c.extend(bytes.map(|b| if *b == b'\n' { b' ' } else { *b })); + c.into() + }), + })); + } + if self.implicit_newline { + body.push(Event::Newline(BString::from(self.newline.to_vec()).into())); + } + } + + /// Removes all events until a key value pair is removed. This will also + /// remove the whitespace preceding the key value pair, if any is found. + pub fn pop(&mut self) -> Option<(Key<'_>, Cow<'event, BStr>)> { + let mut values = Vec::new(); + // events are popped in reverse order + let body = &mut self.section.body.0; + while let Some(e) = body.pop() { + match e { + Event::SectionKey(k) => { + // pop leading whitespace + if let Some(Event::Whitespace(_)) = body.last() { + body.pop(); + } + + if values.len() == 1 { + let value = values.pop().expect("vec is non-empty but popped to empty value"); + return Some((k, normalize(value))); + } + + return Some(( + k, + normalize_bstring({ + let mut s = BString::default(); + for value in values.into_iter().rev() { + s.push_str(value.as_ref()); + } + s + }), + )); + } + Event::Value(v) | Event::ValueNotDone(v) | Event::ValueDone(v) => values.push(v), + _ => (), + } + } + None + } + + /// Sets the last key value pair if it exists, or adds the new value. + /// Returns the previous value if it replaced a value, or None if it adds + /// the value. + pub fn set<'b>(&mut self, key: Key<'event>, value: impl Into<&'b BStr>) -> Option> { + match self.key_and_value_range_by(&key) { + None => { + self.push(key, Some(value.into())); + None + } + Some((key_range, value_range)) => { + let value_range = value_range.unwrap_or(key_range.end - 1..key_range.end); + let range_start = value_range.start; + let ret = self.remove_internal(value_range, false); + self.section + .body + .0 + .insert(range_start, Event::Value(escape_value(value.into()).into())); + Some(ret) + } + } + } + + /// Removes the latest value by key and returns it, if it exists. + pub fn remove(&mut self, key: impl AsRef) -> Option> { + let key = Key::from_str_unchecked(key.as_ref()); + let (key_range, _value_range) = self.key_and_value_range_by(&key)?; + Some(self.remove_internal(key_range, true)) + } + + /// Adds a new line event. Note that you don't need to call this unless + /// you've disabled implicit newlines. + pub fn push_newline(&mut self) -> &mut Self { + self.section + .body + .0 + .push(Event::Newline(Cow::Owned(BString::from(self.newline.to_vec())))); + self + } + + /// Return the newline used when calling [`push_newline()`][Self::push_newline()]. + pub fn newline(&self) -> &BStr { + self.newline.as_slice().as_bstr() + } + + /// Enables or disables automatically adding newline events after adding + /// a value. This is _enabled by default_. + pub fn set_implicit_newline(&mut self, on: bool) -> &mut Self { + self.implicit_newline = on; + self + } + + /// Sets the exact whitespace to use before each newly created key-value pair, + /// with only whitespace characters being permissible. + /// + /// The default is 2 tabs. + /// Set to `None` to disable adding whitespace before a key value. + /// + /// # Panics + /// + /// If non-whitespace characters are used. This makes the method only suitable for validated + /// or known input. + pub fn set_leading_whitespace(&mut self, whitespace: Option>) -> &mut Self { + assert!( + whitespace + .as_deref() + .map_or(true, |ws| ws.iter().all(|b| b.is_ascii_whitespace())), + "input whitespace must only contain whitespace characters." + ); + self.whitespace.pre_key = whitespace; + self + } + + /// Returns the whitespace this section will insert before the + /// beginning of a key, if any. + #[must_use] + pub fn leading_whitespace(&self) -> Option<&BStr> { + self.whitespace.pre_key.as_deref() + } + + /// Returns the whitespace to be used before and after the `=` between the key + /// and the value. + /// + /// For example, `k = v` will have `(Some(" "), Some(" "))`, whereas `k=\tv` will + /// have `(None, Some("\t"))`. + #[must_use] + pub fn separator_whitespace(&self) -> (Option<&BStr>, Option<&BStr>) { + (self.whitespace.pre_sep.as_deref(), self.whitespace.post_sep.as_deref()) + } +} + +// Internal methods that may require exact indices for faster operations. +impl<'a, 'event> SectionMut<'a, 'event> { + pub(crate) fn new(section: &'a mut Section<'event>, newline: SmallVec<[u8; 2]>) -> Self { + let whitespace = Whitespace::from_body(§ion.body); + Self { + section, + implicit_newline: true, + whitespace, + newline, + } + } + + pub(crate) fn get( + &self, + key: &Key<'_>, + start: Index, + end: Index, + ) -> Result, lookup::existing::Error> { + let mut expect_value = false; + let mut concatenated_value = BString::default(); + + for event in &self.section.0[start.0..end.0] { + match event { + Event::SectionKey(event_key) if event_key == key => expect_value = true, + Event::Value(v) if expect_value => return Ok(normalize_bstr(v.as_ref())), + Event::ValueNotDone(v) if expect_value => { + concatenated_value.push_str(v.as_ref()); + } + Event::ValueDone(v) if expect_value => { + concatenated_value.push_str(v.as_ref()); + return Ok(normalize_bstring(concatenated_value)); + } + _ => (), + } + } + + Err(lookup::existing::Error::KeyMissing) + } + + pub(crate) fn delete(&mut self, start: Index, end: Index) { + self.section.body.0.drain(start.0..end.0); + } + + pub(crate) fn set_internal(&mut self, index: Index, key: Key<'event>, value: &BStr) -> Size { + let mut size = 0; + + let body = &mut self.section.body.0; + body.insert(index.0, Event::Value(escape_value(value).into())); + size += 1; + + let sep_events = self.whitespace.key_value_separators(); + size += sep_events.len(); + body.insert_many(index.0, sep_events.into_iter().rev()); + + body.insert(index.0, Event::SectionKey(key)); + size += 1; + + Size(size) + } + + /// Performs the removal, assuming the range is valid. + fn remove_internal(&mut self, range: Range, fix_whitespace: bool) -> Cow<'event, BStr> { + let events = &mut self.section.body.0; + if fix_whitespace + && events + .get(range.end) + .map_or(false, |ev| matches!(ev, Event::Newline(_))) + { + events.remove(range.end); + } + let value = events + .drain(range.clone()) + .fold(Cow::Owned(BString::default()), |mut acc: Cow<'_, BStr>, e| { + if let Event::Value(v) | Event::ValueNotDone(v) | Event::ValueDone(v) = e { + acc.to_mut().extend(&**v); + } + acc + }); + if fix_whitespace + && range + .start + .checked_sub(1) + .and_then(|pos| events.get(pos)) + .map_or(false, |ev| matches!(ev, Event::Whitespace(_))) + { + events.remove(range.start - 1); + } + value + } +} + +impl<'event> Deref for SectionMut<'_, 'event> { + type Target = file::Section<'event>; + + fn deref(&self) -> &Self::Target { + self.section + } +} + +impl<'event> file::section::Body<'event> { + pub(crate) fn as_mut(&mut self) -> &mut parse::section::Events<'event> { + &mut self.0 + } +} diff --git a/vendor/gix-config/src/file/mutable/value.rs b/vendor/gix-config/src/file/mutable/value.rs new file mode 100644 index 000000000..2bccfd32a --- /dev/null +++ b/vendor/gix-config/src/file/mutable/value.rs @@ -0,0 +1,63 @@ +use std::borrow::Cow; + +use bstr::BStr; + +use crate::{ + file, + file::{mutable::section::SectionMut, Index, Size}, + lookup, + parse::section, +}; + +/// An intermediate representation of a mutable value obtained from a [`File`][crate::File]. +#[derive(PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct ValueMut<'borrow, 'lookup, 'event> { + pub(crate) section: SectionMut<'borrow, 'event>, + pub(crate) key: section::Key<'lookup>, + pub(crate) index: Index, + pub(crate) size: Size, +} + +impl<'borrow, 'lookup, 'event> ValueMut<'borrow, 'lookup, 'event> { + /// Returns the actual value. This is computed each time this is called + /// requiring an allocation for multi-line values. + pub fn get(&self) -> Result, lookup::existing::Error> { + self.section.get(&self.key, self.index, self.index + self.size) + } + + /// Update the value to the provided one. This modifies the value such that + /// the Value event(s) are replaced with a single new event containing the + /// new value. + pub fn set_string(&mut self, input: impl AsRef) { + self.set(input.as_ref()); + } + + /// Update the value to the provided one. This modifies the value such that + /// the Value event(s) are replaced with a single new event containing the + /// new value. + pub fn set<'a>(&mut self, input: impl Into<&'a BStr>) { + if self.size.0 > 0 { + self.section.delete(self.index, self.index + self.size); + } + self.size = self.section.set_internal(self.index, self.key.to_owned(), input.into()); + } + + /// Removes the value. Does nothing when called multiple times in + /// succession. + pub fn delete(&mut self) { + if self.size.0 > 0 { + self.section.delete(self.index, self.index + self.size); + self.size = Size(0); + } + } + + /// Return the section containing the value. + pub fn section(&self) -> &file::Section<'event> { + &self.section + } + + /// Convert this value into its owning mutable section. + pub fn into_section_mut(self) -> file::SectionMut<'borrow, 'event> { + self.section + } +} diff --git a/vendor/gix-config/src/file/section/body.rs b/vendor/gix-config/src/file/section/body.rs new file mode 100644 index 000000000..e1a53efd9 --- /dev/null +++ b/vendor/gix-config/src/file/section/body.rs @@ -0,0 +1,206 @@ +use std::{borrow::Cow, iter::FusedIterator, ops::Range}; + +use bstr::{BStr, BString, ByteVec}; + +use crate::{ + parse::{section::Key, Event}, + value::{normalize, normalize_bstr, normalize_bstring}, +}; + +/// A opaque type that represents a section body. +#[derive(PartialEq, Eq, Hash, PartialOrd, Ord, Clone, Debug, Default)] +pub struct Body<'event>(pub(crate) crate::parse::section::Events<'event>); + +/// Access +impl<'event> Body<'event> { + /// Retrieves the last matching value in a section with the given key, if present. + /// + /// Note that we consider values without key separator `=` non-existing. + #[must_use] + pub fn value(&self, key: impl AsRef) -> Option> { + self.value_implicit(key).flatten() + } + + /// Retrieves the last matching value in a section with the given key, if present, and indicates an implicit value with `Some(None)`, + /// and a non-existing one as `None` + #[must_use] + pub fn value_implicit(&self, key: impl AsRef) -> Option>> { + let key = Key::from_str_unchecked(key.as_ref()); + let (_key_range, range) = self.key_and_value_range_by(&key)?; + let range = match range { + None => return Some(None), + Some(range) => range, + }; + let mut concatenated = BString::default(); + + for event in &self.0[range] { + match event { + Event::Value(v) => { + return Some(Some(normalize_bstr(v.as_ref()))); + } + Event::ValueNotDone(v) => { + concatenated.push_str(v.as_ref()); + } + Event::ValueDone(v) => { + concatenated.push_str(v.as_ref()); + return Some(Some(normalize_bstring(concatenated))); + } + _ => (), + } + } + None + } + + /// Retrieves all values that have the provided key name. This may return + /// an empty vec, which implies there were no values with the provided key. + #[must_use] + pub fn values(&self, key: impl AsRef) -> Vec> { + let key = &Key::from_str_unchecked(key.as_ref()); + let mut values = Vec::new(); + let mut expect_value = false; + let mut concatenated_value = BString::default(); + + for event in &self.0 { + match event { + Event::SectionKey(event_key) if event_key == key => expect_value = true, + Event::Value(v) if expect_value => { + expect_value = false; + values.push(normalize_bstr(v.as_ref())); + } + Event::ValueNotDone(v) if expect_value => { + concatenated_value.push_str(v.as_ref()); + } + Event::ValueDone(v) if expect_value => { + expect_value = false; + concatenated_value.push_str(v.as_ref()); + values.push(normalize_bstring(std::mem::take(&mut concatenated_value))); + } + _ => (), + } + } + + values + } + + /// Returns an iterator visiting all keys in order. + pub fn keys(&self) -> impl Iterator> { + self.0.iter().filter_map(|e| match e { + Event::SectionKey(k) => Some(k), + _ => None, + }) + } + + /// Returns true if the section contains the provided key. + #[must_use] + pub fn contains_key(&self, key: impl AsRef) -> bool { + let key = &Key::from_str_unchecked(key.as_ref()); + self.0.iter().any(|e| { + matches!(e, + Event::SectionKey(k) if k == key + ) + }) + } + + /// Returns the number of values in the section. + #[must_use] + pub fn num_values(&self) -> usize { + self.0.iter().filter(|e| matches!(e, Event::SectionKey(_))).count() + } + + /// Returns if the section is empty. + /// Note that this may count whitespace, see [`num_values()`][Self::num_values()] for + /// another way to determine semantic emptiness. + #[must_use] + pub fn is_void(&self) -> bool { + self.0.is_empty() + } +} + +impl<'event> Body<'event> { + pub(crate) fn as_ref(&self) -> &[Event<'_>] { + &self.0 + } + + /// Returns the the range containing the value events for the `key`, with value range being `None` if there is no key-value separator + /// and only a 'fake' Value event with an empty string in side. + /// If the value is not found, `None` is returned. + pub(crate) fn key_and_value_range_by(&self, key: &Key<'_>) -> Option<(Range, Option>)> { + let mut value_range = Range::default(); + let mut key_start = None; + for (i, e) in self.0.iter().enumerate().rev() { + match e { + Event::SectionKey(k) => { + if k == key { + key_start = Some(i); + break; + } + value_range = Range::default(); + } + Event::Value(_) => { + (value_range.start, value_range.end) = (i, i); + } + Event::ValueNotDone(_) | Event::ValueDone(_) => { + if value_range.end == 0 { + value_range.end = i + } else { + value_range.start = i + }; + } + _ => (), + } + } + key_start.map(|key_start| { + // value end needs to be offset by one so that the last value's index + // is included in the range + let value_range = value_range.start..value_range.end + 1; + let key_range = key_start..value_range.end; + (key_range, (value_range.start != key_start + 1).then_some(value_range)) + }) + } +} + +/// An owning iterator of a section body. Created by [`Body::into_iter`], yielding +/// un-normalized (`key`, `value`) pairs. +// TODO: tests +pub struct BodyIter<'event>(smallvec::IntoIter<[Event<'event>; 64]>); + +impl<'event> IntoIterator for Body<'event> { + type Item = (Key<'event>, Cow<'event, BStr>); + + type IntoIter = BodyIter<'event>; + + fn into_iter(self) -> Self::IntoIter { + BodyIter(self.0.into_iter()) + } +} + +impl<'event> Iterator for BodyIter<'event> { + type Item = (Key<'event>, Cow<'event, BStr>); + + fn next(&mut self) -> Option { + let mut key = None; + let mut partial_value = BString::default(); + let mut value = None; + + for event in self.0.by_ref() { + match event { + Event::SectionKey(k) => key = Some(k), + Event::Value(v) => { + value = Some(v); + break; + } + Event::ValueNotDone(v) => partial_value.push_str(v.as_ref()), + Event::ValueDone(v) => { + partial_value.push_str(v.as_ref()); + value = Some(partial_value.into()); + break; + } + _ => (), + } + } + + key.zip(value.map(normalize)) + } +} + +impl FusedIterator for BodyIter<'_> {} diff --git a/vendor/gix-config/src/file/section/mod.rs b/vendor/gix-config/src/file/section/mod.rs new file mode 100644 index 000000000..e8e331084 --- /dev/null +++ b/vendor/gix-config/src/file/section/mod.rs @@ -0,0 +1,145 @@ +use std::{borrow::Cow, ops::Deref}; + +use bstr::{BStr, BString, ByteSlice}; +use smallvec::SmallVec; + +use crate::{ + file, + file::{Metadata, Section, SectionMut}, + parse, + parse::{section, Event}, +}; + +pub(crate) mod body; +pub use body::{Body, BodyIter}; +use gix_features::threading::OwnShared; + +use crate::file::{ + write::{extract_newline, platform_newline}, + SectionId, +}; + +impl<'a> Deref for Section<'a> { + type Target = Body<'a>; + + fn deref(&self) -> &Self::Target { + &self.body + } +} + +/// Instantiation and conversion +impl<'a> Section<'a> { + /// Create a new section with the given `name` and optional, `subsection`, `meta`-data and an empty body. + pub fn new( + name: impl Into>, + subsection: impl Into>>, + meta: impl Into>, + ) -> Result { + Ok(Section { + header: parse::section::Header::new(name, subsection)?, + body: Default::default(), + meta: meta.into(), + id: SectionId::default(), + }) + } +} + +/// Access +impl<'a> Section<'a> { + /// Return our header. + pub fn header(&self) -> §ion::Header<'a> { + &self.header + } + + /// Return the unique `id` of the section, for use with the `*_by_id()` family of methods + /// in [gix_config::File][crate::File]. + pub fn id(&self) -> SectionId { + self.id + } + + /// Return our body, containing all keys and values. + pub fn body(&self) -> &Body<'a> { + &self.body + } + + /// Serialize this type into a `BString` for convenience. + /// + /// Note that `to_string()` can also be used, but might not be lossless. + #[must_use] + pub fn to_bstring(&self) -> BString { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("io error impossible"); + buf.into() + } + + /// Stream ourselves to the given `out`, in order to reproduce this section mostly losslessly + /// as it was parsed. + pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> { + self.header.write_to(&mut out)?; + + if self.body.0.is_empty() { + return Ok(()); + } + + let nl = self + .body + .as_ref() + .iter() + .find_map(extract_newline) + .unwrap_or_else(|| platform_newline()); + + if !self + .body + .as_ref() + .iter() + .take_while(|e| !matches!(e, Event::SectionKey(_))) + .any(|e| e.to_bstr_lossy().contains_str(nl)) + { + out.write_all(nl)?; + } + + let mut saw_newline_after_value = true; + let mut in_key_value_pair = false; + for (idx, event) in self.body.as_ref().iter().enumerate() { + match event { + Event::SectionKey(_) => { + if !saw_newline_after_value { + out.write_all(nl)?; + } + saw_newline_after_value = false; + in_key_value_pair = true; + } + Event::Newline(_) if !in_key_value_pair => { + saw_newline_after_value = true; + } + Event::Value(_) | Event::ValueDone(_) => { + in_key_value_pair = false; + } + _ => {} + } + event.write_to(&mut out)?; + if let Event::ValueNotDone(_) = event { + if self + .body + .0 + .get(idx + 1) + .filter(|e| matches!(e, Event::Newline(_))) + .is_none() + { + out.write_all(nl)?; + } + } + } + Ok(()) + } + + /// Return additional information about this sections origin. + pub fn meta(&self) -> &Metadata { + &self.meta + } + + /// Returns a mutable version of this section for adjustment of values. + pub fn to_mut(&mut self, newline: SmallVec<[u8; 2]>) -> SectionMut<'_, 'a> { + SectionMut::new(self, newline) + } +} diff --git a/vendor/gix-config/src/file/tests.rs b/vendor/gix-config/src/file/tests.rs new file mode 100644 index 000000000..c218dbaac --- /dev/null +++ b/vendor/gix-config/src/file/tests.rs @@ -0,0 +1,228 @@ +use std::collections::HashMap; + +use crate::{ + file::{self, Section, SectionId}, + parse::section, +}; + +mod try_from { + use std::{borrow::Cow, collections::HashMap, convert::TryFrom}; + + use super::{bodies, headers}; + use crate::{ + file::{self, SectionBodyIdsLut, SectionId}, + parse::{ + section, + tests::util::{name_event, newline_event, section_header, value_event}, + Event, + }, + File, + }; + + #[test] + fn empty() { + let config = File::try_from("").unwrap(); + assert_eq!(config.section_id_counter, 0); + assert!(config.section_lookup_tree.is_empty()); + assert!(config.sections.is_empty()); + assert!(config.section_order.is_empty()); + } + + #[test] + fn single_section() { + let mut config = File::try_from("[core]\na=b\nc=d").unwrap(); + let expected_separators = { + let mut map = HashMap::new(); + map.insert(SectionId(0), section_header("core", None)); + map + }; + assert_eq!(headers(&config.sections), expected_separators); + assert_eq!(config.section_id_counter, 1); + let expected_lookup_tree = { + let mut tree = HashMap::new(); + tree.insert( + section::Name(Cow::Borrowed("core".into())), + vec![SectionBodyIdsLut::Terminal(vec![SectionId(0)])], + ); + tree + }; + assert_eq!(config.section_lookup_tree, expected_lookup_tree); + let expected_sections = { + let mut sections = HashMap::new(); + sections.insert( + SectionId(0), + file::section::Body( + vec![ + newline_event(), + name_event("a"), + Event::KeyValueSeparator, + value_event("b"), + newline_event(), + name_event("c"), + Event::KeyValueSeparator, + value_event("d"), + ] + .into(), + ), + ); + sections + }; + assert_eq!(bodies(&config.sections), expected_sections); + assert_eq!(config.section_order.make_contiguous(), &[SectionId(0)]); + } + + #[test] + fn single_subsection() { + let mut config = File::try_from("[core.sub]\na=b\nc=d").unwrap(); + let expected_separators = { + let mut map = HashMap::new(); + map.insert(SectionId(0), section_header("core", (".", "sub"))); + map + }; + assert_eq!(headers(&config.sections), expected_separators); + assert_eq!(config.section_id_counter, 1); + let expected_lookup_tree = { + let mut tree = HashMap::new(); + let mut inner_tree = HashMap::new(); + inner_tree.insert(Cow::Borrowed("sub".into()), vec![SectionId(0)]); + tree.insert( + section::Name(Cow::Borrowed("core".into())), + vec![SectionBodyIdsLut::NonTerminal(inner_tree)], + ); + tree + }; + assert_eq!(config.section_lookup_tree, expected_lookup_tree); + let expected_sections = { + let mut sections = HashMap::new(); + sections.insert( + SectionId(0), + file::section::Body( + vec![ + newline_event(), + name_event("a"), + Event::KeyValueSeparator, + value_event("b"), + newline_event(), + name_event("c"), + Event::KeyValueSeparator, + value_event("d"), + ] + .into(), + ), + ); + sections + }; + assert_eq!(bodies(&config.sections), expected_sections); + assert_eq!(config.section_order.make_contiguous(), &[SectionId(0)]); + } + + #[test] + fn multiple_sections() { + let mut config = File::try_from("[core]\na=b\nc=d\n[other]e=f").unwrap(); + let expected_separators = { + let mut map = HashMap::new(); + map.insert(SectionId(0), section_header("core", None)); + map.insert(SectionId(1), section_header("other", None)); + map + }; + assert_eq!(headers(&config.sections), expected_separators); + assert_eq!(config.section_id_counter, 2); + let expected_lookup_tree = { + let mut tree = HashMap::new(); + tree.insert( + section::Name(Cow::Borrowed("core".into())), + vec![SectionBodyIdsLut::Terminal(vec![SectionId(0)])], + ); + tree.insert( + section::Name(Cow::Borrowed("other".into())), + vec![SectionBodyIdsLut::Terminal(vec![SectionId(1)])], + ); + tree + }; + assert_eq!(config.section_lookup_tree, expected_lookup_tree); + let expected_sections = { + let mut sections = HashMap::new(); + sections.insert( + SectionId(0), + file::section::Body( + vec![ + newline_event(), + name_event("a"), + Event::KeyValueSeparator, + value_event("b"), + newline_event(), + name_event("c"), + Event::KeyValueSeparator, + value_event("d"), + newline_event(), + ] + .into(), + ), + ); + sections.insert( + SectionId(1), + file::section::Body(vec![name_event("e"), Event::KeyValueSeparator, value_event("f")].into()), + ); + sections + }; + assert_eq!(bodies(&config.sections), expected_sections); + assert_eq!(config.section_order.make_contiguous(), &[SectionId(0), SectionId(1)]); + } + + #[test] + fn multiple_duplicate_sections() { + let mut config = File::try_from("[core]\na=b\nc=d\n[core]e=f").unwrap(); + let expected_separators = { + let mut map = HashMap::new(); + map.insert(SectionId(0), section_header("core", None)); + map.insert(SectionId(1), section_header("core", None)); + map + }; + assert_eq!(headers(&config.sections), expected_separators); + assert_eq!(config.section_id_counter, 2); + let expected_lookup_tree = { + let mut tree = HashMap::new(); + tree.insert( + section::Name(Cow::Borrowed("core".into())), + vec![SectionBodyIdsLut::Terminal(vec![SectionId(0), SectionId(1)])], + ); + tree + }; + assert_eq!(config.section_lookup_tree, expected_lookup_tree); + let expected_sections = { + let mut sections = HashMap::new(); + sections.insert( + SectionId(0), + file::section::Body( + vec![ + newline_event(), + name_event("a"), + Event::KeyValueSeparator, + value_event("b"), + newline_event(), + name_event("c"), + Event::KeyValueSeparator, + value_event("d"), + newline_event(), + ] + .into(), + ), + ); + sections.insert( + SectionId(1), + file::section::Body(vec![name_event("e"), Event::KeyValueSeparator, value_event("f")].into()), + ); + sections + }; + assert_eq!(bodies(&config.sections), expected_sections); + assert_eq!(config.section_order.make_contiguous(), &[SectionId(0), SectionId(1)]); + } +} + +fn headers<'a>(sections: &HashMap>) -> HashMap> { + sections.iter().map(|(k, v)| (*k, v.header.clone())).collect() +} + +fn bodies<'a>(sections: &HashMap>) -> HashMap> { + sections.iter().map(|(k, v)| (*k, v.body.clone())).collect() +} diff --git a/vendor/gix-config/src/file/util.rs b/vendor/gix-config/src/file/util.rs new file mode 100644 index 000000000..5c60f1fd5 --- /dev/null +++ b/vendor/gix-config/src/file/util.rs @@ -0,0 +1,190 @@ +use std::{cmp::Ordering, collections::HashMap}; + +use bstr::BStr; + +use crate::{ + file::{self, SectionBodyIdsLut, SectionId}, + lookup, + parse::section, + File, +}; + +/// Private helper functions +impl<'event> File<'event> { + /// Adds a new section to the config file, returning the section id of the newly added section. + pub(crate) fn push_section_internal(&mut self, mut section: file::Section<'event>) -> SectionId { + let new_section_id = SectionId(self.section_id_counter); + section.id = new_section_id; + self.sections.insert(new_section_id, section); + let header = &self.sections[&new_section_id].header; + let lookup = self.section_lookup_tree.entry(header.name.clone()).or_default(); + + let mut found_node = false; + if let Some(subsection_name) = header.subsection_name.clone() { + for node in lookup.iter_mut() { + if let SectionBodyIdsLut::NonTerminal(subsections) = node { + found_node = true; + subsections + .entry(subsection_name.clone()) + .or_default() + .push(new_section_id); + break; + } + } + if !found_node { + let mut map = HashMap::new(); + map.insert(subsection_name, vec![new_section_id]); + lookup.push(SectionBodyIdsLut::NonTerminal(map)); + } + } else { + for node in lookup.iter_mut() { + if let SectionBodyIdsLut::Terminal(vec) = node { + found_node = true; + vec.push(new_section_id); + break; + } + } + if !found_node { + lookup.push(SectionBodyIdsLut::Terminal(vec![new_section_id])); + } + } + self.section_order.push_back(new_section_id); + self.section_id_counter += 1; + new_section_id + } + + /// Inserts `section` after the section that comes `before` it, and maintains correct ordering in all of our lookup structures. + pub(crate) fn insert_section_after(&mut self, mut section: file::Section<'event>, before: SectionId) -> SectionId { + let lookup_section_order = { + let section_order = &self.section_order; + move |section_id| { + section_order + .iter() + .enumerate() + .find_map(|(idx, id)| (*id == section_id).then_some(idx)) + .expect("before-section exists") + } + }; + + let before_order = lookup_section_order(before); + let new_section_id = SectionId(self.section_id_counter); + section.id = new_section_id; + self.sections.insert(new_section_id, section); + let header = &self.sections[&new_section_id].header; + let lookup = self.section_lookup_tree.entry(header.name.clone()).or_default(); + + let mut found_node = false; + if let Some(subsection_name) = header.subsection_name.clone() { + for node in lookup.iter_mut() { + if let SectionBodyIdsLut::NonTerminal(subsections) = node { + found_node = true; + let sections_with_name_and_subsection_name = + subsections.entry(subsection_name.clone()).or_default(); + let insert_pos = find_insert_pos_by_order( + sections_with_name_and_subsection_name, + before_order, + lookup_section_order, + ); + sections_with_name_and_subsection_name.insert(insert_pos, new_section_id); + break; + } + } + if !found_node { + let mut map = HashMap::new(); + map.insert(subsection_name, vec![new_section_id]); + lookup.push(SectionBodyIdsLut::NonTerminal(map)); + } + } else { + for node in lookup.iter_mut() { + if let SectionBodyIdsLut::Terminal(sections_with_name) = node { + found_node = true; + let insert_pos = find_insert_pos_by_order(sections_with_name, before_order, lookup_section_order); + sections_with_name.insert(insert_pos, new_section_id); + break; + } + } + if !found_node { + lookup.push(SectionBodyIdsLut::Terminal(vec![new_section_id])); + } + } + + self.section_order.insert(before_order + 1, new_section_id); + self.section_id_counter += 1; + new_section_id + } + + /// Returns the mapping between section and subsection name to section ids. + pub(crate) fn section_ids_by_name_and_subname<'a>( + &'a self, + section_name: &'a str, + subsection_name: Option<&BStr>, + ) -> Result + ExactSizeIterator + DoubleEndedIterator + '_, lookup::existing::Error> + { + let section_name = section::Name::from_str_unchecked(section_name); + let section_ids = self + .section_lookup_tree + .get(§ion_name) + .ok_or(lookup::existing::Error::SectionMissing)?; + let mut maybe_ids = None; + if let Some(subsection_name) = subsection_name { + for node in section_ids { + if let SectionBodyIdsLut::NonTerminal(subsection_lookup) = node { + maybe_ids = subsection_lookup.get(subsection_name).map(|v| v.iter().copied()); + break; + } + } + } else { + for node in section_ids { + if let SectionBodyIdsLut::Terminal(subsection_lookup) = node { + maybe_ids = Some(subsection_lookup.iter().copied()); + break; + } + } + } + maybe_ids.ok_or(lookup::existing::Error::SubSectionMissing) + } + + pub(crate) fn section_ids_by_name<'a>( + &'a self, + section_name: &'a str, + ) -> Result + '_, lookup::existing::Error> { + let section_name = section::Name::from_str_unchecked(section_name); + match self.section_lookup_tree.get(§ion_name) { + Some(lookup) => { + let mut lut = Vec::with_capacity(self.section_order.len()); + for node in lookup { + match node { + SectionBodyIdsLut::Terminal(v) => lut.extend(v.iter().copied()), + SectionBodyIdsLut::NonTerminal(v) => lut.extend(v.values().flatten().copied()), + } + } + + Ok(self.section_order.iter().filter(move |a| lut.contains(a)).copied()) + } + None => Err(lookup::existing::Error::SectionMissing), + } + } +} + +fn find_insert_pos_by_order( + sections_with_name: &[SectionId], + before_order: usize, + lookup_section_order: impl Fn(SectionId) -> usize, +) -> usize { + let mut insert_pos = sections_with_name.len(); // push back by default + for (idx, candidate_id) in sections_with_name.iter().enumerate() { + let candidate_order = lookup_section_order(*candidate_id); + match candidate_order.cmp(&before_order) { + Ordering::Less => {} + Ordering::Equal => { + insert_pos = idx + 1; // insert right after this one + break; + } + Ordering::Greater => { + insert_pos = idx; // insert before this one + break; + } + } + } + insert_pos +} diff --git a/vendor/gix-config/src/file/write.rs b/vendor/gix-config/src/file/write.rs new file mode 100644 index 000000000..29024170d --- /dev/null +++ b/vendor/gix-config/src/file/write.rs @@ -0,0 +1,93 @@ +use bstr::{BStr, BString, ByteSlice}; + +use crate::{file::Section, parse::Event, File}; + +impl File<'_> { + /// Serialize this type into a `BString` for convenience. + /// + /// Note that `to_string()` can also be used, but might not be lossless. + #[must_use] + pub fn to_bstring(&self) -> BString { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("io error impossible"); + buf.into() + } + + /// Stream ourselves to the given `out` in order to reproduce this file mostly losslessly + /// as it was parsed, while writing only sections for which `filter` returns true. + pub fn write_to_filter( + &self, + mut out: impl std::io::Write, + mut filter: impl FnMut(&Section<'_>) -> bool, + ) -> std::io::Result<()> { + let nl = self.detect_newline_style(); + + { + for event in self.frontmatter_events.as_ref() { + event.write_to(&mut out)?; + } + + if !ends_with_newline(self.frontmatter_events.as_ref(), nl, true) && self.sections.values().any(&mut filter) + { + out.write_all(nl)?; + } + } + + let mut prev_section_ended_with_newline = true; + for section_id in &self.section_order { + if !prev_section_ended_with_newline { + out.write_all(nl)?; + } + let section = self.sections.get(section_id).expect("known section-id"); + if !filter(section) { + continue; + } + section.write_to(&mut out)?; + + prev_section_ended_with_newline = ends_with_newline(section.body.0.as_ref(), nl, false); + if let Some(post_matter) = self.frontmatter_post_section.get(section_id) { + if !prev_section_ended_with_newline { + out.write_all(nl)?; + } + for event in post_matter { + event.write_to(&mut out)?; + } + prev_section_ended_with_newline = ends_with_newline(post_matter, nl, prev_section_ended_with_newline); + } + } + + if !prev_section_ended_with_newline { + out.write_all(nl)?; + } + + Ok(()) + } + + /// Stream ourselves to the given `out`, in order to reproduce this file mostly losslessly + /// as it was parsed. + pub fn write_to(&self, out: impl std::io::Write) -> std::io::Result<()> { + self.write_to_filter(out, |_| true) + } +} + +pub(crate) fn ends_with_newline(e: &[crate::parse::Event<'_>], nl: impl AsRef<[u8]>, default: bool) -> bool { + if e.is_empty() { + return default; + } + e.iter() + .rev() + .take_while(|e| e.to_bstr_lossy().iter().all(|b| b.is_ascii_whitespace())) + .find_map(|e| e.to_bstr_lossy().contains_str(nl.as_ref()).then_some(true)) + .unwrap_or(false) +} + +pub(crate) fn extract_newline<'a>(e: &'a Event<'_>) -> Option<&'a BStr> { + match e { + Event::Newline(b) => b.as_ref().into(), + _ => None, + } +} + +pub(crate) fn platform_newline() -> &'static BStr { + if cfg!(windows) { "\r\n" } else { "\n" }.into() +} diff --git a/vendor/gix-config/src/lib.rs b/vendor/gix-config/src/lib.rs new file mode 100644 index 000000000..9b2afd692 --- /dev/null +++ b/vendor/gix-config/src/lib.rs @@ -0,0 +1,52 @@ +//! # `gix_config` +//! +//! This crate is a high performance `gix-config` file reader and writer. It +//! exposes a high level API to parse, read, and write [`gix-config` files]. +//! +//! This crate has a few primary offerings and various accessory functions. The +//! table below gives a brief explanation of all offerings, loosely in order +//! from the highest to lowest abstraction. +//! +//! | Offering | Description | Zero-copy? | +//! | ------------- | --------------------------------------------------- | ----------------- | +//! | [`File`] | Accelerated wrapper for reading and writing values. | On some reads[^1] | +//! | [`parse::State`] | Syntactic events for `gix-config` files. | Yes | +//! | value wrappers | Wrappers for `gix-config` value types. | Yes | +//! +//! This crate also exposes efficient value normalization which unescapes +//! characters and removes quotes through the `normalize_*` family of functions, +//! located in the [`value`] module. +//! +//! # Known differences to the `git config` specification +//! +//! - Legacy headers like `[section.subsection]` are supposed to be turned into to lower case and compared +//! case-sensitively. We keep its case and compare case-insensitively. +//! +//! [^1]: When read values do not need normalization and it wasn't parsed in 'owned' mode. +//! +//! [`gix-config` files]: https://git-scm.com/docs/gix-config#_configuration_file +//! [`File`]: crate::File +//! [`parse::State`]: crate::parse::Events +//! [`nom`]: https://github.com/Geal/nom +//! +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] + +pub mod file; + +/// +pub mod lookup; +pub mod parse; +/// +pub mod value; +pub use gix_config_value::{color, integer, path, Boolean, Color, Integer, Path}; + +mod types; +pub use types::{File, Source}; +/// +pub mod source; diff --git a/vendor/gix-config/src/lookup.rs b/vendor/gix-config/src/lookup.rs new file mode 100644 index 000000000..781497812 --- /dev/null +++ b/vendor/gix-config/src/lookup.rs @@ -0,0 +1,24 @@ +/// The error when looking up a value, for example via [`File::try_value()`][crate::File::try_value()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + ValueMissing(#[from] existing::Error), + #[error(transparent)] + FailedConversion(E), +} + +/// +pub mod existing { + /// The error when looking up a value that doesn't exist, for example via [`File::value()`][crate::File::value()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The requested section does not exist")] + SectionMissing, + #[error("The requested subsection does not exist")] + SubSectionMissing, + #[error("The key does not exist in the requested section")] + KeyMissing, + } +} diff --git a/vendor/gix-config/src/parse/comment.rs b/vendor/gix-config/src/parse/comment.rs new file mode 100644 index 000000000..6d4bb15ff --- /dev/null +++ b/vendor/gix-config/src/parse/comment.rs @@ -0,0 +1,50 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::BString; + +use crate::parse::Comment; + +impl Comment<'_> { + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Comment<'static> { + Comment { + tag: self.tag, + text: Cow::Owned(self.text.as_ref().into()), + } + } + + /// Serialize this type into a `BString` for convenience. + /// + /// Note that `to_string()` can also be used, but might not be lossless. + #[must_use] + pub fn to_bstring(&self) -> BString { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("io error impossible"); + buf.into() + } + + /// Stream ourselves to the given `out`, in order to reproduce this comment losslessly. + pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> { + out.write_all(&[self.tag])?; + out.write_all(self.text.as_ref()) + } +} + +impl Display for Comment<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.to_bstring(), f) + } +} + +impl From> for BString { + fn from(c: Comment<'_>) -> Self { + c.into() + } +} + +impl From<&Comment<'_>> for BString { + fn from(c: &Comment<'_>) -> Self { + c.to_bstring() + } +} diff --git a/vendor/gix-config/src/parse/error.rs b/vendor/gix-config/src/parse/error.rs new file mode 100644 index 000000000..1f469ee4c --- /dev/null +++ b/vendor/gix-config/src/parse/error.rs @@ -0,0 +1,64 @@ +use std::fmt::Display; + +use crate::parse::Error; + +/// A list of parsers that parsing can fail on. This is used for pretty-printing errors +#[derive(PartialEq, Debug, Clone, Copy)] +pub(crate) enum ParseNode { + SectionHeader, + Name, + Value, +} + +impl Display for ParseNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::SectionHeader => write!(f, "section header"), + Self::Name => write!(f, "name"), + Self::Value => write!(f, "value"), + } + } +} + +impl Error { + /// The one-indexed line number where the error occurred. This is determined + /// by the number of newlines that were successfully parsed. + #[must_use] + pub const fn line_number(&self) -> usize { + self.line_number + 1 + } + + /// The data that was left unparsed, which contains the cause of the parse error. + #[must_use] + pub fn remaining_data(&self) -> &[u8] { + &self.parsed_until + } +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Got an unexpected token on line {} while trying to parse a {}: ", + self.line_number + 1, + self.last_attempted_parser, + )?; + + let data_size = self.parsed_until.len(); + let data = std::str::from_utf8(&self.parsed_until); + match (data, data_size) { + (Ok(data), _) if data_size > 10 => { + write!( + f, + "'{}' ... ({} characters omitted)", + &data.chars().take(10).collect::(), + data_size - 10 + ) + } + (Ok(data), _) => write!(f, "'{data}'"), + (Err(_), _) => self.parsed_until.fmt(f), + } + } +} + +impl std::error::Error for Error {} diff --git a/vendor/gix-config/src/parse/event.rs b/vendor/gix-config/src/parse/event.rs new file mode 100644 index 000000000..b7b96934d --- /dev/null +++ b/vendor/gix-config/src/parse/event.rs @@ -0,0 +1,83 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::{BStr, BString}; + +use crate::parse::Event; + +impl Event<'_> { + /// Serialize this type into a `BString` for convenience. + /// + /// Note that `to_string()` can also be used, but might not be lossless. + #[must_use] + pub fn to_bstring(&self) -> BString { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("io error impossible"); + buf.into() + } + + /// Turn ourselves into the text we represent, lossy. + /// + /// Note that this will be partial in case of `ValueNotDone` which doesn't include the backslash, and `SectionHeader` will only + /// provide their name, lacking the sub-section name. + pub fn to_bstr_lossy(&self) -> &BStr { + match self { + Self::ValueNotDone(e) | Self::Whitespace(e) | Self::Newline(e) | Self::Value(e) | Self::ValueDone(e) => { + e.as_ref() + } + Self::KeyValueSeparator => "=".into(), + Self::SectionKey(k) => k.0.as_ref(), + Self::SectionHeader(h) => h.name.0.as_ref(), + Self::Comment(c) => c.text.as_ref(), + } + } + + /// Stream ourselves to the given `out`, in order to reproduce this event mostly losslessly + /// as it was parsed. + pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> { + match self { + Self::ValueNotDone(e) => { + out.write_all(e.as_ref())?; + out.write_all(b"\\") + } + Self::Whitespace(e) | Self::Newline(e) | Self::Value(e) | Self::ValueDone(e) => out.write_all(e.as_ref()), + Self::KeyValueSeparator => out.write_all(b"="), + Self::SectionKey(k) => out.write_all(k.0.as_ref()), + Self::SectionHeader(h) => h.write_to(&mut out), + Self::Comment(c) => c.write_to(&mut out), + } + } + + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Event<'static> { + match self { + Event::Comment(e) => Event::Comment(e.to_owned()), + Event::SectionHeader(e) => Event::SectionHeader(e.to_owned()), + Event::SectionKey(e) => Event::SectionKey(e.to_owned()), + Event::Value(e) => Event::Value(Cow::Owned(e.clone().into_owned())), + Event::ValueNotDone(e) => Event::ValueNotDone(Cow::Owned(e.clone().into_owned())), + Event::ValueDone(e) => Event::ValueDone(Cow::Owned(e.clone().into_owned())), + Event::Newline(e) => Event::Newline(Cow::Owned(e.clone().into_owned())), + Event::Whitespace(e) => Event::Whitespace(Cow::Owned(e.clone().into_owned())), + Event::KeyValueSeparator => Event::KeyValueSeparator, + } + } +} + +impl Display for Event<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.to_bstring(), f) + } +} + +impl From> for BString { + fn from(event: Event<'_>) -> Self { + event.into() + } +} + +impl From<&Event<'_>> for BString { + fn from(event: &Event<'_>) -> Self { + event.to_bstring() + } +} diff --git a/vendor/gix-config/src/parse/events.rs b/vendor/gix-config/src/parse/events.rs new file mode 100644 index 000000000..62f621b52 --- /dev/null +++ b/vendor/gix-config/src/parse/events.rs @@ -0,0 +1,336 @@ +use std::convert::TryFrom; + +use smallvec::SmallVec; + +use crate::{ + parse, + parse::{section, Event, Section}, +}; + +/// A type store without allocation all events that are typically preceding the first section. +pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; + +/// A zero-copy `gix-config` file parser. +/// +/// This is parser exposes low-level syntactic events from a `gix-config` file. +/// Generally speaking, you'll want to use [`File`] as it wraps +/// around the parser to provide a higher-level abstraction to a `gix-config` +/// file, including querying, modifying, and updating values. +/// +/// This parser guarantees that the events emitted are sufficient to +/// reconstruct a `gix-config` file identical to the source `gix-config` +/// when writing it. +/// +/// # Differences between a `.ini` parser +/// +/// While the `gix-config` format closely resembles the [`.ini` file format], +/// there are subtle differences that make them incompatible. For one, the file +/// format is not well defined, and there exists no formal specification to +/// adhere to. +/// +/// For concrete examples, some notable differences are: +/// - `gix-config` sections permit subsections via either a quoted string +/// (`[some-section "subsection"]`) or via the deprecated dot notation +/// (`[some-section.subsection]`). Successful parsing these section names is not +/// well defined in typical `.ini` parsers. This parser will handle these cases +/// perfectly. +/// - Comment markers are not strictly defined either. This parser will always +/// and only handle a semicolon or octothorpe (also known as a hash or number +/// sign). +/// - Global properties may be allowed in `.ini` parsers, but is strictly +/// disallowed by this parser. +/// - Only `\t`, `\n`, `\b` `\\` are valid escape characters. +/// - Quoted and semi-quoted values will be parsed (but quotes will be included +/// in event outputs). An example of a semi-quoted value is `5"hello world"`, +/// which should be interpreted as `5hello world` after +/// [normalization][crate::value::normalize()]. +/// - Line continuations via a `\` character is supported (inside or outside of quotes) +/// - Whitespace handling similarly follows the `gix-config` specification as +/// closely as possible, where excess whitespace after a non-quoted value are +/// trimmed, and line continuations onto a new line with excess spaces are kept. +/// - Only equal signs (optionally padded by spaces) are valid name/value +/// delimiters. +/// +/// Note that that things such as case-sensitivity or duplicate sections are +/// _not_ handled. This parser is a low level _syntactic_ interpreter +/// and higher level wrappers around this parser, which may +/// or may not be zero-copy, should handle _semantic_ values. This also means +/// that string-like values are not interpreted. For example, `hello"world"` +/// would be read at a high level as `helloworld` but this parser will return +/// the former instead, with the extra quotes. This is because it is not the +/// responsibility of the parser to interpret these values, and doing so would +/// necessarily require a copy, which this parser avoids. +/// +/// # Trait Implementations +/// +/// - This struct does _not_ implement [`FromStr`] due to lifetime +/// constraints implied on the required `from_str` method. Instead, it provides +/// [`From<&'_ str>`]. +/// +/// # Idioms +/// +/// If you do want to use this parser, there are some idioms that may help you +/// with interpreting sequences of events. +/// +/// ## `Value` events do not immediately follow `Key` events +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [core] +/// autocrlf = input +/// ``` +/// +/// Because this parser guarantees perfect reconstruction, there are many +/// non-significant events that occur in addition to the ones you may expect: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("core", None).unwrap(); +/// # let section_data = "[core]\n autocrlf = input"; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::SectionKey(section::Key::try_from("autocrlf")?), +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::KeyValueSeparator, +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::Value(Cow::Borrowed("input".into())), +/// # ]); +/// # Ok::<_, Box>(()) +/// ``` +/// +/// Note the two whitespace events between the key and value pair! Those two +/// events actually refer to the whitespace between the name and value and the +/// equal sign. So if the config instead had `autocrlf=input`, those whitespace +/// events would no longer be present. +/// +/// ## `KeyValueSeparator` event is not guaranteed to emit +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [core] +/// autocrlf +/// ``` +/// +/// This is a valid config with a `autocrlf` key having an implicit `true` +/// value. This means that there is not a `=` separating the key and value, +/// which means that the corresponding event won't appear either: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("core", None).unwrap(); +/// # let section_data = "[core]\n autocrlf"; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::SectionKey(section::Key::try_from("autocrlf")?), +/// Event::Value(Cow::Borrowed("".into())), +/// # ]); +/// # Ok::<_, Box>(()) +/// ``` +/// +/// ## Quoted values are not unquoted +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [core] +/// autocrlf=true"" +/// filemode=fa"lse" +/// ``` +/// +/// Both these events, when fully processed, should normally be `true` and +/// `false`. However, because this parser is zero-copy, we cannot process +/// partially quoted values, such as the `false` example. As a result, to +/// maintain consistency, the parser will just take all values as literals. The +/// relevant event stream emitted is thus emitted as: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("core", None).unwrap(); +/// # let section_data = "[core]\nautocrlf=true\"\"\nfilemode=fa\"lse\""; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::SectionKey(section::Key::try_from("autocrlf")?), +/// Event::KeyValueSeparator, +/// Event::Value(Cow::Borrowed(r#"true"""#.into())), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::SectionKey(section::Key::try_from("filemode")?), +/// Event::KeyValueSeparator, +/// Event::Value(Cow::Borrowed(r#"fa"lse""#.into())), +/// # ]); +/// # Ok::<_, Box>(()) +/// ``` +/// +/// ## Whitespace after line continuations are part of the value +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [some-section] +/// file=a\ +/// c +/// ``` +/// +/// Because how `gix-config` treats continuations, the whitespace preceding `c` +/// are in fact part of the value of `file`. The fully interpreted key/value +/// pair is actually `file=a c`. As a result, the parser will provide this +/// split value accordingly: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("some-section", None).unwrap(); +/// # let section_data = "[some-section]\nfile=a\\\n c"; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::SectionKey(section::Key::try_from("file")?), +/// Event::KeyValueSeparator, +/// Event::ValueNotDone(Cow::Borrowed("a".into())), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::ValueDone(Cow::Borrowed(" c".into())), +/// # ]); +/// # Ok::<_, Box>(()) +/// ``` +/// +/// [`File`]: crate::File +/// [`.ini` file format]: https://en.wikipedia.org/wiki/INI_file +/// [`git`'s documentation]: https://git-scm.com/docs/gix-config#_configuration_file +/// [`FromStr`]: std::str::FromStr +/// [`From<&'_ str>`]: std::convert::From +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] +pub struct Events<'a> { + /// Events seen before the first section. + pub frontmatter: FrontMatterEvents<'a>, + /// All parsed sections. + pub sections: Vec>, +} + +impl Events<'static> { + /// Parses the provided bytes, returning an [`Events`] that contains allocated + /// and owned events. This is similar to [`Events::from_bytes()`], but performance + /// is degraded as it requires allocation for every event. + /// + /// Use `filter` to only include those events for which it returns true. + pub fn from_bytes_owned<'a>( + input: &'a [u8], + filter: Option) -> bool>, + ) -> Result, parse::Error> { + from_bytes(input, |e| e.to_owned(), filter) + } +} + +impl<'a> Events<'a> { + /// Attempt to zero-copy parse the provided bytes. On success, returns a + /// [`Events`] that provides methods to accessing leading comments and sections + /// of a `gix-config` file and can be converted into an iterator of [`Event`] + /// for higher level processing. + /// + /// Use `filter` to only include those events for which it returns true. + pub fn from_bytes(input: &'a [u8], filter: Option) -> bool>) -> Result, parse::Error> { + from_bytes(input, std::convert::identity, filter) + } + + /// Attempt to zero-copy parse the provided `input` string. + /// + /// Prefer the [`from_bytes()`][Self::from_bytes()] method if UTF8 encoding + /// isn't guaranteed. + #[allow(clippy::should_implement_trait)] + pub fn from_str(input: &'a str) -> Result, parse::Error> { + Self::from_bytes(input.as_bytes(), None) + } + + /// Consumes the parser to produce an iterator of all contained events. + #[must_use = "iterators are lazy and do nothing unless consumed"] + #[allow(clippy::should_implement_trait)] + pub fn into_iter(self) -> impl Iterator> + std::iter::FusedIterator { + self.frontmatter.into_iter().chain( + self.sections + .into_iter() + .flat_map(|section| std::iter::once(parse::Event::SectionHeader(section.header)).chain(section.events)), + ) + } + + /// Place all contained events into a single `Vec`. + pub fn into_vec(self) -> Vec> { + self.into_iter().collect() + } +} + +impl<'a> TryFrom<&'a str> for Events<'a> { + type Error = parse::Error; + + fn try_from(value: &'a str) -> Result { + Self::from_str(value) + } +} + +impl<'a> TryFrom<&'a [u8]> for Events<'a> { + type Error = parse::Error; + + fn try_from(value: &'a [u8]) -> Result { + Events::from_bytes(value, None) + } +} + +fn from_bytes<'a, 'b>( + input: &'a [u8], + convert: impl Fn(Event<'a>) -> Event<'b>, + filter: Option) -> bool>, +) -> Result, parse::Error> { + let mut header = None; + let mut events = section::Events::default(); + let mut frontmatter = FrontMatterEvents::default(); + let mut sections = Vec::new(); + parse::from_bytes(input, |e: Event<'_>| match e { + Event::SectionHeader(next_header) => { + match header.take() { + None => { + frontmatter = std::mem::take(&mut events).into_iter().collect(); + } + Some(prev_header) => { + sections.push(parse::Section { + header: prev_header, + events: std::mem::take(&mut events), + }); + } + }; + header = match convert(Event::SectionHeader(next_header)) { + Event::SectionHeader(h) => h, + _ => unreachable!("BUG: convert must not change the event type, just the lifetime"), + } + .into(); + } + event => { + if filter.map_or(true, |f| f(&event)) { + events.push(convert(event)) + } + } + })?; + + match header { + None => { + frontmatter = events.into_iter().collect(); + } + Some(prev_header) => { + sections.push(parse::Section { + header: prev_header, + events: std::mem::take(&mut events), + }); + } + } + Ok(Events { frontmatter, sections }) +} diff --git a/vendor/gix-config/src/parse/key.rs b/vendor/gix-config/src/parse/key.rs new file mode 100644 index 000000000..b0e0376be --- /dev/null +++ b/vendor/gix-config/src/parse/key.rs @@ -0,0 +1,35 @@ +use bstr::{BStr, ByteSlice}; + +/// An unvalidated parse result of parsing input like `remote.origin.url` or `core.bare`. +#[derive(Debug, PartialEq, Ord, PartialOrd, Eq, Hash, Clone, Copy)] +pub struct Key<'a> { + /// The name of the section, like `core` in `core.bare`. + pub section_name: &'a str, + /// The name of the sub-section, like `origin` in `remote.origin.url`. + pub subsection_name: Option<&'a BStr>, + /// The name of the section key, like `url` in `remote.origin.url`. + pub value_name: &'a str, +} + +/// Parse `input` like `core.bare` or `remote.origin.url` as a `Key` to make its fields available, +/// or `None` if there were not at least 2 tokens separated by `.`. +/// Note that `input` isn't validated, and is `str` as ascii is a subset of UTF-8 which is required for any valid keys. +pub fn parse_unvalidated<'a>(input: impl Into<&'a BStr>) -> Option> { + let input = input.into(); + let mut tokens = input.splitn(2, |b| *b == b'.'); + let section_name = tokens.next()?; + let subsection_or_key = tokens.next()?; + let mut tokens = subsection_or_key.rsplitn(2, |b| *b == b'.'); + let (subsection_name, value_name) = match (tokens.next(), tokens.next()) { + (Some(key), Some(subsection)) => (Some(subsection.into()), key), + (Some(key), None) => (None, key), + (None, Some(_)) => unreachable!("iterator can't restart producing items"), + (None, None) => return None, + }; + + Some(Key { + section_name: section_name.to_str().ok()?, + subsection_name, + value_name: value_name.to_str().ok()?, + }) +} diff --git a/vendor/gix-config/src/parse/mod.rs b/vendor/gix-config/src/parse/mod.rs new file mode 100644 index 000000000..50363873c --- /dev/null +++ b/vendor/gix-config/src/parse/mod.rs @@ -0,0 +1,116 @@ +//! This module handles parsing a `gix-config` file. Generally speaking, you +//! want to use a higher abstraction such as [`File`] unless you have some +//! explicit reason to work with events instead. +//! +//! The workflow for interacting with this is to use +//! [`from_bytes()`] to obtain all parse events or tokens of the given input. +//! +//! On a higher level, one can use [`Events`] to parse all events into a set +//! of easily interpretable data type, similar to what [`File`] does. +//! +//! [`File`]: crate::File + +use std::{borrow::Cow, hash::Hash}; + +use bstr::BStr; + +mod nom; +pub use self::nom::from_bytes; +mod event; +#[path = "events.rs"] +mod events_type; +pub use events_type::{Events, FrontMatterEvents}; +mod comment; +mod error; +/// +pub mod section; + +/// +mod key; +pub use key::{parse_unvalidated as key, Key}; + +#[cfg(test)] +pub(crate) mod tests; + +/// Syntactic events that occurs in the config. Despite all these variants +/// holding a [`Cow`] instead over a simple reference, the parser will only emit +/// borrowed `Cow` variants. +/// +/// The `Cow` is used here for ease of inserting new, typically owned events as used +/// in the [`File`] struct when adding values, allowing a mix of owned and borrowed +/// values. +/// +/// [`Cow`]: std::borrow::Cow +/// [`File`]: crate::File +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub enum Event<'a> { + /// A comment with a comment tag and the comment itself. Note that the + /// comment itself may contain additional whitespace and comment markers + /// at the beginning, like `# comment` or `; comment`. + Comment(Comment<'a>), + /// A section header containing the section name and a subsection, if it + /// exists. For instance, `remote "origin"` is parsed to `remote` as section + /// name and `origin` as subsection name. + SectionHeader(section::Header<'a>), + /// A name to a value in a section, like `url` in `remote.origin.url`. + SectionKey(section::Key<'a>), + /// A completed value. This may be any single-line string, including the empty string + /// if an implicit boolean value is used. + /// Note that these values may contain spaces and any special character. This value is + /// also unprocessed, so it it may contain double quotes that should be + /// [normalized][crate::value::normalize()] before interpretation. + Value(Cow<'a, BStr>), + /// Represents any token used to signify a newline character. On Unix + /// platforms, this is typically just `\n`, but can be any valid newline + /// sequence. Multiple newlines (such as `\n\n`) will be merged as a single + /// newline event containing a string of multiple newline characters. + Newline(Cow<'a, BStr>), + /// Any value that isn't completed. This occurs when the value is continued + /// onto the next line by ending it with a backslash. + /// A [`Newline`][Self::Newline] event is guaranteed after, followed by + /// either a ValueDone, a Whitespace, or another ValueNotDone. + ValueNotDone(Cow<'a, BStr>), + /// The last line of a value which was continued onto another line. + /// With this it's possible to obtain the complete value by concatenating + /// the prior [`ValueNotDone`][Self::ValueNotDone] events. + ValueDone(Cow<'a, BStr>), + /// A continuous section of insignificant whitespace. + /// + /// Note that values with internal whitespace will not be separated by this event, + /// hence interior whitespace there is always part of the value. + Whitespace(Cow<'a, BStr>), + /// This event is emitted when the parser counters a valid `=` character + /// separating the key and value. + /// This event is necessary as it eliminates the ambiguity for whitespace + /// events between a key and value event. + KeyValueSeparator, +} + +/// A parsed section containing the header and the section events, typically +/// comprising the keys and their values. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub struct Section<'a> { + /// The section name and subsection name, if any. + pub header: section::Header<'a>, + /// The syntactic events found in this section. + pub events: section::Events<'a>, +} + +/// A parsed comment containing the comment marker and comment. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] +pub struct Comment<'a> { + /// The comment marker used. This is either a semicolon or octothorpe/hash. + pub tag: u8, + /// The parsed comment. + pub text: Cow<'a, BStr>, +} + +/// A parser error reports the one-indexed line number where the parsing error +/// occurred, as well as the last parser node and the remaining data to be +/// parsed. +#[derive(PartialEq, Debug)] +pub struct Error { + line_number: usize, + last_attempted_parser: error::ParseNode, + parsed_until: bstr::BString, +} diff --git a/vendor/gix-config/src/parse/nom/mod.rs b/vendor/gix-config/src/parse/nom/mod.rs new file mode 100644 index 000000000..11d1dea6b --- /dev/null +++ b/vendor/gix-config/src/parse/nom/mod.rs @@ -0,0 +1,460 @@ +use std::borrow::Cow; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; +use nom::{ + branch::alt, + bytes::complete::{tag, take_till, take_while}, + character::{ + complete::{char, one_of}, + is_space, + }, + combinator::{map, opt}, + error::{Error as NomError, ErrorKind}, + multi::{fold_many0, fold_many1}, + sequence::delimited, + IResult, +}; + +use crate::parse::{error::ParseNode, section, Comment, Error, Event}; + +/// Attempt to zero-copy parse the provided bytes, passing results to `dispatch`. +pub fn from_bytes<'a>(input: &'a [u8], mut dispatch: impl FnMut(Event<'a>)) -> Result<(), Error> { + let bom = unicode_bom::Bom::from(input); + let mut newlines = 0; + let (i, _) = fold_many0( + alt(( + map(comment, Event::Comment), + map(take_spaces, |whitespace| Event::Whitespace(Cow::Borrowed(whitespace))), + map(take_newlines, |(newline, counter)| { + newlines += counter; + Event::Newline(Cow::Borrowed(newline)) + }), + )), + || (), + |_acc, event| dispatch(event), + )(&input[bom.len()..]) + // I don't think this can panic. many0 errors if the child parser returns + // a success where the input was not consumed, but alt will only return Ok + // if one of its children succeed. However, all of it's children are + // guaranteed to consume something if they succeed, so the Ok(i) == i case + // can never occur. + .expect("many0(alt(...)) panicked. Likely a bug in one of the children parsers."); + + if i.is_empty() { + return Ok(()); + } + + let mut node = ParseNode::SectionHeader; + + let res = fold_many1( + |i| section(i, &mut node, &mut dispatch), + || (), + |_acc, additional_newlines| { + newlines += additional_newlines; + }, + )(i); + let (i, _) = res.map_err(|_| Error { + line_number: newlines, + last_attempted_parser: node, + parsed_until: i.as_bstr().into(), + })?; + + // This needs to happen after we collect sections, otherwise the line number + // will be off. + if !i.is_empty() { + return Err(Error { + line_number: newlines, + last_attempted_parser: node, + parsed_until: i.as_bstr().into(), + }); + } + + Ok(()) +} + +fn comment(i: &[u8]) -> IResult<&[u8], Comment<'_>> { + let (i, comment_tag) = one_of(";#")(i)?; + let (i, comment) = take_till(|c| c == b'\n')(i)?; + Ok(( + i, + Comment { + tag: comment_tag as u8, + text: Cow::Borrowed(comment.as_bstr()), + }, + )) +} + +#[cfg(test)] +mod tests; + +fn section<'a>(i: &'a [u8], node: &mut ParseNode, dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { + let (mut i, header) = section_header(i)?; + dispatch(Event::SectionHeader(header)); + + let mut newlines = 0; + + // This would usually be a many0(alt(...)), the manual loop allows us to + // optimize vec insertions + loop { + let old_i = i; + + if let Ok((new_i, v)) = take_spaces(i) { + if old_i != new_i { + i = new_i; + dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr()))); + } + } + + if let Ok((new_i, (v, new_newlines))) = take_newlines(i) { + if old_i != new_i { + i = new_i; + newlines += new_newlines; + dispatch(Event::Newline(Cow::Borrowed(v.as_bstr()))); + } + } + + if let Ok((new_i, new_newlines)) = key_value_pair(i, node, dispatch) { + if old_i != new_i { + i = new_i; + newlines += new_newlines; + } + } + + if let Ok((new_i, comment)) = comment(i) { + if old_i != new_i { + i = new_i; + dispatch(Event::Comment(comment)); + } + } + + if old_i == i { + break; + } + } + + Ok((i, newlines)) +} + +fn section_header(i: &[u8]) -> IResult<&[u8], section::Header<'_>> { + let (i, _) = char('[')(i)?; + // No spaces must be between section name and section start + let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-' || c == b'.')(i)?; + + let name = name.as_bstr(); + if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) { + // Either section does not have a subsection or using deprecated + // subsection syntax at this point. + let header = match memchr::memrchr(b'.', name.as_bytes()) { + Some(index) => section::Header { + name: section::Name(Cow::Borrowed(name[..index].as_bstr())), + separator: name.get(index..=index).map(|s| Cow::Borrowed(s.as_bstr())), + subsection_name: name.get(index + 1..).map(|s| Cow::Borrowed(s.as_bstr())), + }, + None => section::Header { + name: section::Name(Cow::Borrowed(name.as_bstr())), + separator: None, + subsection_name: None, + }, + }; + + if header.name.is_empty() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::NoneOf, + })); + } + return Ok((i, header)); + } + + // Section header must be using modern subsection syntax at this point. + + let (i, whitespace) = take_spaces(i)?; + let (i, subsection_name) = delimited(char('"'), opt(sub_section), tag("\"]"))(i)?; + + Ok(( + i, + section::Header { + name: section::Name(Cow::Borrowed(name)), + separator: Some(Cow::Borrowed(whitespace)), + subsection_name, + }, + )) +} + +fn sub_section(i: &[u8]) -> IResult<&[u8], Cow<'_, BStr>> { + let (rest, (found_escape, consumed)) = sub_section_delegate(i, &mut |_| ())?; + if found_escape { + let mut buf = BString::default(); + sub_section_delegate(i, &mut |b| buf.push_byte(b)).map(|(i, _)| (i, buf.into())) + } else { + Ok((rest, i[..consumed].as_bstr().into())) + } +} + +fn sub_section_delegate<'a>(i: &'a [u8], push_byte: &mut dyn FnMut(u8)) -> IResult<&'a [u8], (bool, usize)> { + let mut cursor = 0; + let mut bytes = i.iter().copied(); + let mut found_terminator = false; + let mut found_escape = false; + while let Some(mut b) = bytes.next() { + cursor += 1; + if b == b'\n' || b == 0 { + return Err(nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + })); + } + if b == b'"' { + found_terminator = true; + break; + } + if b == b'\\' { + b = bytes.next().ok_or_else(|| { + nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + }) + })?; + found_escape = true; + cursor += 1; + if b == b'\n' { + return Err(nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + })); + } + } + push_byte(b); + } + + if !found_terminator { + return Err(nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + })); + } + + Ok((&i[cursor - 1..], (found_escape, cursor - 1))) +} + +fn key_value_pair<'a>( + i: &'a [u8], + node: &mut ParseNode, + dispatch: &mut impl FnMut(Event<'a>), +) -> IResult<&'a [u8], usize> { + *node = ParseNode::Name; + let (i, name) = config_name(i)?; + + dispatch(Event::SectionKey(section::Key(Cow::Borrowed(name)))); + + let (i, whitespace) = opt(take_spaces)(i)?; + if let Some(whitespace) = whitespace { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } + + *node = ParseNode::Value; + let (i, newlines) = config_value(i, dispatch)?; + Ok((i, newlines)) +} + +/// Parses the config name of a config pair. Assumes the input has already been +/// trimmed of any leading whitespace. +fn config_name(i: &[u8]) -> IResult<&[u8], &BStr> { + if i.is_empty() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::NonEmpty, + })); + } + + if !i[0].is_ascii_alphabetic() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Alpha, + })); + } + + let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-')(i)?; + Ok((i, name.as_bstr())) +} + +fn config_value<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { + if let (i, Some(_)) = opt(char('='))(i)? { + dispatch(Event::KeyValueSeparator); + let (i, whitespace) = opt(take_spaces)(i)?; + if let Some(whitespace) = whitespace { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } + let (i, newlines) = value_impl(i, dispatch)?; + Ok((i, newlines)) + } else { + // This is a special way of denoting 'empty' values which a lot of code depends on. + // Hence, rather to fix this everywhere else, leave it here and fix it where it matters, namely + // when it's about differentiating between a missing key-value separator, and one followed by emptiness. + dispatch(Event::Value(Cow::Borrowed("".into()))); + Ok((i, 0)) + } +} + +/// Handles parsing of known-to-be values. This function handles both single +/// line values as well as values that are continuations. +fn value_impl<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { + let (i, value_end, newlines, mut dispatch) = { + let new_err = |code| nom::Err::Error(NomError { input: i, code }); + let mut value_end = None::; + let mut value_start: usize = 0; + let mut newlines = 0; + + let mut prev_char_was_backslash = false; + // This is required to ignore comment markers if they're in a quote. + let mut is_in_quotes = false; + // Used to determine if we return a Value or Value{Not,}Done + let mut partial_value_found = false; + let mut last_value_index: usize = 0; + + let mut bytes = i.iter(); + while let Some(mut c) = bytes.next() { + if prev_char_was_backslash { + prev_char_was_backslash = false; + let mut consumed = 1; + if *c == b'\r' { + c = bytes.next().ok_or_else(|| new_err(ErrorKind::Escaped))?; + if *c != b'\n' { + return Err(new_err(ErrorKind::Tag)); + } + consumed += 1; + } + + match c { + b'\n' => { + partial_value_found = true; + let backslash = 1; + dispatch(Event::ValueNotDone(Cow::Borrowed( + i[value_start..last_value_index - backslash].as_bstr(), + ))); + let nl_end = last_value_index + consumed; + dispatch(Event::Newline(Cow::Borrowed(i[last_value_index..nl_end].as_bstr()))); + value_start = nl_end; + value_end = None; + newlines += 1; + + last_value_index += consumed; + } + b'n' | b't' | b'\\' | b'b' | b'"' => { + last_value_index += 1; + } + _ => { + return Err(new_err(ErrorKind::Escaped)); + } + } + } else { + match c { + b'\n' => { + value_end = last_value_index.into(); + break; + } + b';' | b'#' if !is_in_quotes => { + value_end = last_value_index.into(); + break; + } + b'\\' => prev_char_was_backslash = true, + b'"' => is_in_quotes = !is_in_quotes, + _ => {} + } + last_value_index += 1; + } + } + + if prev_char_was_backslash { + return Err(new_err(ErrorKind::Escaped)); + } + + if is_in_quotes { + return Err(new_err(ErrorKind::Tag)); + } + + let value_end = match value_end { + None => { + if last_value_index == 0 { + dispatch(Event::Value(Cow::Borrowed("".into()))); + return Ok((&i[0..], newlines)); + } else { + i.len() + } + } + Some(idx) => idx, + }; + + let dispatch = move |value: &'a [u8]| { + if partial_value_found { + dispatch(Event::ValueDone(Cow::Borrowed(value.as_bstr()))); + } else { + dispatch(Event::Value(Cow::Borrowed(value.as_bstr()))); + } + }; + (&i[value_start..], value_end - value_start, newlines, dispatch) + }; + + let (i, remainder_value) = { + let value_end_no_trailing_whitespace = i[..value_end] + .iter() + .enumerate() + .rev() + .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1)) + .unwrap_or(0); + ( + &i[value_end_no_trailing_whitespace..], + &i[..value_end_no_trailing_whitespace], + ) + }; + + dispatch(remainder_value); + + Ok((i, newlines)) +} + +fn take_spaces(i: &[u8]) -> IResult<&[u8], &BStr> { + let (i, v) = take_while(|c: u8| c.is_ascii() && is_space(c))(i)?; + if v.is_empty() { + Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Eof, + })) + } else { + Ok((i, v.as_bstr())) + } +} + +fn take_newlines(i: &[u8]) -> IResult<&[u8], (&BStr, usize)> { + let mut counter = 0; + let mut consumed_bytes = 0; + let mut next_must_be_newline = false; + for b in i.iter().copied() { + if !b.is_ascii() { + break; + }; + if b == b'\r' { + if next_must_be_newline { + break; + } + next_must_be_newline = true; + continue; + }; + if b == b'\n' { + counter += 1; + consumed_bytes += if next_must_be_newline { 2 } else { 1 }; + next_must_be_newline = false; + } else { + break; + } + } + let (v, i) = i.split_at(consumed_bytes); + if v.is_empty() { + Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Eof, + })) + } else { + Ok((i, (v.as_bstr(), counter))) + } +} diff --git a/vendor/gix-config/src/parse/nom/tests.rs b/vendor/gix-config/src/parse/nom/tests.rs new file mode 100644 index 000000000..f6e8c3d92 --- /dev/null +++ b/vendor/gix-config/src/parse/nom/tests.rs @@ -0,0 +1,924 @@ +use super::*; + +mod section_headers { + use super::section_header; + use crate::parse::tests::util::{fully_consumed, section_header as parsed_section_header}; + + #[test] + fn no_subsection() { + assert_eq!( + section_header(b"[hello]").unwrap(), + fully_consumed(parsed_section_header("hello", None)), + ); + } + + #[test] + fn modern_subsection() { + assert_eq!( + section_header(br#"[hello "world"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", "world"))), + ); + } + + #[test] + fn escaped_subsection() { + assert_eq!( + section_header(br#"[hello "foo\\bar\""]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", r#"foo\bar""#))), + ); + } + + #[test] + fn deprecated_subsection() { + assert_eq!( + section_header(br#"[hello.world]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (".", "world"))) + ); + assert_eq!( + section_header(br#"[Hello.World]"#).unwrap(), + fully_consumed(parsed_section_header("Hello", (".", "World"))) + ); + } + + #[test] + fn empty_legacy_subsection_name() { + assert_eq!( + section_header(br#"[hello-world.]"#).unwrap(), + fully_consumed(parsed_section_header("hello-world", (".", ""))) + ); + } + + #[test] + fn empty_modern_subsection_name() { + assert_eq!( + section_header(br#"[hello ""]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", ""))) + ); + } + + #[test] + fn backslashes_in_subsections_do_not_escape_newlines_or_tabs() { + assert_eq!( + section_header(br#"[hello "single \ \\ \t \n \0"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", r#"single \ t n 0"#))) + ); + } + + #[test] + fn newline_in_header() { + assert!(section_header(b"[hello\n]").is_err()); + } + + #[test] + fn newline_in_sub_section() { + assert!(section_header(b"[hello \"hello\n\"]").is_err()); + } + + #[test] + fn null_byt_in_sub_section() { + assert!(section_header(b"[hello \"hello\0\"]").is_err()); + } + + #[test] + fn escaped_newline_in_sub_section() { + assert!(section_header(b"[hello \"hello\\\n\"]").is_err()); + } + + #[test] + fn eof_after_escape_in_sub_section() { + assert!(section_header(b"[hello \"hello\\").is_err()); + } + + #[test] + fn null_byte_in_header() { + assert!(section_header(b"[hello\0]").is_err()); + } + + #[test] + fn invalid_characters_in_section() { + assert!(section_header(b"[$]").is_err()); + } + #[test] + fn invalid_characters_in_legacy_sub_section() { + assert!(section_header(b"[hello.$]").is_err()); + assert!(section_header(b"[hello. world]").is_err()); + } + + #[test] + fn right_brace_in_subsection_name() { + assert_eq!( + section_header(br#"[hello "]"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", "]"))) + ); + } +} + +mod sub_section { + use std::borrow::Cow; + + use super::sub_section; + + #[test] + fn zero_copy_simple() { + let actual = sub_section(b"name\"").unwrap().1; + assert_eq!(actual.as_ref(), "name"); + assert!(matches!(actual, Cow::Borrowed(_))); + } + + #[test] + fn escapes_need_allocation() { + let actual = sub_section(br#"\x\t\n\0\\\"""#).unwrap().1; + assert_eq!(actual.as_ref(), r#"xtn0\""#); + assert!(matches!(actual, Cow::Owned(_))); + } +} + +mod config_name { + use nom::combinator::all_consuming; + + use super::config_name; + use crate::parse::tests::util::fully_consumed; + + #[test] + fn just_name() { + assert_eq!(config_name(b"name").unwrap(), fully_consumed("name".into())); + } + + #[test] + fn must_start_with_alphabetic() { + assert!(config_name(b"4aaa").is_err()); + assert!(config_name(b"-aaa").is_err()); + } + + #[test] + fn only_a_subset_of_characters_is_allowed() { + assert!(all_consuming(config_name)(b"Name$_").is_err()); + assert!(all_consuming(config_name)(b"other#").is_err()); + } + + #[test] + fn cannot_be_empty() { + assert!(config_name(b"").is_err()); + } +} + +mod section { + use crate::parse::{ + error::ParseNode, + section, + tests::util::{ + comment_event, fully_consumed, name_event, newline_custom_event, newline_event, + section_header as parsed_section_header, value_done_event, value_event, value_not_done_event, + whitespace_event, + }, + Event, Section, + }; + + fn section<'a>(i: &'a [u8], node: &mut ParseNode) -> nom::IResult<&'a [u8], (Section<'a>, usize)> { + let mut header = None; + let mut events = section::Events::default(); + super::section(i, node, &mut |e| match &header { + None => { + header = Some(e); + } + Some(_) => events.push(e), + }) + .map(|(i, o)| { + ( + i, + ( + Section { + header: match header.expect("header set") { + Event::SectionHeader(header) => header, + _ => unreachable!("unexpected"), + }, + events, + }, + o, + ), + ) + }) + } + + #[test] + fn empty_value_with_windows_newlines() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[a] k = \r\n", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event(""), + newline_custom_event("\r\n") + ] + .into(), + }, + 1 + )), + ); + } + + #[test] + fn simple_value_with_windows_newlines() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[a] k = v\r\n", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("v"), + newline_custom_event("\r\n") + ] + .into(), + }, + 1 + )), + ); + assert_eq!( + section(b"[a] k = \r\n", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event(""), + newline_custom_event("\r\n") + ] + .into(), + }, + 1 + )), + ); + } + + #[test] + fn empty_section() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[test]", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("test", None), + events: Default::default() + }, + 0 + )), + ); + } + + #[test] + fn simple_section() { + let mut node = ParseNode::SectionHeader; + let section_data = br#"[hello] + a = b + c + d = "lol""#; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + newline_event(), + whitespace_event(" "), + name_event("c"), + value_event(""), + newline_event(), + whitespace_event(" "), + name_event("d"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("\"lol\"") + ] + .into() + }, + 3 + )) + ); + } + + #[test] + fn section_with_empty_value_simplified() { + let mut node = ParseNode::SectionHeader; + let section_data = b"[a] k="; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + Event::KeyValueSeparator, + value_event(""), + ] + .into() + }, + 0 + )) + ); + + let section_data = b"[a] k=\n"; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + Event::KeyValueSeparator, + value_event(""), + newline_event(), + ] + .into() + }, + 1 + )) + ); + } + + #[test] + fn section_with_empty_value() { + let mut node = ParseNode::SectionHeader; + let section_data = br#"[hello] + a = b + c= + d = "lol""#; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + newline_event(), + whitespace_event(" "), + name_event("c"), + Event::KeyValueSeparator, + value_event(""), + newline_event(), + whitespace_event(" "), + name_event("d"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("\"lol\"") + ] + .into() + }, + 3 + )) + ); + } + + #[test] + fn section_implicit_value() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[hello] c", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![whitespace_event(" "), name_event("c"), value_event("")].into() + }, + 0 + )) + ); + + assert_eq!( + section(b"[hello] c\nd", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + whitespace_event(" "), + name_event("c"), + value_event(""), + newline_event(), + name_event("d"), + value_event("") + ] + .into() + }, + 1 + )) + ); + } + + #[test] + fn section_very_commented() { + let mut node = ParseNode::SectionHeader; + let section_data = br#"[hello] ; commentA + a = b # commentB + ; commentC + ; commentD + c = d"#; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + whitespace_event(" "), + comment_event(';', " commentA"), + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + whitespace_event(" "), + comment_event('#', " commentB"), + newline_event(), + whitespace_event(" "), + comment_event(';', " commentC"), + newline_event(), + whitespace_event(" "), + comment_event(';', " commentD"), + newline_event(), + whitespace_event(" "), + name_event("c"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("d"), + ] + .into() + }, + 4 + )) + ); + } + + #[test] + fn complex_continuation() { + let mut node = ParseNode::SectionHeader; + // This test is absolute hell. Good luck if this fails. + assert_eq!( + section(b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("section", None), + events: vec![ + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_not_done_event(r#"1 "\""#), + newline_event(), + value_not_done_event(r#"a ; e "\""#), + newline_event(), + value_done_event("d"), + whitespace_event(" "), + comment_event('#', " \"b\t ; c"), + ] + .into() + }, + 2 + )) + ); + } + + #[test] + fn quote_split_over_two_lines() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[section \"a\"] b =\"\\\n;\";a", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("section", (" ", "a")), + events: vec![ + whitespace_event(" "), + name_event("b"), + whitespace_event(" "), + Event::KeyValueSeparator, + value_not_done_event("\""), + newline_event(), + value_done_event(";\""), + comment_event(';', "a"), + ] + .into() + }, + 1 + )) + ); + } + + #[test] + fn section_handles_extraneous_whitespace_before_comment() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[s]hello #world", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("s", None), + events: vec![ + name_event("hello"), + whitespace_event(" "), + value_event(""), + comment_event('#', "world"), + ] + .into() + }, + 0 + )) + ); + } +} + +mod value_continuation { + use bstr::ByteSlice; + + use crate::parse::{ + section, + tests::util::{into_events, newline_custom_event, newline_event, value_done_event, value_not_done_event}, + }; + + pub fn value_impl<'a>(i: &'a [u8], events: &mut section::Events<'a>) -> nom::IResult<&'a [u8], ()> { + super::value_impl(i, &mut |e| events.push(e)).map(|t| (t.0, ())) + } + + #[test] + fn simple_continuation() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\\\nworld", &mut events).unwrap().0, b""); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("hello"), + newline_event(), + value_done_event("world") + ]) + ); + } + + #[test] + fn continuation_with_whitespace() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\\\n world", &mut events).unwrap().0, b""); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("hello"), + newline_event(), + value_done_event(" world") + ]) + ); + + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\\\r\n world", &mut events).unwrap().0, b""); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("hello"), + newline_custom_event("\r\n"), + value_done_event(" world") + ]) + ); + + let mut events = section::Events::default(); + assert!( + value_impl(b"hello\\\r\r\n world", &mut events).is_err(), + "\\r must be followed by \\n" + ); + } + + #[test] + fn complex_continuation_with_leftover_comment() { + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut events) + .unwrap() + .0, + b" # \"b\t ; c" + ); + assert_eq!( + events, + into_events(vec![ + value_not_done_event(r#"1 "\""#), + newline_event(), + value_not_done_event(r#"a ; e "\""#), + newline_event(), + value_done_event("d") + ]) + ); + } + + #[test] + fn quote_split_over_two_lines_with_leftover_comment() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"\"\\\n;\";a", &mut events).unwrap().0, b";a"); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("\""), + newline_event(), + value_done_event(";\"") + ]) + ); + + let mut events = section::Events::default(); + assert_eq!(value_impl(b"\"a\\\r\nb;\";c", &mut events).unwrap().0, b";c"); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("\"a"), + newline_custom_event("\r\n"), + value_done_event("b;\"") + ]) + ); + } + + #[test] + fn quote_split_over_multiple_lines_without_surrounding_quotes_but_inner_quotes() { + let mut events = section::Events::default(); + assert_eq!( + value_impl( + br#"1\ +"2" a\ +\"3 b\"\ +4 ; comment "#, + &mut events + ) + .unwrap() + .0 + .as_bstr(), + b" ; comment ".as_bstr() + ); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("1"), + newline_event(), + value_not_done_event("\"2\" a"), + newline_event(), + value_not_done_event("\\\"3 b\\\""), + newline_event(), + value_done_event("4") + ]) + ); + } + + #[test] + fn quote_split_over_multiple_lines_with_surrounding_quotes() { + let mut events = section::Events::default(); + assert_eq!( + value_impl( + br#""1\ +"2" a\ +\"3 b\"\ +4 " ; comment "#, + &mut events + ) + .unwrap() + .0 + .as_bstr(), + b" ; comment ".as_bstr() + ); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("\"1"), + newline_event(), + value_not_done_event("\"2\" a"), + newline_event(), + value_not_done_event("\\\"3 b\\\""), + newline_event(), + value_done_event("4 \"") + ]) + ); + } +} + +mod value_no_continuation { + use super::value_continuation::value_impl; + use crate::parse::{ + section, + tests::util::{into_events, value_event}, + }; + + #[test] + fn no_comment() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello", &mut events).unwrap().0, b""); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn windows_newline() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hi\r\nrest", &mut events).unwrap().0, b"\r\nrest"); + assert_eq!(events, into_events(vec![value_event("hi")])); + + events.clear(); + assert_eq!(value_impl(b"hi\r\r\r\nrest", &mut events).unwrap().0, b"\r\r\r\nrest"); + assert_eq!(events, into_events(vec![value_event("hi")])); + } + + #[test] + fn no_comment_newline() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\na", &mut events).unwrap().0, b"\na"); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn semicolon_comment_not_consumed() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello;world", &mut events).unwrap().0, b";world"); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn octothorpe_comment_not_consumed() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello#world", &mut events).unwrap().0, b"#world"); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn values_with_extraneous_whitespace_without_comment() { + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"hello ", &mut events).unwrap().0, + b" " + ); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn values_with_extraneous_whitespace_before_comment() { + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"hello #world", &mut events).unwrap().0, + b" #world" + ); + assert_eq!(events, into_events(vec![value_event("hello")])); + + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"hello ;world", &mut events).unwrap().0, + b" ;world" + ); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn trans_escaped_comment_marker_not_consumed() { + let mut events = section::Events::default(); + assert_eq!(value_impl(br##"hello"#"world; a"##, &mut events).unwrap().0, b"; a"); + assert_eq!(events, into_events(vec![value_event(r##"hello"#"world"##)])); + } + + #[test] + fn complex_test() { + let mut events = section::Events::default(); + assert_eq!(value_impl(br#"value";";ahhhh"#, &mut events).unwrap().0, b";ahhhh"); + assert_eq!(events, into_events(vec![value_event(r#"value";""#)])); + } + + #[test] + fn garbage_after_continuation_is_err() { + assert!(value_impl(b"hello \\afwjdls", &mut Default::default()).is_err()); + } + + #[test] + fn invalid_escape() { + assert!(value_impl(br#"\x"#, &mut Default::default()).is_err()); + } + + #[test] + fn incomplete_quote() { + assert!(value_impl(br#"hello "world"#, &mut Default::default()).is_err()); + } + + #[test] + fn incomplete_escape() { + assert!(value_impl(br#"hello world\"#, &mut Default::default()).is_err()); + } +} + +mod key_value_pair { + use crate::parse::{ + error::ParseNode, + section, + tests::util::{into_events, name_event, value_event, whitespace_event}, + Event, + }; + + fn key_value<'a>( + i: &'a [u8], + node: &mut ParseNode, + events: &mut section::Events<'a>, + ) -> nom::IResult<&'a [u8], ()> { + super::key_value_pair(i, node, &mut |e| events.push(e)).map(|t| (t.0, ())) + } + + #[test] + fn nonascii_is_allowed_for_values_but_not_for_keys() { + let mut node = ParseNode::SectionHeader; + let mut vec = Default::default(); + assert!(key_value("你好".as_bytes(), &mut node, &mut vec).is_err()); + assert!(key_value("a = 你好 ".as_bytes(), &mut node, &mut vec).is_ok()); + assert_eq!( + vec, + into_events(vec![ + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("你好") + ]) + ); + } + + #[test] + fn whitespace_is_not_ambiguous() { + let mut node = ParseNode::SectionHeader; + let mut vec = Default::default(); + assert!(key_value(b"a =b", &mut node, &mut vec).is_ok()); + assert_eq!( + vec, + into_events(vec![ + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + value_event("b") + ]) + ); + + let mut vec = Default::default(); + assert!(key_value(b"a= b", &mut node, &mut vec).is_ok()); + assert_eq!( + vec, + into_events(vec![ + name_event("a"), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b") + ]) + ); + } +} + +mod comment { + use super::comment; + use crate::parse::tests::util::{comment as parsed_comment, fully_consumed}; + + #[test] + fn semicolon() { + assert_eq!( + comment(b"; this is a semicolon comment").unwrap(), + fully_consumed(parsed_comment(';', " this is a semicolon comment")), + ); + } + + #[test] + fn octothorpe() { + assert_eq!( + comment(b"# this is an octothorpe comment").unwrap(), + fully_consumed(parsed_comment('#', " this is an octothorpe comment")), + ); + } + + #[test] + fn multiple_markers() { + assert_eq!( + comment(b"###### this is an octothorpe comment").unwrap(), + fully_consumed(parsed_comment('#', "##### this is an octothorpe comment")), + ); + } +} diff --git a/vendor/gix-config/src/parse/section/header.rs b/vendor/gix-config/src/parse/section/header.rs new file mode 100644 index 000000000..341edcdd5 --- /dev/null +++ b/vendor/gix-config/src/parse/section/header.rs @@ -0,0 +1,180 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; + +use crate::parse::{ + section::{into_cow_bstr, Header, Name}, + Event, +}; + +/// The error returned by [`Header::new(…)`][super::Header::new()]. +#[derive(Debug, PartialOrd, PartialEq, Eq, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("section names can only be ascii, '-'")] + InvalidName, + #[error("sub-section names must not contain newlines or null bytes")] + InvalidSubSection, +} + +impl<'a> Header<'a> { + /// Instantiate a new header either with a section `name`, e.g. "core" serializing to `["core"]` + /// or `[remote "origin"]` for `subsection` being "origin" and `name` being "remote". + pub fn new( + name: impl Into>, + subsection: impl Into>>, + ) -> Result, Error> { + let name = Name(validated_name(into_cow_bstr(name.into()))?); + if let Some(subsection_name) = subsection.into() { + Ok(Header { + name, + separator: Some(Cow::Borrowed(" ".into())), + subsection_name: Some(validated_subsection(subsection_name)?), + }) + } else { + Ok(Header { + name, + separator: None, + subsection_name: None, + }) + } + } +} + +/// Return true if `name` is valid as subsection name, like `origin` in `[remote "origin"]`. +pub fn is_valid_subsection(name: &BStr) -> bool { + name.find_byteset(b"\n\0").is_none() +} + +fn validated_subsection(name: Cow<'_, BStr>) -> Result, Error> { + is_valid_subsection(name.as_ref()) + .then_some(name) + .ok_or(Error::InvalidSubSection) +} + +fn validated_name(name: Cow<'_, BStr>) -> Result, Error> { + name.iter() + .all(|b| b.is_ascii_alphanumeric() || *b == b'-') + .then_some(name) + .ok_or(Error::InvalidName) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_header_names_are_legal() { + assert!(Header::new("", None).is_ok(), "yes, git allows this, so do we"); + } + + #[test] + fn empty_header_sub_names_are_legal() { + assert!( + Header::new("remote", Some(Cow::Borrowed("".into()))).is_ok(), + "yes, git allows this, so do we" + ); + } +} + +impl Header<'_> { + ///Return true if this is a header like `[legacy.subsection]`, or false otherwise. + pub fn is_legacy(&self) -> bool { + self.separator.as_deref().map_or(false, |n| n == ".") + } + + /// Return the subsection name, if present, i.e. "origin" in `[remote "origin"]`. + /// + /// It is parsed without quotes, and with escapes folded + /// into their resulting characters. + /// Thus during serialization, escapes and quotes must be re-added. + /// This makes it possible to use [`Event`] data for lookups directly. + pub fn subsection_name(&self) -> Option<&BStr> { + self.subsection_name.as_deref() + } + + /// Return the name of the header, like "remote" in `[remote "origin"]`. + pub fn name(&self) -> &BStr { + &self.name + } + + /// Serialize this type into a `BString` for convenience. + /// + /// Note that `to_string()` can also be used, but might not be lossless. + #[must_use] + pub fn to_bstring(&self) -> BString { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("io error impossible"); + buf.into() + } + + /// Stream ourselves to the given `out`, in order to reproduce this header mostly losslessly + /// as it was parsed. + pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> { + out.write_all(b"[")?; + out.write_all(&self.name)?; + + if let (Some(sep), Some(subsection)) = (&self.separator, &self.subsection_name) { + let sep = sep.as_ref(); + out.write_all(sep)?; + if sep == "." { + out.write_all(subsection.as_ref())?; + } else { + out.write_all(b"\"")?; + out.write_all(escape_subsection(subsection.as_ref()).as_ref())?; + out.write_all(b"\"")?; + } + } + + out.write_all(b"]") + } + + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Header<'static> { + Header { + name: self.name.to_owned(), + separator: self.separator.clone().map(|v| Cow::Owned(v.into_owned())), + subsection_name: self.subsection_name.clone().map(|v| Cow::Owned(v.into_owned())), + } + } +} + +fn escape_subsection(name: &BStr) -> Cow<'_, BStr> { + if name.find_byteset(b"\\\"").is_none() { + return name.into(); + } + let mut buf = Vec::with_capacity(name.len()); + for b in name.iter().copied() { + match b { + b'\\' => buf.push_str(br#"\\"#), + b'"' => buf.push_str(br#"\""#), + _ => buf.push(b), + } + } + BString::from(buf).into() +} + +impl Display for Header<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.to_bstring(), f) + } +} + +impl From> for BString { + fn from(header: Header<'_>) -> Self { + header.into() + } +} + +impl From<&Header<'_>> for BString { + fn from(header: &Header<'_>) -> Self { + header.to_bstring() + } +} + +impl<'a> From> for Event<'a> { + fn from(header: Header<'_>) -> Event<'_> { + Event::SectionHeader(header) + } +} diff --git a/vendor/gix-config/src/parse/section/mod.rs b/vendor/gix-config/src/parse/section/mod.rs new file mode 100644 index 000000000..7ba08b87d --- /dev/null +++ b/vendor/gix-config/src/parse/section/mod.rs @@ -0,0 +1,187 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::BStr; +use smallvec::SmallVec; + +use crate::parse::{Event, Section}; + +/// +pub mod header; + +pub(crate) mod unvalidated; + +/// A container for events, avoiding heap allocations in typical files. +pub type Events<'a> = SmallVec<[Event<'a>; 64]>; + +/// A parsed section header, containing a name and optionally a subsection name. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub struct Header<'a> { + /// The name of the header. + pub(crate) name: Name<'a>, + /// The separator used to determine if the section contains a subsection. + /// This is either a period `.` or a string of whitespace. Note that + /// reconstruction of subsection format is dependent on this value. If this + /// is all whitespace, then the subsection name needs to be surrounded by + /// quotes to have perfect reconstruction. + pub(crate) separator: Option>, + pub(crate) subsection_name: Option>, +} + +impl Section<'_> { + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Section<'static> { + Section { + header: self.header.to_owned(), + events: self.events.iter().map(Event::to_owned).collect(), + } + } +} + +impl Display for Section<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.header)?; + for event in &self.events { + event.fmt(f)?; + } + Ok(()) + } +} + +mod types { + macro_rules! generate_case_insensitive { + ($name:ident, $module:ident, $err_doc:literal, $validate:ident, $cow_inner_type:ty, $comment:literal) => { + /// + pub mod $module { + /// The error returned when `TryFrom` is invoked to create an instance. + #[derive(Debug, thiserror::Error, Copy, Clone)] + #[error($err_doc)] + pub struct Error; + } + + #[doc = $comment] + #[derive(Clone, Eq, Debug, Default)] + pub struct $name<'a>(pub(crate) std::borrow::Cow<'a, $cow_inner_type>); + + impl<'a> $name<'a> { + pub(crate) fn from_str_unchecked(s: &'a str) -> Self { + $name(std::borrow::Cow::Borrowed(s.into())) + } + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> $name<'static> { + $name(std::borrow::Cow::Owned(self.0.clone().into_owned())) + } + } + + impl PartialEq for $name<'_> { + fn eq(&self, other: &Self) -> bool { + self.0.eq_ignore_ascii_case(&other.0) + } + } + + impl std::fmt::Display for $name<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } + } + + impl PartialOrd for $name<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + self.cmp(other).into() + } + } + + impl Ord for $name<'_> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + let a = self.0.iter().map(|c| c.to_ascii_lowercase()); + let b = other.0.iter().map(|c| c.to_ascii_lowercase()); + a.cmp(b) + } + } + + impl std::hash::Hash for $name<'_> { + fn hash(&self, state: &mut H) { + for b in self.0.iter() { + b.to_ascii_lowercase().hash(state); + } + } + } + + impl<'a> std::convert::TryFrom<&'a str> for $name<'a> { + type Error = $module::Error; + + fn try_from(s: &'a str) -> Result { + Self::try_from(std::borrow::Cow::Borrowed(bstr::ByteSlice::as_bstr(s.as_bytes()))) + } + } + + impl<'a> std::convert::TryFrom for $name<'a> { + type Error = $module::Error; + + fn try_from(s: String) -> Result { + Self::try_from(std::borrow::Cow::Owned(bstr::BString::from(s))) + } + } + + impl<'a> std::convert::TryFrom> for $name<'a> { + type Error = $module::Error; + + fn try_from(s: std::borrow::Cow<'a, bstr::BStr>) -> Result { + if $validate(s.as_ref()) { + Ok(Self(s)) + } else { + Err($module::Error) + } + } + } + + impl<'a> std::ops::Deref for $name<'a> { + type Target = $cow_inner_type; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl<'a> std::convert::AsRef for $name<'a> { + fn as_ref(&self) -> &str { + std::str::from_utf8(self.0.as_ref()).expect("only valid UTF8 makes it through our validation") + } + } + }; + } + + fn is_valid_name(n: &bstr::BStr) -> bool { + !n.is_empty() && n.iter().all(|b| b.is_ascii_alphanumeric() || *b == b'-') + } + fn is_valid_key(n: &bstr::BStr) -> bool { + is_valid_name(n) && n[0].is_ascii_alphabetic() + } + + generate_case_insensitive!( + Name, + name, + "Valid names consist of alphanumeric characters or dashes.", + is_valid_name, + bstr::BStr, + "Wrapper struct for section header names, like `remote`, since these are case-insensitive." + ); + + generate_case_insensitive!( + Key, + key, + "Valid keys consist alphanumeric characters or dashes, starting with an alphabetic character.", + is_valid_key, + bstr::BStr, + "Wrapper struct for key names, like `path` in `include.path`, since keys are case-insensitive." + ); +} +pub use types::{key, name, Key, Name}; + +pub(crate) fn into_cow_bstr(c: Cow<'_, str>) -> Cow<'_, BStr> { + match c { + Cow::Borrowed(s) => Cow::Borrowed(s.into()), + Cow::Owned(s) => Cow::Owned(s.into()), + } +} diff --git a/vendor/gix-config/src/parse/section/unvalidated.rs b/vendor/gix-config/src/parse/section/unvalidated.rs new file mode 100644 index 000000000..1710837fe --- /dev/null +++ b/vendor/gix-config/src/parse/section/unvalidated.rs @@ -0,0 +1,25 @@ +use bstr::{BStr, ByteSlice}; + +/// An unvalidated parse result of a key for a section, parsing input like `remote.origin` or `core`. +#[derive(Debug, PartialEq, Ord, PartialOrd, Eq, Hash, Clone, Copy)] +pub struct Key<'a> { + /// The name of the section, like `remote` in `remote.origin`. + pub section_name: &'a str, + /// The name of the sub-section, like `origin` in `remote.origin`. + pub subsection_name: Option<&'a BStr>, +} + +impl<'a> Key<'a> { + /// Parse `input` like `remote.origin` or `core` as a `Key` to make its section specific fields available, + /// or `None` if there were not one or two tokens separated by `.`. + /// Note that `input` isn't validated, and is `str` as ascii is a subset of UTF-8 which is required for any valid keys. + pub fn parse(input: impl Into<&'a BStr>) -> Option { + let input = input.into(); + let mut tokens = input.splitn(2, |b| *b == b'.'); + + Some(Key { + section_name: tokens.next()?.to_str().ok()?, + subsection_name: tokens.next().map(Into::into), + }) + } +} diff --git a/vendor/gix-config/src/parse/tests.rs b/vendor/gix-config/src/parse/tests.rs new file mode 100644 index 000000000..2a2853c4c --- /dev/null +++ b/vendor/gix-config/src/parse/tests.rs @@ -0,0 +1,162 @@ +mod section { + + mod header { + mod unvalidated { + use crate::parse::section::unvalidated::Key; + + #[test] + fn section_name_only() { + assert_eq!( + Key::parse("core").unwrap(), + Key { + section_name: "core", + subsection_name: None + } + ); + } + + #[test] + fn section_name_and_subsection() { + assert_eq!( + Key::parse("core.bare").unwrap(), + Key { + section_name: "core", + subsection_name: Some("bare".into()) + } + ); + } + + #[test] + fn section_name_and_subsection_with_separators() { + assert_eq!( + Key::parse("remote.https:///home/user.git").unwrap(), + Key { + section_name: "remote", + subsection_name: Some("https:///home/user.git".into()) + } + ); + } + } + + mod write_to { + use std::borrow::Cow; + + use crate::parse::section; + + fn header(name: &str, subsection: impl Into>) -> section::Header<'_> { + let name = section::Name(Cow::Borrowed(name.into())); + if let Some((separator, subsection_name)) = subsection.into() { + section::Header { + name, + separator: Some(Cow::Borrowed(separator.into())), + subsection_name: Some(Cow::Borrowed(subsection_name.into())), + } + } else { + section::Header { + name, + separator: None, + subsection_name: None, + } + } + } + + #[test] + fn legacy_subsection_format_does_not_use_escapes() { + let invalid = header("invalid", Some((".", "\\ \""))); + assert_eq!( + invalid.to_bstring(), + "[invalid.\\ \"]", + "no escaping happens for legacy subsections" + ); + assert!(invalid.is_legacy()); + } + + #[test] + fn subsections_escape_two_characters_only() { + let invalid = header("invalid", Some((" ", "\\ \"\npost newline"))); + assert_eq!( + invalid.to_bstring(), + "[invalid \"\\\\ \\\"\npost newline\"]", + "newlines are actually invalid in subsection, but they are possible due to unvalidated instance creation" + ); + assert!(!invalid.is_legacy()); + } + } + } +} + +pub(crate) mod util { + //! This module is only included for tests, and contains common unit test helper + //! functions. + + use std::{borrow::Cow, convert::TryFrom}; + + use crate::parse::{section, Comment, Event}; + + pub fn into_events(events: Vec>) -> section::Events<'_> { + events.into() + } + + pub fn section_header( + name: &str, + subsection: impl Into>, + ) -> section::Header<'_> { + let name = section::Name::try_from(name).unwrap(); + if let Some((separator, subsection_name)) = subsection.into() { + section::Header { + name, + separator: Some(Cow::Borrowed(separator.into())), + subsection_name: Some(Cow::Borrowed(subsection_name.into())), + } + } else { + section::Header { + name, + separator: None, + subsection_name: None, + } + } + } + + pub(crate) fn name_event(name: &'static str) -> Event<'static> { + Event::SectionKey(section::Key(Cow::Borrowed(name.into()))) + } + + pub(crate) fn value_event(value: &'static str) -> Event<'static> { + Event::Value(Cow::Borrowed(value.into())) + } + + pub(crate) fn value_not_done_event(value: &'static str) -> Event<'static> { + Event::ValueNotDone(Cow::Borrowed(value.into())) + } + + pub(crate) fn value_done_event(value: &'static str) -> Event<'static> { + Event::ValueDone(Cow::Borrowed(value.into())) + } + + pub(crate) fn newline_event() -> Event<'static> { + newline_custom_event("\n") + } + + pub(crate) fn newline_custom_event(value: &'static str) -> Event<'static> { + Event::Newline(Cow::Borrowed(value.into())) + } + + pub(crate) fn whitespace_event(value: &'static str) -> Event<'static> { + Event::Whitespace(Cow::Borrowed(value.into())) + } + + pub(crate) fn comment_event(tag: char, msg: &'static str) -> Event<'static> { + Event::Comment(comment(tag, msg)) + } + + pub(crate) fn comment(comment_tag: char, comment: &'static str) -> Comment<'static> { + Comment { + tag: comment_tag as u8, + text: Cow::Borrowed(comment.into()), + } + } + + pub(crate) const fn fully_consumed(t: T) -> (&'static [u8], T) { + (&[], t) + } +} diff --git a/vendor/gix-config/src/source.rs b/vendor/gix-config/src/source.rs new file mode 100644 index 000000000..b1991e6b4 --- /dev/null +++ b/vendor/gix-config/src/source.rs @@ -0,0 +1,163 @@ +use std::{ + borrow::Cow, + ffi::OsString, + path::{Path, PathBuf}, +}; + +use crate::Source; + +/// The category of a [`Source`], in order of ascending precedence. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] +pub enum Kind { + /// A special configuration file that ships with the git installation, and is thus tied to the used git binary. + GitInstallation, + /// A source shared for the entire system. + System, + /// Application specific configuration unique for each user of the `System`. + Global, + /// Configuration relevant only to the repository, possibly including the worktree. + Repository, + /// Configuration specified after all other configuration was loaded for the purpose of overrides. + Override, +} + +impl Kind { + /// Return a list of sources associated with this `Kind` of source, in order of ascending precedence. + pub fn sources(self) -> &'static [Source] { + let src = match self { + Kind::GitInstallation => &[Source::GitInstallation] as &[_], + Kind::System => &[Source::System], + Kind::Global => &[Source::Git, Source::User], + Kind::Repository => &[Source::Local, Source::Worktree], + Kind::Override => &[Source::Env, Source::Cli, Source::Api], + }; + debug_assert!( + src.iter().all(|src| src.kind() == self), + "BUG: classification of source has to match the ordering here, see `Source::kind()`" + ); + src + } +} + +impl Source { + /// Return true if the source indicates a location within a file of a repository. + pub const fn kind(self) -> Kind { + use Source::*; + match self { + GitInstallation => Kind::GitInstallation, + System => Kind::System, + Git | User => Kind::Global, + Local | Worktree => Kind::Repository, + Env | Cli | Api | EnvOverride => Kind::Override, + } + } + + /// Returns the location at which a file of this type would be stored, or `None` if + /// there is no notion of persistent storage for this source, with `env_var` to obtain environment variables. + /// Note that the location can be relative for repository-local sources like `Local` and `Worktree`, + /// and the caller has to known which base it it relative to, namely the `common_dir` in the `Local` case + /// and the `git_dir` in the `Worktree` case. + /// Be aware that depending on environment overrides, multiple scopes might return the same path, which should + /// only be loaded once nonetheless. + /// + /// With `env_var` it becomes possible to prevent accessing environment variables entirely to comply with `gix-sec` + /// permissions for example. + pub fn storage_location(self, env_var: &mut dyn FnMut(&str) -> Option) -> Option> { + use Source::*; + match self { + GitInstallation => git::install_config_path().map(gix_path::from_bstr), + System => env_var("GIT_CONFIG_NO_SYSTEM") + .is_none() + .then(|| PathBuf::from(env_var("GIT_CONFIG_SYSTEM").unwrap_or_else(|| "/etc/gitconfig".into())).into()), + Git => match env_var("GIT_CONFIG_GLOBAL") { + Some(global_override) => Some(PathBuf::from(global_override).into()), + None => env_var("XDG_CONFIG_HOME") + .map(|home| { + let mut p = PathBuf::from(home); + p.push("git"); + p.push("config"); + p + }) + .or_else(|| { + env_var("HOME").map(|home| { + let mut p = PathBuf::from(home); + p.push(".config"); + p.push("git"); + p.push("config"); + p + }) + }) + .map(Cow::Owned), + }, + User => env_var("GIT_CONFIG_GLOBAL") + .map(|global_override| PathBuf::from(global_override).into()) + .or_else(|| { + env_var("HOME").map(|home| { + let mut p = PathBuf::from(home); + p.push(".gitconfig"); + p.into() + }) + }), + Local => Some(Path::new("config").into()), + Worktree => Some(Path::new("config.worktree").into()), + Env | Cli | Api | EnvOverride => None, + } + } +} + +/// Environment information involving the `git` program itself. +mod git { + use std::process::{Command, Stdio}; + + use bstr::{BStr, BString, ByteSlice}; + + /// Returns the file that contains git configuration coming with the installation of the `git` file in the current `PATH`, or `None` + /// if no `git` executable was found or there were other errors during execution. + pub fn install_config_path() -> Option<&'static BStr> { + static PATH: once_cell::sync::Lazy> = once_cell::sync::Lazy::new(|| { + let mut cmd = Command::new(if cfg!(windows) { "git.exe" } else { "git" }); + cmd.args(["config", "-l", "--show-origin"]) + .stdin(Stdio::null()) + .stderr(Stdio::null()); + first_file_from_config_with_origin(cmd.output().ok()?.stdout.as_slice().into()).map(ToOwned::to_owned) + }); + PATH.as_ref().map(|b| b.as_ref()) + } + + fn first_file_from_config_with_origin(source: &BStr) -> Option<&BStr> { + let file = source.strip_prefix(b"file:")?; + let end_pos = file.find_byte(b'\t')?; + file[..end_pos].as_bstr().into() + } + + #[cfg(test)] + mod tests { + #[test] + fn first_file_from_config_with_origin() { + let macos = "file:/Applications/Xcode.app/Contents/Developer/usr/share/git-core/gitconfig credential.helper=osxkeychain\nfile:/Users/byron/.gitconfig push.default=simple\n"; + let win_msys = + "file:C:/git-sdk-64/etc/gitconfig core.symlinks=false\r\nfile:C:/git-sdk-64/etc/gitconfig core.autocrlf=true"; + let win_cmd = "file:C:/Program Files/Git/etc/gitconfig diff.astextplain.textconv=astextplain\r\nfile:C:/Program Files/Git/etc/gitconfig filter.lfs.clean=gix-lfs clean -- %f\r\n"; + let linux = "file:/home/parallels/.gitconfig core.excludesfile=~/.gitignore\n"; + let bogus = "something unexpected"; + let empty = ""; + + for (source, expected) in [ + ( + macos, + Some("/Applications/Xcode.app/Contents/Developer/usr/share/git-core/gitconfig"), + ), + (win_msys, Some("C:/git-sdk-64/etc/gitconfig")), + (win_cmd, Some("C:/Program Files/Git/etc/gitconfig")), + (linux, Some("/home/parallels/.gitconfig")), + (bogus, None), + (empty, None), + ] { + assert_eq!( + super::first_file_from_config_with_origin(source.into()), + expected.map(Into::into) + ); + } + } + } +} diff --git a/vendor/gix-config/src/types.rs b/vendor/gix-config/src/types.rs new file mode 100644 index 000000000..7110906b8 --- /dev/null +++ b/vendor/gix-config/src/types.rs @@ -0,0 +1,124 @@ +use std::collections::{HashMap, VecDeque}; + +use gix_features::threading::OwnShared; + +use crate::{ + file, + file::{Metadata, SectionBodyIdsLut, SectionId}, + parse::section, +}; + +/// A list of known sources for git configuration in order of ascending precedence. +/// +/// This means values from the first one will be overridden by values in the second one, and so forth. +/// Note that included files via `include.path` and `includeIf..path` inherit +/// their source. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] +pub enum Source { + /// A special configuration file that ships with the git installation, and is thus tied to the used git binary. + GitInstallation, + /// System-wide configuration path. This is defined as + /// `$(prefix)/etc/gitconfig` (where prefix is the git-installation directory). + System, + /// A platform defined location for where a user's git application configuration should be located. + /// If `$XDG_CONFIG_HOME` is not set or empty, `$HOME/.config/git/config` will be used + /// on unix. + Git, + /// This is usually `~/.gitconfig` on unix. + User, + /// The configuration of the repository itself, located in `.git/config`. + Local, + /// Configuration specific to a worktree as created with `git worktree` and + /// typically located in `$GIT_DIR/config.worktree` if `extensions.worktreeConfig` + /// is enabled. + Worktree, + /// Values parsed from the environment using `GIT_CONFIG_COUNT`, + /// `GIT_CONFIG_KEY_N` and `GIT_CONFIG_VALUE_N` where `N` is incremented from 0 up to the + /// value of `GIT_CONFIG_COUNT`. + Env, + /// Values set from the command-line, typically controlled by the user running a program. + Cli, + /// Entirely internal from a programmatic source, and can be used to have (near final) say in configuration values. + Api, + /// Values obtained from specific environment variables that override values in the git configuration. + /// + /// For example, `HTTP_PROXY` overrides `http.proxy`, no matter where it is specified, and thus + /// controls the value similar to how it's done in `git`. + EnvOverride, +} + +/// High level `gix-config` reader and writer. +/// +/// This is the full-featured implementation that can deserialize, serialize, +/// and edit `gix-config` files without loss of whitespace or comments. +/// +/// # 'multivar' behavior +/// +/// `git` is flexible enough to allow users to set a key multiple times in +/// any number of identically named sections. When this is the case, the key +/// is known as a _"multivar"_. In this case, [`raw_value()`] follows the +/// "last one wins". +/// +/// Concretely, the following config has a multivar, `a`, with the values +/// of `b`, `c`, and `d`, while `e` is a single variable with the value +/// `f g h`. +/// +/// ```text +/// [core] +/// a = b +/// a = c +/// [core] +/// a = d +/// e = f g h +/// ``` +/// +/// Calling methods that fetch or set only one value (such as [`raw_value()`]) +/// key `a` with the above config will fetch `d` or replace `d`, since the last +/// valid config key/value pair is `a = d`: +/// +/// # Filtering +/// +/// All methods exist in a `*_filter(…, filter)` version to allow skipping sections by +/// their metadata. That way it's possible to select values based on their `gix_sec::Trust` +/// for example, or by their location. +/// +/// Note that the filter may be executed even on sections that don't contain the key in question, +/// even though the section will have matched the `name` and `subsection_name` respectively. +/// +/// ``` +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let gix_config = gix_config::File::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); +/// assert_eq!(gix_config.raw_value("core", None, "a").unwrap().as_ref(), "d"); +/// ``` +/// +/// Consider the `multi` variants of the methods instead, if you want to work +/// with all values. +/// +/// # Equality +/// +/// In order to make it useful, equality will ignore all non-value bearing information, hence compare +/// only sections and their names, as well as all of their values. The ordering matters, of course. +/// +/// [`raw_value()`]: Self::raw_value +#[derive(Eq, Clone, Debug, Default)] +pub struct File<'event> { + /// The list of events that occur before any section. Since a + /// `gix-config` file prohibits global values, this vec is limited to only + /// comment, newline, and whitespace events. + pub(crate) frontmatter_events: crate::parse::FrontMatterEvents<'event>, + /// Frontmatter events to be placed after the given section. + pub(crate) frontmatter_post_section: HashMap>, + /// Section name to section id lookup tree, with section bodies for subsections being in a non-terminal + /// variant of `SectionBodyIds`. + pub(crate) section_lookup_tree: HashMap, Vec>>, + /// This indirection with the SectionId as the key is critical to flexibly + /// supporting `gix-config` sections, as duplicated keys are permitted. + pub(crate) sections: HashMap>, + /// Internal monotonically increasing counter for section ids. + pub(crate) section_id_counter: usize, + /// Section order for output ordering. + pub(crate) section_order: VecDeque, + /// The source of the File itself, which is attached to new sections automatically. + pub(crate) meta: OwnShared, +} diff --git a/vendor/gix-config/src/value/mod.rs b/vendor/gix-config/src/value/mod.rs new file mode 100644 index 000000000..94f2f9758 --- /dev/null +++ b/vendor/gix-config/src/value/mod.rs @@ -0,0 +1,4 @@ +pub use gix_config_value::Error; + +mod normalize; +pub use normalize::{normalize, normalize_bstr, normalize_bstring}; diff --git a/vendor/gix-config/src/value/normalize.rs b/vendor/gix-config/src/value/normalize.rs new file mode 100644 index 000000000..1f16b05d8 --- /dev/null +++ b/vendor/gix-config/src/value/normalize.rs @@ -0,0 +1,110 @@ +use std::borrow::Cow; + +use bstr::{BStr, BString, ByteSlice}; + +/// Removes quotes, if any, from the provided inputs, and transforms +/// the 3 escape sequences `\n`, `\t` and `\b` into newline and tab +/// respectively, while `\b` will remove the previous character. +/// +/// It assumes the input contains a even number of unescaped quotes, +/// and will unescape escaped quotes and everything else (even though the latter +/// would have been rejected in the parsing stage). +/// +/// The return values should be safe for value interpretation. +/// +/// This has optimizations for fully-quoted values, where the returned value +/// will be a borrowed reference if the only mutation necessary is to unquote +/// the value. +/// +/// This is the function used to normalize raw values from higher level +/// abstractions. Generally speaking these +/// high level abstractions will handle normalization for you, and you do not +/// need to call this yourself. However, if you're directly handling events +/// from the parser, you may want to use this to help with value interpretation. +/// +/// Generally speaking, you'll want to use one of the variants of this function, +/// such as [`normalize_bstr`] or [`normalize_bstring`]. +/// +/// # Examples +/// +/// Values don't need modification are returned borrowed, without allocation. +/// +/// ``` +/// # use std::borrow::Cow; +/// # use bstr::ByteSlice; +/// # use gix_config::value::normalize_bstr; +/// assert!(matches!(normalize_bstr("hello world"), Cow::Borrowed(_))); +/// ``` +/// +/// Internally quoted values are turned into owned variant with quotes removed. +/// +/// ``` +/// # use std::borrow::Cow; +/// # use bstr::{BStr, BString}; +/// # use gix_config::value::{normalize_bstr}; +/// assert_eq!(normalize_bstr("hello \"world\""), Cow::::Owned(BString::from("hello world"))); +/// ``` +/// +/// Escaped quotes are unescaped. +/// +/// ``` +/// # use std::borrow::Cow; +/// # use bstr::{BStr, BString}; +/// # use gix_config::value::normalize_bstr; +/// assert_eq!(normalize_bstr(r#"hello "world\"""#), Cow::::Owned(BString::from(r#"hello world""#))); +/// ``` +#[must_use] +pub fn normalize(input: Cow<'_, BStr>) -> Cow<'_, BStr> { + if input.as_ref() == "\"\"" { + return Cow::Borrowed("".into()); + } + + let size = input.len(); + if size >= 3 && input[0] == b'"' && input[size - 1] == b'"' && input[size - 2] != b'\\' { + match input { + Cow::Borrowed(input) => return normalize_bstr(&input[1..size - 1]), + Cow::Owned(mut input) => { + input.pop(); + input.remove(0); + return normalize_bstring(input); + } + } + } + + if input.find_byteset(b"\\\"").is_none() { + return input; + } + + let mut out: BString = Vec::with_capacity(input.len()).into(); + let mut bytes = input.iter().copied(); + while let Some(c) = bytes.next() { + match c { + b'\\' => match bytes.next() { + Some(b'n') => out.push(b'\n'), + Some(b't') => out.push(b'\t'), + Some(b'b') => { + out.pop(); + } + Some(c) => { + out.push(c); + } + None => break, + }, + b'"' => {} + _ => out.push(c), + } + } + Cow::Owned(out) +} + +/// `&[u8]` variant of [`normalize`]. +#[must_use] +pub fn normalize_bstr<'a>(input: impl Into<&'a BStr>) -> Cow<'a, BStr> { + normalize(Cow::Borrowed(input.into())) +} + +/// `Vec[u8]` variant of [`normalize`]. +#[must_use] +pub fn normalize_bstring(input: impl Into) -> Cow<'static, BStr> { + normalize(Cow::Owned(input.into())) +} -- cgit v1.2.3