summaryrefslogtreecommitdiffstats
path: root/vendor/gix-config/src/parse
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-config/src/parse')
-rw-r--r--vendor/gix-config/src/parse/comment.rs50
-rw-r--r--vendor/gix-config/src/parse/error.rs64
-rw-r--r--vendor/gix-config/src/parse/event.rs83
-rw-r--r--vendor/gix-config/src/parse/events.rs336
-rw-r--r--vendor/gix-config/src/parse/key.rs35
-rw-r--r--vendor/gix-config/src/parse/mod.rs116
-rw-r--r--vendor/gix-config/src/parse/nom/mod.rs460
-rw-r--r--vendor/gix-config/src/parse/nom/tests.rs924
-rw-r--r--vendor/gix-config/src/parse/section/header.rs180
-rw-r--r--vendor/gix-config/src/parse/section/mod.rs187
-rw-r--r--vendor/gix-config/src/parse/section/unvalidated.rs25
-rw-r--r--vendor/gix-config/src/parse/tests.rs162
12 files changed, 2622 insertions, 0 deletions
diff --git a/vendor/gix-config/src/parse/comment.rs b/vendor/gix-config/src/parse/comment.rs
new file mode 100644
index 000000000..6d4bb15ff
--- /dev/null
+++ b/vendor/gix-config/src/parse/comment.rs
@@ -0,0 +1,50 @@
+use std::{borrow::Cow, fmt::Display};
+
+use bstr::BString;
+
+use crate::parse::Comment;
+
+impl Comment<'_> {
+ /// Turn this instance into a fully owned one with `'static` lifetime.
+ #[must_use]
+ pub fn to_owned(&self) -> Comment<'static> {
+ Comment {
+ tag: self.tag,
+ text: Cow::Owned(self.text.as_ref().into()),
+ }
+ }
+
+ /// Serialize this type into a `BString` for convenience.
+ ///
+ /// Note that `to_string()` can also be used, but might not be lossless.
+ #[must_use]
+ pub fn to_bstring(&self) -> BString {
+ let mut buf = Vec::new();
+ self.write_to(&mut buf).expect("io error impossible");
+ buf.into()
+ }
+
+ /// Stream ourselves to the given `out`, in order to reproduce this comment losslessly.
+ pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> {
+ out.write_all(&[self.tag])?;
+ out.write_all(self.text.as_ref())
+ }
+}
+
+impl Display for Comment<'_> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ Display::fmt(&self.to_bstring(), f)
+ }
+}
+
+impl From<Comment<'_>> for BString {
+ fn from(c: Comment<'_>) -> Self {
+ c.into()
+ }
+}
+
+impl From<&Comment<'_>> for BString {
+ fn from(c: &Comment<'_>) -> Self {
+ c.to_bstring()
+ }
+}
diff --git a/vendor/gix-config/src/parse/error.rs b/vendor/gix-config/src/parse/error.rs
new file mode 100644
index 000000000..1f469ee4c
--- /dev/null
+++ b/vendor/gix-config/src/parse/error.rs
@@ -0,0 +1,64 @@
+use std::fmt::Display;
+
+use crate::parse::Error;
+
+/// A list of parsers that parsing can fail on. This is used for pretty-printing errors
+#[derive(PartialEq, Debug, Clone, Copy)]
+pub(crate) enum ParseNode {
+ SectionHeader,
+ Name,
+ Value,
+}
+
+impl Display for ParseNode {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::SectionHeader => write!(f, "section header"),
+ Self::Name => write!(f, "name"),
+ Self::Value => write!(f, "value"),
+ }
+ }
+}
+
+impl Error {
+ /// The one-indexed line number where the error occurred. This is determined
+ /// by the number of newlines that were successfully parsed.
+ #[must_use]
+ pub const fn line_number(&self) -> usize {
+ self.line_number + 1
+ }
+
+ /// The data that was left unparsed, which contains the cause of the parse error.
+ #[must_use]
+ pub fn remaining_data(&self) -> &[u8] {
+ &self.parsed_until
+ }
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(
+ f,
+ "Got an unexpected token on line {} while trying to parse a {}: ",
+ self.line_number + 1,
+ self.last_attempted_parser,
+ )?;
+
+ let data_size = self.parsed_until.len();
+ let data = std::str::from_utf8(&self.parsed_until);
+ match (data, data_size) {
+ (Ok(data), _) if data_size > 10 => {
+ write!(
+ f,
+ "'{}' ... ({} characters omitted)",
+ &data.chars().take(10).collect::<String>(),
+ data_size - 10
+ )
+ }
+ (Ok(data), _) => write!(f, "'{data}'"),
+ (Err(_), _) => self.parsed_until.fmt(f),
+ }
+ }
+}
+
+impl std::error::Error for Error {}
diff --git a/vendor/gix-config/src/parse/event.rs b/vendor/gix-config/src/parse/event.rs
new file mode 100644
index 000000000..b7b96934d
--- /dev/null
+++ b/vendor/gix-config/src/parse/event.rs
@@ -0,0 +1,83 @@
+use std::{borrow::Cow, fmt::Display};
+
+use bstr::{BStr, BString};
+
+use crate::parse::Event;
+
+impl Event<'_> {
+ /// Serialize this type into a `BString` for convenience.
+ ///
+ /// Note that `to_string()` can also be used, but might not be lossless.
+ #[must_use]
+ pub fn to_bstring(&self) -> BString {
+ let mut buf = Vec::new();
+ self.write_to(&mut buf).expect("io error impossible");
+ buf.into()
+ }
+
+ /// Turn ourselves into the text we represent, lossy.
+ ///
+ /// Note that this will be partial in case of `ValueNotDone` which doesn't include the backslash, and `SectionHeader` will only
+ /// provide their name, lacking the sub-section name.
+ pub fn to_bstr_lossy(&self) -> &BStr {
+ match self {
+ Self::ValueNotDone(e) | Self::Whitespace(e) | Self::Newline(e) | Self::Value(e) | Self::ValueDone(e) => {
+ e.as_ref()
+ }
+ Self::KeyValueSeparator => "=".into(),
+ Self::SectionKey(k) => k.0.as_ref(),
+ Self::SectionHeader(h) => h.name.0.as_ref(),
+ Self::Comment(c) => c.text.as_ref(),
+ }
+ }
+
+ /// Stream ourselves to the given `out`, in order to reproduce this event mostly losslessly
+ /// as it was parsed.
+ pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> {
+ match self {
+ Self::ValueNotDone(e) => {
+ out.write_all(e.as_ref())?;
+ out.write_all(b"\\")
+ }
+ Self::Whitespace(e) | Self::Newline(e) | Self::Value(e) | Self::ValueDone(e) => out.write_all(e.as_ref()),
+ Self::KeyValueSeparator => out.write_all(b"="),
+ Self::SectionKey(k) => out.write_all(k.0.as_ref()),
+ Self::SectionHeader(h) => h.write_to(&mut out),
+ Self::Comment(c) => c.write_to(&mut out),
+ }
+ }
+
+ /// Turn this instance into a fully owned one with `'static` lifetime.
+ #[must_use]
+ pub fn to_owned(&self) -> Event<'static> {
+ match self {
+ Event::Comment(e) => Event::Comment(e.to_owned()),
+ Event::SectionHeader(e) => Event::SectionHeader(e.to_owned()),
+ Event::SectionKey(e) => Event::SectionKey(e.to_owned()),
+ Event::Value(e) => Event::Value(Cow::Owned(e.clone().into_owned())),
+ Event::ValueNotDone(e) => Event::ValueNotDone(Cow::Owned(e.clone().into_owned())),
+ Event::ValueDone(e) => Event::ValueDone(Cow::Owned(e.clone().into_owned())),
+ Event::Newline(e) => Event::Newline(Cow::Owned(e.clone().into_owned())),
+ Event::Whitespace(e) => Event::Whitespace(Cow::Owned(e.clone().into_owned())),
+ Event::KeyValueSeparator => Event::KeyValueSeparator,
+ }
+ }
+}
+
+impl Display for Event<'_> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ Display::fmt(&self.to_bstring(), f)
+ }
+}
+
+impl From<Event<'_>> for BString {
+ fn from(event: Event<'_>) -> Self {
+ event.into()
+ }
+}
+
+impl From<&Event<'_>> for BString {
+ fn from(event: &Event<'_>) -> Self {
+ event.to_bstring()
+ }
+}
diff --git a/vendor/gix-config/src/parse/events.rs b/vendor/gix-config/src/parse/events.rs
new file mode 100644
index 000000000..62f621b52
--- /dev/null
+++ b/vendor/gix-config/src/parse/events.rs
@@ -0,0 +1,336 @@
+use std::convert::TryFrom;
+
+use smallvec::SmallVec;
+
+use crate::{
+ parse,
+ parse::{section, Event, Section},
+};
+
+/// A type store without allocation all events that are typically preceding the first section.
+pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>;
+
+/// A zero-copy `gix-config` file parser.
+///
+/// This is parser exposes low-level syntactic events from a `gix-config` file.
+/// Generally speaking, you'll want to use [`File`] as it wraps
+/// around the parser to provide a higher-level abstraction to a `gix-config`
+/// file, including querying, modifying, and updating values.
+///
+/// This parser guarantees that the events emitted are sufficient to
+/// reconstruct a `gix-config` file identical to the source `gix-config`
+/// when writing it.
+///
+/// # Differences between a `.ini` parser
+///
+/// While the `gix-config` format closely resembles the [`.ini` file format],
+/// there are subtle differences that make them incompatible. For one, the file
+/// format is not well defined, and there exists no formal specification to
+/// adhere to.
+///
+/// For concrete examples, some notable differences are:
+/// - `gix-config` sections permit subsections via either a quoted string
+/// (`[some-section "subsection"]`) or via the deprecated dot notation
+/// (`[some-section.subsection]`). Successful parsing these section names is not
+/// well defined in typical `.ini` parsers. This parser will handle these cases
+/// perfectly.
+/// - Comment markers are not strictly defined either. This parser will always
+/// and only handle a semicolon or octothorpe (also known as a hash or number
+/// sign).
+/// - Global properties may be allowed in `.ini` parsers, but is strictly
+/// disallowed by this parser.
+/// - Only `\t`, `\n`, `\b` `\\` are valid escape characters.
+/// - Quoted and semi-quoted values will be parsed (but quotes will be included
+/// in event outputs). An example of a semi-quoted value is `5"hello world"`,
+/// which should be interpreted as `5hello world` after
+/// [normalization][crate::value::normalize()].
+/// - Line continuations via a `\` character is supported (inside or outside of quotes)
+/// - Whitespace handling similarly follows the `gix-config` specification as
+/// closely as possible, where excess whitespace after a non-quoted value are
+/// trimmed, and line continuations onto a new line with excess spaces are kept.
+/// - Only equal signs (optionally padded by spaces) are valid name/value
+/// delimiters.
+///
+/// Note that that things such as case-sensitivity or duplicate sections are
+/// _not_ handled. This parser is a low level _syntactic_ interpreter
+/// and higher level wrappers around this parser, which may
+/// or may not be zero-copy, should handle _semantic_ values. This also means
+/// that string-like values are not interpreted. For example, `hello"world"`
+/// would be read at a high level as `helloworld` but this parser will return
+/// the former instead, with the extra quotes. This is because it is not the
+/// responsibility of the parser to interpret these values, and doing so would
+/// necessarily require a copy, which this parser avoids.
+///
+/// # Trait Implementations
+///
+/// - This struct does _not_ implement [`FromStr`] due to lifetime
+/// constraints implied on the required `from_str` method. Instead, it provides
+/// [`From<&'_ str>`].
+///
+/// # Idioms
+///
+/// If you do want to use this parser, there are some idioms that may help you
+/// with interpreting sequences of events.
+///
+/// ## `Value` events do not immediately follow `Key` events
+///
+/// Consider the following `gix-config` example:
+///
+/// ```text
+/// [core]
+/// autocrlf = input
+/// ```
+///
+/// Because this parser guarantees perfect reconstruction, there are many
+/// non-significant events that occur in addition to the ones you may expect:
+///
+/// ```
+/// # use gix_config::parse::{Event, Events, section};
+/// # use std::borrow::Cow;
+/// # use std::convert::TryFrom;
+/// # let section_header = section::Header::new("core", None).unwrap();
+/// # let section_data = "[core]\n autocrlf = input";
+/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
+/// Event::SectionHeader(section_header),
+/// Event::Newline(Cow::Borrowed("\n".into())),
+/// Event::Whitespace(Cow::Borrowed(" ".into())),
+/// Event::SectionKey(section::Key::try_from("autocrlf")?),
+/// Event::Whitespace(Cow::Borrowed(" ".into())),
+/// Event::KeyValueSeparator,
+/// Event::Whitespace(Cow::Borrowed(" ".into())),
+/// Event::Value(Cow::Borrowed("input".into())),
+/// # ]);
+/// # Ok::<_, Box<dyn std::error::Error>>(())
+/// ```
+///
+/// Note the two whitespace events between the key and value pair! Those two
+/// events actually refer to the whitespace between the name and value and the
+/// equal sign. So if the config instead had `autocrlf=input`, those whitespace
+/// events would no longer be present.
+///
+/// ## `KeyValueSeparator` event is not guaranteed to emit
+///
+/// Consider the following `gix-config` example:
+///
+/// ```text
+/// [core]
+/// autocrlf
+/// ```
+///
+/// This is a valid config with a `autocrlf` key having an implicit `true`
+/// value. This means that there is not a `=` separating the key and value,
+/// which means that the corresponding event won't appear either:
+///
+/// ```
+/// # use gix_config::parse::{Event, Events, section};
+/// # use std::borrow::Cow;
+/// # use std::convert::TryFrom;
+/// # let section_header = section::Header::new("core", None).unwrap();
+/// # let section_data = "[core]\n autocrlf";
+/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
+/// Event::SectionHeader(section_header),
+/// Event::Newline(Cow::Borrowed("\n".into())),
+/// Event::Whitespace(Cow::Borrowed(" ".into())),
+/// Event::SectionKey(section::Key::try_from("autocrlf")?),
+/// Event::Value(Cow::Borrowed("".into())),
+/// # ]);
+/// # Ok::<_, Box<dyn std::error::Error>>(())
+/// ```
+///
+/// ## Quoted values are not unquoted
+///
+/// Consider the following `gix-config` example:
+///
+/// ```text
+/// [core]
+/// autocrlf=true""
+/// filemode=fa"lse"
+/// ```
+///
+/// Both these events, when fully processed, should normally be `true` and
+/// `false`. However, because this parser is zero-copy, we cannot process
+/// partially quoted values, such as the `false` example. As a result, to
+/// maintain consistency, the parser will just take all values as literals. The
+/// relevant event stream emitted is thus emitted as:
+///
+/// ```
+/// # use gix_config::parse::{Event, Events, section};
+/// # use std::borrow::Cow;
+/// # use std::convert::TryFrom;
+/// # let section_header = section::Header::new("core", None).unwrap();
+/// # let section_data = "[core]\nautocrlf=true\"\"\nfilemode=fa\"lse\"";
+/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
+/// Event::SectionHeader(section_header),
+/// Event::Newline(Cow::Borrowed("\n".into())),
+/// Event::SectionKey(section::Key::try_from("autocrlf")?),
+/// Event::KeyValueSeparator,
+/// Event::Value(Cow::Borrowed(r#"true"""#.into())),
+/// Event::Newline(Cow::Borrowed("\n".into())),
+/// Event::SectionKey(section::Key::try_from("filemode")?),
+/// Event::KeyValueSeparator,
+/// Event::Value(Cow::Borrowed(r#"fa"lse""#.into())),
+/// # ]);
+/// # Ok::<_, Box<dyn std::error::Error>>(())
+/// ```
+///
+/// ## Whitespace after line continuations are part of the value
+///
+/// Consider the following `gix-config` example:
+///
+/// ```text
+/// [some-section]
+/// file=a\
+/// c
+/// ```
+///
+/// Because how `gix-config` treats continuations, the whitespace preceding `c`
+/// are in fact part of the value of `file`. The fully interpreted key/value
+/// pair is actually `file=a c`. As a result, the parser will provide this
+/// split value accordingly:
+///
+/// ```
+/// # use gix_config::parse::{Event, Events, section};
+/// # use std::borrow::Cow;
+/// # use std::convert::TryFrom;
+/// # let section_header = section::Header::new("some-section", None).unwrap();
+/// # let section_data = "[some-section]\nfile=a\\\n c";
+/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
+/// Event::SectionHeader(section_header),
+/// Event::Newline(Cow::Borrowed("\n".into())),
+/// Event::SectionKey(section::Key::try_from("file")?),
+/// Event::KeyValueSeparator,
+/// Event::ValueNotDone(Cow::Borrowed("a".into())),
+/// Event::Newline(Cow::Borrowed("\n".into())),
+/// Event::ValueDone(Cow::Borrowed(" c".into())),
+/// # ]);
+/// # Ok::<_, Box<dyn std::error::Error>>(())
+/// ```
+///
+/// [`File`]: crate::File
+/// [`.ini` file format]: https://en.wikipedia.org/wiki/INI_file
+/// [`git`'s documentation]: https://git-scm.com/docs/gix-config#_configuration_file
+/// [`FromStr`]: std::str::FromStr
+/// [`From<&'_ str>`]: std::convert::From
+#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
+pub struct Events<'a> {
+ /// Events seen before the first section.
+ pub frontmatter: FrontMatterEvents<'a>,
+ /// All parsed sections.
+ pub sections: Vec<Section<'a>>,
+}
+
+impl Events<'static> {
+ /// Parses the provided bytes, returning an [`Events`] that contains allocated
+ /// and owned events. This is similar to [`Events::from_bytes()`], but performance
+ /// is degraded as it requires allocation for every event.
+ ///
+ /// Use `filter` to only include those events for which it returns true.
+ pub fn from_bytes_owned<'a>(
+ input: &'a [u8],
+ filter: Option<fn(&Event<'a>) -> bool>,
+ ) -> Result<Events<'static>, parse::Error> {
+ from_bytes(input, |e| e.to_owned(), filter)
+ }
+}
+
+impl<'a> Events<'a> {
+ /// Attempt to zero-copy parse the provided bytes. On success, returns a
+ /// [`Events`] that provides methods to accessing leading comments and sections
+ /// of a `gix-config` file and can be converted into an iterator of [`Event`]
+ /// for higher level processing.
+ ///
+ /// Use `filter` to only include those events for which it returns true.
+ pub fn from_bytes(input: &'a [u8], filter: Option<fn(&Event<'a>) -> bool>) -> Result<Events<'a>, parse::Error> {
+ from_bytes(input, std::convert::identity, filter)
+ }
+
+ /// Attempt to zero-copy parse the provided `input` string.
+ ///
+ /// Prefer the [`from_bytes()`][Self::from_bytes()] method if UTF8 encoding
+ /// isn't guaranteed.
+ #[allow(clippy::should_implement_trait)]
+ pub fn from_str(input: &'a str) -> Result<Events<'a>, parse::Error> {
+ Self::from_bytes(input.as_bytes(), None)
+ }
+
+ /// Consumes the parser to produce an iterator of all contained events.
+ #[must_use = "iterators are lazy and do nothing unless consumed"]
+ #[allow(clippy::should_implement_trait)]
+ pub fn into_iter(self) -> impl Iterator<Item = parse::Event<'a>> + std::iter::FusedIterator {
+ self.frontmatter.into_iter().chain(
+ self.sections
+ .into_iter()
+ .flat_map(|section| std::iter::once(parse::Event::SectionHeader(section.header)).chain(section.events)),
+ )
+ }
+
+ /// Place all contained events into a single `Vec`.
+ pub fn into_vec(self) -> Vec<parse::Event<'a>> {
+ self.into_iter().collect()
+ }
+}
+
+impl<'a> TryFrom<&'a str> for Events<'a> {
+ type Error = parse::Error;
+
+ fn try_from(value: &'a str) -> Result<Self, Self::Error> {
+ Self::from_str(value)
+ }
+}
+
+impl<'a> TryFrom<&'a [u8]> for Events<'a> {
+ type Error = parse::Error;
+
+ fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
+ Events::from_bytes(value, None)
+ }
+}
+
+fn from_bytes<'a, 'b>(
+ input: &'a [u8],
+ convert: impl Fn(Event<'a>) -> Event<'b>,
+ filter: Option<fn(&Event<'a>) -> bool>,
+) -> Result<Events<'b>, parse::Error> {
+ let mut header = None;
+ let mut events = section::Events::default();
+ let mut frontmatter = FrontMatterEvents::default();
+ let mut sections = Vec::new();
+ parse::from_bytes(input, |e: Event<'_>| match e {
+ Event::SectionHeader(next_header) => {
+ match header.take() {
+ None => {
+ frontmatter = std::mem::take(&mut events).into_iter().collect();
+ }
+ Some(prev_header) => {
+ sections.push(parse::Section {
+ header: prev_header,
+ events: std::mem::take(&mut events),
+ });
+ }
+ };
+ header = match convert(Event::SectionHeader(next_header)) {
+ Event::SectionHeader(h) => h,
+ _ => unreachable!("BUG: convert must not change the event type, just the lifetime"),
+ }
+ .into();
+ }
+ event => {
+ if filter.map_or(true, |f| f(&event)) {
+ events.push(convert(event))
+ }
+ }
+ })?;
+
+ match header {
+ None => {
+ frontmatter = events.into_iter().collect();
+ }
+ Some(prev_header) => {
+ sections.push(parse::Section {
+ header: prev_header,
+ events: std::mem::take(&mut events),
+ });
+ }
+ }
+ Ok(Events { frontmatter, sections })
+}
diff --git a/vendor/gix-config/src/parse/key.rs b/vendor/gix-config/src/parse/key.rs
new file mode 100644
index 000000000..b0e0376be
--- /dev/null
+++ b/vendor/gix-config/src/parse/key.rs
@@ -0,0 +1,35 @@
+use bstr::{BStr, ByteSlice};
+
+/// An unvalidated parse result of parsing input like `remote.origin.url` or `core.bare`.
+#[derive(Debug, PartialEq, Ord, PartialOrd, Eq, Hash, Clone, Copy)]
+pub struct Key<'a> {
+ /// The name of the section, like `core` in `core.bare`.
+ pub section_name: &'a str,
+ /// The name of the sub-section, like `origin` in `remote.origin.url`.
+ pub subsection_name: Option<&'a BStr>,
+ /// The name of the section key, like `url` in `remote.origin.url`.
+ pub value_name: &'a str,
+}
+
+/// Parse `input` like `core.bare` or `remote.origin.url` as a `Key` to make its fields available,
+/// or `None` if there were not at least 2 tokens separated by `.`.
+/// Note that `input` isn't validated, and is `str` as ascii is a subset of UTF-8 which is required for any valid keys.
+pub fn parse_unvalidated<'a>(input: impl Into<&'a BStr>) -> Option<Key<'a>> {
+ let input = input.into();
+ let mut tokens = input.splitn(2, |b| *b == b'.');
+ let section_name = tokens.next()?;
+ let subsection_or_key = tokens.next()?;
+ let mut tokens = subsection_or_key.rsplitn(2, |b| *b == b'.');
+ let (subsection_name, value_name) = match (tokens.next(), tokens.next()) {
+ (Some(key), Some(subsection)) => (Some(subsection.into()), key),
+ (Some(key), None) => (None, key),
+ (None, Some(_)) => unreachable!("iterator can't restart producing items"),
+ (None, None) => return None,
+ };
+
+ Some(Key {
+ section_name: section_name.to_str().ok()?,
+ subsection_name,
+ value_name: value_name.to_str().ok()?,
+ })
+}
diff --git a/vendor/gix-config/src/parse/mod.rs b/vendor/gix-config/src/parse/mod.rs
new file mode 100644
index 000000000..50363873c
--- /dev/null
+++ b/vendor/gix-config/src/parse/mod.rs
@@ -0,0 +1,116 @@
+//! This module handles parsing a `gix-config` file. Generally speaking, you
+//! want to use a higher abstraction such as [`File`] unless you have some
+//! explicit reason to work with events instead.
+//!
+//! The workflow for interacting with this is to use
+//! [`from_bytes()`] to obtain all parse events or tokens of the given input.
+//!
+//! On a higher level, one can use [`Events`] to parse all events into a set
+//! of easily interpretable data type, similar to what [`File`] does.
+//!
+//! [`File`]: crate::File
+
+use std::{borrow::Cow, hash::Hash};
+
+use bstr::BStr;
+
+mod nom;
+pub use self::nom::from_bytes;
+mod event;
+#[path = "events.rs"]
+mod events_type;
+pub use events_type::{Events, FrontMatterEvents};
+mod comment;
+mod error;
+///
+pub mod section;
+
+///
+mod key;
+pub use key::{parse_unvalidated as key, Key};
+
+#[cfg(test)]
+pub(crate) mod tests;
+
+/// Syntactic events that occurs in the config. Despite all these variants
+/// holding a [`Cow`] instead over a simple reference, the parser will only emit
+/// borrowed `Cow` variants.
+///
+/// The `Cow` is used here for ease of inserting new, typically owned events as used
+/// in the [`File`] struct when adding values, allowing a mix of owned and borrowed
+/// values.
+///
+/// [`Cow`]: std::borrow::Cow
+/// [`File`]: crate::File
+#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
+pub enum Event<'a> {
+ /// A comment with a comment tag and the comment itself. Note that the
+ /// comment itself may contain additional whitespace and comment markers
+ /// at the beginning, like `# comment` or `; comment`.
+ Comment(Comment<'a>),
+ /// A section header containing the section name and a subsection, if it
+ /// exists. For instance, `remote "origin"` is parsed to `remote` as section
+ /// name and `origin` as subsection name.
+ SectionHeader(section::Header<'a>),
+ /// A name to a value in a section, like `url` in `remote.origin.url`.
+ SectionKey(section::Key<'a>),
+ /// A completed value. This may be any single-line string, including the empty string
+ /// if an implicit boolean value is used.
+ /// Note that these values may contain spaces and any special character. This value is
+ /// also unprocessed, so it it may contain double quotes that should be
+ /// [normalized][crate::value::normalize()] before interpretation.
+ Value(Cow<'a, BStr>),
+ /// Represents any token used to signify a newline character. On Unix
+ /// platforms, this is typically just `\n`, but can be any valid newline
+ /// sequence. Multiple newlines (such as `\n\n`) will be merged as a single
+ /// newline event containing a string of multiple newline characters.
+ Newline(Cow<'a, BStr>),
+ /// Any value that isn't completed. This occurs when the value is continued
+ /// onto the next line by ending it with a backslash.
+ /// A [`Newline`][Self::Newline] event is guaranteed after, followed by
+ /// either a ValueDone, a Whitespace, or another ValueNotDone.
+ ValueNotDone(Cow<'a, BStr>),
+ /// The last line of a value which was continued onto another line.
+ /// With this it's possible to obtain the complete value by concatenating
+ /// the prior [`ValueNotDone`][Self::ValueNotDone] events.
+ ValueDone(Cow<'a, BStr>),
+ /// A continuous section of insignificant whitespace.
+ ///
+ /// Note that values with internal whitespace will not be separated by this event,
+ /// hence interior whitespace there is always part of the value.
+ Whitespace(Cow<'a, BStr>),
+ /// This event is emitted when the parser counters a valid `=` character
+ /// separating the key and value.
+ /// This event is necessary as it eliminates the ambiguity for whitespace
+ /// events between a key and value event.
+ KeyValueSeparator,
+}
+
+/// A parsed section containing the header and the section events, typically
+/// comprising the keys and their values.
+#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
+pub struct Section<'a> {
+ /// The section name and subsection name, if any.
+ pub header: section::Header<'a>,
+ /// The syntactic events found in this section.
+ pub events: section::Events<'a>,
+}
+
+/// A parsed comment containing the comment marker and comment.
+#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
+pub struct Comment<'a> {
+ /// The comment marker used. This is either a semicolon or octothorpe/hash.
+ pub tag: u8,
+ /// The parsed comment.
+ pub text: Cow<'a, BStr>,
+}
+
+/// A parser error reports the one-indexed line number where the parsing error
+/// occurred, as well as the last parser node and the remaining data to be
+/// parsed.
+#[derive(PartialEq, Debug)]
+pub struct Error {
+ line_number: usize,
+ last_attempted_parser: error::ParseNode,
+ parsed_until: bstr::BString,
+}
diff --git a/vendor/gix-config/src/parse/nom/mod.rs b/vendor/gix-config/src/parse/nom/mod.rs
new file mode 100644
index 000000000..11d1dea6b
--- /dev/null
+++ b/vendor/gix-config/src/parse/nom/mod.rs
@@ -0,0 +1,460 @@
+use std::borrow::Cow;
+
+use bstr::{BStr, BString, ByteSlice, ByteVec};
+use nom::{
+ branch::alt,
+ bytes::complete::{tag, take_till, take_while},
+ character::{
+ complete::{char, one_of},
+ is_space,
+ },
+ combinator::{map, opt},
+ error::{Error as NomError, ErrorKind},
+ multi::{fold_many0, fold_many1},
+ sequence::delimited,
+ IResult,
+};
+
+use crate::parse::{error::ParseNode, section, Comment, Error, Event};
+
+/// Attempt to zero-copy parse the provided bytes, passing results to `dispatch`.
+pub fn from_bytes<'a>(input: &'a [u8], mut dispatch: impl FnMut(Event<'a>)) -> Result<(), Error> {
+ let bom = unicode_bom::Bom::from(input);
+ let mut newlines = 0;
+ let (i, _) = fold_many0(
+ alt((
+ map(comment, Event::Comment),
+ map(take_spaces, |whitespace| Event::Whitespace(Cow::Borrowed(whitespace))),
+ map(take_newlines, |(newline, counter)| {
+ newlines += counter;
+ Event::Newline(Cow::Borrowed(newline))
+ }),
+ )),
+ || (),
+ |_acc, event| dispatch(event),
+ )(&input[bom.len()..])
+ // I don't think this can panic. many0 errors if the child parser returns
+ // a success where the input was not consumed, but alt will only return Ok
+ // if one of its children succeed. However, all of it's children are
+ // guaranteed to consume something if they succeed, so the Ok(i) == i case
+ // can never occur.
+ .expect("many0(alt(...)) panicked. Likely a bug in one of the children parsers.");
+
+ if i.is_empty() {
+ return Ok(());
+ }
+
+ let mut node = ParseNode::SectionHeader;
+
+ let res = fold_many1(
+ |i| section(i, &mut node, &mut dispatch),
+ || (),
+ |_acc, additional_newlines| {
+ newlines += additional_newlines;
+ },
+ )(i);
+ let (i, _) = res.map_err(|_| Error {
+ line_number: newlines,
+ last_attempted_parser: node,
+ parsed_until: i.as_bstr().into(),
+ })?;
+
+ // This needs to happen after we collect sections, otherwise the line number
+ // will be off.
+ if !i.is_empty() {
+ return Err(Error {
+ line_number: newlines,
+ last_attempted_parser: node,
+ parsed_until: i.as_bstr().into(),
+ });
+ }
+
+ Ok(())
+}
+
+fn comment(i: &[u8]) -> IResult<&[u8], Comment<'_>> {
+ let (i, comment_tag) = one_of(";#")(i)?;
+ let (i, comment) = take_till(|c| c == b'\n')(i)?;
+ Ok((
+ i,
+ Comment {
+ tag: comment_tag as u8,
+ text: Cow::Borrowed(comment.as_bstr()),
+ },
+ ))
+}
+
+#[cfg(test)]
+mod tests;
+
+fn section<'a>(i: &'a [u8], node: &mut ParseNode, dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> {
+ let (mut i, header) = section_header(i)?;
+ dispatch(Event::SectionHeader(header));
+
+ let mut newlines = 0;
+
+ // This would usually be a many0(alt(...)), the manual loop allows us to
+ // optimize vec insertions
+ loop {
+ let old_i = i;
+
+ if let Ok((new_i, v)) = take_spaces(i) {
+ if old_i != new_i {
+ i = new_i;
+ dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr())));
+ }
+ }
+
+ if let Ok((new_i, (v, new_newlines))) = take_newlines(i) {
+ if old_i != new_i {
+ i = new_i;
+ newlines += new_newlines;
+ dispatch(Event::Newline(Cow::Borrowed(v.as_bstr())));
+ }
+ }
+
+ if let Ok((new_i, new_newlines)) = key_value_pair(i, node, dispatch) {
+ if old_i != new_i {
+ i = new_i;
+ newlines += new_newlines;
+ }
+ }
+
+ if let Ok((new_i, comment)) = comment(i) {
+ if old_i != new_i {
+ i = new_i;
+ dispatch(Event::Comment(comment));
+ }
+ }
+
+ if old_i == i {
+ break;
+ }
+ }
+
+ Ok((i, newlines))
+}
+
+fn section_header(i: &[u8]) -> IResult<&[u8], section::Header<'_>> {
+ let (i, _) = char('[')(i)?;
+ // No spaces must be between section name and section start
+ let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-' || c == b'.')(i)?;
+
+ let name = name.as_bstr();
+ if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) {
+ // Either section does not have a subsection or using deprecated
+ // subsection syntax at this point.
+ let header = match memchr::memrchr(b'.', name.as_bytes()) {
+ Some(index) => section::Header {
+ name: section::Name(Cow::Borrowed(name[..index].as_bstr())),
+ separator: name.get(index..=index).map(|s| Cow::Borrowed(s.as_bstr())),
+ subsection_name: name.get(index + 1..).map(|s| Cow::Borrowed(s.as_bstr())),
+ },
+ None => section::Header {
+ name: section::Name(Cow::Borrowed(name.as_bstr())),
+ separator: None,
+ subsection_name: None,
+ },
+ };
+
+ if header.name.is_empty() {
+ return Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::NoneOf,
+ }));
+ }
+ return Ok((i, header));
+ }
+
+ // Section header must be using modern subsection syntax at this point.
+
+ let (i, whitespace) = take_spaces(i)?;
+ let (i, subsection_name) = delimited(char('"'), opt(sub_section), tag("\"]"))(i)?;
+
+ Ok((
+ i,
+ section::Header {
+ name: section::Name(Cow::Borrowed(name)),
+ separator: Some(Cow::Borrowed(whitespace)),
+ subsection_name,
+ },
+ ))
+}
+
+fn sub_section(i: &[u8]) -> IResult<&[u8], Cow<'_, BStr>> {
+ let (rest, (found_escape, consumed)) = sub_section_delegate(i, &mut |_| ())?;
+ if found_escape {
+ let mut buf = BString::default();
+ sub_section_delegate(i, &mut |b| buf.push_byte(b)).map(|(i, _)| (i, buf.into()))
+ } else {
+ Ok((rest, i[..consumed].as_bstr().into()))
+ }
+}
+
+fn sub_section_delegate<'a>(i: &'a [u8], push_byte: &mut dyn FnMut(u8)) -> IResult<&'a [u8], (bool, usize)> {
+ let mut cursor = 0;
+ let mut bytes = i.iter().copied();
+ let mut found_terminator = false;
+ let mut found_escape = false;
+ while let Some(mut b) = bytes.next() {
+ cursor += 1;
+ if b == b'\n' || b == 0 {
+ return Err(nom::Err::Error(NomError {
+ input: &i[cursor..],
+ code: ErrorKind::NonEmpty,
+ }));
+ }
+ if b == b'"' {
+ found_terminator = true;
+ break;
+ }
+ if b == b'\\' {
+ b = bytes.next().ok_or_else(|| {
+ nom::Err::Error(NomError {
+ input: &i[cursor..],
+ code: ErrorKind::NonEmpty,
+ })
+ })?;
+ found_escape = true;
+ cursor += 1;
+ if b == b'\n' {
+ return Err(nom::Err::Error(NomError {
+ input: &i[cursor..],
+ code: ErrorKind::NonEmpty,
+ }));
+ }
+ }
+ push_byte(b);
+ }
+
+ if !found_terminator {
+ return Err(nom::Err::Error(NomError {
+ input: &i[cursor..],
+ code: ErrorKind::NonEmpty,
+ }));
+ }
+
+ Ok((&i[cursor - 1..], (found_escape, cursor - 1)))
+}
+
+fn key_value_pair<'a>(
+ i: &'a [u8],
+ node: &mut ParseNode,
+ dispatch: &mut impl FnMut(Event<'a>),
+) -> IResult<&'a [u8], usize> {
+ *node = ParseNode::Name;
+ let (i, name) = config_name(i)?;
+
+ dispatch(Event::SectionKey(section::Key(Cow::Borrowed(name))));
+
+ let (i, whitespace) = opt(take_spaces)(i)?;
+ if let Some(whitespace) = whitespace {
+ dispatch(Event::Whitespace(Cow::Borrowed(whitespace)));
+ }
+
+ *node = ParseNode::Value;
+ let (i, newlines) = config_value(i, dispatch)?;
+ Ok((i, newlines))
+}
+
+/// Parses the config name of a config pair. Assumes the input has already been
+/// trimmed of any leading whitespace.
+fn config_name(i: &[u8]) -> IResult<&[u8], &BStr> {
+ if i.is_empty() {
+ return Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::NonEmpty,
+ }));
+ }
+
+ if !i[0].is_ascii_alphabetic() {
+ return Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::Alpha,
+ }));
+ }
+
+ let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-')(i)?;
+ Ok((i, name.as_bstr()))
+}
+
+fn config_value<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> {
+ if let (i, Some(_)) = opt(char('='))(i)? {
+ dispatch(Event::KeyValueSeparator);
+ let (i, whitespace) = opt(take_spaces)(i)?;
+ if let Some(whitespace) = whitespace {
+ dispatch(Event::Whitespace(Cow::Borrowed(whitespace)));
+ }
+ let (i, newlines) = value_impl(i, dispatch)?;
+ Ok((i, newlines))
+ } else {
+ // This is a special way of denoting 'empty' values which a lot of code depends on.
+ // Hence, rather to fix this everywhere else, leave it here and fix it where it matters, namely
+ // when it's about differentiating between a missing key-value separator, and one followed by emptiness.
+ dispatch(Event::Value(Cow::Borrowed("".into())));
+ Ok((i, 0))
+ }
+}
+
+/// Handles parsing of known-to-be values. This function handles both single
+/// line values as well as values that are continuations.
+fn value_impl<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> {
+ let (i, value_end, newlines, mut dispatch) = {
+ let new_err = |code| nom::Err::Error(NomError { input: i, code });
+ let mut value_end = None::<usize>;
+ let mut value_start: usize = 0;
+ let mut newlines = 0;
+
+ let mut prev_char_was_backslash = false;
+ // This is required to ignore comment markers if they're in a quote.
+ let mut is_in_quotes = false;
+ // Used to determine if we return a Value or Value{Not,}Done
+ let mut partial_value_found = false;
+ let mut last_value_index: usize = 0;
+
+ let mut bytes = i.iter();
+ while let Some(mut c) = bytes.next() {
+ if prev_char_was_backslash {
+ prev_char_was_backslash = false;
+ let mut consumed = 1;
+ if *c == b'\r' {
+ c = bytes.next().ok_or_else(|| new_err(ErrorKind::Escaped))?;
+ if *c != b'\n' {
+ return Err(new_err(ErrorKind::Tag));
+ }
+ consumed += 1;
+ }
+
+ match c {
+ b'\n' => {
+ partial_value_found = true;
+ let backslash = 1;
+ dispatch(Event::ValueNotDone(Cow::Borrowed(
+ i[value_start..last_value_index - backslash].as_bstr(),
+ )));
+ let nl_end = last_value_index + consumed;
+ dispatch(Event::Newline(Cow::Borrowed(i[last_value_index..nl_end].as_bstr())));
+ value_start = nl_end;
+ value_end = None;
+ newlines += 1;
+
+ last_value_index += consumed;
+ }
+ b'n' | b't' | b'\\' | b'b' | b'"' => {
+ last_value_index += 1;
+ }
+ _ => {
+ return Err(new_err(ErrorKind::Escaped));
+ }
+ }
+ } else {
+ match c {
+ b'\n' => {
+ value_end = last_value_index.into();
+ break;
+ }
+ b';' | b'#' if !is_in_quotes => {
+ value_end = last_value_index.into();
+ break;
+ }
+ b'\\' => prev_char_was_backslash = true,
+ b'"' => is_in_quotes = !is_in_quotes,
+ _ => {}
+ }
+ last_value_index += 1;
+ }
+ }
+
+ if prev_char_was_backslash {
+ return Err(new_err(ErrorKind::Escaped));
+ }
+
+ if is_in_quotes {
+ return Err(new_err(ErrorKind::Tag));
+ }
+
+ let value_end = match value_end {
+ None => {
+ if last_value_index == 0 {
+ dispatch(Event::Value(Cow::Borrowed("".into())));
+ return Ok((&i[0..], newlines));
+ } else {
+ i.len()
+ }
+ }
+ Some(idx) => idx,
+ };
+
+ let dispatch = move |value: &'a [u8]| {
+ if partial_value_found {
+ dispatch(Event::ValueDone(Cow::Borrowed(value.as_bstr())));
+ } else {
+ dispatch(Event::Value(Cow::Borrowed(value.as_bstr())));
+ }
+ };
+ (&i[value_start..], value_end - value_start, newlines, dispatch)
+ };
+
+ let (i, remainder_value) = {
+ let value_end_no_trailing_whitespace = i[..value_end]
+ .iter()
+ .enumerate()
+ .rev()
+ .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1))
+ .unwrap_or(0);
+ (
+ &i[value_end_no_trailing_whitespace..],
+ &i[..value_end_no_trailing_whitespace],
+ )
+ };
+
+ dispatch(remainder_value);
+
+ Ok((i, newlines))
+}
+
+fn take_spaces(i: &[u8]) -> IResult<&[u8], &BStr> {
+ let (i, v) = take_while(|c: u8| c.is_ascii() && is_space(c))(i)?;
+ if v.is_empty() {
+ Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::Eof,
+ }))
+ } else {
+ Ok((i, v.as_bstr()))
+ }
+}
+
+fn take_newlines(i: &[u8]) -> IResult<&[u8], (&BStr, usize)> {
+ let mut counter = 0;
+ let mut consumed_bytes = 0;
+ let mut next_must_be_newline = false;
+ for b in i.iter().copied() {
+ if !b.is_ascii() {
+ break;
+ };
+ if b == b'\r' {
+ if next_must_be_newline {
+ break;
+ }
+ next_must_be_newline = true;
+ continue;
+ };
+ if b == b'\n' {
+ counter += 1;
+ consumed_bytes += if next_must_be_newline { 2 } else { 1 };
+ next_must_be_newline = false;
+ } else {
+ break;
+ }
+ }
+ let (v, i) = i.split_at(consumed_bytes);
+ if v.is_empty() {
+ Err(nom::Err::Error(NomError {
+ input: i,
+ code: ErrorKind::Eof,
+ }))
+ } else {
+ Ok((i, (v.as_bstr(), counter)))
+ }
+}
diff --git a/vendor/gix-config/src/parse/nom/tests.rs b/vendor/gix-config/src/parse/nom/tests.rs
new file mode 100644
index 000000000..f6e8c3d92
--- /dev/null
+++ b/vendor/gix-config/src/parse/nom/tests.rs
@@ -0,0 +1,924 @@
+use super::*;
+
+mod section_headers {
+ use super::section_header;
+ use crate::parse::tests::util::{fully_consumed, section_header as parsed_section_header};
+
+ #[test]
+ fn no_subsection() {
+ assert_eq!(
+ section_header(b"[hello]").unwrap(),
+ fully_consumed(parsed_section_header("hello", None)),
+ );
+ }
+
+ #[test]
+ fn modern_subsection() {
+ assert_eq!(
+ section_header(br#"[hello "world"]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", "world"))),
+ );
+ }
+
+ #[test]
+ fn escaped_subsection() {
+ assert_eq!(
+ section_header(br#"[hello "foo\\bar\""]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", r#"foo\bar""#))),
+ );
+ }
+
+ #[test]
+ fn deprecated_subsection() {
+ assert_eq!(
+ section_header(br#"[hello.world]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (".", "world")))
+ );
+ assert_eq!(
+ section_header(br#"[Hello.World]"#).unwrap(),
+ fully_consumed(parsed_section_header("Hello", (".", "World")))
+ );
+ }
+
+ #[test]
+ fn empty_legacy_subsection_name() {
+ assert_eq!(
+ section_header(br#"[hello-world.]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello-world", (".", "")))
+ );
+ }
+
+ #[test]
+ fn empty_modern_subsection_name() {
+ assert_eq!(
+ section_header(br#"[hello ""]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", "")))
+ );
+ }
+
+ #[test]
+ fn backslashes_in_subsections_do_not_escape_newlines_or_tabs() {
+ assert_eq!(
+ section_header(br#"[hello "single \ \\ \t \n \0"]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", r#"single \ t n 0"#)))
+ );
+ }
+
+ #[test]
+ fn newline_in_header() {
+ assert!(section_header(b"[hello\n]").is_err());
+ }
+
+ #[test]
+ fn newline_in_sub_section() {
+ assert!(section_header(b"[hello \"hello\n\"]").is_err());
+ }
+
+ #[test]
+ fn null_byt_in_sub_section() {
+ assert!(section_header(b"[hello \"hello\0\"]").is_err());
+ }
+
+ #[test]
+ fn escaped_newline_in_sub_section() {
+ assert!(section_header(b"[hello \"hello\\\n\"]").is_err());
+ }
+
+ #[test]
+ fn eof_after_escape_in_sub_section() {
+ assert!(section_header(b"[hello \"hello\\").is_err());
+ }
+
+ #[test]
+ fn null_byte_in_header() {
+ assert!(section_header(b"[hello\0]").is_err());
+ }
+
+ #[test]
+ fn invalid_characters_in_section() {
+ assert!(section_header(b"[$]").is_err());
+ }
+ #[test]
+ fn invalid_characters_in_legacy_sub_section() {
+ assert!(section_header(b"[hello.$]").is_err());
+ assert!(section_header(b"[hello. world]").is_err());
+ }
+
+ #[test]
+ fn right_brace_in_subsection_name() {
+ assert_eq!(
+ section_header(br#"[hello "]"]"#).unwrap(),
+ fully_consumed(parsed_section_header("hello", (" ", "]")))
+ );
+ }
+}
+
+mod sub_section {
+ use std::borrow::Cow;
+
+ use super::sub_section;
+
+ #[test]
+ fn zero_copy_simple() {
+ let actual = sub_section(b"name\"").unwrap().1;
+ assert_eq!(actual.as_ref(), "name");
+ assert!(matches!(actual, Cow::Borrowed(_)));
+ }
+
+ #[test]
+ fn escapes_need_allocation() {
+ let actual = sub_section(br#"\x\t\n\0\\\"""#).unwrap().1;
+ assert_eq!(actual.as_ref(), r#"xtn0\""#);
+ assert!(matches!(actual, Cow::Owned(_)));
+ }
+}
+
+mod config_name {
+ use nom::combinator::all_consuming;
+
+ use super::config_name;
+ use crate::parse::tests::util::fully_consumed;
+
+ #[test]
+ fn just_name() {
+ assert_eq!(config_name(b"name").unwrap(), fully_consumed("name".into()));
+ }
+
+ #[test]
+ fn must_start_with_alphabetic() {
+ assert!(config_name(b"4aaa").is_err());
+ assert!(config_name(b"-aaa").is_err());
+ }
+
+ #[test]
+ fn only_a_subset_of_characters_is_allowed() {
+ assert!(all_consuming(config_name)(b"Name$_").is_err());
+ assert!(all_consuming(config_name)(b"other#").is_err());
+ }
+
+ #[test]
+ fn cannot_be_empty() {
+ assert!(config_name(b"").is_err());
+ }
+}
+
+mod section {
+ use crate::parse::{
+ error::ParseNode,
+ section,
+ tests::util::{
+ comment_event, fully_consumed, name_event, newline_custom_event, newline_event,
+ section_header as parsed_section_header, value_done_event, value_event, value_not_done_event,
+ whitespace_event,
+ },
+ Event, Section,
+ };
+
+ fn section<'a>(i: &'a [u8], node: &mut ParseNode) -> nom::IResult<&'a [u8], (Section<'a>, usize)> {
+ let mut header = None;
+ let mut events = section::Events::default();
+ super::section(i, node, &mut |e| match &header {
+ None => {
+ header = Some(e);
+ }
+ Some(_) => events.push(e),
+ })
+ .map(|(i, o)| {
+ (
+ i,
+ (
+ Section {
+ header: match header.expect("header set") {
+ Event::SectionHeader(header) => header,
+ _ => unreachable!("unexpected"),
+ },
+ events,
+ },
+ o,
+ ),
+ )
+ })
+ }
+
+ #[test]
+ fn empty_value_with_windows_newlines() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[a] k = \r\n", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event(""),
+ newline_custom_event("\r\n")
+ ]
+ .into(),
+ },
+ 1
+ )),
+ );
+ }
+
+ #[test]
+ fn simple_value_with_windows_newlines() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[a] k = v\r\n", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("v"),
+ newline_custom_event("\r\n")
+ ]
+ .into(),
+ },
+ 1
+ )),
+ );
+ assert_eq!(
+ section(b"[a] k = \r\n", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event(""),
+ newline_custom_event("\r\n")
+ ]
+ .into(),
+ },
+ 1
+ )),
+ );
+ }
+
+ #[test]
+ fn empty_section() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[test]", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("test", None),
+ events: Default::default()
+ },
+ 0
+ )),
+ );
+ }
+
+ #[test]
+ fn simple_section() {
+ let mut node = ParseNode::SectionHeader;
+ let section_data = br#"[hello]
+ a = b
+ c
+ d = "lol""#;
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![
+ newline_event(),
+ whitespace_event(" "),
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("b"),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("c"),
+ value_event(""),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("d"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("\"lol\"")
+ ]
+ .into()
+ },
+ 3
+ ))
+ );
+ }
+
+ #[test]
+ fn section_with_empty_value_simplified() {
+ let mut node = ParseNode::SectionHeader;
+ let section_data = b"[a] k=";
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ Event::KeyValueSeparator,
+ value_event(""),
+ ]
+ .into()
+ },
+ 0
+ ))
+ );
+
+ let section_data = b"[a] k=\n";
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("a", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("k"),
+ Event::KeyValueSeparator,
+ value_event(""),
+ newline_event(),
+ ]
+ .into()
+ },
+ 1
+ ))
+ );
+ }
+
+ #[test]
+ fn section_with_empty_value() {
+ let mut node = ParseNode::SectionHeader;
+ let section_data = br#"[hello]
+ a = b
+ c=
+ d = "lol""#;
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![
+ newline_event(),
+ whitespace_event(" "),
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("b"),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("c"),
+ Event::KeyValueSeparator,
+ value_event(""),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("d"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("\"lol\"")
+ ]
+ .into()
+ },
+ 3
+ ))
+ );
+ }
+
+ #[test]
+ fn section_implicit_value() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[hello] c", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![whitespace_event(" "), name_event("c"), value_event("")].into()
+ },
+ 0
+ ))
+ );
+
+ assert_eq!(
+ section(b"[hello] c\nd", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("c"),
+ value_event(""),
+ newline_event(),
+ name_event("d"),
+ value_event("")
+ ]
+ .into()
+ },
+ 1
+ ))
+ );
+ }
+
+ #[test]
+ fn section_very_commented() {
+ let mut node = ParseNode::SectionHeader;
+ let section_data = br#"[hello] ; commentA
+ a = b # commentB
+ ; commentC
+ ; commentD
+ c = d"#;
+ assert_eq!(
+ section(section_data, &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("hello", None),
+ events: vec![
+ whitespace_event(" "),
+ comment_event(';', " commentA"),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("b"),
+ whitespace_event(" "),
+ comment_event('#', " commentB"),
+ newline_event(),
+ whitespace_event(" "),
+ comment_event(';', " commentC"),
+ newline_event(),
+ whitespace_event(" "),
+ comment_event(';', " commentD"),
+ newline_event(),
+ whitespace_event(" "),
+ name_event("c"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("d"),
+ ]
+ .into()
+ },
+ 4
+ ))
+ );
+ }
+
+ #[test]
+ fn complex_continuation() {
+ let mut node = ParseNode::SectionHeader;
+ // This test is absolute hell. Good luck if this fails.
+ assert_eq!(
+ section(b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("section", None),
+ events: vec![
+ whitespace_event(" "),
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_not_done_event(r#"1 "\""#),
+ newline_event(),
+ value_not_done_event(r#"a ; e "\""#),
+ newline_event(),
+ value_done_event("d"),
+ whitespace_event(" "),
+ comment_event('#', " \"b\t ; c"),
+ ]
+ .into()
+ },
+ 2
+ ))
+ );
+ }
+
+ #[test]
+ fn quote_split_over_two_lines() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[section \"a\"] b =\"\\\n;\";a", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("section", (" ", "a")),
+ events: vec![
+ whitespace_event(" "),
+ name_event("b"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ value_not_done_event("\""),
+ newline_event(),
+ value_done_event(";\""),
+ comment_event(';', "a"),
+ ]
+ .into()
+ },
+ 1
+ ))
+ );
+ }
+
+ #[test]
+ fn section_handles_extraneous_whitespace_before_comment() {
+ let mut node = ParseNode::SectionHeader;
+ assert_eq!(
+ section(b"[s]hello #world", &mut node).unwrap(),
+ fully_consumed((
+ Section {
+ header: parsed_section_header("s", None),
+ events: vec![
+ name_event("hello"),
+ whitespace_event(" "),
+ value_event(""),
+ comment_event('#', "world"),
+ ]
+ .into()
+ },
+ 0
+ ))
+ );
+ }
+}
+
+mod value_continuation {
+ use bstr::ByteSlice;
+
+ use crate::parse::{
+ section,
+ tests::util::{into_events, newline_custom_event, newline_event, value_done_event, value_not_done_event},
+ };
+
+ pub fn value_impl<'a>(i: &'a [u8], events: &mut section::Events<'a>) -> nom::IResult<&'a [u8], ()> {
+ super::value_impl(i, &mut |e| events.push(e)).map(|t| (t.0, ()))
+ }
+
+ #[test]
+ fn simple_continuation() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello\\\nworld", &mut events).unwrap().0, b"");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("hello"),
+ newline_event(),
+ value_done_event("world")
+ ])
+ );
+ }
+
+ #[test]
+ fn continuation_with_whitespace() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello\\\n world", &mut events).unwrap().0, b"");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("hello"),
+ newline_event(),
+ value_done_event(" world")
+ ])
+ );
+
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello\\\r\n world", &mut events).unwrap().0, b"");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("hello"),
+ newline_custom_event("\r\n"),
+ value_done_event(" world")
+ ])
+ );
+
+ let mut events = section::Events::default();
+ assert!(
+ value_impl(b"hello\\\r\r\n world", &mut events).is_err(),
+ "\\r must be followed by \\n"
+ );
+ }
+
+ #[test]
+ fn complex_continuation_with_leftover_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(b"1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut events)
+ .unwrap()
+ .0,
+ b" # \"b\t ; c"
+ );
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event(r#"1 "\""#),
+ newline_event(),
+ value_not_done_event(r#"a ; e "\""#),
+ newline_event(),
+ value_done_event("d")
+ ])
+ );
+ }
+
+ #[test]
+ fn quote_split_over_two_lines_with_leftover_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"\"\\\n;\";a", &mut events).unwrap().0, b";a");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("\""),
+ newline_event(),
+ value_done_event(";\"")
+ ])
+ );
+
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"\"a\\\r\nb;\";c", &mut events).unwrap().0, b";c");
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("\"a"),
+ newline_custom_event("\r\n"),
+ value_done_event("b;\"")
+ ])
+ );
+ }
+
+ #[test]
+ fn quote_split_over_multiple_lines_without_surrounding_quotes_but_inner_quotes() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(
+ br#"1\
+"2" a\
+\"3 b\"\
+4 ; comment "#,
+ &mut events
+ )
+ .unwrap()
+ .0
+ .as_bstr(),
+ b" ; comment ".as_bstr()
+ );
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("1"),
+ newline_event(),
+ value_not_done_event("\"2\" a"),
+ newline_event(),
+ value_not_done_event("\\\"3 b\\\""),
+ newline_event(),
+ value_done_event("4")
+ ])
+ );
+ }
+
+ #[test]
+ fn quote_split_over_multiple_lines_with_surrounding_quotes() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(
+ br#""1\
+"2" a\
+\"3 b\"\
+4 " ; comment "#,
+ &mut events
+ )
+ .unwrap()
+ .0
+ .as_bstr(),
+ b" ; comment ".as_bstr()
+ );
+ assert_eq!(
+ events,
+ into_events(vec![
+ value_not_done_event("\"1"),
+ newline_event(),
+ value_not_done_event("\"2\" a"),
+ newline_event(),
+ value_not_done_event("\\\"3 b\\\""),
+ newline_event(),
+ value_done_event("4 \"")
+ ])
+ );
+ }
+}
+
+mod value_no_continuation {
+ use super::value_continuation::value_impl;
+ use crate::parse::{
+ section,
+ tests::util::{into_events, value_event},
+ };
+
+ #[test]
+ fn no_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello", &mut events).unwrap().0, b"");
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn windows_newline() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hi\r\nrest", &mut events).unwrap().0, b"\r\nrest");
+ assert_eq!(events, into_events(vec![value_event("hi")]));
+
+ events.clear();
+ assert_eq!(value_impl(b"hi\r\r\r\nrest", &mut events).unwrap().0, b"\r\r\r\nrest");
+ assert_eq!(events, into_events(vec![value_event("hi")]));
+ }
+
+ #[test]
+ fn no_comment_newline() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello\na", &mut events).unwrap().0, b"\na");
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn semicolon_comment_not_consumed() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello;world", &mut events).unwrap().0, b";world");
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn octothorpe_comment_not_consumed() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(b"hello#world", &mut events).unwrap().0, b"#world");
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn values_with_extraneous_whitespace_without_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(b"hello ", &mut events).unwrap().0,
+ b" "
+ );
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn values_with_extraneous_whitespace_before_comment() {
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(b"hello #world", &mut events).unwrap().0,
+ b" #world"
+ );
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+
+ let mut events = section::Events::default();
+ assert_eq!(
+ value_impl(b"hello ;world", &mut events).unwrap().0,
+ b" ;world"
+ );
+ assert_eq!(events, into_events(vec![value_event("hello")]));
+ }
+
+ #[test]
+ fn trans_escaped_comment_marker_not_consumed() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(br##"hello"#"world; a"##, &mut events).unwrap().0, b"; a");
+ assert_eq!(events, into_events(vec![value_event(r##"hello"#"world"##)]));
+ }
+
+ #[test]
+ fn complex_test() {
+ let mut events = section::Events::default();
+ assert_eq!(value_impl(br#"value";";ahhhh"#, &mut events).unwrap().0, b";ahhhh");
+ assert_eq!(events, into_events(vec![value_event(r#"value";""#)]));
+ }
+
+ #[test]
+ fn garbage_after_continuation_is_err() {
+ assert!(value_impl(b"hello \\afwjdls", &mut Default::default()).is_err());
+ }
+
+ #[test]
+ fn invalid_escape() {
+ assert!(value_impl(br#"\x"#, &mut Default::default()).is_err());
+ }
+
+ #[test]
+ fn incomplete_quote() {
+ assert!(value_impl(br#"hello "world"#, &mut Default::default()).is_err());
+ }
+
+ #[test]
+ fn incomplete_escape() {
+ assert!(value_impl(br#"hello world\"#, &mut Default::default()).is_err());
+ }
+}
+
+mod key_value_pair {
+ use crate::parse::{
+ error::ParseNode,
+ section,
+ tests::util::{into_events, name_event, value_event, whitespace_event},
+ Event,
+ };
+
+ fn key_value<'a>(
+ i: &'a [u8],
+ node: &mut ParseNode,
+ events: &mut section::Events<'a>,
+ ) -> nom::IResult<&'a [u8], ()> {
+ super::key_value_pair(i, node, &mut |e| events.push(e)).map(|t| (t.0, ()))
+ }
+
+ #[test]
+ fn nonascii_is_allowed_for_values_but_not_for_keys() {
+ let mut node = ParseNode::SectionHeader;
+ let mut vec = Default::default();
+ assert!(key_value("你好".as_bytes(), &mut node, &mut vec).is_err());
+ assert!(key_value("a = 你好 ".as_bytes(), &mut node, &mut vec).is_ok());
+ assert_eq!(
+ vec,
+ into_events(vec![
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("你好")
+ ])
+ );
+ }
+
+ #[test]
+ fn whitespace_is_not_ambiguous() {
+ let mut node = ParseNode::SectionHeader;
+ let mut vec = Default::default();
+ assert!(key_value(b"a =b", &mut node, &mut vec).is_ok());
+ assert_eq!(
+ vec,
+ into_events(vec![
+ name_event("a"),
+ whitespace_event(" "),
+ Event::KeyValueSeparator,
+ value_event("b")
+ ])
+ );
+
+ let mut vec = Default::default();
+ assert!(key_value(b"a= b", &mut node, &mut vec).is_ok());
+ assert_eq!(
+ vec,
+ into_events(vec![
+ name_event("a"),
+ Event::KeyValueSeparator,
+ whitespace_event(" "),
+ value_event("b")
+ ])
+ );
+ }
+}
+
+mod comment {
+ use super::comment;
+ use crate::parse::tests::util::{comment as parsed_comment, fully_consumed};
+
+ #[test]
+ fn semicolon() {
+ assert_eq!(
+ comment(b"; this is a semicolon comment").unwrap(),
+ fully_consumed(parsed_comment(';', " this is a semicolon comment")),
+ );
+ }
+
+ #[test]
+ fn octothorpe() {
+ assert_eq!(
+ comment(b"# this is an octothorpe comment").unwrap(),
+ fully_consumed(parsed_comment('#', " this is an octothorpe comment")),
+ );
+ }
+
+ #[test]
+ fn multiple_markers() {
+ assert_eq!(
+ comment(b"###### this is an octothorpe comment").unwrap(),
+ fully_consumed(parsed_comment('#', "##### this is an octothorpe comment")),
+ );
+ }
+}
diff --git a/vendor/gix-config/src/parse/section/header.rs b/vendor/gix-config/src/parse/section/header.rs
new file mode 100644
index 000000000..341edcdd5
--- /dev/null
+++ b/vendor/gix-config/src/parse/section/header.rs
@@ -0,0 +1,180 @@
+use std::{borrow::Cow, fmt::Display};
+
+use bstr::{BStr, BString, ByteSlice, ByteVec};
+
+use crate::parse::{
+ section::{into_cow_bstr, Header, Name},
+ Event,
+};
+
+/// The error returned by [`Header::new(…)`][super::Header::new()].
+#[derive(Debug, PartialOrd, PartialEq, Eq, thiserror::Error)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("section names can only be ascii, '-'")]
+ InvalidName,
+ #[error("sub-section names must not contain newlines or null bytes")]
+ InvalidSubSection,
+}
+
+impl<'a> Header<'a> {
+ /// Instantiate a new header either with a section `name`, e.g. "core" serializing to `["core"]`
+ /// or `[remote "origin"]` for `subsection` being "origin" and `name` being "remote".
+ pub fn new(
+ name: impl Into<Cow<'a, str>>,
+ subsection: impl Into<Option<Cow<'a, BStr>>>,
+ ) -> Result<Header<'a>, Error> {
+ let name = Name(validated_name(into_cow_bstr(name.into()))?);
+ if let Some(subsection_name) = subsection.into() {
+ Ok(Header {
+ name,
+ separator: Some(Cow::Borrowed(" ".into())),
+ subsection_name: Some(validated_subsection(subsection_name)?),
+ })
+ } else {
+ Ok(Header {
+ name,
+ separator: None,
+ subsection_name: None,
+ })
+ }
+ }
+}
+
+/// Return true if `name` is valid as subsection name, like `origin` in `[remote "origin"]`.
+pub fn is_valid_subsection(name: &BStr) -> bool {
+ name.find_byteset(b"\n\0").is_none()
+}
+
+fn validated_subsection(name: Cow<'_, BStr>) -> Result<Cow<'_, BStr>, Error> {
+ is_valid_subsection(name.as_ref())
+ .then_some(name)
+ .ok_or(Error::InvalidSubSection)
+}
+
+fn validated_name(name: Cow<'_, BStr>) -> Result<Cow<'_, BStr>, Error> {
+ name.iter()
+ .all(|b| b.is_ascii_alphanumeric() || *b == b'-')
+ .then_some(name)
+ .ok_or(Error::InvalidName)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn empty_header_names_are_legal() {
+ assert!(Header::new("", None).is_ok(), "yes, git allows this, so do we");
+ }
+
+ #[test]
+ fn empty_header_sub_names_are_legal() {
+ assert!(
+ Header::new("remote", Some(Cow::Borrowed("".into()))).is_ok(),
+ "yes, git allows this, so do we"
+ );
+ }
+}
+
+impl Header<'_> {
+ ///Return true if this is a header like `[legacy.subsection]`, or false otherwise.
+ pub fn is_legacy(&self) -> bool {
+ self.separator.as_deref().map_or(false, |n| n == ".")
+ }
+
+ /// Return the subsection name, if present, i.e. "origin" in `[remote "origin"]`.
+ ///
+ /// It is parsed without quotes, and with escapes folded
+ /// into their resulting characters.
+ /// Thus during serialization, escapes and quotes must be re-added.
+ /// This makes it possible to use [`Event`] data for lookups directly.
+ pub fn subsection_name(&self) -> Option<&BStr> {
+ self.subsection_name.as_deref()
+ }
+
+ /// Return the name of the header, like "remote" in `[remote "origin"]`.
+ pub fn name(&self) -> &BStr {
+ &self.name
+ }
+
+ /// Serialize this type into a `BString` for convenience.
+ ///
+ /// Note that `to_string()` can also be used, but might not be lossless.
+ #[must_use]
+ pub fn to_bstring(&self) -> BString {
+ let mut buf = Vec::new();
+ self.write_to(&mut buf).expect("io error impossible");
+ buf.into()
+ }
+
+ /// Stream ourselves to the given `out`, in order to reproduce this header mostly losslessly
+ /// as it was parsed.
+ pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> {
+ out.write_all(b"[")?;
+ out.write_all(&self.name)?;
+
+ if let (Some(sep), Some(subsection)) = (&self.separator, &self.subsection_name) {
+ let sep = sep.as_ref();
+ out.write_all(sep)?;
+ if sep == "." {
+ out.write_all(subsection.as_ref())?;
+ } else {
+ out.write_all(b"\"")?;
+ out.write_all(escape_subsection(subsection.as_ref()).as_ref())?;
+ out.write_all(b"\"")?;
+ }
+ }
+
+ out.write_all(b"]")
+ }
+
+ /// Turn this instance into a fully owned one with `'static` lifetime.
+ #[must_use]
+ pub fn to_owned(&self) -> Header<'static> {
+ Header {
+ name: self.name.to_owned(),
+ separator: self.separator.clone().map(|v| Cow::Owned(v.into_owned())),
+ subsection_name: self.subsection_name.clone().map(|v| Cow::Owned(v.into_owned())),
+ }
+ }
+}
+
+fn escape_subsection(name: &BStr) -> Cow<'_, BStr> {
+ if name.find_byteset(b"\\\"").is_none() {
+ return name.into();
+ }
+ let mut buf = Vec::with_capacity(name.len());
+ for b in name.iter().copied() {
+ match b {
+ b'\\' => buf.push_str(br#"\\"#),
+ b'"' => buf.push_str(br#"\""#),
+ _ => buf.push(b),
+ }
+ }
+ BString::from(buf).into()
+}
+
+impl Display for Header<'_> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ Display::fmt(&self.to_bstring(), f)
+ }
+}
+
+impl From<Header<'_>> for BString {
+ fn from(header: Header<'_>) -> Self {
+ header.into()
+ }
+}
+
+impl From<&Header<'_>> for BString {
+ fn from(header: &Header<'_>) -> Self {
+ header.to_bstring()
+ }
+}
+
+impl<'a> From<Header<'a>> for Event<'a> {
+ fn from(header: Header<'_>) -> Event<'_> {
+ Event::SectionHeader(header)
+ }
+}
diff --git a/vendor/gix-config/src/parse/section/mod.rs b/vendor/gix-config/src/parse/section/mod.rs
new file mode 100644
index 000000000..7ba08b87d
--- /dev/null
+++ b/vendor/gix-config/src/parse/section/mod.rs
@@ -0,0 +1,187 @@
+use std::{borrow::Cow, fmt::Display};
+
+use bstr::BStr;
+use smallvec::SmallVec;
+
+use crate::parse::{Event, Section};
+
+///
+pub mod header;
+
+pub(crate) mod unvalidated;
+
+/// A container for events, avoiding heap allocations in typical files.
+pub type Events<'a> = SmallVec<[Event<'a>; 64]>;
+
+/// A parsed section header, containing a name and optionally a subsection name.
+#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
+pub struct Header<'a> {
+ /// The name of the header.
+ pub(crate) name: Name<'a>,
+ /// The separator used to determine if the section contains a subsection.
+ /// This is either a period `.` or a string of whitespace. Note that
+ /// reconstruction of subsection format is dependent on this value. If this
+ /// is all whitespace, then the subsection name needs to be surrounded by
+ /// quotes to have perfect reconstruction.
+ pub(crate) separator: Option<Cow<'a, BStr>>,
+ pub(crate) subsection_name: Option<Cow<'a, BStr>>,
+}
+
+impl Section<'_> {
+ /// Turn this instance into a fully owned one with `'static` lifetime.
+ #[must_use]
+ pub fn to_owned(&self) -> Section<'static> {
+ Section {
+ header: self.header.to_owned(),
+ events: self.events.iter().map(Event::to_owned).collect(),
+ }
+ }
+}
+
+impl Display for Section<'_> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.header)?;
+ for event in &self.events {
+ event.fmt(f)?;
+ }
+ Ok(())
+ }
+}
+
+mod types {
+ macro_rules! generate_case_insensitive {
+ ($name:ident, $module:ident, $err_doc:literal, $validate:ident, $cow_inner_type:ty, $comment:literal) => {
+ ///
+ pub mod $module {
+ /// The error returned when `TryFrom` is invoked to create an instance.
+ #[derive(Debug, thiserror::Error, Copy, Clone)]
+ #[error($err_doc)]
+ pub struct Error;
+ }
+
+ #[doc = $comment]
+ #[derive(Clone, Eq, Debug, Default)]
+ pub struct $name<'a>(pub(crate) std::borrow::Cow<'a, $cow_inner_type>);
+
+ impl<'a> $name<'a> {
+ pub(crate) fn from_str_unchecked(s: &'a str) -> Self {
+ $name(std::borrow::Cow::Borrowed(s.into()))
+ }
+ /// Turn this instance into a fully owned one with `'static` lifetime.
+ #[must_use]
+ pub fn to_owned(&self) -> $name<'static> {
+ $name(std::borrow::Cow::Owned(self.0.clone().into_owned()))
+ }
+ }
+
+ impl PartialEq for $name<'_> {
+ fn eq(&self, other: &Self) -> bool {
+ self.0.eq_ignore_ascii_case(&other.0)
+ }
+ }
+
+ impl std::fmt::Display for $name<'_> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ self.0.fmt(f)
+ }
+ }
+
+ impl PartialOrd for $name<'_> {
+ fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+ self.cmp(other).into()
+ }
+ }
+
+ impl Ord for $name<'_> {
+ fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+ let a = self.0.iter().map(|c| c.to_ascii_lowercase());
+ let b = other.0.iter().map(|c| c.to_ascii_lowercase());
+ a.cmp(b)
+ }
+ }
+
+ impl std::hash::Hash for $name<'_> {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ for b in self.0.iter() {
+ b.to_ascii_lowercase().hash(state);
+ }
+ }
+ }
+
+ impl<'a> std::convert::TryFrom<&'a str> for $name<'a> {
+ type Error = $module::Error;
+
+ fn try_from(s: &'a str) -> Result<Self, Self::Error> {
+ Self::try_from(std::borrow::Cow::Borrowed(bstr::ByteSlice::as_bstr(s.as_bytes())))
+ }
+ }
+
+ impl<'a> std::convert::TryFrom<String> for $name<'a> {
+ type Error = $module::Error;
+
+ fn try_from(s: String) -> Result<Self, Self::Error> {
+ Self::try_from(std::borrow::Cow::Owned(bstr::BString::from(s)))
+ }
+ }
+
+ impl<'a> std::convert::TryFrom<std::borrow::Cow<'a, bstr::BStr>> for $name<'a> {
+ type Error = $module::Error;
+
+ fn try_from(s: std::borrow::Cow<'a, bstr::BStr>) -> Result<Self, Self::Error> {
+ if $validate(s.as_ref()) {
+ Ok(Self(s))
+ } else {
+ Err($module::Error)
+ }
+ }
+ }
+
+ impl<'a> std::ops::Deref for $name<'a> {
+ type Target = $cow_inner_type;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+ }
+
+ impl<'a> std::convert::AsRef<str> for $name<'a> {
+ fn as_ref(&self) -> &str {
+ std::str::from_utf8(self.0.as_ref()).expect("only valid UTF8 makes it through our validation")
+ }
+ }
+ };
+ }
+
+ fn is_valid_name(n: &bstr::BStr) -> bool {
+ !n.is_empty() && n.iter().all(|b| b.is_ascii_alphanumeric() || *b == b'-')
+ }
+ fn is_valid_key(n: &bstr::BStr) -> bool {
+ is_valid_name(n) && n[0].is_ascii_alphabetic()
+ }
+
+ generate_case_insensitive!(
+ Name,
+ name,
+ "Valid names consist of alphanumeric characters or dashes.",
+ is_valid_name,
+ bstr::BStr,
+ "Wrapper struct for section header names, like `remote`, since these are case-insensitive."
+ );
+
+ generate_case_insensitive!(
+ Key,
+ key,
+ "Valid keys consist alphanumeric characters or dashes, starting with an alphabetic character.",
+ is_valid_key,
+ bstr::BStr,
+ "Wrapper struct for key names, like `path` in `include.path`, since keys are case-insensitive."
+ );
+}
+pub use types::{key, name, Key, Name};
+
+pub(crate) fn into_cow_bstr(c: Cow<'_, str>) -> Cow<'_, BStr> {
+ match c {
+ Cow::Borrowed(s) => Cow::Borrowed(s.into()),
+ Cow::Owned(s) => Cow::Owned(s.into()),
+ }
+}
diff --git a/vendor/gix-config/src/parse/section/unvalidated.rs b/vendor/gix-config/src/parse/section/unvalidated.rs
new file mode 100644
index 000000000..1710837fe
--- /dev/null
+++ b/vendor/gix-config/src/parse/section/unvalidated.rs
@@ -0,0 +1,25 @@
+use bstr::{BStr, ByteSlice};
+
+/// An unvalidated parse result of a key for a section, parsing input like `remote.origin` or `core`.
+#[derive(Debug, PartialEq, Ord, PartialOrd, Eq, Hash, Clone, Copy)]
+pub struct Key<'a> {
+ /// The name of the section, like `remote` in `remote.origin`.
+ pub section_name: &'a str,
+ /// The name of the sub-section, like `origin` in `remote.origin`.
+ pub subsection_name: Option<&'a BStr>,
+}
+
+impl<'a> Key<'a> {
+ /// Parse `input` like `remote.origin` or `core` as a `Key` to make its section specific fields available,
+ /// or `None` if there were not one or two tokens separated by `.`.
+ /// Note that `input` isn't validated, and is `str` as ascii is a subset of UTF-8 which is required for any valid keys.
+ pub fn parse(input: impl Into<&'a BStr>) -> Option<Self> {
+ let input = input.into();
+ let mut tokens = input.splitn(2, |b| *b == b'.');
+
+ Some(Key {
+ section_name: tokens.next()?.to_str().ok()?,
+ subsection_name: tokens.next().map(Into::into),
+ })
+ }
+}
diff --git a/vendor/gix-config/src/parse/tests.rs b/vendor/gix-config/src/parse/tests.rs
new file mode 100644
index 000000000..2a2853c4c
--- /dev/null
+++ b/vendor/gix-config/src/parse/tests.rs
@@ -0,0 +1,162 @@
+mod section {
+
+ mod header {
+ mod unvalidated {
+ use crate::parse::section::unvalidated::Key;
+
+ #[test]
+ fn section_name_only() {
+ assert_eq!(
+ Key::parse("core").unwrap(),
+ Key {
+ section_name: "core",
+ subsection_name: None
+ }
+ );
+ }
+
+ #[test]
+ fn section_name_and_subsection() {
+ assert_eq!(
+ Key::parse("core.bare").unwrap(),
+ Key {
+ section_name: "core",
+ subsection_name: Some("bare".into())
+ }
+ );
+ }
+
+ #[test]
+ fn section_name_and_subsection_with_separators() {
+ assert_eq!(
+ Key::parse("remote.https:///home/user.git").unwrap(),
+ Key {
+ section_name: "remote",
+ subsection_name: Some("https:///home/user.git".into())
+ }
+ );
+ }
+ }
+
+ mod write_to {
+ use std::borrow::Cow;
+
+ use crate::parse::section;
+
+ fn header(name: &str, subsection: impl Into<Option<(&'static str, &'static str)>>) -> section::Header<'_> {
+ let name = section::Name(Cow::Borrowed(name.into()));
+ if let Some((separator, subsection_name)) = subsection.into() {
+ section::Header {
+ name,
+ separator: Some(Cow::Borrowed(separator.into())),
+ subsection_name: Some(Cow::Borrowed(subsection_name.into())),
+ }
+ } else {
+ section::Header {
+ name,
+ separator: None,
+ subsection_name: None,
+ }
+ }
+ }
+
+ #[test]
+ fn legacy_subsection_format_does_not_use_escapes() {
+ let invalid = header("invalid", Some((".", "\\ \"")));
+ assert_eq!(
+ invalid.to_bstring(),
+ "[invalid.\\ \"]",
+ "no escaping happens for legacy subsections"
+ );
+ assert!(invalid.is_legacy());
+ }
+
+ #[test]
+ fn subsections_escape_two_characters_only() {
+ let invalid = header("invalid", Some((" ", "\\ \"\npost newline")));
+ assert_eq!(
+ invalid.to_bstring(),
+ "[invalid \"\\\\ \\\"\npost newline\"]",
+ "newlines are actually invalid in subsection, but they are possible due to unvalidated instance creation"
+ );
+ assert!(!invalid.is_legacy());
+ }
+ }
+ }
+}
+
+pub(crate) mod util {
+ //! This module is only included for tests, and contains common unit test helper
+ //! functions.
+
+ use std::{borrow::Cow, convert::TryFrom};
+
+ use crate::parse::{section, Comment, Event};
+
+ pub fn into_events(events: Vec<Event<'_>>) -> section::Events<'_> {
+ events.into()
+ }
+
+ pub fn section_header(
+ name: &str,
+ subsection: impl Into<Option<(&'static str, &'static str)>>,
+ ) -> section::Header<'_> {
+ let name = section::Name::try_from(name).unwrap();
+ if let Some((separator, subsection_name)) = subsection.into() {
+ section::Header {
+ name,
+ separator: Some(Cow::Borrowed(separator.into())),
+ subsection_name: Some(Cow::Borrowed(subsection_name.into())),
+ }
+ } else {
+ section::Header {
+ name,
+ separator: None,
+ subsection_name: None,
+ }
+ }
+ }
+
+ pub(crate) fn name_event(name: &'static str) -> Event<'static> {
+ Event::SectionKey(section::Key(Cow::Borrowed(name.into())))
+ }
+
+ pub(crate) fn value_event(value: &'static str) -> Event<'static> {
+ Event::Value(Cow::Borrowed(value.into()))
+ }
+
+ pub(crate) fn value_not_done_event(value: &'static str) -> Event<'static> {
+ Event::ValueNotDone(Cow::Borrowed(value.into()))
+ }
+
+ pub(crate) fn value_done_event(value: &'static str) -> Event<'static> {
+ Event::ValueDone(Cow::Borrowed(value.into()))
+ }
+
+ pub(crate) fn newline_event() -> Event<'static> {
+ newline_custom_event("\n")
+ }
+
+ pub(crate) fn newline_custom_event(value: &'static str) -> Event<'static> {
+ Event::Newline(Cow::Borrowed(value.into()))
+ }
+
+ pub(crate) fn whitespace_event(value: &'static str) -> Event<'static> {
+ Event::Whitespace(Cow::Borrowed(value.into()))
+ }
+
+ pub(crate) fn comment_event(tag: char, msg: &'static str) -> Event<'static> {
+ Event::Comment(comment(tag, msg))
+ }
+
+ pub(crate) fn comment(comment_tag: char, comment: &'static str) -> Comment<'static> {
+ Comment {
+ tag: comment_tag as u8,
+ text: Cow::Borrowed(comment.into()),
+ }
+ }
+
+ pub(crate) const fn fully_consumed<T>(t: T) -> (&'static [u8], T) {
+ (&[], t)
+ }
+}