diff options
Diffstat (limited to 'vendor/gix-config/src/parse')
-rw-r--r-- | vendor/gix-config/src/parse/comment.rs | 50 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/error.rs | 64 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/event.rs | 83 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/events.rs | 336 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/key.rs | 35 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/mod.rs | 116 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/nom/mod.rs | 460 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/nom/tests.rs | 924 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/section/header.rs | 180 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/section/mod.rs | 187 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/section/unvalidated.rs | 25 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/tests.rs | 162 |
12 files changed, 2622 insertions, 0 deletions
diff --git a/vendor/gix-config/src/parse/comment.rs b/vendor/gix-config/src/parse/comment.rs new file mode 100644 index 000000000..6d4bb15ff --- /dev/null +++ b/vendor/gix-config/src/parse/comment.rs @@ -0,0 +1,50 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::BString; + +use crate::parse::Comment; + +impl Comment<'_> { + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Comment<'static> { + Comment { + tag: self.tag, + text: Cow::Owned(self.text.as_ref().into()), + } + } + + /// Serialize this type into a `BString` for convenience. + /// + /// Note that `to_string()` can also be used, but might not be lossless. + #[must_use] + pub fn to_bstring(&self) -> BString { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("io error impossible"); + buf.into() + } + + /// Stream ourselves to the given `out`, in order to reproduce this comment losslessly. + pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> { + out.write_all(&[self.tag])?; + out.write_all(self.text.as_ref()) + } +} + +impl Display for Comment<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.to_bstring(), f) + } +} + +impl From<Comment<'_>> for BString { + fn from(c: Comment<'_>) -> Self { + c.into() + } +} + +impl From<&Comment<'_>> for BString { + fn from(c: &Comment<'_>) -> Self { + c.to_bstring() + } +} diff --git a/vendor/gix-config/src/parse/error.rs b/vendor/gix-config/src/parse/error.rs new file mode 100644 index 000000000..1f469ee4c --- /dev/null +++ b/vendor/gix-config/src/parse/error.rs @@ -0,0 +1,64 @@ +use std::fmt::Display; + +use crate::parse::Error; + +/// A list of parsers that parsing can fail on. This is used for pretty-printing errors +#[derive(PartialEq, Debug, Clone, Copy)] +pub(crate) enum ParseNode { + SectionHeader, + Name, + Value, +} + +impl Display for ParseNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::SectionHeader => write!(f, "section header"), + Self::Name => write!(f, "name"), + Self::Value => write!(f, "value"), + } + } +} + +impl Error { + /// The one-indexed line number where the error occurred. This is determined + /// by the number of newlines that were successfully parsed. + #[must_use] + pub const fn line_number(&self) -> usize { + self.line_number + 1 + } + + /// The data that was left unparsed, which contains the cause of the parse error. + #[must_use] + pub fn remaining_data(&self) -> &[u8] { + &self.parsed_until + } +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Got an unexpected token on line {} while trying to parse a {}: ", + self.line_number + 1, + self.last_attempted_parser, + )?; + + let data_size = self.parsed_until.len(); + let data = std::str::from_utf8(&self.parsed_until); + match (data, data_size) { + (Ok(data), _) if data_size > 10 => { + write!( + f, + "'{}' ... ({} characters omitted)", + &data.chars().take(10).collect::<String>(), + data_size - 10 + ) + } + (Ok(data), _) => write!(f, "'{data}'"), + (Err(_), _) => self.parsed_until.fmt(f), + } + } +} + +impl std::error::Error for Error {} diff --git a/vendor/gix-config/src/parse/event.rs b/vendor/gix-config/src/parse/event.rs new file mode 100644 index 000000000..b7b96934d --- /dev/null +++ b/vendor/gix-config/src/parse/event.rs @@ -0,0 +1,83 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::{BStr, BString}; + +use crate::parse::Event; + +impl Event<'_> { + /// Serialize this type into a `BString` for convenience. + /// + /// Note that `to_string()` can also be used, but might not be lossless. + #[must_use] + pub fn to_bstring(&self) -> BString { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("io error impossible"); + buf.into() + } + + /// Turn ourselves into the text we represent, lossy. + /// + /// Note that this will be partial in case of `ValueNotDone` which doesn't include the backslash, and `SectionHeader` will only + /// provide their name, lacking the sub-section name. + pub fn to_bstr_lossy(&self) -> &BStr { + match self { + Self::ValueNotDone(e) | Self::Whitespace(e) | Self::Newline(e) | Self::Value(e) | Self::ValueDone(e) => { + e.as_ref() + } + Self::KeyValueSeparator => "=".into(), + Self::SectionKey(k) => k.0.as_ref(), + Self::SectionHeader(h) => h.name.0.as_ref(), + Self::Comment(c) => c.text.as_ref(), + } + } + + /// Stream ourselves to the given `out`, in order to reproduce this event mostly losslessly + /// as it was parsed. + pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> { + match self { + Self::ValueNotDone(e) => { + out.write_all(e.as_ref())?; + out.write_all(b"\\") + } + Self::Whitespace(e) | Self::Newline(e) | Self::Value(e) | Self::ValueDone(e) => out.write_all(e.as_ref()), + Self::KeyValueSeparator => out.write_all(b"="), + Self::SectionKey(k) => out.write_all(k.0.as_ref()), + Self::SectionHeader(h) => h.write_to(&mut out), + Self::Comment(c) => c.write_to(&mut out), + } + } + + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Event<'static> { + match self { + Event::Comment(e) => Event::Comment(e.to_owned()), + Event::SectionHeader(e) => Event::SectionHeader(e.to_owned()), + Event::SectionKey(e) => Event::SectionKey(e.to_owned()), + Event::Value(e) => Event::Value(Cow::Owned(e.clone().into_owned())), + Event::ValueNotDone(e) => Event::ValueNotDone(Cow::Owned(e.clone().into_owned())), + Event::ValueDone(e) => Event::ValueDone(Cow::Owned(e.clone().into_owned())), + Event::Newline(e) => Event::Newline(Cow::Owned(e.clone().into_owned())), + Event::Whitespace(e) => Event::Whitespace(Cow::Owned(e.clone().into_owned())), + Event::KeyValueSeparator => Event::KeyValueSeparator, + } + } +} + +impl Display for Event<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.to_bstring(), f) + } +} + +impl From<Event<'_>> for BString { + fn from(event: Event<'_>) -> Self { + event.into() + } +} + +impl From<&Event<'_>> for BString { + fn from(event: &Event<'_>) -> Self { + event.to_bstring() + } +} diff --git a/vendor/gix-config/src/parse/events.rs b/vendor/gix-config/src/parse/events.rs new file mode 100644 index 000000000..62f621b52 --- /dev/null +++ b/vendor/gix-config/src/parse/events.rs @@ -0,0 +1,336 @@ +use std::convert::TryFrom; + +use smallvec::SmallVec; + +use crate::{ + parse, + parse::{section, Event, Section}, +}; + +/// A type store without allocation all events that are typically preceding the first section. +pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; + +/// A zero-copy `gix-config` file parser. +/// +/// This is parser exposes low-level syntactic events from a `gix-config` file. +/// Generally speaking, you'll want to use [`File`] as it wraps +/// around the parser to provide a higher-level abstraction to a `gix-config` +/// file, including querying, modifying, and updating values. +/// +/// This parser guarantees that the events emitted are sufficient to +/// reconstruct a `gix-config` file identical to the source `gix-config` +/// when writing it. +/// +/// # Differences between a `.ini` parser +/// +/// While the `gix-config` format closely resembles the [`.ini` file format], +/// there are subtle differences that make them incompatible. For one, the file +/// format is not well defined, and there exists no formal specification to +/// adhere to. +/// +/// For concrete examples, some notable differences are: +/// - `gix-config` sections permit subsections via either a quoted string +/// (`[some-section "subsection"]`) or via the deprecated dot notation +/// (`[some-section.subsection]`). Successful parsing these section names is not +/// well defined in typical `.ini` parsers. This parser will handle these cases +/// perfectly. +/// - Comment markers are not strictly defined either. This parser will always +/// and only handle a semicolon or octothorpe (also known as a hash or number +/// sign). +/// - Global properties may be allowed in `.ini` parsers, but is strictly +/// disallowed by this parser. +/// - Only `\t`, `\n`, `\b` `\\` are valid escape characters. +/// - Quoted and semi-quoted values will be parsed (but quotes will be included +/// in event outputs). An example of a semi-quoted value is `5"hello world"`, +/// which should be interpreted as `5hello world` after +/// [normalization][crate::value::normalize()]. +/// - Line continuations via a `\` character is supported (inside or outside of quotes) +/// - Whitespace handling similarly follows the `gix-config` specification as +/// closely as possible, where excess whitespace after a non-quoted value are +/// trimmed, and line continuations onto a new line with excess spaces are kept. +/// - Only equal signs (optionally padded by spaces) are valid name/value +/// delimiters. +/// +/// Note that that things such as case-sensitivity or duplicate sections are +/// _not_ handled. This parser is a low level _syntactic_ interpreter +/// and higher level wrappers around this parser, which may +/// or may not be zero-copy, should handle _semantic_ values. This also means +/// that string-like values are not interpreted. For example, `hello"world"` +/// would be read at a high level as `helloworld` but this parser will return +/// the former instead, with the extra quotes. This is because it is not the +/// responsibility of the parser to interpret these values, and doing so would +/// necessarily require a copy, which this parser avoids. +/// +/// # Trait Implementations +/// +/// - This struct does _not_ implement [`FromStr`] due to lifetime +/// constraints implied on the required `from_str` method. Instead, it provides +/// [`From<&'_ str>`]. +/// +/// # Idioms +/// +/// If you do want to use this parser, there are some idioms that may help you +/// with interpreting sequences of events. +/// +/// ## `Value` events do not immediately follow `Key` events +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [core] +/// autocrlf = input +/// ``` +/// +/// Because this parser guarantees perfect reconstruction, there are many +/// non-significant events that occur in addition to the ones you may expect: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("core", None).unwrap(); +/// # let section_data = "[core]\n autocrlf = input"; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::SectionKey(section::Key::try_from("autocrlf")?), +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::KeyValueSeparator, +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::Value(Cow::Borrowed("input".into())), +/// # ]); +/// # Ok::<_, Box<dyn std::error::Error>>(()) +/// ``` +/// +/// Note the two whitespace events between the key and value pair! Those two +/// events actually refer to the whitespace between the name and value and the +/// equal sign. So if the config instead had `autocrlf=input`, those whitespace +/// events would no longer be present. +/// +/// ## `KeyValueSeparator` event is not guaranteed to emit +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [core] +/// autocrlf +/// ``` +/// +/// This is a valid config with a `autocrlf` key having an implicit `true` +/// value. This means that there is not a `=` separating the key and value, +/// which means that the corresponding event won't appear either: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("core", None).unwrap(); +/// # let section_data = "[core]\n autocrlf"; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::SectionKey(section::Key::try_from("autocrlf")?), +/// Event::Value(Cow::Borrowed("".into())), +/// # ]); +/// # Ok::<_, Box<dyn std::error::Error>>(()) +/// ``` +/// +/// ## Quoted values are not unquoted +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [core] +/// autocrlf=true"" +/// filemode=fa"lse" +/// ``` +/// +/// Both these events, when fully processed, should normally be `true` and +/// `false`. However, because this parser is zero-copy, we cannot process +/// partially quoted values, such as the `false` example. As a result, to +/// maintain consistency, the parser will just take all values as literals. The +/// relevant event stream emitted is thus emitted as: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("core", None).unwrap(); +/// # let section_data = "[core]\nautocrlf=true\"\"\nfilemode=fa\"lse\""; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::SectionKey(section::Key::try_from("autocrlf")?), +/// Event::KeyValueSeparator, +/// Event::Value(Cow::Borrowed(r#"true"""#.into())), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::SectionKey(section::Key::try_from("filemode")?), +/// Event::KeyValueSeparator, +/// Event::Value(Cow::Borrowed(r#"fa"lse""#.into())), +/// # ]); +/// # Ok::<_, Box<dyn std::error::Error>>(()) +/// ``` +/// +/// ## Whitespace after line continuations are part of the value +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [some-section] +/// file=a\ +/// c +/// ``` +/// +/// Because how `gix-config` treats continuations, the whitespace preceding `c` +/// are in fact part of the value of `file`. The fully interpreted key/value +/// pair is actually `file=a c`. As a result, the parser will provide this +/// split value accordingly: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("some-section", None).unwrap(); +/// # let section_data = "[some-section]\nfile=a\\\n c"; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::SectionKey(section::Key::try_from("file")?), +/// Event::KeyValueSeparator, +/// Event::ValueNotDone(Cow::Borrowed("a".into())), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::ValueDone(Cow::Borrowed(" c".into())), +/// # ]); +/// # Ok::<_, Box<dyn std::error::Error>>(()) +/// ``` +/// +/// [`File`]: crate::File +/// [`.ini` file format]: https://en.wikipedia.org/wiki/INI_file +/// [`git`'s documentation]: https://git-scm.com/docs/gix-config#_configuration_file +/// [`FromStr`]: std::str::FromStr +/// [`From<&'_ str>`]: std::convert::From +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] +pub struct Events<'a> { + /// Events seen before the first section. + pub frontmatter: FrontMatterEvents<'a>, + /// All parsed sections. + pub sections: Vec<Section<'a>>, +} + +impl Events<'static> { + /// Parses the provided bytes, returning an [`Events`] that contains allocated + /// and owned events. This is similar to [`Events::from_bytes()`], but performance + /// is degraded as it requires allocation for every event. + /// + /// Use `filter` to only include those events for which it returns true. + pub fn from_bytes_owned<'a>( + input: &'a [u8], + filter: Option<fn(&Event<'a>) -> bool>, + ) -> Result<Events<'static>, parse::Error> { + from_bytes(input, |e| e.to_owned(), filter) + } +} + +impl<'a> Events<'a> { + /// Attempt to zero-copy parse the provided bytes. On success, returns a + /// [`Events`] that provides methods to accessing leading comments and sections + /// of a `gix-config` file and can be converted into an iterator of [`Event`] + /// for higher level processing. + /// + /// Use `filter` to only include those events for which it returns true. + pub fn from_bytes(input: &'a [u8], filter: Option<fn(&Event<'a>) -> bool>) -> Result<Events<'a>, parse::Error> { + from_bytes(input, std::convert::identity, filter) + } + + /// Attempt to zero-copy parse the provided `input` string. + /// + /// Prefer the [`from_bytes()`][Self::from_bytes()] method if UTF8 encoding + /// isn't guaranteed. + #[allow(clippy::should_implement_trait)] + pub fn from_str(input: &'a str) -> Result<Events<'a>, parse::Error> { + Self::from_bytes(input.as_bytes(), None) + } + + /// Consumes the parser to produce an iterator of all contained events. + #[must_use = "iterators are lazy and do nothing unless consumed"] + #[allow(clippy::should_implement_trait)] + pub fn into_iter(self) -> impl Iterator<Item = parse::Event<'a>> + std::iter::FusedIterator { + self.frontmatter.into_iter().chain( + self.sections + .into_iter() + .flat_map(|section| std::iter::once(parse::Event::SectionHeader(section.header)).chain(section.events)), + ) + } + + /// Place all contained events into a single `Vec`. + pub fn into_vec(self) -> Vec<parse::Event<'a>> { + self.into_iter().collect() + } +} + +impl<'a> TryFrom<&'a str> for Events<'a> { + type Error = parse::Error; + + fn try_from(value: &'a str) -> Result<Self, Self::Error> { + Self::from_str(value) + } +} + +impl<'a> TryFrom<&'a [u8]> for Events<'a> { + type Error = parse::Error; + + fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> { + Events::from_bytes(value, None) + } +} + +fn from_bytes<'a, 'b>( + input: &'a [u8], + convert: impl Fn(Event<'a>) -> Event<'b>, + filter: Option<fn(&Event<'a>) -> bool>, +) -> Result<Events<'b>, parse::Error> { + let mut header = None; + let mut events = section::Events::default(); + let mut frontmatter = FrontMatterEvents::default(); + let mut sections = Vec::new(); + parse::from_bytes(input, |e: Event<'_>| match e { + Event::SectionHeader(next_header) => { + match header.take() { + None => { + frontmatter = std::mem::take(&mut events).into_iter().collect(); + } + Some(prev_header) => { + sections.push(parse::Section { + header: prev_header, + events: std::mem::take(&mut events), + }); + } + }; + header = match convert(Event::SectionHeader(next_header)) { + Event::SectionHeader(h) => h, + _ => unreachable!("BUG: convert must not change the event type, just the lifetime"), + } + .into(); + } + event => { + if filter.map_or(true, |f| f(&event)) { + events.push(convert(event)) + } + } + })?; + + match header { + None => { + frontmatter = events.into_iter().collect(); + } + Some(prev_header) => { + sections.push(parse::Section { + header: prev_header, + events: std::mem::take(&mut events), + }); + } + } + Ok(Events { frontmatter, sections }) +} diff --git a/vendor/gix-config/src/parse/key.rs b/vendor/gix-config/src/parse/key.rs new file mode 100644 index 000000000..b0e0376be --- /dev/null +++ b/vendor/gix-config/src/parse/key.rs @@ -0,0 +1,35 @@ +use bstr::{BStr, ByteSlice}; + +/// An unvalidated parse result of parsing input like `remote.origin.url` or `core.bare`. +#[derive(Debug, PartialEq, Ord, PartialOrd, Eq, Hash, Clone, Copy)] +pub struct Key<'a> { + /// The name of the section, like `core` in `core.bare`. + pub section_name: &'a str, + /// The name of the sub-section, like `origin` in `remote.origin.url`. + pub subsection_name: Option<&'a BStr>, + /// The name of the section key, like `url` in `remote.origin.url`. + pub value_name: &'a str, +} + +/// Parse `input` like `core.bare` or `remote.origin.url` as a `Key` to make its fields available, +/// or `None` if there were not at least 2 tokens separated by `.`. +/// Note that `input` isn't validated, and is `str` as ascii is a subset of UTF-8 which is required for any valid keys. +pub fn parse_unvalidated<'a>(input: impl Into<&'a BStr>) -> Option<Key<'a>> { + let input = input.into(); + let mut tokens = input.splitn(2, |b| *b == b'.'); + let section_name = tokens.next()?; + let subsection_or_key = tokens.next()?; + let mut tokens = subsection_or_key.rsplitn(2, |b| *b == b'.'); + let (subsection_name, value_name) = match (tokens.next(), tokens.next()) { + (Some(key), Some(subsection)) => (Some(subsection.into()), key), + (Some(key), None) => (None, key), + (None, Some(_)) => unreachable!("iterator can't restart producing items"), + (None, None) => return None, + }; + + Some(Key { + section_name: section_name.to_str().ok()?, + subsection_name, + value_name: value_name.to_str().ok()?, + }) +} diff --git a/vendor/gix-config/src/parse/mod.rs b/vendor/gix-config/src/parse/mod.rs new file mode 100644 index 000000000..50363873c --- /dev/null +++ b/vendor/gix-config/src/parse/mod.rs @@ -0,0 +1,116 @@ +//! This module handles parsing a `gix-config` file. Generally speaking, you +//! want to use a higher abstraction such as [`File`] unless you have some +//! explicit reason to work with events instead. +//! +//! The workflow for interacting with this is to use +//! [`from_bytes()`] to obtain all parse events or tokens of the given input. +//! +//! On a higher level, one can use [`Events`] to parse all events into a set +//! of easily interpretable data type, similar to what [`File`] does. +//! +//! [`File`]: crate::File + +use std::{borrow::Cow, hash::Hash}; + +use bstr::BStr; + +mod nom; +pub use self::nom::from_bytes; +mod event; +#[path = "events.rs"] +mod events_type; +pub use events_type::{Events, FrontMatterEvents}; +mod comment; +mod error; +/// +pub mod section; + +/// +mod key; +pub use key::{parse_unvalidated as key, Key}; + +#[cfg(test)] +pub(crate) mod tests; + +/// Syntactic events that occurs in the config. Despite all these variants +/// holding a [`Cow`] instead over a simple reference, the parser will only emit +/// borrowed `Cow` variants. +/// +/// The `Cow` is used here for ease of inserting new, typically owned events as used +/// in the [`File`] struct when adding values, allowing a mix of owned and borrowed +/// values. +/// +/// [`Cow`]: std::borrow::Cow +/// [`File`]: crate::File +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub enum Event<'a> { + /// A comment with a comment tag and the comment itself. Note that the + /// comment itself may contain additional whitespace and comment markers + /// at the beginning, like `# comment` or `; comment`. + Comment(Comment<'a>), + /// A section header containing the section name and a subsection, if it + /// exists. For instance, `remote "origin"` is parsed to `remote` as section + /// name and `origin` as subsection name. + SectionHeader(section::Header<'a>), + /// A name to a value in a section, like `url` in `remote.origin.url`. + SectionKey(section::Key<'a>), + /// A completed value. This may be any single-line string, including the empty string + /// if an implicit boolean value is used. + /// Note that these values may contain spaces and any special character. This value is + /// also unprocessed, so it it may contain double quotes that should be + /// [normalized][crate::value::normalize()] before interpretation. + Value(Cow<'a, BStr>), + /// Represents any token used to signify a newline character. On Unix + /// platforms, this is typically just `\n`, but can be any valid newline + /// sequence. Multiple newlines (such as `\n\n`) will be merged as a single + /// newline event containing a string of multiple newline characters. + Newline(Cow<'a, BStr>), + /// Any value that isn't completed. This occurs when the value is continued + /// onto the next line by ending it with a backslash. + /// A [`Newline`][Self::Newline] event is guaranteed after, followed by + /// either a ValueDone, a Whitespace, or another ValueNotDone. + ValueNotDone(Cow<'a, BStr>), + /// The last line of a value which was continued onto another line. + /// With this it's possible to obtain the complete value by concatenating + /// the prior [`ValueNotDone`][Self::ValueNotDone] events. + ValueDone(Cow<'a, BStr>), + /// A continuous section of insignificant whitespace. + /// + /// Note that values with internal whitespace will not be separated by this event, + /// hence interior whitespace there is always part of the value. + Whitespace(Cow<'a, BStr>), + /// This event is emitted when the parser counters a valid `=` character + /// separating the key and value. + /// This event is necessary as it eliminates the ambiguity for whitespace + /// events between a key and value event. + KeyValueSeparator, +} + +/// A parsed section containing the header and the section events, typically +/// comprising the keys and their values. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub struct Section<'a> { + /// The section name and subsection name, if any. + pub header: section::Header<'a>, + /// The syntactic events found in this section. + pub events: section::Events<'a>, +} + +/// A parsed comment containing the comment marker and comment. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] +pub struct Comment<'a> { + /// The comment marker used. This is either a semicolon or octothorpe/hash. + pub tag: u8, + /// The parsed comment. + pub text: Cow<'a, BStr>, +} + +/// A parser error reports the one-indexed line number where the parsing error +/// occurred, as well as the last parser node and the remaining data to be +/// parsed. +#[derive(PartialEq, Debug)] +pub struct Error { + line_number: usize, + last_attempted_parser: error::ParseNode, + parsed_until: bstr::BString, +} diff --git a/vendor/gix-config/src/parse/nom/mod.rs b/vendor/gix-config/src/parse/nom/mod.rs new file mode 100644 index 000000000..11d1dea6b --- /dev/null +++ b/vendor/gix-config/src/parse/nom/mod.rs @@ -0,0 +1,460 @@ +use std::borrow::Cow; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; +use nom::{ + branch::alt, + bytes::complete::{tag, take_till, take_while}, + character::{ + complete::{char, one_of}, + is_space, + }, + combinator::{map, opt}, + error::{Error as NomError, ErrorKind}, + multi::{fold_many0, fold_many1}, + sequence::delimited, + IResult, +}; + +use crate::parse::{error::ParseNode, section, Comment, Error, Event}; + +/// Attempt to zero-copy parse the provided bytes, passing results to `dispatch`. +pub fn from_bytes<'a>(input: &'a [u8], mut dispatch: impl FnMut(Event<'a>)) -> Result<(), Error> { + let bom = unicode_bom::Bom::from(input); + let mut newlines = 0; + let (i, _) = fold_many0( + alt(( + map(comment, Event::Comment), + map(take_spaces, |whitespace| Event::Whitespace(Cow::Borrowed(whitespace))), + map(take_newlines, |(newline, counter)| { + newlines += counter; + Event::Newline(Cow::Borrowed(newline)) + }), + )), + || (), + |_acc, event| dispatch(event), + )(&input[bom.len()..]) + // I don't think this can panic. many0 errors if the child parser returns + // a success where the input was not consumed, but alt will only return Ok + // if one of its children succeed. However, all of it's children are + // guaranteed to consume something if they succeed, so the Ok(i) == i case + // can never occur. + .expect("many0(alt(...)) panicked. Likely a bug in one of the children parsers."); + + if i.is_empty() { + return Ok(()); + } + + let mut node = ParseNode::SectionHeader; + + let res = fold_many1( + |i| section(i, &mut node, &mut dispatch), + || (), + |_acc, additional_newlines| { + newlines += additional_newlines; + }, + )(i); + let (i, _) = res.map_err(|_| Error { + line_number: newlines, + last_attempted_parser: node, + parsed_until: i.as_bstr().into(), + })?; + + // This needs to happen after we collect sections, otherwise the line number + // will be off. + if !i.is_empty() { + return Err(Error { + line_number: newlines, + last_attempted_parser: node, + parsed_until: i.as_bstr().into(), + }); + } + + Ok(()) +} + +fn comment(i: &[u8]) -> IResult<&[u8], Comment<'_>> { + let (i, comment_tag) = one_of(";#")(i)?; + let (i, comment) = take_till(|c| c == b'\n')(i)?; + Ok(( + i, + Comment { + tag: comment_tag as u8, + text: Cow::Borrowed(comment.as_bstr()), + }, + )) +} + +#[cfg(test)] +mod tests; + +fn section<'a>(i: &'a [u8], node: &mut ParseNode, dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { + let (mut i, header) = section_header(i)?; + dispatch(Event::SectionHeader(header)); + + let mut newlines = 0; + + // This would usually be a many0(alt(...)), the manual loop allows us to + // optimize vec insertions + loop { + let old_i = i; + + if let Ok((new_i, v)) = take_spaces(i) { + if old_i != new_i { + i = new_i; + dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr()))); + } + } + + if let Ok((new_i, (v, new_newlines))) = take_newlines(i) { + if old_i != new_i { + i = new_i; + newlines += new_newlines; + dispatch(Event::Newline(Cow::Borrowed(v.as_bstr()))); + } + } + + if let Ok((new_i, new_newlines)) = key_value_pair(i, node, dispatch) { + if old_i != new_i { + i = new_i; + newlines += new_newlines; + } + } + + if let Ok((new_i, comment)) = comment(i) { + if old_i != new_i { + i = new_i; + dispatch(Event::Comment(comment)); + } + } + + if old_i == i { + break; + } + } + + Ok((i, newlines)) +} + +fn section_header(i: &[u8]) -> IResult<&[u8], section::Header<'_>> { + let (i, _) = char('[')(i)?; + // No spaces must be between section name and section start + let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-' || c == b'.')(i)?; + + let name = name.as_bstr(); + if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) { + // Either section does not have a subsection or using deprecated + // subsection syntax at this point. + let header = match memchr::memrchr(b'.', name.as_bytes()) { + Some(index) => section::Header { + name: section::Name(Cow::Borrowed(name[..index].as_bstr())), + separator: name.get(index..=index).map(|s| Cow::Borrowed(s.as_bstr())), + subsection_name: name.get(index + 1..).map(|s| Cow::Borrowed(s.as_bstr())), + }, + None => section::Header { + name: section::Name(Cow::Borrowed(name.as_bstr())), + separator: None, + subsection_name: None, + }, + }; + + if header.name.is_empty() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::NoneOf, + })); + } + return Ok((i, header)); + } + + // Section header must be using modern subsection syntax at this point. + + let (i, whitespace) = take_spaces(i)?; + let (i, subsection_name) = delimited(char('"'), opt(sub_section), tag("\"]"))(i)?; + + Ok(( + i, + section::Header { + name: section::Name(Cow::Borrowed(name)), + separator: Some(Cow::Borrowed(whitespace)), + subsection_name, + }, + )) +} + +fn sub_section(i: &[u8]) -> IResult<&[u8], Cow<'_, BStr>> { + let (rest, (found_escape, consumed)) = sub_section_delegate(i, &mut |_| ())?; + if found_escape { + let mut buf = BString::default(); + sub_section_delegate(i, &mut |b| buf.push_byte(b)).map(|(i, _)| (i, buf.into())) + } else { + Ok((rest, i[..consumed].as_bstr().into())) + } +} + +fn sub_section_delegate<'a>(i: &'a [u8], push_byte: &mut dyn FnMut(u8)) -> IResult<&'a [u8], (bool, usize)> { + let mut cursor = 0; + let mut bytes = i.iter().copied(); + let mut found_terminator = false; + let mut found_escape = false; + while let Some(mut b) = bytes.next() { + cursor += 1; + if b == b'\n' || b == 0 { + return Err(nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + })); + } + if b == b'"' { + found_terminator = true; + break; + } + if b == b'\\' { + b = bytes.next().ok_or_else(|| { + nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + }) + })?; + found_escape = true; + cursor += 1; + if b == b'\n' { + return Err(nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + })); + } + } + push_byte(b); + } + + if !found_terminator { + return Err(nom::Err::Error(NomError { + input: &i[cursor..], + code: ErrorKind::NonEmpty, + })); + } + + Ok((&i[cursor - 1..], (found_escape, cursor - 1))) +} + +fn key_value_pair<'a>( + i: &'a [u8], + node: &mut ParseNode, + dispatch: &mut impl FnMut(Event<'a>), +) -> IResult<&'a [u8], usize> { + *node = ParseNode::Name; + let (i, name) = config_name(i)?; + + dispatch(Event::SectionKey(section::Key(Cow::Borrowed(name)))); + + let (i, whitespace) = opt(take_spaces)(i)?; + if let Some(whitespace) = whitespace { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } + + *node = ParseNode::Value; + let (i, newlines) = config_value(i, dispatch)?; + Ok((i, newlines)) +} + +/// Parses the config name of a config pair. Assumes the input has already been +/// trimmed of any leading whitespace. +fn config_name(i: &[u8]) -> IResult<&[u8], &BStr> { + if i.is_empty() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::NonEmpty, + })); + } + + if !i[0].is_ascii_alphabetic() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Alpha, + })); + } + + let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-')(i)?; + Ok((i, name.as_bstr())) +} + +fn config_value<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { + if let (i, Some(_)) = opt(char('='))(i)? { + dispatch(Event::KeyValueSeparator); + let (i, whitespace) = opt(take_spaces)(i)?; + if let Some(whitespace) = whitespace { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } + let (i, newlines) = value_impl(i, dispatch)?; + Ok((i, newlines)) + } else { + // This is a special way of denoting 'empty' values which a lot of code depends on. + // Hence, rather to fix this everywhere else, leave it here and fix it where it matters, namely + // when it's about differentiating between a missing key-value separator, and one followed by emptiness. + dispatch(Event::Value(Cow::Borrowed("".into()))); + Ok((i, 0)) + } +} + +/// Handles parsing of known-to-be values. This function handles both single +/// line values as well as values that are continuations. +fn value_impl<'a>(i: &'a [u8], dispatch: &mut impl FnMut(Event<'a>)) -> IResult<&'a [u8], usize> { + let (i, value_end, newlines, mut dispatch) = { + let new_err = |code| nom::Err::Error(NomError { input: i, code }); + let mut value_end = None::<usize>; + let mut value_start: usize = 0; + let mut newlines = 0; + + let mut prev_char_was_backslash = false; + // This is required to ignore comment markers if they're in a quote. + let mut is_in_quotes = false; + // Used to determine if we return a Value or Value{Not,}Done + let mut partial_value_found = false; + let mut last_value_index: usize = 0; + + let mut bytes = i.iter(); + while let Some(mut c) = bytes.next() { + if prev_char_was_backslash { + prev_char_was_backslash = false; + let mut consumed = 1; + if *c == b'\r' { + c = bytes.next().ok_or_else(|| new_err(ErrorKind::Escaped))?; + if *c != b'\n' { + return Err(new_err(ErrorKind::Tag)); + } + consumed += 1; + } + + match c { + b'\n' => { + partial_value_found = true; + let backslash = 1; + dispatch(Event::ValueNotDone(Cow::Borrowed( + i[value_start..last_value_index - backslash].as_bstr(), + ))); + let nl_end = last_value_index + consumed; + dispatch(Event::Newline(Cow::Borrowed(i[last_value_index..nl_end].as_bstr()))); + value_start = nl_end; + value_end = None; + newlines += 1; + + last_value_index += consumed; + } + b'n' | b't' | b'\\' | b'b' | b'"' => { + last_value_index += 1; + } + _ => { + return Err(new_err(ErrorKind::Escaped)); + } + } + } else { + match c { + b'\n' => { + value_end = last_value_index.into(); + break; + } + b';' | b'#' if !is_in_quotes => { + value_end = last_value_index.into(); + break; + } + b'\\' => prev_char_was_backslash = true, + b'"' => is_in_quotes = !is_in_quotes, + _ => {} + } + last_value_index += 1; + } + } + + if prev_char_was_backslash { + return Err(new_err(ErrorKind::Escaped)); + } + + if is_in_quotes { + return Err(new_err(ErrorKind::Tag)); + } + + let value_end = match value_end { + None => { + if last_value_index == 0 { + dispatch(Event::Value(Cow::Borrowed("".into()))); + return Ok((&i[0..], newlines)); + } else { + i.len() + } + } + Some(idx) => idx, + }; + + let dispatch = move |value: &'a [u8]| { + if partial_value_found { + dispatch(Event::ValueDone(Cow::Borrowed(value.as_bstr()))); + } else { + dispatch(Event::Value(Cow::Borrowed(value.as_bstr()))); + } + }; + (&i[value_start..], value_end - value_start, newlines, dispatch) + }; + + let (i, remainder_value) = { + let value_end_no_trailing_whitespace = i[..value_end] + .iter() + .enumerate() + .rev() + .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1)) + .unwrap_or(0); + ( + &i[value_end_no_trailing_whitespace..], + &i[..value_end_no_trailing_whitespace], + ) + }; + + dispatch(remainder_value); + + Ok((i, newlines)) +} + +fn take_spaces(i: &[u8]) -> IResult<&[u8], &BStr> { + let (i, v) = take_while(|c: u8| c.is_ascii() && is_space(c))(i)?; + if v.is_empty() { + Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Eof, + })) + } else { + Ok((i, v.as_bstr())) + } +} + +fn take_newlines(i: &[u8]) -> IResult<&[u8], (&BStr, usize)> { + let mut counter = 0; + let mut consumed_bytes = 0; + let mut next_must_be_newline = false; + for b in i.iter().copied() { + if !b.is_ascii() { + break; + }; + if b == b'\r' { + if next_must_be_newline { + break; + } + next_must_be_newline = true; + continue; + }; + if b == b'\n' { + counter += 1; + consumed_bytes += if next_must_be_newline { 2 } else { 1 }; + next_must_be_newline = false; + } else { + break; + } + } + let (v, i) = i.split_at(consumed_bytes); + if v.is_empty() { + Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Eof, + })) + } else { + Ok((i, (v.as_bstr(), counter))) + } +} diff --git a/vendor/gix-config/src/parse/nom/tests.rs b/vendor/gix-config/src/parse/nom/tests.rs new file mode 100644 index 000000000..f6e8c3d92 --- /dev/null +++ b/vendor/gix-config/src/parse/nom/tests.rs @@ -0,0 +1,924 @@ +use super::*; + +mod section_headers { + use super::section_header; + use crate::parse::tests::util::{fully_consumed, section_header as parsed_section_header}; + + #[test] + fn no_subsection() { + assert_eq!( + section_header(b"[hello]").unwrap(), + fully_consumed(parsed_section_header("hello", None)), + ); + } + + #[test] + fn modern_subsection() { + assert_eq!( + section_header(br#"[hello "world"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", "world"))), + ); + } + + #[test] + fn escaped_subsection() { + assert_eq!( + section_header(br#"[hello "foo\\bar\""]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", r#"foo\bar""#))), + ); + } + + #[test] + fn deprecated_subsection() { + assert_eq!( + section_header(br#"[hello.world]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (".", "world"))) + ); + assert_eq!( + section_header(br#"[Hello.World]"#).unwrap(), + fully_consumed(parsed_section_header("Hello", (".", "World"))) + ); + } + + #[test] + fn empty_legacy_subsection_name() { + assert_eq!( + section_header(br#"[hello-world.]"#).unwrap(), + fully_consumed(parsed_section_header("hello-world", (".", ""))) + ); + } + + #[test] + fn empty_modern_subsection_name() { + assert_eq!( + section_header(br#"[hello ""]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", ""))) + ); + } + + #[test] + fn backslashes_in_subsections_do_not_escape_newlines_or_tabs() { + assert_eq!( + section_header(br#"[hello "single \ \\ \t \n \0"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", r#"single \ t n 0"#))) + ); + } + + #[test] + fn newline_in_header() { + assert!(section_header(b"[hello\n]").is_err()); + } + + #[test] + fn newline_in_sub_section() { + assert!(section_header(b"[hello \"hello\n\"]").is_err()); + } + + #[test] + fn null_byt_in_sub_section() { + assert!(section_header(b"[hello \"hello\0\"]").is_err()); + } + + #[test] + fn escaped_newline_in_sub_section() { + assert!(section_header(b"[hello \"hello\\\n\"]").is_err()); + } + + #[test] + fn eof_after_escape_in_sub_section() { + assert!(section_header(b"[hello \"hello\\").is_err()); + } + + #[test] + fn null_byte_in_header() { + assert!(section_header(b"[hello\0]").is_err()); + } + + #[test] + fn invalid_characters_in_section() { + assert!(section_header(b"[$]").is_err()); + } + #[test] + fn invalid_characters_in_legacy_sub_section() { + assert!(section_header(b"[hello.$]").is_err()); + assert!(section_header(b"[hello. world]").is_err()); + } + + #[test] + fn right_brace_in_subsection_name() { + assert_eq!( + section_header(br#"[hello "]"]"#).unwrap(), + fully_consumed(parsed_section_header("hello", (" ", "]"))) + ); + } +} + +mod sub_section { + use std::borrow::Cow; + + use super::sub_section; + + #[test] + fn zero_copy_simple() { + let actual = sub_section(b"name\"").unwrap().1; + assert_eq!(actual.as_ref(), "name"); + assert!(matches!(actual, Cow::Borrowed(_))); + } + + #[test] + fn escapes_need_allocation() { + let actual = sub_section(br#"\x\t\n\0\\\"""#).unwrap().1; + assert_eq!(actual.as_ref(), r#"xtn0\""#); + assert!(matches!(actual, Cow::Owned(_))); + } +} + +mod config_name { + use nom::combinator::all_consuming; + + use super::config_name; + use crate::parse::tests::util::fully_consumed; + + #[test] + fn just_name() { + assert_eq!(config_name(b"name").unwrap(), fully_consumed("name".into())); + } + + #[test] + fn must_start_with_alphabetic() { + assert!(config_name(b"4aaa").is_err()); + assert!(config_name(b"-aaa").is_err()); + } + + #[test] + fn only_a_subset_of_characters_is_allowed() { + assert!(all_consuming(config_name)(b"Name$_").is_err()); + assert!(all_consuming(config_name)(b"other#").is_err()); + } + + #[test] + fn cannot_be_empty() { + assert!(config_name(b"").is_err()); + } +} + +mod section { + use crate::parse::{ + error::ParseNode, + section, + tests::util::{ + comment_event, fully_consumed, name_event, newline_custom_event, newline_event, + section_header as parsed_section_header, value_done_event, value_event, value_not_done_event, + whitespace_event, + }, + Event, Section, + }; + + fn section<'a>(i: &'a [u8], node: &mut ParseNode) -> nom::IResult<&'a [u8], (Section<'a>, usize)> { + let mut header = None; + let mut events = section::Events::default(); + super::section(i, node, &mut |e| match &header { + None => { + header = Some(e); + } + Some(_) => events.push(e), + }) + .map(|(i, o)| { + ( + i, + ( + Section { + header: match header.expect("header set") { + Event::SectionHeader(header) => header, + _ => unreachable!("unexpected"), + }, + events, + }, + o, + ), + ) + }) + } + + #[test] + fn empty_value_with_windows_newlines() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[a] k = \r\n", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event(""), + newline_custom_event("\r\n") + ] + .into(), + }, + 1 + )), + ); + } + + #[test] + fn simple_value_with_windows_newlines() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[a] k = v\r\n", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("v"), + newline_custom_event("\r\n") + ] + .into(), + }, + 1 + )), + ); + assert_eq!( + section(b"[a] k = \r\n", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event(""), + newline_custom_event("\r\n") + ] + .into(), + }, + 1 + )), + ); + } + + #[test] + fn empty_section() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[test]", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("test", None), + events: Default::default() + }, + 0 + )), + ); + } + + #[test] + fn simple_section() { + let mut node = ParseNode::SectionHeader; + let section_data = br#"[hello] + a = b + c + d = "lol""#; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + newline_event(), + whitespace_event(" "), + name_event("c"), + value_event(""), + newline_event(), + whitespace_event(" "), + name_event("d"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("\"lol\"") + ] + .into() + }, + 3 + )) + ); + } + + #[test] + fn section_with_empty_value_simplified() { + let mut node = ParseNode::SectionHeader; + let section_data = b"[a] k="; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + Event::KeyValueSeparator, + value_event(""), + ] + .into() + }, + 0 + )) + ); + + let section_data = b"[a] k=\n"; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("a", None), + events: vec![ + whitespace_event(" "), + name_event("k"), + Event::KeyValueSeparator, + value_event(""), + newline_event(), + ] + .into() + }, + 1 + )) + ); + } + + #[test] + fn section_with_empty_value() { + let mut node = ParseNode::SectionHeader; + let section_data = br#"[hello] + a = b + c= + d = "lol""#; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + newline_event(), + whitespace_event(" "), + name_event("c"), + Event::KeyValueSeparator, + value_event(""), + newline_event(), + whitespace_event(" "), + name_event("d"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("\"lol\"") + ] + .into() + }, + 3 + )) + ); + } + + #[test] + fn section_implicit_value() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[hello] c", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![whitespace_event(" "), name_event("c"), value_event("")].into() + }, + 0 + )) + ); + + assert_eq!( + section(b"[hello] c\nd", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + whitespace_event(" "), + name_event("c"), + value_event(""), + newline_event(), + name_event("d"), + value_event("") + ] + .into() + }, + 1 + )) + ); + } + + #[test] + fn section_very_commented() { + let mut node = ParseNode::SectionHeader; + let section_data = br#"[hello] ; commentA + a = b # commentB + ; commentC + ; commentD + c = d"#; + assert_eq!( + section(section_data, &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("hello", None), + events: vec![ + whitespace_event(" "), + comment_event(';', " commentA"), + newline_event(), + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + whitespace_event(" "), + comment_event('#', " commentB"), + newline_event(), + whitespace_event(" "), + comment_event(';', " commentC"), + newline_event(), + whitespace_event(" "), + comment_event(';', " commentD"), + newline_event(), + whitespace_event(" "), + name_event("c"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("d"), + ] + .into() + }, + 4 + )) + ); + } + + #[test] + fn complex_continuation() { + let mut node = ParseNode::SectionHeader; + // This test is absolute hell. Good luck if this fails. + assert_eq!( + section(b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("section", None), + events: vec![ + whitespace_event(" "), + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_not_done_event(r#"1 "\""#), + newline_event(), + value_not_done_event(r#"a ; e "\""#), + newline_event(), + value_done_event("d"), + whitespace_event(" "), + comment_event('#', " \"b\t ; c"), + ] + .into() + }, + 2 + )) + ); + } + + #[test] + fn quote_split_over_two_lines() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[section \"a\"] b =\"\\\n;\";a", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("section", (" ", "a")), + events: vec![ + whitespace_event(" "), + name_event("b"), + whitespace_event(" "), + Event::KeyValueSeparator, + value_not_done_event("\""), + newline_event(), + value_done_event(";\""), + comment_event(';', "a"), + ] + .into() + }, + 1 + )) + ); + } + + #[test] + fn section_handles_extraneous_whitespace_before_comment() { + let mut node = ParseNode::SectionHeader; + assert_eq!( + section(b"[s]hello #world", &mut node).unwrap(), + fully_consumed(( + Section { + header: parsed_section_header("s", None), + events: vec![ + name_event("hello"), + whitespace_event(" "), + value_event(""), + comment_event('#', "world"), + ] + .into() + }, + 0 + )) + ); + } +} + +mod value_continuation { + use bstr::ByteSlice; + + use crate::parse::{ + section, + tests::util::{into_events, newline_custom_event, newline_event, value_done_event, value_not_done_event}, + }; + + pub fn value_impl<'a>(i: &'a [u8], events: &mut section::Events<'a>) -> nom::IResult<&'a [u8], ()> { + super::value_impl(i, &mut |e| events.push(e)).map(|t| (t.0, ())) + } + + #[test] + fn simple_continuation() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\\\nworld", &mut events).unwrap().0, b""); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("hello"), + newline_event(), + value_done_event("world") + ]) + ); + } + + #[test] + fn continuation_with_whitespace() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\\\n world", &mut events).unwrap().0, b""); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("hello"), + newline_event(), + value_done_event(" world") + ]) + ); + + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\\\r\n world", &mut events).unwrap().0, b""); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("hello"), + newline_custom_event("\r\n"), + value_done_event(" world") + ]) + ); + + let mut events = section::Events::default(); + assert!( + value_impl(b"hello\\\r\r\n world", &mut events).is_err(), + "\\r must be followed by \\n" + ); + } + + #[test] + fn complex_continuation_with_leftover_comment() { + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut events) + .unwrap() + .0, + b" # \"b\t ; c" + ); + assert_eq!( + events, + into_events(vec![ + value_not_done_event(r#"1 "\""#), + newline_event(), + value_not_done_event(r#"a ; e "\""#), + newline_event(), + value_done_event("d") + ]) + ); + } + + #[test] + fn quote_split_over_two_lines_with_leftover_comment() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"\"\\\n;\";a", &mut events).unwrap().0, b";a"); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("\""), + newline_event(), + value_done_event(";\"") + ]) + ); + + let mut events = section::Events::default(); + assert_eq!(value_impl(b"\"a\\\r\nb;\";c", &mut events).unwrap().0, b";c"); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("\"a"), + newline_custom_event("\r\n"), + value_done_event("b;\"") + ]) + ); + } + + #[test] + fn quote_split_over_multiple_lines_without_surrounding_quotes_but_inner_quotes() { + let mut events = section::Events::default(); + assert_eq!( + value_impl( + br#"1\ +"2" a\ +\"3 b\"\ +4 ; comment "#, + &mut events + ) + .unwrap() + .0 + .as_bstr(), + b" ; comment ".as_bstr() + ); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("1"), + newline_event(), + value_not_done_event("\"2\" a"), + newline_event(), + value_not_done_event("\\\"3 b\\\""), + newline_event(), + value_done_event("4") + ]) + ); + } + + #[test] + fn quote_split_over_multiple_lines_with_surrounding_quotes() { + let mut events = section::Events::default(); + assert_eq!( + value_impl( + br#""1\ +"2" a\ +\"3 b\"\ +4 " ; comment "#, + &mut events + ) + .unwrap() + .0 + .as_bstr(), + b" ; comment ".as_bstr() + ); + assert_eq!( + events, + into_events(vec![ + value_not_done_event("\"1"), + newline_event(), + value_not_done_event("\"2\" a"), + newline_event(), + value_not_done_event("\\\"3 b\\\""), + newline_event(), + value_done_event("4 \"") + ]) + ); + } +} + +mod value_no_continuation { + use super::value_continuation::value_impl; + use crate::parse::{ + section, + tests::util::{into_events, value_event}, + }; + + #[test] + fn no_comment() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello", &mut events).unwrap().0, b""); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn windows_newline() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hi\r\nrest", &mut events).unwrap().0, b"\r\nrest"); + assert_eq!(events, into_events(vec![value_event("hi")])); + + events.clear(); + assert_eq!(value_impl(b"hi\r\r\r\nrest", &mut events).unwrap().0, b"\r\r\r\nrest"); + assert_eq!(events, into_events(vec![value_event("hi")])); + } + + #[test] + fn no_comment_newline() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello\na", &mut events).unwrap().0, b"\na"); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn semicolon_comment_not_consumed() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello;world", &mut events).unwrap().0, b";world"); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn octothorpe_comment_not_consumed() { + let mut events = section::Events::default(); + assert_eq!(value_impl(b"hello#world", &mut events).unwrap().0, b"#world"); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn values_with_extraneous_whitespace_without_comment() { + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"hello ", &mut events).unwrap().0, + b" " + ); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn values_with_extraneous_whitespace_before_comment() { + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"hello #world", &mut events).unwrap().0, + b" #world" + ); + assert_eq!(events, into_events(vec![value_event("hello")])); + + let mut events = section::Events::default(); + assert_eq!( + value_impl(b"hello ;world", &mut events).unwrap().0, + b" ;world" + ); + assert_eq!(events, into_events(vec![value_event("hello")])); + } + + #[test] + fn trans_escaped_comment_marker_not_consumed() { + let mut events = section::Events::default(); + assert_eq!(value_impl(br##"hello"#"world; a"##, &mut events).unwrap().0, b"; a"); + assert_eq!(events, into_events(vec![value_event(r##"hello"#"world"##)])); + } + + #[test] + fn complex_test() { + let mut events = section::Events::default(); + assert_eq!(value_impl(br#"value";";ahhhh"#, &mut events).unwrap().0, b";ahhhh"); + assert_eq!(events, into_events(vec![value_event(r#"value";""#)])); + } + + #[test] + fn garbage_after_continuation_is_err() { + assert!(value_impl(b"hello \\afwjdls", &mut Default::default()).is_err()); + } + + #[test] + fn invalid_escape() { + assert!(value_impl(br#"\x"#, &mut Default::default()).is_err()); + } + + #[test] + fn incomplete_quote() { + assert!(value_impl(br#"hello "world"#, &mut Default::default()).is_err()); + } + + #[test] + fn incomplete_escape() { + assert!(value_impl(br#"hello world\"#, &mut Default::default()).is_err()); + } +} + +mod key_value_pair { + use crate::parse::{ + error::ParseNode, + section, + tests::util::{into_events, name_event, value_event, whitespace_event}, + Event, + }; + + fn key_value<'a>( + i: &'a [u8], + node: &mut ParseNode, + events: &mut section::Events<'a>, + ) -> nom::IResult<&'a [u8], ()> { + super::key_value_pair(i, node, &mut |e| events.push(e)).map(|t| (t.0, ())) + } + + #[test] + fn nonascii_is_allowed_for_values_but_not_for_keys() { + let mut node = ParseNode::SectionHeader; + let mut vec = Default::default(); + assert!(key_value("你好".as_bytes(), &mut node, &mut vec).is_err()); + assert!(key_value("a = 你好 ".as_bytes(), &mut node, &mut vec).is_ok()); + assert_eq!( + vec, + into_events(vec![ + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("你好") + ]) + ); + } + + #[test] + fn whitespace_is_not_ambiguous() { + let mut node = ParseNode::SectionHeader; + let mut vec = Default::default(); + assert!(key_value(b"a =b", &mut node, &mut vec).is_ok()); + assert_eq!( + vec, + into_events(vec![ + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + value_event("b") + ]) + ); + + let mut vec = Default::default(); + assert!(key_value(b"a= b", &mut node, &mut vec).is_ok()); + assert_eq!( + vec, + into_events(vec![ + name_event("a"), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b") + ]) + ); + } +} + +mod comment { + use super::comment; + use crate::parse::tests::util::{comment as parsed_comment, fully_consumed}; + + #[test] + fn semicolon() { + assert_eq!( + comment(b"; this is a semicolon comment").unwrap(), + fully_consumed(parsed_comment(';', " this is a semicolon comment")), + ); + } + + #[test] + fn octothorpe() { + assert_eq!( + comment(b"# this is an octothorpe comment").unwrap(), + fully_consumed(parsed_comment('#', " this is an octothorpe comment")), + ); + } + + #[test] + fn multiple_markers() { + assert_eq!( + comment(b"###### this is an octothorpe comment").unwrap(), + fully_consumed(parsed_comment('#', "##### this is an octothorpe comment")), + ); + } +} diff --git a/vendor/gix-config/src/parse/section/header.rs b/vendor/gix-config/src/parse/section/header.rs new file mode 100644 index 000000000..341edcdd5 --- /dev/null +++ b/vendor/gix-config/src/parse/section/header.rs @@ -0,0 +1,180 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; + +use crate::parse::{ + section::{into_cow_bstr, Header, Name}, + Event, +}; + +/// The error returned by [`Header::new(…)`][super::Header::new()]. +#[derive(Debug, PartialOrd, PartialEq, Eq, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("section names can only be ascii, '-'")] + InvalidName, + #[error("sub-section names must not contain newlines or null bytes")] + InvalidSubSection, +} + +impl<'a> Header<'a> { + /// Instantiate a new header either with a section `name`, e.g. "core" serializing to `["core"]` + /// or `[remote "origin"]` for `subsection` being "origin" and `name` being "remote". + pub fn new( + name: impl Into<Cow<'a, str>>, + subsection: impl Into<Option<Cow<'a, BStr>>>, + ) -> Result<Header<'a>, Error> { + let name = Name(validated_name(into_cow_bstr(name.into()))?); + if let Some(subsection_name) = subsection.into() { + Ok(Header { + name, + separator: Some(Cow::Borrowed(" ".into())), + subsection_name: Some(validated_subsection(subsection_name)?), + }) + } else { + Ok(Header { + name, + separator: None, + subsection_name: None, + }) + } + } +} + +/// Return true if `name` is valid as subsection name, like `origin` in `[remote "origin"]`. +pub fn is_valid_subsection(name: &BStr) -> bool { + name.find_byteset(b"\n\0").is_none() +} + +fn validated_subsection(name: Cow<'_, BStr>) -> Result<Cow<'_, BStr>, Error> { + is_valid_subsection(name.as_ref()) + .then_some(name) + .ok_or(Error::InvalidSubSection) +} + +fn validated_name(name: Cow<'_, BStr>) -> Result<Cow<'_, BStr>, Error> { + name.iter() + .all(|b| b.is_ascii_alphanumeric() || *b == b'-') + .then_some(name) + .ok_or(Error::InvalidName) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_header_names_are_legal() { + assert!(Header::new("", None).is_ok(), "yes, git allows this, so do we"); + } + + #[test] + fn empty_header_sub_names_are_legal() { + assert!( + Header::new("remote", Some(Cow::Borrowed("".into()))).is_ok(), + "yes, git allows this, so do we" + ); + } +} + +impl Header<'_> { + ///Return true if this is a header like `[legacy.subsection]`, or false otherwise. + pub fn is_legacy(&self) -> bool { + self.separator.as_deref().map_or(false, |n| n == ".") + } + + /// Return the subsection name, if present, i.e. "origin" in `[remote "origin"]`. + /// + /// It is parsed without quotes, and with escapes folded + /// into their resulting characters. + /// Thus during serialization, escapes and quotes must be re-added. + /// This makes it possible to use [`Event`] data for lookups directly. + pub fn subsection_name(&self) -> Option<&BStr> { + self.subsection_name.as_deref() + } + + /// Return the name of the header, like "remote" in `[remote "origin"]`. + pub fn name(&self) -> &BStr { + &self.name + } + + /// Serialize this type into a `BString` for convenience. + /// + /// Note that `to_string()` can also be used, but might not be lossless. + #[must_use] + pub fn to_bstring(&self) -> BString { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("io error impossible"); + buf.into() + } + + /// Stream ourselves to the given `out`, in order to reproduce this header mostly losslessly + /// as it was parsed. + pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> { + out.write_all(b"[")?; + out.write_all(&self.name)?; + + if let (Some(sep), Some(subsection)) = (&self.separator, &self.subsection_name) { + let sep = sep.as_ref(); + out.write_all(sep)?; + if sep == "." { + out.write_all(subsection.as_ref())?; + } else { + out.write_all(b"\"")?; + out.write_all(escape_subsection(subsection.as_ref()).as_ref())?; + out.write_all(b"\"")?; + } + } + + out.write_all(b"]") + } + + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Header<'static> { + Header { + name: self.name.to_owned(), + separator: self.separator.clone().map(|v| Cow::Owned(v.into_owned())), + subsection_name: self.subsection_name.clone().map(|v| Cow::Owned(v.into_owned())), + } + } +} + +fn escape_subsection(name: &BStr) -> Cow<'_, BStr> { + if name.find_byteset(b"\\\"").is_none() { + return name.into(); + } + let mut buf = Vec::with_capacity(name.len()); + for b in name.iter().copied() { + match b { + b'\\' => buf.push_str(br#"\\"#), + b'"' => buf.push_str(br#"\""#), + _ => buf.push(b), + } + } + BString::from(buf).into() +} + +impl Display for Header<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.to_bstring(), f) + } +} + +impl From<Header<'_>> for BString { + fn from(header: Header<'_>) -> Self { + header.into() + } +} + +impl From<&Header<'_>> for BString { + fn from(header: &Header<'_>) -> Self { + header.to_bstring() + } +} + +impl<'a> From<Header<'a>> for Event<'a> { + fn from(header: Header<'_>) -> Event<'_> { + Event::SectionHeader(header) + } +} diff --git a/vendor/gix-config/src/parse/section/mod.rs b/vendor/gix-config/src/parse/section/mod.rs new file mode 100644 index 000000000..7ba08b87d --- /dev/null +++ b/vendor/gix-config/src/parse/section/mod.rs @@ -0,0 +1,187 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::BStr; +use smallvec::SmallVec; + +use crate::parse::{Event, Section}; + +/// +pub mod header; + +pub(crate) mod unvalidated; + +/// A container for events, avoiding heap allocations in typical files. +pub type Events<'a> = SmallVec<[Event<'a>; 64]>; + +/// A parsed section header, containing a name and optionally a subsection name. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub struct Header<'a> { + /// The name of the header. + pub(crate) name: Name<'a>, + /// The separator used to determine if the section contains a subsection. + /// This is either a period `.` or a string of whitespace. Note that + /// reconstruction of subsection format is dependent on this value. If this + /// is all whitespace, then the subsection name needs to be surrounded by + /// quotes to have perfect reconstruction. + pub(crate) separator: Option<Cow<'a, BStr>>, + pub(crate) subsection_name: Option<Cow<'a, BStr>>, +} + +impl Section<'_> { + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Section<'static> { + Section { + header: self.header.to_owned(), + events: self.events.iter().map(Event::to_owned).collect(), + } + } +} + +impl Display for Section<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.header)?; + for event in &self.events { + event.fmt(f)?; + } + Ok(()) + } +} + +mod types { + macro_rules! generate_case_insensitive { + ($name:ident, $module:ident, $err_doc:literal, $validate:ident, $cow_inner_type:ty, $comment:literal) => { + /// + pub mod $module { + /// The error returned when `TryFrom` is invoked to create an instance. + #[derive(Debug, thiserror::Error, Copy, Clone)] + #[error($err_doc)] + pub struct Error; + } + + #[doc = $comment] + #[derive(Clone, Eq, Debug, Default)] + pub struct $name<'a>(pub(crate) std::borrow::Cow<'a, $cow_inner_type>); + + impl<'a> $name<'a> { + pub(crate) fn from_str_unchecked(s: &'a str) -> Self { + $name(std::borrow::Cow::Borrowed(s.into())) + } + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> $name<'static> { + $name(std::borrow::Cow::Owned(self.0.clone().into_owned())) + } + } + + impl PartialEq for $name<'_> { + fn eq(&self, other: &Self) -> bool { + self.0.eq_ignore_ascii_case(&other.0) + } + } + + impl std::fmt::Display for $name<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } + } + + impl PartialOrd for $name<'_> { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + self.cmp(other).into() + } + } + + impl Ord for $name<'_> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + let a = self.0.iter().map(|c| c.to_ascii_lowercase()); + let b = other.0.iter().map(|c| c.to_ascii_lowercase()); + a.cmp(b) + } + } + + impl std::hash::Hash for $name<'_> { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + for b in self.0.iter() { + b.to_ascii_lowercase().hash(state); + } + } + } + + impl<'a> std::convert::TryFrom<&'a str> for $name<'a> { + type Error = $module::Error; + + fn try_from(s: &'a str) -> Result<Self, Self::Error> { + Self::try_from(std::borrow::Cow::Borrowed(bstr::ByteSlice::as_bstr(s.as_bytes()))) + } + } + + impl<'a> std::convert::TryFrom<String> for $name<'a> { + type Error = $module::Error; + + fn try_from(s: String) -> Result<Self, Self::Error> { + Self::try_from(std::borrow::Cow::Owned(bstr::BString::from(s))) + } + } + + impl<'a> std::convert::TryFrom<std::borrow::Cow<'a, bstr::BStr>> for $name<'a> { + type Error = $module::Error; + + fn try_from(s: std::borrow::Cow<'a, bstr::BStr>) -> Result<Self, Self::Error> { + if $validate(s.as_ref()) { + Ok(Self(s)) + } else { + Err($module::Error) + } + } + } + + impl<'a> std::ops::Deref for $name<'a> { + type Target = $cow_inner_type; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl<'a> std::convert::AsRef<str> for $name<'a> { + fn as_ref(&self) -> &str { + std::str::from_utf8(self.0.as_ref()).expect("only valid UTF8 makes it through our validation") + } + } + }; + } + + fn is_valid_name(n: &bstr::BStr) -> bool { + !n.is_empty() && n.iter().all(|b| b.is_ascii_alphanumeric() || *b == b'-') + } + fn is_valid_key(n: &bstr::BStr) -> bool { + is_valid_name(n) && n[0].is_ascii_alphabetic() + } + + generate_case_insensitive!( + Name, + name, + "Valid names consist of alphanumeric characters or dashes.", + is_valid_name, + bstr::BStr, + "Wrapper struct for section header names, like `remote`, since these are case-insensitive." + ); + + generate_case_insensitive!( + Key, + key, + "Valid keys consist alphanumeric characters or dashes, starting with an alphabetic character.", + is_valid_key, + bstr::BStr, + "Wrapper struct for key names, like `path` in `include.path`, since keys are case-insensitive." + ); +} +pub use types::{key, name, Key, Name}; + +pub(crate) fn into_cow_bstr(c: Cow<'_, str>) -> Cow<'_, BStr> { + match c { + Cow::Borrowed(s) => Cow::Borrowed(s.into()), + Cow::Owned(s) => Cow::Owned(s.into()), + } +} diff --git a/vendor/gix-config/src/parse/section/unvalidated.rs b/vendor/gix-config/src/parse/section/unvalidated.rs new file mode 100644 index 000000000..1710837fe --- /dev/null +++ b/vendor/gix-config/src/parse/section/unvalidated.rs @@ -0,0 +1,25 @@ +use bstr::{BStr, ByteSlice}; + +/// An unvalidated parse result of a key for a section, parsing input like `remote.origin` or `core`. +#[derive(Debug, PartialEq, Ord, PartialOrd, Eq, Hash, Clone, Copy)] +pub struct Key<'a> { + /// The name of the section, like `remote` in `remote.origin`. + pub section_name: &'a str, + /// The name of the sub-section, like `origin` in `remote.origin`. + pub subsection_name: Option<&'a BStr>, +} + +impl<'a> Key<'a> { + /// Parse `input` like `remote.origin` or `core` as a `Key` to make its section specific fields available, + /// or `None` if there were not one or two tokens separated by `.`. + /// Note that `input` isn't validated, and is `str` as ascii is a subset of UTF-8 which is required for any valid keys. + pub fn parse(input: impl Into<&'a BStr>) -> Option<Self> { + let input = input.into(); + let mut tokens = input.splitn(2, |b| *b == b'.'); + + Some(Key { + section_name: tokens.next()?.to_str().ok()?, + subsection_name: tokens.next().map(Into::into), + }) + } +} diff --git a/vendor/gix-config/src/parse/tests.rs b/vendor/gix-config/src/parse/tests.rs new file mode 100644 index 000000000..2a2853c4c --- /dev/null +++ b/vendor/gix-config/src/parse/tests.rs @@ -0,0 +1,162 @@ +mod section { + + mod header { + mod unvalidated { + use crate::parse::section::unvalidated::Key; + + #[test] + fn section_name_only() { + assert_eq!( + Key::parse("core").unwrap(), + Key { + section_name: "core", + subsection_name: None + } + ); + } + + #[test] + fn section_name_and_subsection() { + assert_eq!( + Key::parse("core.bare").unwrap(), + Key { + section_name: "core", + subsection_name: Some("bare".into()) + } + ); + } + + #[test] + fn section_name_and_subsection_with_separators() { + assert_eq!( + Key::parse("remote.https:///home/user.git").unwrap(), + Key { + section_name: "remote", + subsection_name: Some("https:///home/user.git".into()) + } + ); + } + } + + mod write_to { + use std::borrow::Cow; + + use crate::parse::section; + + fn header(name: &str, subsection: impl Into<Option<(&'static str, &'static str)>>) -> section::Header<'_> { + let name = section::Name(Cow::Borrowed(name.into())); + if let Some((separator, subsection_name)) = subsection.into() { + section::Header { + name, + separator: Some(Cow::Borrowed(separator.into())), + subsection_name: Some(Cow::Borrowed(subsection_name.into())), + } + } else { + section::Header { + name, + separator: None, + subsection_name: None, + } + } + } + + #[test] + fn legacy_subsection_format_does_not_use_escapes() { + let invalid = header("invalid", Some((".", "\\ \""))); + assert_eq!( + invalid.to_bstring(), + "[invalid.\\ \"]", + "no escaping happens for legacy subsections" + ); + assert!(invalid.is_legacy()); + } + + #[test] + fn subsections_escape_two_characters_only() { + let invalid = header("invalid", Some((" ", "\\ \"\npost newline"))); + assert_eq!( + invalid.to_bstring(), + "[invalid \"\\\\ \\\"\npost newline\"]", + "newlines are actually invalid in subsection, but they are possible due to unvalidated instance creation" + ); + assert!(!invalid.is_legacy()); + } + } + } +} + +pub(crate) mod util { + //! This module is only included for tests, and contains common unit test helper + //! functions. + + use std::{borrow::Cow, convert::TryFrom}; + + use crate::parse::{section, Comment, Event}; + + pub fn into_events(events: Vec<Event<'_>>) -> section::Events<'_> { + events.into() + } + + pub fn section_header( + name: &str, + subsection: impl Into<Option<(&'static str, &'static str)>>, + ) -> section::Header<'_> { + let name = section::Name::try_from(name).unwrap(); + if let Some((separator, subsection_name)) = subsection.into() { + section::Header { + name, + separator: Some(Cow::Borrowed(separator.into())), + subsection_name: Some(Cow::Borrowed(subsection_name.into())), + } + } else { + section::Header { + name, + separator: None, + subsection_name: None, + } + } + } + + pub(crate) fn name_event(name: &'static str) -> Event<'static> { + Event::SectionKey(section::Key(Cow::Borrowed(name.into()))) + } + + pub(crate) fn value_event(value: &'static str) -> Event<'static> { + Event::Value(Cow::Borrowed(value.into())) + } + + pub(crate) fn value_not_done_event(value: &'static str) -> Event<'static> { + Event::ValueNotDone(Cow::Borrowed(value.into())) + } + + pub(crate) fn value_done_event(value: &'static str) -> Event<'static> { + Event::ValueDone(Cow::Borrowed(value.into())) + } + + pub(crate) fn newline_event() -> Event<'static> { + newline_custom_event("\n") + } + + pub(crate) fn newline_custom_event(value: &'static str) -> Event<'static> { + Event::Newline(Cow::Borrowed(value.into())) + } + + pub(crate) fn whitespace_event(value: &'static str) -> Event<'static> { + Event::Whitespace(Cow::Borrowed(value.into())) + } + + pub(crate) fn comment_event(tag: char, msg: &'static str) -> Event<'static> { + Event::Comment(comment(tag, msg)) + } + + pub(crate) fn comment(comment_tag: char, comment: &'static str) -> Comment<'static> { + Comment { + tag: comment_tag as u8, + text: Cow::Borrowed(comment.into()), + } + } + + pub(crate) const fn fully_consumed<T>(t: T) -> (&'static [u8], T) { + (&[], t) + } +} |