diff options
Diffstat (limited to 'vendor/gix-config/src/parse/events.rs')
-rw-r--r-- | vendor/gix-config/src/parse/events.rs | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/vendor/gix-config/src/parse/events.rs b/vendor/gix-config/src/parse/events.rs new file mode 100644 index 000000000..62f621b52 --- /dev/null +++ b/vendor/gix-config/src/parse/events.rs @@ -0,0 +1,336 @@ +use std::convert::TryFrom; + +use smallvec::SmallVec; + +use crate::{ + parse, + parse::{section, Event, Section}, +}; + +/// A type store without allocation all events that are typically preceding the first section. +pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; + +/// A zero-copy `gix-config` file parser. +/// +/// This is parser exposes low-level syntactic events from a `gix-config` file. +/// Generally speaking, you'll want to use [`File`] as it wraps +/// around the parser to provide a higher-level abstraction to a `gix-config` +/// file, including querying, modifying, and updating values. +/// +/// This parser guarantees that the events emitted are sufficient to +/// reconstruct a `gix-config` file identical to the source `gix-config` +/// when writing it. +/// +/// # Differences between a `.ini` parser +/// +/// While the `gix-config` format closely resembles the [`.ini` file format], +/// there are subtle differences that make them incompatible. For one, the file +/// format is not well defined, and there exists no formal specification to +/// adhere to. +/// +/// For concrete examples, some notable differences are: +/// - `gix-config` sections permit subsections via either a quoted string +/// (`[some-section "subsection"]`) or via the deprecated dot notation +/// (`[some-section.subsection]`). Successful parsing these section names is not +/// well defined in typical `.ini` parsers. This parser will handle these cases +/// perfectly. +/// - Comment markers are not strictly defined either. This parser will always +/// and only handle a semicolon or octothorpe (also known as a hash or number +/// sign). +/// - Global properties may be allowed in `.ini` parsers, but is strictly +/// disallowed by this parser. +/// - Only `\t`, `\n`, `\b` `\\` are valid escape characters. +/// - Quoted and semi-quoted values will be parsed (but quotes will be included +/// in event outputs). An example of a semi-quoted value is `5"hello world"`, +/// which should be interpreted as `5hello world` after +/// [normalization][crate::value::normalize()]. +/// - Line continuations via a `\` character is supported (inside or outside of quotes) +/// - Whitespace handling similarly follows the `gix-config` specification as +/// closely as possible, where excess whitespace after a non-quoted value are +/// trimmed, and line continuations onto a new line with excess spaces are kept. +/// - Only equal signs (optionally padded by spaces) are valid name/value +/// delimiters. +/// +/// Note that that things such as case-sensitivity or duplicate sections are +/// _not_ handled. This parser is a low level _syntactic_ interpreter +/// and higher level wrappers around this parser, which may +/// or may not be zero-copy, should handle _semantic_ values. This also means +/// that string-like values are not interpreted. For example, `hello"world"` +/// would be read at a high level as `helloworld` but this parser will return +/// the former instead, with the extra quotes. This is because it is not the +/// responsibility of the parser to interpret these values, and doing so would +/// necessarily require a copy, which this parser avoids. +/// +/// # Trait Implementations +/// +/// - This struct does _not_ implement [`FromStr`] due to lifetime +/// constraints implied on the required `from_str` method. Instead, it provides +/// [`From<&'_ str>`]. +/// +/// # Idioms +/// +/// If you do want to use this parser, there are some idioms that may help you +/// with interpreting sequences of events. +/// +/// ## `Value` events do not immediately follow `Key` events +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [core] +/// autocrlf = input +/// ``` +/// +/// Because this parser guarantees perfect reconstruction, there are many +/// non-significant events that occur in addition to the ones you may expect: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("core", None).unwrap(); +/// # let section_data = "[core]\n autocrlf = input"; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::SectionKey(section::Key::try_from("autocrlf")?), +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::KeyValueSeparator, +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::Value(Cow::Borrowed("input".into())), +/// # ]); +/// # Ok::<_, Box<dyn std::error::Error>>(()) +/// ``` +/// +/// Note the two whitespace events between the key and value pair! Those two +/// events actually refer to the whitespace between the name and value and the +/// equal sign. So if the config instead had `autocrlf=input`, those whitespace +/// events would no longer be present. +/// +/// ## `KeyValueSeparator` event is not guaranteed to emit +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [core] +/// autocrlf +/// ``` +/// +/// This is a valid config with a `autocrlf` key having an implicit `true` +/// value. This means that there is not a `=` separating the key and value, +/// which means that the corresponding event won't appear either: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("core", None).unwrap(); +/// # let section_data = "[core]\n autocrlf"; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::SectionKey(section::Key::try_from("autocrlf")?), +/// Event::Value(Cow::Borrowed("".into())), +/// # ]); +/// # Ok::<_, Box<dyn std::error::Error>>(()) +/// ``` +/// +/// ## Quoted values are not unquoted +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [core] +/// autocrlf=true"" +/// filemode=fa"lse" +/// ``` +/// +/// Both these events, when fully processed, should normally be `true` and +/// `false`. However, because this parser is zero-copy, we cannot process +/// partially quoted values, such as the `false` example. As a result, to +/// maintain consistency, the parser will just take all values as literals. The +/// relevant event stream emitted is thus emitted as: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("core", None).unwrap(); +/// # let section_data = "[core]\nautocrlf=true\"\"\nfilemode=fa\"lse\""; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::SectionKey(section::Key::try_from("autocrlf")?), +/// Event::KeyValueSeparator, +/// Event::Value(Cow::Borrowed(r#"true"""#.into())), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::SectionKey(section::Key::try_from("filemode")?), +/// Event::KeyValueSeparator, +/// Event::Value(Cow::Borrowed(r#"fa"lse""#.into())), +/// # ]); +/// # Ok::<_, Box<dyn std::error::Error>>(()) +/// ``` +/// +/// ## Whitespace after line continuations are part of the value +/// +/// Consider the following `gix-config` example: +/// +/// ```text +/// [some-section] +/// file=a\ +/// c +/// ``` +/// +/// Because how `gix-config` treats continuations, the whitespace preceding `c` +/// are in fact part of the value of `file`. The fully interpreted key/value +/// pair is actually `file=a c`. As a result, the parser will provide this +/// split value accordingly: +/// +/// ``` +/// # use gix_config::parse::{Event, Events, section}; +/// # use std::borrow::Cow; +/// # use std::convert::TryFrom; +/// # let section_header = section::Header::new("some-section", None).unwrap(); +/// # let section_data = "[some-section]\nfile=a\\\n c"; +/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![ +/// Event::SectionHeader(section_header), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::SectionKey(section::Key::try_from("file")?), +/// Event::KeyValueSeparator, +/// Event::ValueNotDone(Cow::Borrowed("a".into())), +/// Event::Newline(Cow::Borrowed("\n".into())), +/// Event::ValueDone(Cow::Borrowed(" c".into())), +/// # ]); +/// # Ok::<_, Box<dyn std::error::Error>>(()) +/// ``` +/// +/// [`File`]: crate::File +/// [`.ini` file format]: https://en.wikipedia.org/wiki/INI_file +/// [`git`'s documentation]: https://git-scm.com/docs/gix-config#_configuration_file +/// [`FromStr`]: std::str::FromStr +/// [`From<&'_ str>`]: std::convert::From +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] +pub struct Events<'a> { + /// Events seen before the first section. + pub frontmatter: FrontMatterEvents<'a>, + /// All parsed sections. + pub sections: Vec<Section<'a>>, +} + +impl Events<'static> { + /// Parses the provided bytes, returning an [`Events`] that contains allocated + /// and owned events. This is similar to [`Events::from_bytes()`], but performance + /// is degraded as it requires allocation for every event. + /// + /// Use `filter` to only include those events for which it returns true. + pub fn from_bytes_owned<'a>( + input: &'a [u8], + filter: Option<fn(&Event<'a>) -> bool>, + ) -> Result<Events<'static>, parse::Error> { + from_bytes(input, |e| e.to_owned(), filter) + } +} + +impl<'a> Events<'a> { + /// Attempt to zero-copy parse the provided bytes. On success, returns a + /// [`Events`] that provides methods to accessing leading comments and sections + /// of a `gix-config` file and can be converted into an iterator of [`Event`] + /// for higher level processing. + /// + /// Use `filter` to only include those events for which it returns true. + pub fn from_bytes(input: &'a [u8], filter: Option<fn(&Event<'a>) -> bool>) -> Result<Events<'a>, parse::Error> { + from_bytes(input, std::convert::identity, filter) + } + + /// Attempt to zero-copy parse the provided `input` string. + /// + /// Prefer the [`from_bytes()`][Self::from_bytes()] method if UTF8 encoding + /// isn't guaranteed. + #[allow(clippy::should_implement_trait)] + pub fn from_str(input: &'a str) -> Result<Events<'a>, parse::Error> { + Self::from_bytes(input.as_bytes(), None) + } + + /// Consumes the parser to produce an iterator of all contained events. + #[must_use = "iterators are lazy and do nothing unless consumed"] + #[allow(clippy::should_implement_trait)] + pub fn into_iter(self) -> impl Iterator<Item = parse::Event<'a>> + std::iter::FusedIterator { + self.frontmatter.into_iter().chain( + self.sections + .into_iter() + .flat_map(|section| std::iter::once(parse::Event::SectionHeader(section.header)).chain(section.events)), + ) + } + + /// Place all contained events into a single `Vec`. + pub fn into_vec(self) -> Vec<parse::Event<'a>> { + self.into_iter().collect() + } +} + +impl<'a> TryFrom<&'a str> for Events<'a> { + type Error = parse::Error; + + fn try_from(value: &'a str) -> Result<Self, Self::Error> { + Self::from_str(value) + } +} + +impl<'a> TryFrom<&'a [u8]> for Events<'a> { + type Error = parse::Error; + + fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> { + Events::from_bytes(value, None) + } +} + +fn from_bytes<'a, 'b>( + input: &'a [u8], + convert: impl Fn(Event<'a>) -> Event<'b>, + filter: Option<fn(&Event<'a>) -> bool>, +) -> Result<Events<'b>, parse::Error> { + let mut header = None; + let mut events = section::Events::default(); + let mut frontmatter = FrontMatterEvents::default(); + let mut sections = Vec::new(); + parse::from_bytes(input, |e: Event<'_>| match e { + Event::SectionHeader(next_header) => { + match header.take() { + None => { + frontmatter = std::mem::take(&mut events).into_iter().collect(); + } + Some(prev_header) => { + sections.push(parse::Section { + header: prev_header, + events: std::mem::take(&mut events), + }); + } + }; + header = match convert(Event::SectionHeader(next_header)) { + Event::SectionHeader(h) => h, + _ => unreachable!("BUG: convert must not change the event type, just the lifetime"), + } + .into(); + } + event => { + if filter.map_or(true, |f| f(&event)) { + events.push(convert(event)) + } + } + })?; + + match header { + None => { + frontmatter = events.into_iter().collect(); + } + Some(prev_header) => { + sections.push(parse::Section { + header: prev_header, + events: std::mem::take(&mut events), + }); + } + } + Ok(Events { frontmatter, sections }) +} |