diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
commit | 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch) | |
tree | bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-config/src/parse/section | |
parent | Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff) | |
download | rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip |
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-config/src/parse/section')
-rw-r--r-- | vendor/gix-config/src/parse/section/header.rs | 180 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/section/mod.rs | 187 | ||||
-rw-r--r-- | vendor/gix-config/src/parse/section/unvalidated.rs | 25 |
3 files changed, 392 insertions, 0 deletions
diff --git a/vendor/gix-config/src/parse/section/header.rs b/vendor/gix-config/src/parse/section/header.rs new file mode 100644 index 000000000..341edcdd5 --- /dev/null +++ b/vendor/gix-config/src/parse/section/header.rs @@ -0,0 +1,180 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::{BStr, BString, ByteSlice, ByteVec}; + +use crate::parse::{ + section::{into_cow_bstr, Header, Name}, + Event, +}; + +/// The error returned by [`Header::new(…)`][super::Header::new()]. +#[derive(Debug, PartialOrd, PartialEq, Eq, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("section names can only be ascii, '-'")] + InvalidName, + #[error("sub-section names must not contain newlines or null bytes")] + InvalidSubSection, +} + +impl<'a> Header<'a> { + /// Instantiate a new header either with a section `name`, e.g. "core" serializing to `["core"]` + /// or `[remote "origin"]` for `subsection` being "origin" and `name` being "remote". + pub fn new( + name: impl Into<Cow<'a, str>>, + subsection: impl Into<Option<Cow<'a, BStr>>>, + ) -> Result<Header<'a>, Error> { + let name = Name(validated_name(into_cow_bstr(name.into()))?); + if let Some(subsection_name) = subsection.into() { + Ok(Header { + name, + separator: Some(Cow::Borrowed(" ".into())), + subsection_name: Some(validated_subsection(subsection_name)?), + }) + } else { + Ok(Header { + name, + separator: None, + subsection_name: None, + }) + } + } +} + +/// Return true if `name` is valid as subsection name, like `origin` in `[remote "origin"]`. +pub fn is_valid_subsection(name: &BStr) -> bool { + name.find_byteset(b"\n\0").is_none() +} + +fn validated_subsection(name: Cow<'_, BStr>) -> Result<Cow<'_, BStr>, Error> { + is_valid_subsection(name.as_ref()) + .then_some(name) + .ok_or(Error::InvalidSubSection) +} + +fn validated_name(name: Cow<'_, BStr>) -> Result<Cow<'_, BStr>, Error> { + name.iter() + .all(|b| b.is_ascii_alphanumeric() || *b == b'-') + .then_some(name) + .ok_or(Error::InvalidName) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_header_names_are_legal() { + assert!(Header::new("", None).is_ok(), "yes, git allows this, so do we"); + } + + #[test] + fn empty_header_sub_names_are_legal() { + assert!( + Header::new("remote", Some(Cow::Borrowed("".into()))).is_ok(), + "yes, git allows this, so do we" + ); + } +} + +impl Header<'_> { + ///Return true if this is a header like `[legacy.subsection]`, or false otherwise. + pub fn is_legacy(&self) -> bool { + self.separator.as_deref().map_or(false, |n| n == ".") + } + + /// Return the subsection name, if present, i.e. "origin" in `[remote "origin"]`. + /// + /// It is parsed without quotes, and with escapes folded + /// into their resulting characters. + /// Thus during serialization, escapes and quotes must be re-added. + /// This makes it possible to use [`Event`] data for lookups directly. + pub fn subsection_name(&self) -> Option<&BStr> { + self.subsection_name.as_deref() + } + + /// Return the name of the header, like "remote" in `[remote "origin"]`. + pub fn name(&self) -> &BStr { + &self.name + } + + /// Serialize this type into a `BString` for convenience. + /// + /// Note that `to_string()` can also be used, but might not be lossless. + #[must_use] + pub fn to_bstring(&self) -> BString { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("io error impossible"); + buf.into() + } + + /// Stream ourselves to the given `out`, in order to reproduce this header mostly losslessly + /// as it was parsed. + pub fn write_to(&self, mut out: impl std::io::Write) -> std::io::Result<()> { + out.write_all(b"[")?; + out.write_all(&self.name)?; + + if let (Some(sep), Some(subsection)) = (&self.separator, &self.subsection_name) { + let sep = sep.as_ref(); + out.write_all(sep)?; + if sep == "." { + out.write_all(subsection.as_ref())?; + } else { + out.write_all(b"\"")?; + out.write_all(escape_subsection(subsection.as_ref()).as_ref())?; + out.write_all(b"\"")?; + } + } + + out.write_all(b"]") + } + + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Header<'static> { + Header { + name: self.name.to_owned(), + separator: self.separator.clone().map(|v| Cow::Owned(v.into_owned())), + subsection_name: self.subsection_name.clone().map(|v| Cow::Owned(v.into_owned())), + } + } +} + +fn escape_subsection(name: &BStr) -> Cow<'_, BStr> { + if name.find_byteset(b"\\\"").is_none() { + return name.into(); + } + let mut buf = Vec::with_capacity(name.len()); + for b in name.iter().copied() { + match b { + b'\\' => buf.push_str(br#"\\"#), + b'"' => buf.push_str(br#"\""#), + _ => buf.push(b), + } + } + BString::from(buf).into() +} + +impl Display for Header<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.to_bstring(), f) + } +} + +impl From<Header<'_>> for BString { + fn from(header: Header<'_>) -> Self { + header.into() + } +} + +impl From<&Header<'_>> for BString { + fn from(header: &Header<'_>) -> Self { + header.to_bstring() + } +} + +impl<'a> From<Header<'a>> for Event<'a> { + fn from(header: Header<'_>) -> Event<'_> { + Event::SectionHeader(header) + } +} diff --git a/vendor/gix-config/src/parse/section/mod.rs b/vendor/gix-config/src/parse/section/mod.rs new file mode 100644 index 000000000..7ba08b87d --- /dev/null +++ b/vendor/gix-config/src/parse/section/mod.rs @@ -0,0 +1,187 @@ +use std::{borrow::Cow, fmt::Display}; + +use bstr::BStr; +use smallvec::SmallVec; + +use crate::parse::{Event, Section}; + +/// +pub mod header; + +pub(crate) mod unvalidated; + +/// A container for events, avoiding heap allocations in typical files. +pub type Events<'a> = SmallVec<[Event<'a>; 64]>; + +/// A parsed section header, containing a name and optionally a subsection name. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub struct Header<'a> { + /// The name of the header. + pub(crate) name: Name<'a>, + /// The separator used to determine if the section contains a subsection. + /// This is either a period `.` or a string of whitespace. Note that + /// reconstruction of subsection format is dependent on this value. If this + /// is all whitespace, then the subsection name needs to be surrounded by + /// quotes to have perfect reconstruction. + pub(crate) separator: Option<Cow<'a, BStr>>, + pub(crate) subsection_name: Option<Cow<'a, BStr>>, +} + +impl Section<'_> { + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> Section<'static> { + Section { + header: self.header.to_owned(), + events: self.events.iter().map(Event::to_owned).collect(), + } + } +} + +impl Display for Section<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.header)?; + for event in &self.events { + event.fmt(f)?; + } + Ok(()) + } +} + +mod types { + macro_rules! generate_case_insensitive { + ($name:ident, $module:ident, $err_doc:literal, $validate:ident, $cow_inner_type:ty, $comment:literal) => { + /// + pub mod $module { + /// The error returned when `TryFrom` is invoked to create an instance. + #[derive(Debug, thiserror::Error, Copy, Clone)] + #[error($err_doc)] + pub struct Error; + } + + #[doc = $comment] + #[derive(Clone, Eq, Debug, Default)] + pub struct $name<'a>(pub(crate) std::borrow::Cow<'a, $cow_inner_type>); + + impl<'a> $name<'a> { + pub(crate) fn from_str_unchecked(s: &'a str) -> Self { + $name(std::borrow::Cow::Borrowed(s.into())) + } + /// Turn this instance into a fully owned one with `'static` lifetime. + #[must_use] + pub fn to_owned(&self) -> $name<'static> { + $name(std::borrow::Cow::Owned(self.0.clone().into_owned())) + } + } + + impl PartialEq for $name<'_> { + fn eq(&self, other: &Self) -> bool { + self.0.eq_ignore_ascii_case(&other.0) + } + } + + impl std::fmt::Display for $name<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } + } + + impl PartialOrd for $name<'_> { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + self.cmp(other).into() + } + } + + impl Ord for $name<'_> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + let a = self.0.iter().map(|c| c.to_ascii_lowercase()); + let b = other.0.iter().map(|c| c.to_ascii_lowercase()); + a.cmp(b) + } + } + + impl std::hash::Hash for $name<'_> { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + for b in self.0.iter() { + b.to_ascii_lowercase().hash(state); + } + } + } + + impl<'a> std::convert::TryFrom<&'a str> for $name<'a> { + type Error = $module::Error; + + fn try_from(s: &'a str) -> Result<Self, Self::Error> { + Self::try_from(std::borrow::Cow::Borrowed(bstr::ByteSlice::as_bstr(s.as_bytes()))) + } + } + + impl<'a> std::convert::TryFrom<String> for $name<'a> { + type Error = $module::Error; + + fn try_from(s: String) -> Result<Self, Self::Error> { + Self::try_from(std::borrow::Cow::Owned(bstr::BString::from(s))) + } + } + + impl<'a> std::convert::TryFrom<std::borrow::Cow<'a, bstr::BStr>> for $name<'a> { + type Error = $module::Error; + + fn try_from(s: std::borrow::Cow<'a, bstr::BStr>) -> Result<Self, Self::Error> { + if $validate(s.as_ref()) { + Ok(Self(s)) + } else { + Err($module::Error) + } + } + } + + impl<'a> std::ops::Deref for $name<'a> { + type Target = $cow_inner_type; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl<'a> std::convert::AsRef<str> for $name<'a> { + fn as_ref(&self) -> &str { + std::str::from_utf8(self.0.as_ref()).expect("only valid UTF8 makes it through our validation") + } + } + }; + } + + fn is_valid_name(n: &bstr::BStr) -> bool { + !n.is_empty() && n.iter().all(|b| b.is_ascii_alphanumeric() || *b == b'-') + } + fn is_valid_key(n: &bstr::BStr) -> bool { + is_valid_name(n) && n[0].is_ascii_alphabetic() + } + + generate_case_insensitive!( + Name, + name, + "Valid names consist of alphanumeric characters or dashes.", + is_valid_name, + bstr::BStr, + "Wrapper struct for section header names, like `remote`, since these are case-insensitive." + ); + + generate_case_insensitive!( + Key, + key, + "Valid keys consist alphanumeric characters or dashes, starting with an alphabetic character.", + is_valid_key, + bstr::BStr, + "Wrapper struct for key names, like `path` in `include.path`, since keys are case-insensitive." + ); +} +pub use types::{key, name, Key, Name}; + +pub(crate) fn into_cow_bstr(c: Cow<'_, str>) -> Cow<'_, BStr> { + match c { + Cow::Borrowed(s) => Cow::Borrowed(s.into()), + Cow::Owned(s) => Cow::Owned(s.into()), + } +} diff --git a/vendor/gix-config/src/parse/section/unvalidated.rs b/vendor/gix-config/src/parse/section/unvalidated.rs new file mode 100644 index 000000000..1710837fe --- /dev/null +++ b/vendor/gix-config/src/parse/section/unvalidated.rs @@ -0,0 +1,25 @@ +use bstr::{BStr, ByteSlice}; + +/// An unvalidated parse result of a key for a section, parsing input like `remote.origin` or `core`. +#[derive(Debug, PartialEq, Ord, PartialOrd, Eq, Hash, Clone, Copy)] +pub struct Key<'a> { + /// The name of the section, like `remote` in `remote.origin`. + pub section_name: &'a str, + /// The name of the sub-section, like `origin` in `remote.origin`. + pub subsection_name: Option<&'a BStr>, +} + +impl<'a> Key<'a> { + /// Parse `input` like `remote.origin` or `core` as a `Key` to make its section specific fields available, + /// or `None` if there were not one or two tokens separated by `.`. + /// Note that `input` isn't validated, and is `str` as ascii is a subset of UTF-8 which is required for any valid keys. + pub fn parse(input: impl Into<&'a BStr>) -> Option<Self> { + let input = input.into(); + let mut tokens = input.splitn(2, |b| *b == b'.'); + + Some(Key { + section_name: tokens.next()?.to_str().ok()?, + subsection_name: tokens.next().map(Into::into), + }) + } +} |