diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:35 +0000 |
commit | 7e5d7eea9c580ef4b41a765bde624af431942b96 (patch) | |
tree | 2c0d9ca12878fc4525650aa4e54d77a81a07cc09 /vendor/gix-object/src | |
parent | Adding debian version 1.70.0+dfsg1-9. (diff) | |
download | rustc-7e5d7eea9c580ef4b41a765bde624af431942b96.tar.xz rustc-7e5d7eea9c580ef4b41a765bde624af431942b96.zip |
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-object/src')
24 files changed, 2999 insertions, 0 deletions
diff --git a/vendor/gix-object/src/blob.rs b/vendor/gix-object/src/blob.rs new file mode 100644 index 000000000..ff2eeafc8 --- /dev/null +++ b/vendor/gix-object/src/blob.rs @@ -0,0 +1,47 @@ +use std::{convert::Infallible, io}; + +use crate::{Blob, BlobRef, Kind}; + +impl<'a> crate::WriteTo for BlobRef<'a> { + /// Write the blobs data to `out` verbatim. + fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { + out.write_all(self.data) + } + + fn size(&self) -> usize { + self.data.len() + } + + fn kind(&self) -> Kind { + Kind::Blob + } +} + +impl crate::WriteTo for Blob { + /// Write the blobs data to `out` verbatim. + fn write_to(&self, out: impl io::Write) -> io::Result<()> { + self.to_ref().write_to(out) + } + + fn size(&self) -> usize { + self.to_ref().size() + } + + fn kind(&self) -> Kind { + Kind::Blob + } +} + +impl Blob { + /// Provide a `BlobRef` to this owned blob + pub fn to_ref(&self) -> BlobRef<'_> { + BlobRef { data: &self.data } + } +} + +impl<'a> BlobRef<'a> { + /// Instantiate a `Blob` from the given `data`, which is used as-is. + pub fn from_bytes(data: &[u8]) -> Result<BlobRef<'_>, Infallible> { + Ok(BlobRef { data }) + } +} diff --git a/vendor/gix-object/src/commit/decode.rs b/vendor/gix-object/src/commit/decode.rs new file mode 100644 index 000000000..821feaabb --- /dev/null +++ b/vendor/gix-object/src/commit/decode.rs @@ -0,0 +1,71 @@ +use std::borrow::Cow; + +use nom::{ + branch::alt, + bytes::complete::{is_not, tag}, + combinator::{all_consuming, opt}, + error::{context, ContextError, ParseError}, + multi::many0, + IResult, Parser, +}; +use smallvec::SmallVec; + +use crate::{parse, parse::NL, BStr, ByteSlice, CommitRef}; + +pub fn message<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a BStr, E> { + if i.is_empty() { + // newline + [message] + return Err(nom::Err::Error(E::add_context( + i, + "newline + <message>", + E::from_error_kind(i, nom::error::ErrorKind::Eof), + ))); + } + let (i, _) = context("a newline separates headers from the message", tag(NL))(i)?; + Ok((&[], i.as_bstr())) +} + +pub fn commit<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>( + i: &'a [u8], +) -> IResult<&'a [u8], CommitRef<'_>, E> { + let (i, tree) = context("tree <40 lowercase hex char>", |i| { + parse::header_field(i, b"tree", parse::hex_hash) + })(i)?; + let (i, parents) = context( + "zero or more 'parent <40 lowercase hex char>'", + many0(|i| parse::header_field(i, b"parent", parse::hex_hash)), + )(i)?; + let (i, author) = context("author <signature>", |i| { + parse::header_field(i, b"author", parse::signature) + })(i)?; + let (i, committer) = context("committer <signature>", |i| { + parse::header_field(i, b"committer", parse::signature) + })(i)?; + let (i, encoding) = context( + "encoding <encoding>", + opt(|i| parse::header_field(i, b"encoding", is_not(NL))), + )(i)?; + let (i, extra_headers) = context( + "<field> <single-line|multi-line>", + many0(alt(( + parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), + |i| { + parse::any_header_field(i, is_not(NL)).map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr())))) + }, + ))), + )(i)?; + let (i, message) = all_consuming(message)(i)?; + + Ok(( + i, + CommitRef { + tree, + parents: SmallVec::from(parents), + author, + committer, + encoding: encoding.map(ByteSlice::as_bstr), + message, + extra_headers, + }, + )) +} diff --git a/vendor/gix-object/src/commit/message/body.rs b/vendor/gix-object/src/commit/message/body.rs new file mode 100644 index 000000000..27175a477 --- /dev/null +++ b/vendor/gix-object/src/commit/message/body.rs @@ -0,0 +1,152 @@ +use std::ops::Deref; + +use nom::{ + bytes::complete::{tag, take_until1}, + combinator::all_consuming, + error::{ErrorKind, ParseError}, + sequence::terminated, + IResult, +}; + +use crate::{ + bstr::{BStr, ByteSlice}, + commit::message::BodyRef, +}; + +/// An iterator over trailers as parsed from a commit message body. +/// +/// lines with parsing failures will be skipped +pub struct Trailers<'a> { + pub(crate) cursor: &'a [u8], +} + +/// A trailer as parsed from the commit message body. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct TrailerRef<'a> { + /// The name of the trailer, like "Signed-off-by", up to the separator ": " + #[cfg_attr(feature = "serde1", serde(borrow))] + pub token: &'a BStr, + /// The value right after the separator ": ", with leading and trailing whitespace trimmed. + /// Note that multi-line values aren't currently supported. + pub value: &'a BStr, +} + +fn parse_single_line_trailer<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, &'a BStr), E> { + let (value, token) = terminated(take_until1(b":".as_ref()), tag(b": "))(i.trim_end())?; + if token.trim_end().len() != token.len() || value.trim_start().len() != value.len() { + Err(nom::Err::Failure(E::from_error_kind(i, ErrorKind::Fail))) + } else { + Ok((&[], (token.as_bstr(), value.as_bstr()))) + } +} + +impl<'a> Iterator for Trailers<'a> { + type Item = TrailerRef<'a>; + + fn next(&mut self) -> Option<Self::Item> { + if self.cursor.is_empty() { + return None; + } + for line in self.cursor.lines_with_terminator() { + self.cursor = &self.cursor[line.len()..]; + if let Some(trailer) = + all_consuming(parse_single_line_trailer::<()>)(line) + .ok() + .map(|(_, (token, value))| TrailerRef { + token: token.trim().as_bstr(), + value: value.trim().as_bstr(), + }) + { + return Some(trailer); + } + } + None + } +} + +impl<'a> BodyRef<'a> { + /// Parse `body` bytes into the trailer and the actual body. + pub fn from_bytes(body: &'a [u8]) -> Self { + body.rfind(b"\n\n") + .map(|pos| (2, pos)) + .or_else(|| body.rfind(b"\r\n\r\n").map(|pos| (4, pos))) + .and_then(|(sep_len, pos)| { + let trailer = &body[pos + sep_len..]; + let body = &body[..pos]; + Trailers { cursor: trailer }.next().map(|_| BodyRef { + body_without_trailer: body.as_bstr(), + start_of_trailer: trailer, + }) + }) + .unwrap_or_else(|| BodyRef { + body_without_trailer: body.as_bstr(), + start_of_trailer: &[], + }) + } + + /// Returns the body with the trailers stripped. + /// + /// You can iterate trailers with the [`trailers()`][BodyRef::trailers()] method. + pub fn without_trailer(&self) -> &'a BStr { + self.body_without_trailer + } + + /// Return an iterator over the trailers parsed from the last paragraph of the body. May be empty. + pub fn trailers(&self) -> Trailers<'a> { + Trailers { + cursor: self.start_of_trailer, + } + } +} + +impl<'a> AsRef<BStr> for BodyRef<'a> { + fn as_ref(&self) -> &BStr { + self.body_without_trailer + } +} + +impl<'a> Deref for BodyRef<'a> { + type Target = BStr; + + fn deref(&self) -> &Self::Target { + self.body_without_trailer + } +} +#[cfg(test)] +mod test_parse_trailer { + use super::*; + + fn parse(input: &str) -> (&BStr, &BStr) { + parse_single_line_trailer::<()>(input.as_bytes()).unwrap().1 + } + + #[test] + fn simple_newline() { + assert_eq!(parse("foo: bar\n"), ("foo".into(), "bar".into())); + } + + #[test] + fn simple_non_ascii_no_newline() { + assert_eq!(parse("🤗: 🎉"), ("🤗".into(), "🎉".into())); + } + + #[test] + fn with_lots_of_whitespace_newline() { + assert_eq!( + parse("hello foo: bar there \n"), + ("hello foo".into(), "bar there".into()) + ); + } + + #[test] + fn extra_whitespace_before_token_or_value_is_error() { + assert!(parse_single_line_trailer::<()>(b"foo : bar").is_err()); + assert!(parse_single_line_trailer::<()>(b"foo: bar").is_err()) + } + + #[test] + fn simple_newline_windows() { + assert_eq!(parse("foo: bar\r\n"), ("foo".into(), "bar".into())); + } +} diff --git a/vendor/gix-object/src/commit/message/decode.rs b/vendor/gix-object/src/commit/message/decode.rs new file mode 100644 index 000000000..6224909bd --- /dev/null +++ b/vendor/gix-object/src/commit/message/decode.rs @@ -0,0 +1,57 @@ +use nom::{ + branch::alt, + bytes::complete::{tag, take_till1}, + combinator::all_consuming, + error::ParseError, + sequence::pair, + IResult, +}; + +use crate::bstr::{BStr, ByteSlice}; + +pub(crate) fn newline<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E> { + alt((tag(b"\r\n"), tag(b"\n")))(i) +} + +fn subject_and_body<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> { + let mut c = i; + let mut consumed_bytes = 0; + while !c.is_empty() { + c = match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r')(c) { + Ok((i1, segment)) => { + consumed_bytes += segment.len(); + match pair::<_, _, _, E, _, _>(newline, newline)(i1) { + Ok((body, _)) => { + return Ok(( + &[], + ( + i[0usize..consumed_bytes].as_bstr(), + (!body.is_empty()).then(|| body.as_bstr()), + ), + )); + } + Err(_) => match i1.get(1..) { + Some(next) => { + consumed_bytes += 1; + next + } + None => break, + }, + } + } + Err(_) => match c.get(1..) { + Some(next) => { + consumed_bytes += 1; + next + } + None => break, + }, + }; + } + Ok((&[], (i.as_bstr(), None))) +} + +/// Returns title and body, without separator +pub fn message(input: &[u8]) -> (&BStr, Option<&BStr>) { + all_consuming(subject_and_body::<()>)(input).expect("cannot fail").1 +} diff --git a/vendor/gix-object/src/commit/message/mod.rs b/vendor/gix-object/src/commit/message/mod.rs new file mode 100644 index 000000000..1d5fd2944 --- /dev/null +++ b/vendor/gix-object/src/commit/message/mod.rs @@ -0,0 +1,100 @@ +use std::borrow::Cow; + +use crate::{ + bstr::{BStr, BString, ByteSlice, ByteVec}, + commit::MessageRef, + CommitRef, +}; + +/// +pub mod body; +mod decode; + +impl<'a> CommitRef<'a> { + /// Return exactly the same message as [`MessageRef::summary()`]. + pub fn message_summary(&self) -> Cow<'a, BStr> { + summary(self.message) + } + + /// Return an iterator over message trailers as obtained from the last paragraph of the commit message. + /// May be empty. + pub fn message_trailers(&self) -> body::Trailers<'a> { + BodyRef::from_bytes(self.message).trailers() + } +} + +impl<'a> MessageRef<'a> { + /// Parse the given `input` as message. + /// + /// Note that this cannot fail as everything will be interpreted as title if there is no body separator. + pub fn from_bytes(input: &'a [u8]) -> Self { + let (title, body) = decode::message(input); + MessageRef { title, body } + } + + /// Produce a short commit summary for the message title. + /// + /// This means the following + /// + /// * Take the subject line which is delimited by two newlines (\n\n) + /// * transform intermediate consecutive whitespace including \r into one space + /// + /// The resulting summary will have folded whitespace before a newline into spaces and stopped that process + /// once two consecutive newlines are encountered. + pub fn summary(&self) -> Cow<'a, BStr> { + summary(self.title) + } + + /// Further parse the body into into non-trailer and trailers, which can be iterated from the returned [`BodyRef`]. + pub fn body(&self) -> Option<BodyRef<'a>> { + self.body.map(|b| BodyRef::from_bytes(b)) + } +} + +pub(crate) fn summary(message: &BStr) -> Cow<'_, BStr> { + let message = message.trim(); + match message.find_byte(b'\n') { + Some(mut pos) => { + let mut out = BString::default(); + let mut previous_pos = None; + loop { + if let Some(previous_pos) = previous_pos { + if previous_pos + 1 == pos { + let len_after_trim = out.trim_end().len(); + out.resize(len_after_trim, 0); + break out.into(); + } + } + let message_to_newline = &message[previous_pos.map(|p| p + 1).unwrap_or(0)..pos]; + + if let Some(pos_before_whitespace) = message_to_newline.rfind_not_byteset(b"\t\n\x0C\r ") { + out.extend_from_slice(&message_to_newline[..pos_before_whitespace + 1]); + } + out.push_byte(b' '); + previous_pos = Some(pos); + match message.get(pos + 1..).and_then(|i| i.find_byte(b'\n')) { + Some(next_nl_pos) => pos += next_nl_pos + 1, + None => { + if let Some(slice) = message.get((pos + 1)..) { + out.extend_from_slice(slice); + } + break out.into(); + } + } + } + } + None => message.as_bstr().into(), + } +} + +/// A reference to a message body, further parsed to only contain the non-trailer parts. +/// +/// See [git-interpret-trailers](https://git-scm.com/docs/git-interpret-trailers) for more information +/// on what constitutes trailers and not that this implementation is only good for typical sign-off footer or key-value parsing. +/// +/// Note that we only parse trailers from the bottom of the body. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +pub struct BodyRef<'a> { + body_without_trailer: &'a BStr, + start_of_trailer: &'a [u8], +} diff --git a/vendor/gix-object/src/commit/mod.rs b/vendor/gix-object/src/commit/mod.rs new file mode 100644 index 000000000..94e8e6ec0 --- /dev/null +++ b/vendor/gix-object/src/commit/mod.rs @@ -0,0 +1,119 @@ +use bstr::{BStr, ByteSlice}; + +use crate::{Commit, CommitRef, TagRef}; + +mod decode; +/// +pub mod message; + +/// A parsed commit message that assumes a title separated from the body by two consecutive newlines. +/// +/// Titles can have any amount of whitespace +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct MessageRef<'a> { + /// The title of the commit, as separated from the body with two consecutive newlines. The newlines are not included. + #[cfg_attr(feature = "serde1", serde(borrow))] + pub title: &'a BStr, + /// All bytes not consumed by the title, excluding the separating newlines. + /// + /// The body is `None` if there was now title separation or the body was empty after the separator. + pub body: Option<&'a BStr>, +} + +/// +pub mod ref_iter; + +mod write; + +impl<'a> CommitRef<'a> { + /// Deserialize a commit from the given `data` bytes while avoiding most allocations. + pub fn from_bytes(data: &'a [u8]) -> Result<CommitRef<'a>, crate::decode::Error> { + decode::commit(data).map(|(_, t)| t).map_err(crate::decode::Error::from) + } + /// Return the `tree` fields hash digest. + pub fn tree(&self) -> gix_hash::ObjectId { + gix_hash::ObjectId::from_hex(self.tree).expect("prior validation of tree hash during parsing") + } + + /// Returns an iterator of parent object ids + pub fn parents(&self) -> impl Iterator<Item = gix_hash::ObjectId> + '_ { + self.parents + .iter() + .map(|hex_hash| gix_hash::ObjectId::from_hex(hex_hash).expect("prior validation of hashes during parsing")) + } + + /// Returns a convenient iterator over all extra headers. + pub fn extra_headers(&self) -> crate::commit::ExtraHeaders<impl Iterator<Item = (&BStr, &BStr)>> { + crate::commit::ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (*k, v.as_ref()))) + } + + /// Return the author, with whitespace trimmed. + /// + /// This is different from the `author` field which may contain whitespace. + pub fn author(&self) -> gix_actor::SignatureRef<'a> { + self.author.trim() + } + + /// Return the committer, with whitespace trimmed. + /// + /// This is different from the `committer` field which may contain whitespace. + pub fn committer(&self) -> gix_actor::SignatureRef<'a> { + self.committer.trim() + } + + /// Returns a partially parsed message from which more information can be derived. + pub fn message(&self) -> MessageRef<'a> { + MessageRef::from_bytes(self.message) + } + + /// Returns the time at which this commit was created. + pub fn time(&self) -> gix_actor::Time { + self.committer.time + } +} + +impl Commit { + /// Returns a convenient iterator over all extra headers. + pub fn extra_headers(&self) -> ExtraHeaders<impl Iterator<Item = (&BStr, &BStr)>> { + ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (k.as_bstr(), v.as_bstr()))) + } +} + +/// An iterator over extra headers in [owned][crate::Commit] and [borrowed][crate::CommitRef] commits. +pub struct ExtraHeaders<I> { + inner: I, +} + +/// Instantiation and convenience. +impl<'a, I> ExtraHeaders<I> +where + I: Iterator<Item = (&'a BStr, &'a BStr)>, +{ + /// Create a new instance from an iterator over tuples of (name, value) pairs. + pub fn new(iter: I) -> Self { + ExtraHeaders { inner: iter } + } + /// Find the _value_ of the _first_ header with the given `name`. + pub fn find(mut self, name: &str) -> Option<&'a BStr> { + self.inner + .find_map(move |(k, v)| if k == name.as_bytes().as_bstr() { Some(v) } else { None }) + } + /// Return an iterator over all _values_ of headers with the given `name`. + pub fn find_all(self, name: &'a str) -> impl Iterator<Item = &'a BStr> { + self.inner + .filter_map(move |(k, v)| if k == name.as_bytes().as_bstr() { Some(v) } else { None }) + } + /// Return an iterator over all git mergetags. + /// + /// A merge tag is a tag object embedded within the respective header field of a commit, making + /// it a child object of sorts. + pub fn mergetags(self) -> impl Iterator<Item = Result<TagRef<'a>, crate::decode::Error>> { + self.find_all("mergetag").map(|b| TagRef::from_bytes(b)) + } + + /// Return the cryptographic signature provided by gpg/pgp verbatim. + pub fn pgp_signature(self) -> Option<&'a BStr> { + self.find("gpgsig") + } +} diff --git a/vendor/gix-object/src/commit/ref_iter.rs b/vendor/gix-object/src/commit/ref_iter.rs new file mode 100644 index 000000000..d502bea73 --- /dev/null +++ b/vendor/gix-object/src/commit/ref_iter.rs @@ -0,0 +1,278 @@ +use std::borrow::Cow; + +use bstr::BStr; +use gix_hash::{oid, ObjectId}; +use nom::{ + branch::alt, + bytes::complete::is_not, + combinator::{all_consuming, opt}, + error::context, +}; + +use crate::{bstr::ByteSlice, commit::decode, parse, parse::NL, CommitRefIter}; + +#[derive(Copy, Clone)] +pub(crate) enum SignatureKind { + Author, + Committer, +} + +#[derive(Copy, Clone)] +pub(crate) enum State { + Tree, + Parents, + Signature { of: SignatureKind }, + Encoding, + ExtraHeaders, + Message, +} + +impl Default for State { + fn default() -> Self { + State::Tree + } +} + +impl<'a> CommitRefIter<'a> { + /// Create a commit iterator from data. + pub fn from_bytes(data: &'a [u8]) -> CommitRefIter<'a> { + CommitRefIter { + data, + state: State::default(), + } + } + + /// Returns the object id of this commits tree if it is the first function called and if there is no error in decoding + /// the data. + /// + /// Note that this method must only be called once or else will always return None while consuming a single token. + /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they + /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again. + /// `next()`. + pub fn tree_id(&mut self) -> Result<ObjectId, crate::decode::Error> { + let tree_id = self.next().ok_or_else(missing_field)??; + Token::try_into_id(tree_id).ok_or_else(missing_field) + } + + /// Return all parent_ids as iterator. + /// + /// Parsing errors are ignored quietly. + pub fn parent_ids(self) -> impl Iterator<Item = gix_hash::ObjectId> + 'a { + self.filter_map(|t| match t { + Ok(Token::Parent { id }) => Some(id), + _ => None, + }) + } + + /// Returns all signatures, first the author, then the committer, if there is no decoding error. + /// + /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not + /// if not exactly two signatures were iterable. + /// Errors are not the common case - if an error needs to be detectable, use this instance as iterator. + pub fn signatures(self) -> impl Iterator<Item = gix_actor::SignatureRef<'a>> + 'a { + self.filter_map(|t| match t { + Ok(Token::Author { signature }) | Ok(Token::Committer { signature }) => Some(signature), + _ => None, + }) + } + + /// Returns the committer signature if there is no decoding error. + pub fn committer(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> { + self.find_map(|t| match t { + Ok(Token::Committer { signature }) => Some(Ok(signature)), + Err(err) => Some(Err(err)), + _ => None, + }) + .ok_or_else(missing_field)? + } + + /// Returns the author signature if there is no decoding error. + /// + /// It may contain white space surrounding it, and is exactly as parsed. + pub fn author(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> { + self.find_map(|t| match t { + Ok(Token::Author { signature }) => Some(Ok(signature)), + Err(err) => Some(Err(err)), + _ => None, + }) + .ok_or_else(missing_field)? + } + + /// Returns the message if there is no decoding error. + /// + /// It may contain white space surrounding it, and is exactly as + // parsed. + pub fn message(mut self) -> Result<&'a BStr, crate::decode::Error> { + self.find_map(|t| match t { + Ok(Token::Message(msg)) => Some(Ok(msg)), + Err(err) => Some(Err(err)), + _ => None, + }) + .transpose() + .map(|msg| msg.unwrap_or_default()) + } +} + +fn missing_field() -> crate::decode::Error { + crate::decode::empty_error() +} + +impl<'a> CommitRefIter<'a> { + fn next_inner(i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { + use State::*; + Ok(match state { + Tree => { + let (i, tree) = context("tree <40 lowercase hex char>", |i| { + parse::header_field(i, b"tree", parse::hex_hash) + })(i)?; + *state = State::Parents; + ( + i, + Token::Tree { + id: ObjectId::from_hex(tree).expect("parsing validation"), + }, + ) + } + Parents => { + let (i, parent) = context( + "commit <40 lowercase hex char>", + opt(|i| parse::header_field(i, b"parent", parse::hex_hash)), + )(i)?; + match parent { + Some(parent) => ( + i, + Token::Parent { + id: ObjectId::from_hex(parent).expect("parsing validation"), + }, + ), + None => { + *state = State::Signature { + of: SignatureKind::Author, + }; + return Self::next_inner(i, state); + } + } + } + Signature { ref mut of } => { + let who = *of; + let (field_name, err_msg) = match of { + SignatureKind::Author => { + *of = SignatureKind::Committer; + (&b"author"[..], "author <signature>") + } + SignatureKind::Committer => { + *state = State::Encoding; + (&b"committer"[..], "committer <signature>") + } + }; + let (i, signature) = context(err_msg, |i| parse::header_field(i, field_name, parse::signature))(i)?; + ( + i, + match who { + SignatureKind::Author => Token::Author { signature }, + SignatureKind::Committer => Token::Committer { signature }, + }, + ) + } + Encoding => { + let (i, encoding) = context( + "encoding <encoding>", + opt(|i| parse::header_field(i, b"encoding", is_not(NL))), + )(i)?; + *state = State::ExtraHeaders; + match encoding { + Some(encoding) => (i, Token::Encoding(encoding.as_bstr())), + None => return Self::next_inner(i, state), + } + } + ExtraHeaders => { + let (i, extra_header) = context( + "<field> <single-line|multi-line>", + opt(alt(( + |i| parse::any_header_field_multi_line(i).map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Owned(o)))), + |i| { + parse::any_header_field(i, is_not(NL)) + .map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr())))) + }, + ))), + )(i)?; + match extra_header { + Some(extra_header) => (i, Token::ExtraHeader(extra_header)), + None => { + *state = State::Message; + return Self::next_inner(i, state); + } + } + } + Message => { + let (i, message) = all_consuming(decode::message)(i)?; + debug_assert!( + i.is_empty(), + "we should have consumed all data - otherwise iter may go forever" + ); + return Ok((i, Token::Message(message))); + } + }) + } +} + +impl<'a> Iterator for CommitRefIter<'a> { + type Item = Result<Token<'a>, crate::decode::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.data.is_empty() { + return None; + } + match Self::next_inner(self.data, &mut self.state) { + Ok((data, token)) => { + self.data = data; + Some(Ok(token)) + } + Err(err) => { + self.data = &[]; + Some(Err(err)) + } + } + } +} + +/// A token returned by the [commit iterator][CommitRefIter]. +#[allow(missing_docs)] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub enum Token<'a> { + Tree { + id: ObjectId, + }, + Parent { + id: ObjectId, + }, + /// A person who authored the content of the commit. + Author { + signature: gix_actor::SignatureRef<'a>, + }, + /// A person who committed the authors work to the repository. + Committer { + signature: gix_actor::SignatureRef<'a>, + }, + Encoding(&'a BStr), + ExtraHeader((&'a BStr, Cow<'a, BStr>)), + Message(&'a BStr), +} + +impl<'a> Token<'a> { + /// Return the object id of this token if its a [tree][Token::Tree] or a [parent commit][Token::Parent]. + pub fn id(&self) -> Option<&oid> { + match self { + Token::Tree { id } | Token::Parent { id } => Some(id.as_ref()), + _ => None, + } + } + + /// Return the owned object id of this token if its a [tree][Token::Tree] or a [parent commit][Token::Parent]. + pub fn try_into_id(self) -> Option<ObjectId> { + match self { + Token::Tree { id } | Token::Parent { id } => Some(id), + _ => None, + } + } +} diff --git a/vendor/gix-object/src/commit/write.rs b/vendor/gix-object/src/commit/write.rs new file mode 100644 index 000000000..40362e5ae --- /dev/null +++ b/vendor/gix-object/src/commit/write.rs @@ -0,0 +1,99 @@ +use std::io; + +use bstr::ByteSlice; + +use crate::{encode, encode::NL, Commit, CommitRef, Kind}; + +impl crate::WriteTo for Commit { + /// Serializes this instance to `out` in the git serialization format. + fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { + encode::trusted_header_id(b"tree", &self.tree, &mut out)?; + for parent in &self.parents { + encode::trusted_header_id(b"parent", parent, &mut out)?; + } + encode::trusted_header_signature(b"author", &self.author.to_ref(), &mut out)?; + encode::trusted_header_signature(b"committer", &self.committer.to_ref(), &mut out)?; + if let Some(encoding) = self.encoding.as_ref() { + encode::header_field(b"encoding", encoding, &mut out)?; + } + for (name, value) in &self.extra_headers { + encode::header_field_multi_line(name, value, &mut out)?; + } + out.write_all(NL)?; + out.write_all(&self.message) + } + + fn kind(&self) -> Kind { + Kind::Commit + } + + fn size(&self) -> usize { + let hash_in_hex = self.tree.kind().len_in_hex(); + b"tree".len() + 1 /*space*/ + hash_in_hex + 1 /* nl */ + + self.parents.iter().count() * (b"parent".len() + 1 + hash_in_hex + 1) + + b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */ + + b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */ + + self + .encoding + .as_ref() + .map(|e| b"encoding".len() + 1 /* space */ + e.len() + 1 /* nl */) + .unwrap_or(0) + + self + .extra_headers + .iter() + .map(|(name, value)| { + // each header *value* is preceded by a space and followed by a newline + name.len() + value.split_str("\n").map(|s| s.len() + 2).sum::<usize>() + }) + .sum::<usize>() + + 1 /* nl */ + + self.message.len() + } +} + +impl<'a> crate::WriteTo for CommitRef<'a> { + /// Serializes this instance to `out` in the git serialization format. + fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { + encode::trusted_header_id(b"tree", &self.tree(), &mut out)?; + for parent in self.parents() { + encode::trusted_header_id(b"parent", &parent, &mut out)?; + } + encode::trusted_header_signature(b"author", &self.author, &mut out)?; + encode::trusted_header_signature(b"committer", &self.committer, &mut out)?; + if let Some(encoding) = self.encoding.as_ref() { + encode::header_field(b"encoding", encoding, &mut out)?; + } + for (name, value) in &self.extra_headers { + encode::header_field_multi_line(name, value, &mut out)?; + } + out.write_all(NL)?; + out.write_all(self.message) + } + + fn kind(&self) -> Kind { + Kind::Commit + } + + fn size(&self) -> usize { + let hash_in_hex = self.tree().kind().len_in_hex(); + b"tree".len() + 1 /* space */ + hash_in_hex + 1 /* nl */ + + self.parents.iter().count() * (b"parent".len() + 1 /* space */ + hash_in_hex + 1 /* nl */) + + b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */ + + b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */ + + self + .encoding + .as_ref() + .map(|e| b"encoding".len() + 1 /* space */ + e.len() + 1 /* nl */) + .unwrap_or(0) + + self + .extra_headers + .iter() + .map(|(name, value)| { + // each header *value* is preceded by a space and followed by a newline + name.len() + value.split_str("\n").map(|s| s.len() + 2).sum::<usize>() + }) + .sum::<usize>() + + 1 /* nl */ + + self.message.len() + } +} diff --git a/vendor/gix-object/src/data.rs b/vendor/gix-object/src/data.rs new file mode 100644 index 000000000..abf5e1377 --- /dev/null +++ b/vendor/gix-object/src/data.rs @@ -0,0 +1,96 @@ +//! Contains a borrowed Object bound to a buffer holding its decompressed data. + +use crate::{BlobRef, CommitRef, CommitRefIter, Data, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter}; + +impl<'a> Data<'a> { + /// Constructs a new data object from `kind` and `data`. + pub fn new(kind: Kind, data: &'a [u8]) -> Data<'a> { + Data { kind, data } + } + /// Decodes the data in the backing slice into a [`ObjectRef`], allowing to access all of its data + /// conveniently. The cost of parsing an object is negligible. + /// + /// **Note** that [mutable, decoded objects][crate::Object] can be created from [`Data`] + /// using [`crate::ObjectRef::into_owned()`]. + pub fn decode(&self) -> Result<ObjectRef<'a>, crate::decode::Error> { + Ok(match self.kind { + Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data)?), + Kind::Blob => ObjectRef::Blob(BlobRef { data: self.data }), + Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data)?), + Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data)?), + }) + } + + /// Returns this object as tree iterator to parse entries one at a time to avoid allocations, or + /// `None` if this is not a tree object. + pub fn try_into_tree_iter(self) -> Option<TreeRefIter<'a>> { + match self.kind { + Kind::Tree => Some(TreeRefIter::from_bytes(self.data)), + _ => None, + } + } + + /// Returns this object as commit iterator to parse tokens one at a time to avoid allocations, or + /// `None` if this is not a commit object. + pub fn try_into_commit_iter(self) -> Option<CommitRefIter<'a>> { + match self.kind { + Kind::Commit => Some(CommitRefIter::from_bytes(self.data)), + _ => None, + } + } + + /// Returns this object as tag iterator to parse tokens one at a time to avoid allocations, or + /// `None` if this is not a tag object. + pub fn try_into_tag_iter(self) -> Option<TagRefIter<'a>> { + match self.kind { + Kind::Tag => Some(TagRefIter::from_bytes(self.data)), + _ => None, + } + } +} + +/// Types supporting object hash verification +pub mod verify { + + /// Returned by [`crate::Data::verify_checksum()`] + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Object expected to have id {desired}, but actual id was {actual}")] + ChecksumMismatch { + desired: gix_hash::ObjectId, + actual: gix_hash::ObjectId, + }, + } + + impl crate::Data<'_> { + /// Compute the checksum of `self` and compare it with the `desired` hash. + /// If the hashes do not match, an [`Error`] is returned, containing the actual + /// hash of `self`. + pub fn verify_checksum(&self, desired: impl AsRef<gix_hash::oid>) -> Result<(), Error> { + let desired = desired.as_ref(); + let mut hasher = gix_features::hash::hasher(desired.kind()); + hasher.update(&crate::encode::loose_header(self.kind, self.data.len())); + hasher.update(self.data); + + let actual_id = gix_hash::ObjectId::from(hasher.digest()); + if desired != actual_id { + return Err(Error::ChecksumMismatch { + desired: desired.into(), + actual: actual_id, + }); + } + Ok(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn size_of_object() { + assert_eq!(std::mem::size_of::<Data<'_>>(), 24, "this shouldn't change unnoticed"); + } +} diff --git a/vendor/gix-object/src/encode.rs b/vendor/gix-object/src/encode.rs new file mode 100644 index 000000000..6d291c92a --- /dev/null +++ b/vendor/gix-object/src/encode.rs @@ -0,0 +1,84 @@ +//! Encoding utilities +use std::io::{self, Write}; + +use bstr::{BString, ByteSlice}; + +/// An error returned when object encoding fails. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Newlines are not allowed in header values: {value:?}")] + NewlineInHeaderValue { value: BString }, + #[error("Header values must not be empty")] + EmptyValue, +} + +macro_rules! check { + ($e: expr) => { + $e.expect("Writing to a Vec should never fail.") + }; +} +/// Generates a loose header buffer +pub fn loose_header(kind: crate::Kind, size: usize) -> smallvec::SmallVec<[u8; 28]> { + let mut v = smallvec::SmallVec::new(); + check!(v.write_all(kind.as_bytes())); + check!(v.write_all(SPACE)); + check!(v.write_all(itoa::Buffer::new().format(size).as_bytes())); + check!(v.write_all(b"\0")); + v +} + +impl From<Error> for io::Error { + fn from(other: Error) -> io::Error { + io::Error::new(io::ErrorKind::Other, other) + } +} + +pub(crate) fn header_field_multi_line(name: &[u8], value: &[u8], mut out: impl io::Write) -> io::Result<()> { + let mut lines = value.as_bstr().split_str(b"\n"); + trusted_header_field(name, lines.next().ok_or(Error::EmptyValue)?, &mut out)?; + for line in lines { + out.write_all(SPACE)?; + out.write_all(line)?; + out.write_all(NL)?; + } + Ok(()) +} + +pub(crate) fn trusted_header_field(name: &[u8], value: &[u8], mut out: impl io::Write) -> io::Result<()> { + out.write_all(name)?; + out.write_all(SPACE)?; + out.write_all(value)?; + out.write_all(NL) +} + +pub(crate) fn trusted_header_signature( + name: &[u8], + value: &gix_actor::SignatureRef<'_>, + mut out: impl io::Write, +) -> io::Result<()> { + out.write_all(name)?; + out.write_all(SPACE)?; + value.write_to(&mut out)?; + out.write_all(NL) +} + +pub(crate) fn trusted_header_id(name: &[u8], value: &gix_hash::ObjectId, mut out: impl io::Write) -> io::Result<()> { + out.write_all(name)?; + out.write_all(SPACE)?; + value.write_hex_to(&mut out)?; + out.write_all(NL) +} + +pub(crate) fn header_field(name: &[u8], value: &[u8], out: impl io::Write) -> io::Result<()> { + if value.is_empty() { + return Err(Error::EmptyValue.into()); + } + if value.find(NL).is_some() { + return Err(Error::NewlineInHeaderValue { value: value.into() }.into()); + } + trusted_header_field(name, value, out) +} + +pub(crate) const NL: &[u8; 1] = b"\n"; +pub(crate) const SPACE: &[u8; 1] = b" "; diff --git a/vendor/gix-object/src/kind.rs b/vendor/gix-object/src/kind.rs new file mode 100644 index 000000000..86df251bf --- /dev/null +++ b/vendor/gix-object/src/kind.rs @@ -0,0 +1,40 @@ +use std::fmt; + +use crate::Kind; + +/// The Error used in [`Kind::from_bytes()`]. +#[derive(Debug, Clone, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Unknown object kind: {kind:?}")] + InvalidObjectKind { kind: bstr::BString }, +} + +impl Kind { + /// Parse a `Kind` from its serialized loose git objects. + pub fn from_bytes(s: &[u8]) -> Result<Kind, Error> { + Ok(match s { + b"tree" => Kind::Tree, + b"blob" => Kind::Blob, + b"commit" => Kind::Commit, + b"tag" => Kind::Tag, + _ => return Err(Error::InvalidObjectKind { kind: s.into() }), + }) + } + + /// Return the name of `self` for use in serialized loose git objects. + pub fn as_bytes(&self) -> &[u8] { + match self { + Kind::Tree => b"tree", + Kind::Commit => b"commit", + Kind::Blob => b"blob", + Kind::Tag => b"tag", + } + } +} + +impl fmt::Display for Kind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(std::str::from_utf8(self.as_bytes()).expect("Converting Kind name to utf8")) + } +} diff --git a/vendor/gix-object/src/lib.rs b/vendor/gix-object/src/lib.rs new file mode 100644 index 000000000..80273af9f --- /dev/null +++ b/vendor/gix-object/src/lib.rs @@ -0,0 +1,377 @@ +//! This crate provides types for [read-only git objects][crate::ObjectRef] backed by bytes provided in git's serialization format +//! as well as [mutable versions][Object] of these. Both types of objects can be encoded. +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms)] +#![forbid(unsafe_code)] + +use std::borrow::Cow; + +/// For convenience to allow using `bstr` without adding it to own cargo manifest. +pub use bstr; +use bstr::{BStr, BString, ByteSlice}; +use smallvec::SmallVec; + +/// +pub mod commit; +mod object; +/// +pub mod tag; +/// +pub mod tree; + +mod blob; +/// +pub mod data; + +mod traits; +pub use traits::WriteTo; + +pub mod encode; +pub(crate) mod parse; + +/// +pub mod kind; + +/// The four types of objects that git differentiates. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[allow(missing_docs)] +pub enum Kind { + Tree, + Blob, + Commit, + Tag, +} +/// A chunk of any [`data`][BlobRef::data]. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct BlobRef<'a> { + /// The bytes themselves. + pub data: &'a [u8], +} + +/// A mutable chunk of any [`data`][Blob::data]. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Blob { + /// The data itself. + pub data: Vec<u8>, +} + +/// A git commit parsed using [`from_bytes()`][CommitRef::from_bytes()]. +/// +/// A commit encapsulates information about a point in time at which the state of the repository is recorded, usually after a +/// change which is documented in the commit `message`. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct CommitRef<'a> { + /// HEX hash of tree object we point to. Usually 40 bytes long. + /// + /// Use [`tree()`][CommitRef::tree()] to obtain a decoded version of it. + #[cfg_attr(feature = "serde1", serde(borrow))] + pub tree: &'a BStr, + /// HEX hash of each parent commit. Empty for first commit in repository. + pub parents: SmallVec<[&'a BStr; 1]>, + /// Who wrote this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping. + /// + /// Use the [`author()`][CommitRef::author()] method to received a trimmed version of it. + pub author: gix_actor::SignatureRef<'a>, + /// Who committed this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping. + /// + /// Use the [`committer()`][CommitRef::committer()] method to received a trimmed version of it. + /// + /// This may be different from the `author` in case the author couldn't write to the repository themselves and + /// is commonly encountered with contributed commits. + pub committer: gix_actor::SignatureRef<'a>, + /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493). + pub encoding: Option<&'a BStr>, + /// The commit message documenting the change. + pub message: &'a BStr, + /// Extra header fields, in order of them being encountered, made accessible with the iterator returned by [`extra_headers()`][CommitRef::extra_headers()]. + pub extra_headers: Vec<(&'a BStr, Cow<'a, BStr>)>, +} + +/// Like [`CommitRef`][crate::CommitRef], but as `Iterator` to support (up to) entirely allocation free parsing. +/// It's particularly useful to traverse the commit graph without ever allocating arrays for parents. +#[derive(Copy, Clone)] +pub struct CommitRefIter<'a> { + data: &'a [u8], + state: commit::ref_iter::State, +} + +/// A mutable git commit, representing an annotated state of a working tree along with a reference to its historical commits. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Commit { + /// The hash of recorded working tree state. + pub tree: gix_hash::ObjectId, + /// Hash of each parent commit. Empty for the first commit in repository. + pub parents: SmallVec<[gix_hash::ObjectId; 1]>, + /// Who wrote this commit. + pub author: gix_actor::Signature, + /// Who committed this commit. + /// + /// This may be different from the `author` in case the author couldn't write to the repository themselves and + /// is commonly encountered with contributed commits. + pub committer: gix_actor::Signature, + /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493). + pub encoding: Option<BString>, + /// The commit message documenting the change. + pub message: BString, + /// Extra header fields, in order of them being encountered, made accessible with the iterator returned + /// by [`extra_headers()`][Commit::extra_headers()]. + pub extra_headers: Vec<(BString, BString)>, +} + +/// Represents a git tag, commonly indicating a software release. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct TagRef<'a> { + /// The hash in hexadecimal being the object this tag points to. Use [`target()`][TagRef::target()] to obtain a byte representation. + #[cfg_attr(feature = "serde1", serde(borrow))] + pub target: &'a BStr, + /// The kind of object that `target` points to. + pub target_kind: Kind, + /// The name of the tag, e.g. "v1.0". + pub name: &'a BStr, + /// The author of the tag. + pub tagger: Option<gix_actor::SignatureRef<'a>>, + /// The message describing this release. + pub message: &'a BStr, + /// A cryptographic signature over the entire content of the serialized tag object thus far. + pub pgp_signature: Option<&'a BStr>, +} + +/// Like [`TagRef`], but as `Iterator` to support entirely allocation free parsing. +/// It's particularly useful to dereference only the target chain. +#[derive(Copy, Clone)] +pub struct TagRefIter<'a> { + data: &'a [u8], + state: tag::ref_iter::State, +} + +/// A mutable git tag. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Tag { + /// The hash this tag is pointing to. + pub target: gix_hash::ObjectId, + /// The kind of object this tag is pointing to. + pub target_kind: Kind, + /// The name of the tag, e.g. "v1.0". + pub name: BString, + /// The tags author. + pub tagger: Option<gix_actor::Signature>, + /// The message describing the tag. + pub message: BString, + /// A pgp signature over all bytes of the encoded tag, excluding the pgp signature itself. + pub pgp_signature: Option<BString>, +} + +/// Immutable objects are read-only structures referencing most data from [a byte slice][crate::ObjectRef::from_bytes()]. +/// +/// Immutable objects are expected to be deserialized from bytes that acts as backing store, and they +/// cannot be mutated or serialized. Instead, one will [convert][crate::ObjectRef::into_owned()] them into their [`mutable`][Object] counterparts +/// which support mutation and serialization. +/// +/// An `ObjectRef` is representing [`Trees`][TreeRef], [`Blobs`][BlobRef], [`Commits`][CommitRef], or [`Tags`][TagRef]. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[allow(missing_docs)] +pub enum ObjectRef<'a> { + #[cfg_attr(feature = "serde1", serde(borrow))] + Tree(TreeRef<'a>), + Blob(BlobRef<'a>), + Commit(CommitRef<'a>), + Tag(TagRef<'a>), +} + +/// Mutable objects with each field being separately allocated and changeable. +/// +/// Mutable objects are Commits, Trees, Blobs and Tags that can be changed and serialized. +/// +/// They either created using object [construction][Object] or by [deserializing existing objects][ObjectRef::from_bytes()] +/// and converting these [into mutable copies][ObjectRef::into_owned()] for adjustments. +/// +/// An `Object` is representing [`Trees`][Tree], [`Blobs`][Blob], [`Commits`][Commit] or [`Tags`][Tag]. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[allow(clippy::large_enum_variant, missing_docs)] +pub enum Object { + Tree(Tree), + Blob(Blob), + Commit(Commit), + Tag(Tag), +} +/// A directory snapshot containing files (blobs), directories (trees) and submodules (commits). +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct TreeRef<'a> { + /// The directories and files contained in this tree. + #[cfg_attr(feature = "serde1", serde(borrow))] + pub entries: Vec<tree::EntryRef<'a>>, +} + +/// A directory snapshot containing files (blobs), directories (trees) and submodules (commits), lazily evaluated. +#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct TreeRefIter<'a> { + /// The directories and files contained in this tree. + data: &'a [u8], +} + +/// A mutable Tree, containing other trees, blobs or commits. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Tree { + /// The directories and files contained in this tree. They must be and remain sorted by [`filename`][tree::Entry::filename]. + pub entries: Vec<tree::Entry>, +} + +impl Tree { + /// Return an empty tree which serializes to a well-known hash + pub fn empty() -> Self { + Tree { entries: Vec::new() } + } +} + +/// A borrowed object using a slice as backing buffer, or in other words a bytes buffer that knows the kind of object it represents. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Data<'a> { + /// kind of object + pub kind: Kind, + /// decoded, decompressed data, owned by a backing store. + pub data: &'a [u8], +} + +/// +pub mod decode { + #[cfg(feature = "verbose-object-parsing-errors")] + mod _decode { + use crate::bstr::{BString, ByteSlice}; + + /// The type to be used for parse errors. + pub type ParseError<'a> = nom::error::VerboseError<&'a [u8]>; + /// The owned type to be used for parse errors. + pub type ParseErrorOwned = nom::error::VerboseError<BString>; + + pub(crate) fn empty_error() -> Error { + Error { + inner: nom::error::VerboseError::<BString> { errors: Vec::new() }, + } + } + + /// A type to indicate errors during parsing and to abstract away details related to `nom`. + #[derive(Debug, Clone)] + pub struct Error { + /// The actual error + pub inner: ParseErrorOwned, + } + + impl<'a> From<nom::Err<ParseError<'a>>> for Error { + fn from(v: nom::Err<ParseError<'a>>) -> Self { + Error { + inner: match v { + nom::Err::Error(err) | nom::Err::Failure(err) => nom::error::VerboseError { + errors: err + .errors + .into_iter() + .map(|(i, v)| (i.as_bstr().to_owned(), v)) + .collect(), + }, + nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"), + }, + } + } + } + + impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.fmt(f) + } + } + } + + /// + #[cfg(not(feature = "verbose-object-parsing-errors"))] + mod _decode { + /// The type to be used for parse errors, discards everything and is zero size + pub type ParseError<'a> = (); + /// The owned type to be used for parse errors, discards everything and is zero size + pub type ParseErrorOwned = (); + + pub(crate) fn empty_error() -> Error { + Error { inner: () } + } + + /// A type to indicate errors during parsing and to abstract away details related to `nom`. + #[derive(Debug, Clone)] + pub struct Error { + /// The actual error + pub inner: ParseErrorOwned, + } + + impl<'a> From<nom::Err<ParseError<'a>>> for Error { + fn from(v: nom::Err<ParseError<'a>>) -> Self { + Error { + inner: match v { + nom::Err::Error(err) | nom::Err::Failure(err) => err, + nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"), + }, + } + } + } + + impl std::fmt::Display for Error { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Ok(()) + } + } + } + pub(crate) use _decode::empty_error; + pub use _decode::{Error, ParseError, ParseErrorOwned}; + impl std::error::Error for Error {} + + /// Returned by [`loose_header()`] + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum LooseHeaderDecodeError { + #[error("{message}: {number:?}")] + ParseIntegerError { + source: btoi::ParseIntegerError, + message: &'static str, + number: bstr::BString, + }, + #[error("{message}")] + InvalidHeader { message: &'static str }, + #[error("The object header contained an unknown object kind.")] + ObjectHeader(#[from] super::kind::Error), + } + + use bstr::ByteSlice; + /// Decode a loose object header, being `<kind> <size>\0`, returns + /// ([`kind`](super::Kind), `size`, `consumed bytes`). + /// + /// `size` is the uncompressed size of the payload in bytes. + pub fn loose_header(input: &[u8]) -> Result<(super::Kind, usize, usize), LooseHeaderDecodeError> { + use LooseHeaderDecodeError::*; + let kind_end = input.find_byte(0x20).ok_or(InvalidHeader { + message: "Expected '<type> <size>'", + })?; + let kind = super::Kind::from_bytes(&input[..kind_end])?; + let size_end = input.find_byte(0x0).ok_or(InvalidHeader { + message: "Did not find 0 byte in header", + })?; + let size_bytes = &input[kind_end + 1..size_end]; + let size = btoi::btoi(size_bytes).map_err(|source| ParseIntegerError { + source, + message: "Object size in header could not be parsed", + number: size_bytes.into(), + })?; + Ok((kind, size, size_end + 1)) + } +} diff --git a/vendor/gix-object/src/object/convert.rs b/vendor/gix-object/src/object/convert.rs new file mode 100644 index 000000000..5e6e63486 --- /dev/null +++ b/vendor/gix-object/src/object/convert.rs @@ -0,0 +1,228 @@ +use std::convert::TryFrom; + +use crate::{tree, Blob, BlobRef, Commit, CommitRef, Object, ObjectRef, Tag, TagRef, Tree, TreeRef}; + +impl From<TagRef<'_>> for Tag { + fn from(other: TagRef<'_>) -> Tag { + let TagRef { + target, + name, + target_kind, + message, + tagger: signature, + pgp_signature, + } = other; + Tag { + target: gix_hash::ObjectId::from_hex(target).expect("prior parser validation"), + name: name.to_owned(), + target_kind, + message: message.to_owned(), + tagger: signature.map(Into::into), + pgp_signature: pgp_signature.map(ToOwned::to_owned), + } + } +} + +impl From<CommitRef<'_>> for Commit { + fn from(other: CommitRef<'_>) -> Commit { + let CommitRef { + tree, + parents, + author, + committer, + encoding, + message, + extra_headers, + } = other; + Commit { + tree: gix_hash::ObjectId::from_hex(tree).expect("prior parser validation"), + parents: parents + .iter() + .map(|parent| gix_hash::ObjectId::from_hex(parent).expect("prior parser validation")) + .collect(), + author: author.into(), + committer: committer.into(), + encoding: encoding.map(ToOwned::to_owned), + message: message.to_owned(), + extra_headers: extra_headers + .into_iter() + .map(|(k, v)| (k.into(), v.into_owned())) + .collect(), + } + } +} + +impl<'a> From<BlobRef<'a>> for Blob { + fn from(v: BlobRef<'a>) -> Self { + Blob { + data: v.data.to_owned(), + } + } +} + +impl From<TreeRef<'_>> for Tree { + fn from(other: TreeRef<'_>) -> Tree { + let TreeRef { entries } = other; + Tree { + entries: entries.into_iter().map(Into::into).collect(), + } + } +} + +impl From<tree::EntryRef<'_>> for tree::Entry { + fn from(other: tree::EntryRef<'_>) -> tree::Entry { + let tree::EntryRef { mode, filename, oid } = other; + tree::Entry { + mode, + filename: filename.to_owned(), + oid: oid.into(), + } + } +} + +impl<'a> From<ObjectRef<'a>> for Object { + fn from(v: ObjectRef<'_>) -> Self { + match v { + ObjectRef::Tree(v) => Object::Tree(v.into()), + ObjectRef::Blob(v) => Object::Blob(v.into()), + ObjectRef::Commit(v) => Object::Commit(v.into()), + ObjectRef::Tag(v) => Object::Tag(v.into()), + } + } +} + +impl From<Tag> for Object { + fn from(v: Tag) -> Self { + Object::Tag(v) + } +} + +impl From<Commit> for Object { + fn from(v: Commit) -> Self { + Object::Commit(v) + } +} + +impl From<Tree> for Object { + fn from(v: Tree) -> Self { + Object::Tree(v) + } +} + +impl From<Blob> for Object { + fn from(v: Blob) -> Self { + Object::Blob(v) + } +} + +impl TryFrom<Object> for Tag { + type Error = Object; + + fn try_from(value: Object) -> Result<Self, Self::Error> { + Ok(match value { + Object::Tag(v) => v, + _ => return Err(value), + }) + } +} + +impl TryFrom<Object> for Commit { + type Error = Object; + + fn try_from(value: Object) -> Result<Self, Self::Error> { + Ok(match value { + Object::Commit(v) => v, + _ => return Err(value), + }) + } +} + +impl TryFrom<Object> for Tree { + type Error = Object; + + fn try_from(value: Object) -> Result<Self, Self::Error> { + Ok(match value { + Object::Tree(v) => v, + _ => return Err(value), + }) + } +} + +impl TryFrom<Object> for Blob { + type Error = Object; + + fn try_from(value: Object) -> Result<Self, Self::Error> { + Ok(match value { + Object::Blob(v) => v, + _ => return Err(value), + }) + } +} + +impl<'a> From<TagRef<'a>> for ObjectRef<'a> { + fn from(v: TagRef<'a>) -> Self { + ObjectRef::Tag(v) + } +} + +impl<'a> From<CommitRef<'a>> for ObjectRef<'a> { + fn from(v: CommitRef<'a>) -> Self { + ObjectRef::Commit(v) + } +} + +impl<'a> From<TreeRef<'a>> for ObjectRef<'a> { + fn from(v: TreeRef<'a>) -> Self { + ObjectRef::Tree(v) + } +} + +impl<'a> From<BlobRef<'a>> for ObjectRef<'a> { + fn from(v: BlobRef<'a>) -> Self { + ObjectRef::Blob(v) + } +} + +impl<'a> TryFrom<ObjectRef<'a>> for TagRef<'a> { + type Error = ObjectRef<'a>; + + fn try_from(value: ObjectRef<'a>) -> Result<Self, Self::Error> { + Ok(match value { + ObjectRef::Tag(v) => v, + _ => return Err(value), + }) + } +} + +impl<'a> TryFrom<ObjectRef<'a>> for CommitRef<'a> { + type Error = ObjectRef<'a>; + + fn try_from(value: ObjectRef<'a>) -> Result<Self, Self::Error> { + Ok(match value { + ObjectRef::Commit(v) => v, + _ => return Err(value), + }) + } +} + +impl<'a> TryFrom<ObjectRef<'a>> for TreeRef<'a> { + type Error = ObjectRef<'a>; + + fn try_from(value: ObjectRef<'a>) -> Result<Self, Self::Error> { + Ok(match value { + ObjectRef::Tree(v) => v, + _ => return Err(value), + }) + } +} + +impl<'a> TryFrom<ObjectRef<'a>> for BlobRef<'a> { + type Error = ObjectRef<'a>; + + fn try_from(value: ObjectRef<'a>) -> Result<Self, Self::Error> { + Ok(match value { + ObjectRef::Blob(v) => v, + _ => return Err(value), + }) + } +} diff --git a/vendor/gix-object/src/object/mod.rs b/vendor/gix-object/src/object/mod.rs new file mode 100644 index 000000000..c0f9dcd52 --- /dev/null +++ b/vendor/gix-object/src/object/mod.rs @@ -0,0 +1,296 @@ +use crate::{Blob, Commit, Object, Tag, Tree}; + +mod convert; + +mod write { + use std::io; + + use crate::{Kind, Object, ObjectRef, WriteTo}; + + /// Serialization + impl<'a> WriteTo for ObjectRef<'a> { + /// Write the contained object to `out` in the git serialization format. + fn write_to(&self, out: impl io::Write) -> io::Result<()> { + use crate::ObjectRef::*; + match self { + Tree(v) => v.write_to(out), + Blob(v) => v.write_to(out), + Commit(v) => v.write_to(out), + Tag(v) => v.write_to(out), + } + } + + fn size(&self) -> usize { + use crate::ObjectRef::*; + match self { + Tree(v) => v.size(), + Blob(v) => v.size(), + Commit(v) => v.size(), + Tag(v) => v.size(), + } + } + + fn kind(&self) -> Kind { + self.kind() + } + } + + /// Serialization + impl WriteTo for Object { + /// Write the contained object to `out` in the git serialization format. + fn write_to(&self, out: impl io::Write) -> io::Result<()> { + use crate::Object::*; + match self { + Tree(v) => v.write_to(out), + Blob(v) => v.write_to(out), + Commit(v) => v.write_to(out), + Tag(v) => v.write_to(out), + } + } + + fn size(&self) -> usize { + use crate::Object::*; + match self { + Tree(v) => v.size(), + Blob(v) => v.size(), + Commit(v) => v.size(), + Tag(v) => v.size(), + } + } + + fn kind(&self) -> Kind { + self.kind() + } + } +} + +/// Convenient extraction of typed object. +impl Object { + /// Turns this instance into a [`Blob`][Blob], panic otherwise. + pub fn into_blob(self) -> Blob { + match self { + Object::Blob(v) => v, + _ => panic!("BUG: not a blob"), + } + } + /// Turns this instance into a [`Commit`][Commit] panic otherwise. + pub fn into_commit(self) -> Commit { + match self { + Object::Commit(v) => v, + _ => panic!("BUG: not a commit"), + } + } + /// Turns this instance into a [`Tree`][Tree] panic otherwise. + pub fn into_tree(self) -> Tree { + match self { + Object::Tree(v) => v, + _ => panic!("BUG: not a tree"), + } + } + /// Turns this instance into a [`Tag`][Tag] panic otherwise. + pub fn into_tag(self) -> Tag { + match self { + Object::Tag(v) => v, + _ => panic!("BUG: not a tag"), + } + } + /// Turns this instance into a [`Blob`][Blob] if it is one. + #[allow(clippy::result_large_err)] + pub fn try_into_blob(self) -> Result<Blob, Self> { + match self { + Object::Blob(v) => Ok(v), + _ => Err(self), + } + } + /// Turns this instance into a [`BlobRef`][BlobRef] if it is a blob. + pub fn try_into_blob_ref(&self) -> Option<BlobRef<'_>> { + match self { + Object::Blob(v) => Some(v.to_ref()), + _ => None, + } + } + /// Turns this instance into a [`Commit`][Commit] if it is one. + #[allow(clippy::result_large_err)] + pub fn try_into_commit(self) -> Result<Commit, Self> { + match self { + Object::Commit(v) => Ok(v), + _ => Err(self), + } + } + /// Turns this instance into a [`Tree`][Tree] if it is one. + #[allow(clippy::result_large_err)] + pub fn try_into_tree(self) -> Result<Tree, Self> { + match self { + Object::Tree(v) => Ok(v), + _ => Err(self), + } + } + /// Turns this instance into a [`Tag`][Tag] if it is one. + #[allow(clippy::result_large_err)] + pub fn try_into_tag(self) -> Result<Tag, Self> { + match self { + Object::Tag(v) => Ok(v), + _ => Err(self), + } + } + + /// Returns a [`Blob`][Blob] if it is one. + pub fn as_blob(&self) -> Option<&Blob> { + match self { + Object::Blob(v) => Some(v), + _ => None, + } + } + /// Returns a [`Commit`][Commit] if it is one. + pub fn as_commit(&self) -> Option<&Commit> { + match self { + Object::Commit(v) => Some(v), + _ => None, + } + } + /// Returns a [`Tree`][Tree] if it is one. + pub fn as_tree(&self) -> Option<&Tree> { + match self { + Object::Tree(v) => Some(v), + _ => None, + } + } + /// Returns a [`Tag`][Tag] if it is one. + pub fn as_tag(&self) -> Option<&Tag> { + match self { + Object::Tag(v) => Some(v), + _ => None, + } + } + /// Returns the kind of object stored in this instance. + pub fn kind(&self) -> crate::Kind { + match self { + Object::Tree(_) => crate::Kind::Tree, + Object::Blob(_) => crate::Kind::Blob, + Object::Commit(_) => crate::Kind::Commit, + Object::Tag(_) => crate::Kind::Tag, + } + } +} + +use crate::{ + decode::{loose_header, Error as DecodeError, LooseHeaderDecodeError}, + BlobRef, CommitRef, Kind, ObjectRef, TagRef, TreeRef, +}; + +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum LooseDecodeError { + #[error(transparent)] + InvalidHeader(#[from] LooseHeaderDecodeError), + #[error(transparent)] + InvalidContent(#[from] DecodeError), +} + +impl<'a> ObjectRef<'a> { + /// Deserialize an object from a loose serialisation + pub fn from_loose(data: &'a [u8]) -> Result<ObjectRef<'a>, LooseDecodeError> { + let (kind, size, offset) = loose_header(data)?; + + let body = &data[offset..] + .get(..size) + .ok_or(LooseHeaderDecodeError::InvalidHeader { + message: "object data was shorter than its size declared in the header", + })?; + + Ok(Self::from_bytes(kind, body)?) + } + + /// Deserialize an object of `kind` from the given `data`. + pub fn from_bytes(kind: Kind, data: &'a [u8]) -> Result<ObjectRef<'a>, crate::decode::Error> { + Ok(match kind { + Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data)?), + Kind::Blob => ObjectRef::Blob(BlobRef { data }), + Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(data)?), + Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(data)?), + }) + } + + /// Convert the immutable object into a mutable version, consuming the source in the process. + /// + /// Note that this is an expensive operation. + pub fn into_owned(self) -> Object { + self.into() + } + + /// Convert this immutable object into its mutable counterpart. + /// + /// Note that this is an expensive operation. + pub fn to_owned(&self) -> Object { + self.clone().into() + } +} + +/// Convenient access to contained objects. +impl<'a> ObjectRef<'a> { + /// Interpret this object as blob. + pub fn as_blob(&self) -> Option<&BlobRef<'a>> { + match self { + ObjectRef::Blob(v) => Some(v), + _ => None, + } + } + /// Interpret this object as blob, chainable. + pub fn into_blob(self) -> Option<BlobRef<'a>> { + match self { + ObjectRef::Blob(v) => Some(v), + _ => None, + } + } + /// Interpret this object as commit. + pub fn as_commit(&self) -> Option<&CommitRef<'a>> { + match self { + ObjectRef::Commit(v) => Some(v), + _ => None, + } + } + /// Interpret this object as commit, chainable. + pub fn into_commit(self) -> Option<CommitRef<'a>> { + match self { + ObjectRef::Commit(v) => Some(v), + _ => None, + } + } + /// Interpret this object as tree. + pub fn as_tree(&self) -> Option<&TreeRef<'a>> { + match self { + ObjectRef::Tree(v) => Some(v), + _ => None, + } + } + /// Interpret this object as tree, chainable + pub fn into_tree(self) -> Option<TreeRef<'a>> { + match self { + ObjectRef::Tree(v) => Some(v), + _ => None, + } + } + /// Interpret this object as tag. + pub fn as_tag(&self) -> Option<&TagRef<'a>> { + match self { + ObjectRef::Tag(v) => Some(v), + _ => None, + } + } + /// Interpret this object as tag, chainable. + pub fn into_tag(self) -> Option<TagRef<'a>> { + match self { + ObjectRef::Tag(v) => Some(v), + _ => None, + } + } + /// Return the kind of object. + pub fn kind(&self) -> Kind { + match self { + ObjectRef::Tree(_) => Kind::Tree, + ObjectRef::Blob(_) => Kind::Blob, + ObjectRef::Commit(_) => Kind::Commit, + ObjectRef::Tag(_) => Kind::Tag, + } + } +} diff --git a/vendor/gix-object/src/parse.rs b/vendor/gix-object/src/parse.rs new file mode 100644 index 000000000..20dd443c0 --- /dev/null +++ b/vendor/gix-object/src/parse.rs @@ -0,0 +1,81 @@ +use bstr::{BStr, BString, ByteVec}; +use nom::{ + bytes::complete::{is_not, tag, take_until, take_while_m_n}, + combinator::{peek, recognize}, + error::{context, ContextError, ParseError}, + multi::many1_count, + sequence::{preceded, terminated, tuple}, + IResult, +}; + +use crate::ByteSlice; + +pub(crate) const NL: &[u8] = b"\n"; +pub(crate) const SPACE: &[u8] = b" "; +const SPACE_OR_NL: &[u8] = b" \n"; + +pub(crate) fn any_header_field_multi_line<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>( + i: &'a [u8], +) -> IResult<&'a [u8], (&'a [u8], BString), E> { + let (i, (k, o)) = context( + "name <multi-line-value>", + peek(tuple(( + terminated(is_not(SPACE_OR_NL), tag(SPACE)), + recognize(tuple(( + is_not(NL), + tag(NL), + many1_count(terminated(tuple((tag(SPACE), take_until(NL))), tag(NL))), + ))), + ))), + )(i)?; + assert!(!o.is_empty(), "we have parsed more than one value here"); + let end = &o[o.len() - 1] as *const u8 as usize; + let start_input = &i[0] as *const u8 as usize; + + let bytes = o[..o.len() - 1].as_bstr(); + let mut out = BString::from(Vec::with_capacity(bytes.len())); + let mut lines = bytes.lines(); + out.push_str(lines.next().expect("first line")); + for line in lines { + out.push(b'\n'); + out.push_str(&line[1..]); // cut leading space + } + Ok((&i[end - start_input + 1..], (k, out))) +} + +pub(crate) fn header_field<'a, T, E: ParseError<&'a [u8]>>( + i: &'a [u8], + name: &'static [u8], + parse_value: impl Fn(&'a [u8]) -> IResult<&'a [u8], T, E>, +) -> IResult<&'a [u8], T, E> { + terminated(preceded(terminated(tag(name), tag(SPACE)), parse_value), tag(NL))(i) +} + +pub(crate) fn any_header_field<'a, T, E: ParseError<&'a [u8]>>( + i: &'a [u8], + parse_value: impl Fn(&'a [u8]) -> IResult<&'a [u8], T, E>, +) -> IResult<&'a [u8], (&'a [u8], T), E> { + terminated( + tuple((terminated(is_not(SPACE_OR_NL), tag(SPACE)), parse_value)), + tag(NL), + )(i) +} + +fn is_hex_digit_lc(b: u8) -> bool { + matches!(b, b'0'..=b'9' | b'a'..=b'f') +} + +pub fn hex_hash<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a BStr, E> { + take_while_m_n( + gix_hash::Kind::shortest().len_in_hex(), + gix_hash::Kind::longest().len_in_hex(), + is_hex_digit_lc, + )(i) + .map(|(i, hex)| (i, hex.as_bstr())) +} + +pub(crate) fn signature<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>( + i: &'a [u8], +) -> IResult<&'a [u8], gix_actor::SignatureRef<'a>, E> { + gix_actor::signature::decode(i) +} diff --git a/vendor/gix-object/src/tag/decode.rs b/vendor/gix-object/src/tag/decode.rs new file mode 100644 index 000000000..ba9460af9 --- /dev/null +++ b/vendor/gix-object/src/tag/decode.rs @@ -0,0 +1,90 @@ +use nom::{ + branch::alt, + bytes::complete::{tag, take_until, take_while, take_while1}, + character::is_alphabetic, + combinator::{all_consuming, opt, recognize}, + error::{context, ContextError, ParseError}, + sequence::{preceded, tuple}, + IResult, +}; + +use crate::{parse, parse::NL, BStr, ByteSlice, TagRef}; + +pub fn git_tag<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>(i: &'a [u8]) -> IResult<&[u8], TagRef<'a>, E> { + let (i, target) = context("object <40 lowercase hex char>", |i| { + parse::header_field(i, b"object", parse::hex_hash) + })(i)?; + + let (i, kind) = context("type <object kind>", |i| { + parse::header_field(i, b"type", take_while1(is_alphabetic)) + })(i)?; + let kind = crate::Kind::from_bytes(kind) + .map_err(|_| nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::MapRes)))?; + + let (i, tag_version) = context("tag <version>", |i| { + parse::header_field(i, b"tag", take_while1(|b| b != NL[0])) + })(i)?; + + let (i, signature) = context( + "tagger <signature>", + opt(|i| parse::header_field(i, b"tagger", parse::signature)), + )(i)?; + let (i, (message, pgp_signature)) = all_consuming(message)(i)?; + Ok(( + i, + TagRef { + target, + name: tag_version.as_bstr(), + target_kind: kind, + message, + tagger: signature, + pgp_signature, + }, + )) +} + +pub fn message<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> { + const PGP_SIGNATURE_BEGIN: &[u8] = b"\n-----BEGIN PGP SIGNATURE-----"; + const PGP_SIGNATURE_END: &[u8] = b"-----END PGP SIGNATURE-----"; + + if i.is_empty() { + return Ok((i, (i.as_bstr(), None))); + } + let (i, _) = tag(NL)(i)?; + fn all_to_end<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a [u8], &'a [u8]), E> { + if i.is_empty() { + // Empty message. That's OK. + return Ok((&[], (&[], &[]))); + } + // an empty signature message signals that there is none - the function signature is needed + // to work with 'alt(…)'. PGP signatures are never empty + Ok((&[], (i, &[]))) + } + let (i, (message, signature)) = alt(( + tuple(( + take_until(PGP_SIGNATURE_BEGIN), + preceded( + tag(NL), + recognize(tuple(( + tag(&PGP_SIGNATURE_BEGIN[1..]), + take_until(PGP_SIGNATURE_END), + tag(PGP_SIGNATURE_END), + take_while(|_| true), + ))), + ), + )), + all_to_end, + ))(i)?; + let (i, _) = opt(tag(NL))(i)?; + Ok(( + i, + ( + message.as_bstr(), + if signature.is_empty() { + None + } else { + Some(signature.as_bstr()) + }, + ), + )) +} diff --git a/vendor/gix-object/src/tag/mod.rs b/vendor/gix-object/src/tag/mod.rs new file mode 100644 index 000000000..1cd353ffb --- /dev/null +++ b/vendor/gix-object/src/tag/mod.rs @@ -0,0 +1,22 @@ +use crate::TagRef; + +mod decode; + +/// +pub mod write; + +/// +pub mod ref_iter; + +impl<'a> TagRef<'a> { + /// Deserialize a tag from `data`. + pub fn from_bytes(data: &'a [u8]) -> Result<TagRef<'a>, crate::decode::Error> { + decode::git_tag(data) + .map(|(_, t)| t) + .map_err(crate::decode::Error::from) + } + /// The object this tag points to as `Id`. + pub fn target(&self) -> gix_hash::ObjectId { + gix_hash::ObjectId::from_hex(self.target).expect("prior validation") + } +} diff --git a/vendor/gix-object/src/tag/ref_iter.rs b/vendor/gix-object/src/tag/ref_iter.rs new file mode 100644 index 000000000..f9409ebbe --- /dev/null +++ b/vendor/gix-object/src/tag/ref_iter.rs @@ -0,0 +1,173 @@ +use bstr::BStr; +use gix_hash::{oid, ObjectId}; +use nom::{ + bytes::complete::take_while1, + character::is_alphabetic, + combinator::{all_consuming, opt}, + error::{context, ParseError}, +}; + +use crate::{bstr::ByteSlice, parse, parse::NL, tag::decode, Kind, TagRefIter}; + +#[derive(Copy, Clone)] +pub(crate) enum State { + Target, + TargetKind, + Name, + Tagger, + Message, +} + +impl Default for State { + fn default() -> Self { + State::Target + } +} + +impl<'a> TagRefIter<'a> { + /// Create a tag iterator from data. + pub fn from_bytes(data: &'a [u8]) -> TagRefIter<'a> { + TagRefIter { + data, + state: State::default(), + } + } + + /// Returns the target id of this tag if it is the first function called and if there is no error in decoding + /// the data. + /// + /// Note that this method must only be called once or else will always return None while consuming a single token. + /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they + /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again. + /// `next()`. + pub fn target_id(mut self) -> Result<ObjectId, crate::decode::Error> { + let token = self.next().ok_or_else(missing_field)??; + Token::into_id(token).ok_or_else(missing_field) + } + + /// Returns the taggers signature if there is no decoding error, and if this field exists. + /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not. + pub fn tagger(mut self) -> Result<Option<gix_actor::SignatureRef<'a>>, crate::decode::Error> { + self.find_map(|t| match t { + Ok(Token::Tagger(signature)) => Some(Ok(signature)), + Err(err) => Some(Err(err)), + _ => None, + }) + .ok_or_else(missing_field)? + } +} + +fn missing_field() -> crate::decode::Error { + crate::decode::empty_error() +} + +impl<'a> TagRefIter<'a> { + fn next_inner(i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { + use State::*; + Ok(match state { + Target => { + let (i, target) = context("object <40 lowercase hex char>", |i| { + parse::header_field(i, b"object", parse::hex_hash) + })(i)?; + *state = TargetKind; + ( + i, + Token::Target { + id: ObjectId::from_hex(target).expect("parsing validation"), + }, + ) + } + TargetKind => { + let (i, kind) = context("type <object kind>", |i| { + parse::header_field(i, b"type", take_while1(is_alphabetic)) + })(i)?; + let kind = Kind::from_bytes(kind).map_err(|_| { + #[allow(clippy::let_unit_value)] + { + let err = crate::decode::ParseError::from_error_kind(i, nom::error::ErrorKind::MapRes); + nom::Err::Error(err) + } + })?; + *state = Name; + (i, Token::TargetKind(kind)) + } + Name => { + let (i, tag_version) = context("tag <version>", |i| { + parse::header_field(i, b"tag", take_while1(|b| b != NL[0])) + })(i)?; + *state = Tagger; + (i, Token::Name(tag_version.as_bstr())) + } + Tagger => { + let (i, signature) = context( + "tagger <signature>", + opt(|i| parse::header_field(i, b"tagger", parse::signature)), + )(i)?; + *state = Message; + (i, Token::Tagger(signature)) + } + Message => { + let (i, (message, pgp_signature)) = all_consuming(decode::message)(i)?; + debug_assert!( + i.is_empty(), + "we should have consumed all data - otherwise iter may go forever" + ); + return Ok((i, Token::Body { message, pgp_signature })); + } + }) + } +} + +impl<'a> Iterator for TagRefIter<'a> { + type Item = Result<Token<'a>, crate::decode::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.data.is_empty() { + return None; + } + match Self::next_inner(self.data, &mut self.state) { + Ok((data, token)) => { + self.data = data; + Some(Ok(token)) + } + Err(err) => { + self.data = &[]; + Some(Err(err)) + } + } + } +} + +/// A token returned by the [tag iterator][TagRefIter]. +#[allow(missing_docs)] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub enum Token<'a> { + Target { + id: ObjectId, + }, + TargetKind(Kind), + Name(&'a BStr), + Tagger(Option<gix_actor::SignatureRef<'a>>), + Body { + message: &'a BStr, + pgp_signature: Option<&'a BStr>, + }, +} + +impl<'a> Token<'a> { + /// Return the object id of this token if its a [Target][Token::Target]. + pub fn id(&self) -> Option<&oid> { + match self { + Token::Target { id } => Some(id.as_ref()), + _ => None, + } + } + + /// Return the owned object id of this token if its a [Target][Token::Target]. + pub fn into_id(self) -> Option<ObjectId> { + match self { + Token::Target { id } => Some(id), + _ => None, + } + } +} diff --git a/vendor/gix-object/src/tag/write.rs b/vendor/gix-object/src/tag/write.rs new file mode 100644 index 000000000..7ccb4f39e --- /dev/null +++ b/vendor/gix-object/src/tag/write.rs @@ -0,0 +1,108 @@ +use std::io; + +use bstr::BStr; + +use crate::{encode, encode::NL, Kind, Tag, TagRef}; + +/// An Error used in [`Tag::write_to()`][crate::WriteTo::write_to()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Tags must not start with a dash: '-'")] + StartsWithDash, + #[error("The tag name was no valid reference name")] + InvalidRefName(#[from] gix_validate::tag::name::Error), +} + +impl From<Error> for io::Error { + fn from(err: Error) -> Self { + io::Error::new(io::ErrorKind::Other, err) + } +} + +impl crate::WriteTo for Tag { + fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { + encode::trusted_header_id(b"object", &self.target, &mut out)?; + encode::trusted_header_field(b"type", self.target_kind.as_bytes(), &mut out)?; + encode::header_field(b"tag", validated_name(self.name.as_ref())?, &mut out)?; + if let Some(tagger) = &self.tagger { + encode::trusted_header_signature(b"tagger", &tagger.to_ref(), &mut out)?; + } + + out.write_all(NL)?; + if !self.message.is_empty() { + out.write_all(self.message.as_ref())?; + } + if let Some(message) = &self.pgp_signature { + out.write_all(NL)?; + out.write_all(message.as_ref())?; + } + Ok(()) + } + + fn kind(&self) -> Kind { + Kind::Tag + } + + fn size(&self) -> usize { + b"object".len() + 1 /* space */ + self.target.kind().len_in_hex() + 1 /* nl */ + + b"type".len() + 1 /* space */ + self.target_kind.as_bytes().len() + 1 /* nl */ + + b"tag".len() + 1 /* space */ + self.name.len() + 1 /* nl */ + + self + .tagger + .as_ref() + .map(|t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) + .unwrap_or(0) + + 1 /* nl */ + self.message.len() + + self.pgp_signature.as_ref().map(|m| 1 /* nl */ + m.len()).unwrap_or(0) + } +} + +impl<'a> crate::WriteTo for TagRef<'a> { + fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { + encode::trusted_header_field(b"object", self.target, &mut out)?; + encode::trusted_header_field(b"type", self.target_kind.as_bytes(), &mut out)?; + encode::header_field(b"tag", validated_name(self.name)?, &mut out)?; + if let Some(tagger) = &self.tagger { + encode::trusted_header_signature(b"tagger", tagger, &mut out)?; + } + + out.write_all(NL)?; + if !self.message.is_empty() { + out.write_all(self.message)?; + } + if let Some(message) = self.pgp_signature { + out.write_all(NL)?; + out.write_all(message)?; + } + Ok(()) + } + + fn kind(&self) -> Kind { + Kind::Tag + } + + fn size(&self) -> usize { + b"object".len() + 1 /* space */ + self.target().kind().len_in_hex() + 1 /* nl */ + + b"type".len() + 1 /* space */ + self.target_kind.as_bytes().len() + 1 /* nl */ + + b"tag".len() + 1 /* space */ + self.name.len() + 1 /* nl */ + + self + .tagger + .as_ref() + .map(|t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) + .unwrap_or(0) + + 1 /* nl */ + self.message.len() + + self.pgp_signature.as_ref().map(|m| 1 /* nl */ + m.len()).unwrap_or(0) + } +} + +fn validated_name(name: &BStr) -> Result<&BStr, Error> { + gix_validate::tag::name(name)?; + if name[0] == b'-' { + return Err(Error::StartsWithDash); + } + Ok(name) +} + +#[cfg(test)] +mod tests; diff --git a/vendor/gix-object/src/tag/write/tests.rs b/vendor/gix-object/src/tag/write/tests.rs new file mode 100644 index 000000000..d0cb5aa57 --- /dev/null +++ b/vendor/gix-object/src/tag/write/tests.rs @@ -0,0 +1,29 @@ +mod validated_name { + mod invalid { + use bstr::ByteSlice; + + use super::super::super::*; + + #[test] + fn only_dash() { + assert!(validated_name(b"-".as_bstr()).is_err()) + } + #[test] + fn leading_dash() { + assert!(validated_name(b"-hello".as_bstr()).is_err()) + } + } + + mod valid { + use bstr::ByteSlice; + + use super::super::super::*; + + #[test] + fn version() { + for version in &["v1.0.0", "0.2.1", "0-alpha1"] { + assert!(validated_name(version.as_bytes().as_bstr()).is_ok()) + } + } + } +} diff --git a/vendor/gix-object/src/traits.rs b/vendor/gix-object/src/traits.rs new file mode 100644 index 000000000..193cd78c3 --- /dev/null +++ b/vendor/gix-object/src/traits.rs @@ -0,0 +1,43 @@ +use std::io::Write; + +use crate::Kind; + +/// Writing of objects to a `Write` implementation +pub trait WriteTo { + /// Write a representation of this instance to `out`. + fn write_to(&self, out: impl std::io::Write) -> std::io::Result<()>; + + /// Returns the type of this object. + fn kind(&self) -> Kind; + + /// Returns the size of this object's representation (the amount + /// of data which would be written by [`write_to`](Self::write_to)). + /// + /// [`size`](Self::size)'s value has no bearing on the validity of + /// the object, as such it's possible for [`size`](Self::size) to + /// return a sensible value but [`write_to`](Self::write_to) to + /// fail because the object was not actually valid in some way. + fn size(&self) -> usize; + + /// Returns a loose object header based on the object's data + fn loose_header(&self) -> smallvec::SmallVec<[u8; 28]> { + crate::encode::loose_header(self.kind(), self.size()) + } +} + +impl<T> WriteTo for &T +where + T: WriteTo, +{ + fn write_to(&self, out: impl Write) -> std::io::Result<()> { + <T as WriteTo>::write_to(self, out) + } + + fn size(&self) -> usize { + <T as WriteTo>::size(self) + } + + fn kind(&self) -> Kind { + <T as WriteTo>::kind(self) + } +} diff --git a/vendor/gix-object/src/tree/mod.rs b/vendor/gix-object/src/tree/mod.rs new file mode 100644 index 000000000..688689c08 --- /dev/null +++ b/vendor/gix-object/src/tree/mod.rs @@ -0,0 +1,136 @@ +use std::cmp::Ordering; + +use crate::{ + bstr::{BStr, BString}, + tree, +}; + +mod ref_iter; +/// +pub mod write; + +/// The mode of items storable in a tree, similar to the file mode on a unix file system. +/// +/// Used in [mutable::Entry][crate::tree::Entry] and [EntryRef]. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)] +#[repr(u16)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum EntryMode { + /// A tree, or directory + Tree = 0o040000u16, + /// A file that is not executable + Blob = 0o100644, + /// A file that is executable + BlobExecutable = 0o100755, + /// A symbolic link + Link = 0o120000, + /// A commit of a git submodule + Commit = 0o160000, +} + +impl EntryMode { + /// Return true if this entry mode represents a Tree/directory + pub fn is_tree(&self) -> bool { + *self == EntryMode::Tree + } + + /// Return true if this entry mode represents anything BUT Tree/directory + pub fn is_no_tree(&self) -> bool { + *self != EntryMode::Tree + } + + /// Return true if the entry is any kind of blob. + pub fn is_blob(&self) -> bool { + matches!(self, EntryMode::Blob | EntryMode::BlobExecutable) + } + + /// Return true if the entry is any kind of blob or symlink. + pub fn is_blob_or_symlink(&self) -> bool { + matches!(self, EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link) + } + + /// Represent the mode as descriptive string. + pub fn as_str(&self) -> &'static str { + use EntryMode::*; + match self { + Tree => "tree", + Blob => "blob", + BlobExecutable => "exe", + Link => "link", + Commit => "commit", + } + } +} + +/// An element of a [`TreeRef`][crate::TreeRef::entries]. +#[derive(PartialEq, Eq, Debug, Hash, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct EntryRef<'a> { + /// The kind of object to which `oid` is pointing. + pub mode: tree::EntryMode, + /// The name of the file in the parent tree. + pub filename: &'a BStr, + /// The id of the object representing the entry. + // TODO: figure out how these should be called. id or oid? It's inconsistent around the codebase. + // Answer: make it 'id', as in `git2` + #[cfg_attr(feature = "serde1", serde(borrow))] + pub oid: &'a gix_hash::oid, +} + +impl<'a> PartialOrd for EntryRef<'a> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl<'a> Ord for EntryRef<'a> { + /// Entries compare by the common portion of the filename. This is critical for proper functioning of algorithms working on trees. + /// Doing it like this is needed for compatibility with older, potentially broken(?) trees. + fn cmp(&self, other: &Self) -> Ordering { + let len = self.filename.len().min(other.filename.len()); + self.filename[..len].cmp(&other.filename[..len]) + } +} + +/// An entry in a [`Tree`][crate::Tree], similar to an entry in a directory. +#[derive(PartialEq, Eq, Debug, Hash, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry { + /// The kind of object to which `oid` is pointing to. + pub mode: EntryMode, + /// The name of the file in the parent tree. + pub filename: BString, + /// The id of the object representing the entry. + pub oid: gix_hash::ObjectId, +} + +impl PartialOrd for Entry { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for Entry { + /// Entries compare by the common portion of the filename. This is critical for proper functioning of algorithms working on trees. + fn cmp(&self, other: &Self) -> Ordering { + let common_len = self.filename.len().min(other.filename.len()); + self.filename[..common_len] + .cmp(&other.filename[..common_len]) + .then_with(|| self.filename.len().cmp(&other.filename.len())) + } +} + +/// Serialization +impl EntryMode { + /// Return the representation as used in the git internal format. + pub fn as_bytes(&self) -> &'static [u8] { + use EntryMode::*; + match self { + Tree => b"40000", + Blob => b"100644", + BlobExecutable => b"100755", + Link => b"120000", + Commit => b"160000", + } + } +} diff --git a/vendor/gix-object/src/tree/ref_iter.rs b/vendor/gix-object/src/tree/ref_iter.rs new file mode 100644 index 000000000..fb3ba2dfc --- /dev/null +++ b/vendor/gix-object/src/tree/ref_iter.rs @@ -0,0 +1,162 @@ +use std::convert::TryFrom; + +use nom::error::ParseError; + +use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter}; + +impl<'a> TreeRefIter<'a> { + /// Instantiate an iterator from the given tree data. + pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> { + TreeRefIter { data } + } +} + +impl<'a> TreeRef<'a> { + /// Deserialize a Tree from `data`. + pub fn from_bytes(data: &'a [u8]) -> Result<TreeRef<'a>, crate::decode::Error> { + decode::tree(data).map(|(_, t)| t).map_err(crate::decode::Error::from) + } + + /// Create an instance of the empty tree. + /// + /// It's particularly useful as static part of a program. + pub const fn empty() -> TreeRef<'static> { + TreeRef { entries: Vec::new() } + } +} + +impl<'a> TreeRefIter<'a> { + /// Consume self and return all parsed entries. + pub fn entries(self) -> Result<Vec<EntryRef<'a>>, crate::decode::Error> { + self.collect() + } +} + +impl<'a> Iterator for TreeRefIter<'a> { + type Item = Result<EntryRef<'a>, crate::decode::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.data.is_empty() { + return None; + } + match decode::fast_entry(self.data) { + Some((data_left, entry)) => { + self.data = data_left; + Some(Ok(entry)) + } + None => { + self.data = &[]; + #[allow(clippy::unit_arg)] + Some(Err(nom::Err::Error(crate::decode::ParseError::from_error_kind( + &[] as &[u8], + nom::error::ErrorKind::MapRes, + )) + .into())) + } + } + } +} + +impl<'a> TryFrom<&'a [u8]> for tree::EntryMode { + type Error = &'a [u8]; + + fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> { + Ok(match mode { + b"40000" => tree::EntryMode::Tree, + b"100644" => tree::EntryMode::Blob, + b"100755" => tree::EntryMode::BlobExecutable, + b"120000" => tree::EntryMode::Link, + b"160000" => tree::EntryMode::Commit, + b"100664" => tree::EntryMode::Blob, // rare and found in the linux kernel + b"100640" => tree::EntryMode::Blob, // rare and found in the Rust repo + _ => return Err(mode), + }) + } +} + +impl TryFrom<u32> for tree::EntryMode { + type Error = u32; + + fn try_from(mode: u32) -> Result<Self, Self::Error> { + Ok(match mode { + 0o40000 => tree::EntryMode::Tree, + 0o100644 => tree::EntryMode::Blob, + 0o100755 => tree::EntryMode::BlobExecutable, + 0o120000 => tree::EntryMode::Link, + 0o160000 => tree::EntryMode::Commit, + 0o100664 => tree::EntryMode::Blob, // rare and found in the linux kernel + 0o100640 => tree::EntryMode::Blob, // rare and found in the Rust repo + _ => return Err(mode), + }) + } +} + +mod decode { + use std::convert::TryFrom; + + use bstr::ByteSlice; + use nom::{ + bytes::complete::{tag, take, take_while1, take_while_m_n}, + character::is_digit, + combinator::all_consuming, + error::ParseError, + multi::many0, + sequence::terminated, + IResult, + }; + + use crate::{parse::SPACE, tree, tree::EntryRef, TreeRef}; + + const NULL: &[u8] = b"\0"; + + pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> { + let mut mode = 0u32; + let mut spacer_pos = 1; + for b in i.iter().take_while(|b| **b != b' ') { + if *b < b'0' || *b > b'7' { + return None; + } + mode = (mode << 3) + (b - b'0') as u32; + spacer_pos += 1; + } + let (_, i) = i.split_at(spacer_pos); + let mode = tree::EntryMode::try_from(mode).ok()?; + let (filename, i) = i.split_at(i.find_byte(0)?); + let i = &i[1..]; + const HASH_LEN_FIXME: usize = 20; // TODO: know actual /desired length or we may overshoot + let (oid, i) = match i.len() { + len if len < HASH_LEN_FIXME => return None, + _ => i.split_at(20), + }; + Some(( + i, + EntryRef { + mode, + filename: filename.as_bstr(), + oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"), + }, + )) + } + + pub fn entry<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&[u8], EntryRef<'_>, E> { + let (i, mode) = terminated(take_while_m_n(5, 6, is_digit), tag(SPACE))(i)?; + let mode = tree::EntryMode::try_from(mode) + .map_err(|invalid| nom::Err::Error(E::from_error_kind(invalid, nom::error::ErrorKind::MapRes)))?; + let (i, filename) = terminated(take_while1(|b| b != NULL[0]), tag(NULL))(i)?; + let (i, oid) = take(20u8)(i)?; // TODO: make this compatible with other hash lengths + + Ok(( + i, + EntryRef { + mode, + filename: filename.as_bstr(), + oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"), + }, + )) + } + + pub fn tree<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], TreeRef<'a>, E> { + let (i, entries) = all_consuming(many0(entry))(i)?; + Ok((i, TreeRef { entries })) + } +} diff --git a/vendor/gix-object/src/tree/write.rs b/vendor/gix-object/src/tree/write.rs new file mode 100644 index 000000000..1e8edc024 --- /dev/null +++ b/vendor/gix-object/src/tree/write.rs @@ -0,0 +1,111 @@ +use std::io; + +use bstr::{BString, ByteSlice}; + +use crate::{ + encode::SPACE, + tree::{Entry, EntryRef}, + Kind, Tree, TreeRef, +}; + +/// The Error used in [`Tree::write_to()`][crate::WriteTo::write_to()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Newlines are invalid in file paths: {name:?}")] + NewlineInFilename { name: BString }, +} + +impl From<Error> for io::Error { + fn from(err: Error) -> Self { + io::Error::new(io::ErrorKind::Other, err) + } +} + +/// Serialization +impl crate::WriteTo for Tree { + /// Serialize this tree to `out` in the git internal format. + fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { + debug_assert_eq!( + &{ + let mut entries_sorted = self.entries.clone(); + entries_sorted.sort(); + entries_sorted + }, + &self.entries, + "entries for serialization must be sorted by filename" + ); + for Entry { mode, filename, oid } in &self.entries { + out.write_all(mode.as_bytes())?; + out.write_all(SPACE)?; + + if filename.find_byte(b'\n').is_some() { + return Err(Error::NewlineInFilename { + name: (*filename).to_owned(), + } + .into()); + } + out.write_all(filename)?; + out.write_all(&[b'\0'])?; + + out.write_all(oid.as_bytes())?; + } + Ok(()) + } + + fn size(&self) -> usize { + self.entries + .iter() + .map(|Entry { mode, filename, oid }| mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len()) + .sum() + } + + fn kind(&self) -> Kind { + Kind::Tree + } +} + +/// Serialization +impl<'a> crate::WriteTo for TreeRef<'a> { + /// Serialize this tree to `out` in the git internal format. + fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { + debug_assert_eq!( + &{ + let mut entries_sorted = self.entries.clone(); + entries_sorted.sort(); + entries_sorted + }, + &self.entries, + "entries for serialization must be sorted by filename" + ); + for EntryRef { mode, filename, oid } in &self.entries { + out.write_all(mode.as_bytes())?; + out.write_all(SPACE)?; + + if filename.find_byte(b'\n').is_some() { + return Err(Error::NewlineInFilename { + name: (*filename).to_owned(), + } + .into()); + } + out.write_all(filename)?; + out.write_all(&[b'\0'])?; + + out.write_all(oid.as_bytes())?; + } + Ok(()) + } + + fn size(&self) -> usize { + self.entries + .iter() + .map(|EntryRef { mode, filename, oid }| { + mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len() + }) + .sum() + } + + fn kind(&self) -> Kind { + Kind::Tree + } +} |