diff options
Diffstat (limited to 'vendor/gix-object/src/tree')
-rw-r--r-- | vendor/gix-object/src/tree/mod.rs | 136 | ||||
-rw-r--r-- | vendor/gix-object/src/tree/ref_iter.rs | 162 | ||||
-rw-r--r-- | vendor/gix-object/src/tree/write.rs | 111 |
3 files changed, 409 insertions, 0 deletions
diff --git a/vendor/gix-object/src/tree/mod.rs b/vendor/gix-object/src/tree/mod.rs new file mode 100644 index 000000000..688689c08 --- /dev/null +++ b/vendor/gix-object/src/tree/mod.rs @@ -0,0 +1,136 @@ +use std::cmp::Ordering; + +use crate::{ + bstr::{BStr, BString}, + tree, +}; + +mod ref_iter; +/// +pub mod write; + +/// The mode of items storable in a tree, similar to the file mode on a unix file system. +/// +/// Used in [mutable::Entry][crate::tree::Entry] and [EntryRef]. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)] +#[repr(u16)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum EntryMode { + /// A tree, or directory + Tree = 0o040000u16, + /// A file that is not executable + Blob = 0o100644, + /// A file that is executable + BlobExecutable = 0o100755, + /// A symbolic link + Link = 0o120000, + /// A commit of a git submodule + Commit = 0o160000, +} + +impl EntryMode { + /// Return true if this entry mode represents a Tree/directory + pub fn is_tree(&self) -> bool { + *self == EntryMode::Tree + } + + /// Return true if this entry mode represents anything BUT Tree/directory + pub fn is_no_tree(&self) -> bool { + *self != EntryMode::Tree + } + + /// Return true if the entry is any kind of blob. + pub fn is_blob(&self) -> bool { + matches!(self, EntryMode::Blob | EntryMode::BlobExecutable) + } + + /// Return true if the entry is any kind of blob or symlink. + pub fn is_blob_or_symlink(&self) -> bool { + matches!(self, EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link) + } + + /// Represent the mode as descriptive string. + pub fn as_str(&self) -> &'static str { + use EntryMode::*; + match self { + Tree => "tree", + Blob => "blob", + BlobExecutable => "exe", + Link => "link", + Commit => "commit", + } + } +} + +/// An element of a [`TreeRef`][crate::TreeRef::entries]. +#[derive(PartialEq, Eq, Debug, Hash, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct EntryRef<'a> { + /// The kind of object to which `oid` is pointing. + pub mode: tree::EntryMode, + /// The name of the file in the parent tree. + pub filename: &'a BStr, + /// The id of the object representing the entry. + // TODO: figure out how these should be called. id or oid? It's inconsistent around the codebase. + // Answer: make it 'id', as in `git2` + #[cfg_attr(feature = "serde1", serde(borrow))] + pub oid: &'a gix_hash::oid, +} + +impl<'a> PartialOrd for EntryRef<'a> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl<'a> Ord for EntryRef<'a> { + /// Entries compare by the common portion of the filename. This is critical for proper functioning of algorithms working on trees. + /// Doing it like this is needed for compatibility with older, potentially broken(?) trees. + fn cmp(&self, other: &Self) -> Ordering { + let len = self.filename.len().min(other.filename.len()); + self.filename[..len].cmp(&other.filename[..len]) + } +} + +/// An entry in a [`Tree`][crate::Tree], similar to an entry in a directory. +#[derive(PartialEq, Eq, Debug, Hash, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry { + /// The kind of object to which `oid` is pointing to. + pub mode: EntryMode, + /// The name of the file in the parent tree. + pub filename: BString, + /// The id of the object representing the entry. + pub oid: gix_hash::ObjectId, +} + +impl PartialOrd for Entry { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for Entry { + /// Entries compare by the common portion of the filename. This is critical for proper functioning of algorithms working on trees. + fn cmp(&self, other: &Self) -> Ordering { + let common_len = self.filename.len().min(other.filename.len()); + self.filename[..common_len] + .cmp(&other.filename[..common_len]) + .then_with(|| self.filename.len().cmp(&other.filename.len())) + } +} + +/// Serialization +impl EntryMode { + /// Return the representation as used in the git internal format. + pub fn as_bytes(&self) -> &'static [u8] { + use EntryMode::*; + match self { + Tree => b"40000", + Blob => b"100644", + BlobExecutable => b"100755", + Link => b"120000", + Commit => b"160000", + } + } +} diff --git a/vendor/gix-object/src/tree/ref_iter.rs b/vendor/gix-object/src/tree/ref_iter.rs new file mode 100644 index 000000000..fb3ba2dfc --- /dev/null +++ b/vendor/gix-object/src/tree/ref_iter.rs @@ -0,0 +1,162 @@ +use std::convert::TryFrom; + +use nom::error::ParseError; + +use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter}; + +impl<'a> TreeRefIter<'a> { + /// Instantiate an iterator from the given tree data. + pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> { + TreeRefIter { data } + } +} + +impl<'a> TreeRef<'a> { + /// Deserialize a Tree from `data`. + pub fn from_bytes(data: &'a [u8]) -> Result<TreeRef<'a>, crate::decode::Error> { + decode::tree(data).map(|(_, t)| t).map_err(crate::decode::Error::from) + } + + /// Create an instance of the empty tree. + /// + /// It's particularly useful as static part of a program. + pub const fn empty() -> TreeRef<'static> { + TreeRef { entries: Vec::new() } + } +} + +impl<'a> TreeRefIter<'a> { + /// Consume self and return all parsed entries. + pub fn entries(self) -> Result<Vec<EntryRef<'a>>, crate::decode::Error> { + self.collect() + } +} + +impl<'a> Iterator for TreeRefIter<'a> { + type Item = Result<EntryRef<'a>, crate::decode::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.data.is_empty() { + return None; + } + match decode::fast_entry(self.data) { + Some((data_left, entry)) => { + self.data = data_left; + Some(Ok(entry)) + } + None => { + self.data = &[]; + #[allow(clippy::unit_arg)] + Some(Err(nom::Err::Error(crate::decode::ParseError::from_error_kind( + &[] as &[u8], + nom::error::ErrorKind::MapRes, + )) + .into())) + } + } + } +} + +impl<'a> TryFrom<&'a [u8]> for tree::EntryMode { + type Error = &'a [u8]; + + fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> { + Ok(match mode { + b"40000" => tree::EntryMode::Tree, + b"100644" => tree::EntryMode::Blob, + b"100755" => tree::EntryMode::BlobExecutable, + b"120000" => tree::EntryMode::Link, + b"160000" => tree::EntryMode::Commit, + b"100664" => tree::EntryMode::Blob, // rare and found in the linux kernel + b"100640" => tree::EntryMode::Blob, // rare and found in the Rust repo + _ => return Err(mode), + }) + } +} + +impl TryFrom<u32> for tree::EntryMode { + type Error = u32; + + fn try_from(mode: u32) -> Result<Self, Self::Error> { + Ok(match mode { + 0o40000 => tree::EntryMode::Tree, + 0o100644 => tree::EntryMode::Blob, + 0o100755 => tree::EntryMode::BlobExecutable, + 0o120000 => tree::EntryMode::Link, + 0o160000 => tree::EntryMode::Commit, + 0o100664 => tree::EntryMode::Blob, // rare and found in the linux kernel + 0o100640 => tree::EntryMode::Blob, // rare and found in the Rust repo + _ => return Err(mode), + }) + } +} + +mod decode { + use std::convert::TryFrom; + + use bstr::ByteSlice; + use nom::{ + bytes::complete::{tag, take, take_while1, take_while_m_n}, + character::is_digit, + combinator::all_consuming, + error::ParseError, + multi::many0, + sequence::terminated, + IResult, + }; + + use crate::{parse::SPACE, tree, tree::EntryRef, TreeRef}; + + const NULL: &[u8] = b"\0"; + + pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> { + let mut mode = 0u32; + let mut spacer_pos = 1; + for b in i.iter().take_while(|b| **b != b' ') { + if *b < b'0' || *b > b'7' { + return None; + } + mode = (mode << 3) + (b - b'0') as u32; + spacer_pos += 1; + } + let (_, i) = i.split_at(spacer_pos); + let mode = tree::EntryMode::try_from(mode).ok()?; + let (filename, i) = i.split_at(i.find_byte(0)?); + let i = &i[1..]; + const HASH_LEN_FIXME: usize = 20; // TODO: know actual /desired length or we may overshoot + let (oid, i) = match i.len() { + len if len < HASH_LEN_FIXME => return None, + _ => i.split_at(20), + }; + Some(( + i, + EntryRef { + mode, + filename: filename.as_bstr(), + oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"), + }, + )) + } + + pub fn entry<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&[u8], EntryRef<'_>, E> { + let (i, mode) = terminated(take_while_m_n(5, 6, is_digit), tag(SPACE))(i)?; + let mode = tree::EntryMode::try_from(mode) + .map_err(|invalid| nom::Err::Error(E::from_error_kind(invalid, nom::error::ErrorKind::MapRes)))?; + let (i, filename) = terminated(take_while1(|b| b != NULL[0]), tag(NULL))(i)?; + let (i, oid) = take(20u8)(i)?; // TODO: make this compatible with other hash lengths + + Ok(( + i, + EntryRef { + mode, + filename: filename.as_bstr(), + oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"), + }, + )) + } + + pub fn tree<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], TreeRef<'a>, E> { + let (i, entries) = all_consuming(many0(entry))(i)?; + Ok((i, TreeRef { entries })) + } +} diff --git a/vendor/gix-object/src/tree/write.rs b/vendor/gix-object/src/tree/write.rs new file mode 100644 index 000000000..1e8edc024 --- /dev/null +++ b/vendor/gix-object/src/tree/write.rs @@ -0,0 +1,111 @@ +use std::io; + +use bstr::{BString, ByteSlice}; + +use crate::{ + encode::SPACE, + tree::{Entry, EntryRef}, + Kind, Tree, TreeRef, +}; + +/// The Error used in [`Tree::write_to()`][crate::WriteTo::write_to()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Newlines are invalid in file paths: {name:?}")] + NewlineInFilename { name: BString }, +} + +impl From<Error> for io::Error { + fn from(err: Error) -> Self { + io::Error::new(io::ErrorKind::Other, err) + } +} + +/// Serialization +impl crate::WriteTo for Tree { + /// Serialize this tree to `out` in the git internal format. + fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { + debug_assert_eq!( + &{ + let mut entries_sorted = self.entries.clone(); + entries_sorted.sort(); + entries_sorted + }, + &self.entries, + "entries for serialization must be sorted by filename" + ); + for Entry { mode, filename, oid } in &self.entries { + out.write_all(mode.as_bytes())?; + out.write_all(SPACE)?; + + if filename.find_byte(b'\n').is_some() { + return Err(Error::NewlineInFilename { + name: (*filename).to_owned(), + } + .into()); + } + out.write_all(filename)?; + out.write_all(&[b'\0'])?; + + out.write_all(oid.as_bytes())?; + } + Ok(()) + } + + fn size(&self) -> usize { + self.entries + .iter() + .map(|Entry { mode, filename, oid }| mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len()) + .sum() + } + + fn kind(&self) -> Kind { + Kind::Tree + } +} + +/// Serialization +impl<'a> crate::WriteTo for TreeRef<'a> { + /// Serialize this tree to `out` in the git internal format. + fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { + debug_assert_eq!( + &{ + let mut entries_sorted = self.entries.clone(); + entries_sorted.sort(); + entries_sorted + }, + &self.entries, + "entries for serialization must be sorted by filename" + ); + for EntryRef { mode, filename, oid } in &self.entries { + out.write_all(mode.as_bytes())?; + out.write_all(SPACE)?; + + if filename.find_byte(b'\n').is_some() { + return Err(Error::NewlineInFilename { + name: (*filename).to_owned(), + } + .into()); + } + out.write_all(filename)?; + out.write_all(&[b'\0'])?; + + out.write_all(oid.as_bytes())?; + } + Ok(()) + } + + fn size(&self) -> usize { + self.entries + .iter() + .map(|EntryRef { mode, filename, oid }| { + mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len() + }) + .sum() + } + + fn kind(&self) -> Kind { + Kind::Tree + } +} |