summaryrefslogtreecommitdiffstats
path: root/vendor/gix-commitgraph/src
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-commitgraph/src')
-rw-r--r--vendor/gix-commitgraph/src/access.rs97
-rw-r--r--vendor/gix-commitgraph/src/file/access.rs138
-rw-r--r--vendor/gix-commitgraph/src/file/commit.rs258
-rw-r--r--vendor/gix-commitgraph/src/file/init.rs257
-rw-r--r--vendor/gix-commitgraph/src/file/mod.rs46
-rw-r--r--vendor/gix-commitgraph/src/file/verify.rs173
-rw-r--r--vendor/gix-commitgraph/src/init.rs130
-rw-r--r--vendor/gix-commitgraph/src/lib.rs77
-rw-r--r--vendor/gix-commitgraph/src/verify.rs205
9 files changed, 1381 insertions, 0 deletions
diff --git a/vendor/gix-commitgraph/src/access.rs b/vendor/gix-commitgraph/src/access.rs
new file mode 100644
index 000000000..ce3688f78
--- /dev/null
+++ b/vendor/gix-commitgraph/src/access.rs
@@ -0,0 +1,97 @@
+use crate::{file, file::Commit, File, Graph, Position};
+
+/// Access
+impl Graph {
+ /// Returns the commit at the given position `pos`.
+ ///
+ /// # Panics
+ /// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()].
+ pub fn commit_at(&self, pos: Position) -> Commit<'_> {
+ let r = self.lookup_by_pos(pos);
+ r.file.commit_at(r.pos)
+ }
+
+ /// Returns the commit matching the given `id`.
+ pub fn commit_by_id(&self, id: impl AsRef<gix_hash::oid>) -> Option<Commit<'_>> {
+ let r = self.lookup_by_id(id.as_ref())?;
+ Some(r.file.commit_at(r.file_pos))
+ }
+
+ /// Returns the `hash` at the given position `pos`.
+ ///
+ /// # Panics
+ /// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()].
+ pub fn id_at(&self, pos: Position) -> &gix_hash::oid {
+ let r = self.lookup_by_pos(pos);
+ r.file.id_at(r.pos)
+ }
+
+ /// Iterate over commits in unsorted order.
+ pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> {
+ self.files.iter().flat_map(|file| file.iter_commits())
+ }
+
+ /// Iterate over commit IDs in unsorted order.
+ pub fn iter_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
+ self.files.iter().flat_map(|file| file.iter_ids())
+ }
+
+ /// Translate the given `id` to its position in the file.
+ pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<Position> {
+ Some(self.lookup_by_id(id.as_ref())?.graph_pos)
+ }
+
+ /// Returns the number of commits stored in this file.
+ pub fn num_commits(&self) -> u32 {
+ self.files.iter().map(|f| f.num_commits()).sum()
+ }
+}
+
+/// Access fundamentals
+impl Graph {
+ fn lookup_by_id(&self, id: &gix_hash::oid) -> Option<LookupByIdResult<'_>> {
+ let mut current_file_start = 0;
+ for file in &self.files {
+ if let Some(lex_pos) = file.lookup(id) {
+ return Some(LookupByIdResult {
+ file,
+ file_pos: lex_pos,
+ graph_pos: Position(current_file_start + lex_pos.0),
+ });
+ }
+ current_file_start += file.num_commits();
+ }
+ None
+ }
+
+ fn lookup_by_pos(&self, pos: Position) -> LookupByPositionResult<'_> {
+ let mut remaining = pos.0;
+ for (file_index, file) in self.files.iter().enumerate() {
+ match remaining.checked_sub(file.num_commits()) {
+ Some(v) => remaining = v,
+ None => {
+ return LookupByPositionResult {
+ file,
+ _file_index: file_index,
+ pos: file::Position(remaining),
+ }
+ }
+ }
+ }
+ panic!("graph position too large: {}", pos.0);
+ }
+}
+
+#[derive(Clone)]
+struct LookupByIdResult<'a> {
+ pub file: &'a File,
+ pub graph_pos: Position,
+ pub file_pos: file::Position,
+}
+
+#[derive(Clone)]
+struct LookupByPositionResult<'a> {
+ pub file: &'a File,
+ pub _file_index: usize,
+ pub pos: file::Position,
+}
diff --git a/vendor/gix-commitgraph/src/file/access.rs b/vendor/gix-commitgraph/src/file/access.rs
new file mode 100644
index 000000000..d3c32d8bb
--- /dev/null
+++ b/vendor/gix-commitgraph/src/file/access.rs
@@ -0,0 +1,138 @@
+use std::{
+ convert::TryInto,
+ fmt::{Debug, Formatter},
+ path::Path,
+};
+
+use crate::{
+ file::{self, commit::Commit, COMMIT_DATA_ENTRY_SIZE_SANS_HASH},
+ File,
+};
+
+/// Access
+impl File {
+ /// The number of base graphs that this file depends on.
+ pub fn base_graph_count(&self) -> u8 {
+ self.base_graph_count
+ }
+
+ /// Returns the commit data for the commit located at the given lexigraphical position.
+ ///
+ /// `pos` must range from 0 to `self.num_commits()`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `pos` is out of bounds.
+ pub fn commit_at(&self, pos: file::Position) -> Commit<'_> {
+ Commit::new(self, pos)
+ }
+
+ /// The kind of hash used in this File.
+ ///
+ /// Note that it is always conforming to the hash used in the owning repository.
+ pub fn object_hash(&self) -> gix_hash::Kind {
+ self.object_hash
+ }
+
+ /// Returns an object id at the given index in our list of (sorted) hashes.
+ /// The position ranges from 0 to `self.num_commits()`
+ // copied from gix-odb/src/pack/index/ext
+ pub fn id_at(&self, pos: file::Position) -> &gix_hash::oid {
+ assert!(
+ pos.0 < self.num_commits(),
+ "expected lexigraphical position less than {}, got {}",
+ self.num_commits(),
+ pos.0
+ );
+ let pos: usize = pos
+ .0
+ .try_into()
+ .expect("an architecture able to hold 32 bits of integer");
+ let start = self.oid_lookup_offset + (pos * self.hash_len);
+ gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len])
+ }
+
+ /// Return an iterator over all object hashes stored in the base graph.
+ pub fn iter_base_graph_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
+ let start = self.base_graphs_list_offset.unwrap_or(0);
+ let base_graphs_list = &self.data[start..][..self.hash_len * usize::from(self.base_graph_count)];
+ base_graphs_list
+ .chunks(self.hash_len)
+ .map(gix_hash::oid::from_bytes_unchecked)
+ }
+
+ /// return an iterator over all commits in this file.
+ pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> {
+ (0..self.num_commits()).map(move |i| self.commit_at(file::Position(i)))
+ }
+
+ /// Return an iterator over all object hashes stored in this file.
+ pub fn iter_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
+ (0..self.num_commits()).map(move |i| self.id_at(file::Position(i)))
+ }
+
+ /// Translate the given object hash to its position within this file, if present.
+ // copied from gix-odb/src/pack/index/ext
+ pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<file::Position> {
+ let id = id.as_ref();
+ let first_byte = usize::from(id.first_byte());
+ let mut upper_bound = self.fan[first_byte];
+ let mut lower_bound = if first_byte != 0 { self.fan[first_byte - 1] } else { 0 };
+
+ while lower_bound < upper_bound {
+ let mid = (lower_bound + upper_bound) / 2;
+ let mid_sha = self.id_at(file::Position(mid));
+
+ use std::cmp::Ordering::*;
+ match id.cmp(mid_sha) {
+ Less => upper_bound = mid,
+ Equal => return Some(file::Position(mid)),
+ Greater => lower_bound = mid + 1,
+ }
+ }
+ None
+ }
+
+ /// Returns the number of commits in this graph file.
+ ///
+ /// The maximum valid `file::Position` that can be used with this file is one less than
+ /// `num_commits()`.
+ pub fn num_commits(&self) -> u32 {
+ self.fan[255]
+ }
+
+ /// Returns the path to this file.
+ pub fn path(&self) -> &Path {
+ &self.path
+ }
+}
+
+impl File {
+ /// Returns the byte slice for the given commit in this file's Commit Data (CDAT) chunk.
+ pub(crate) fn commit_data_bytes(&self, pos: file::Position) -> &[u8] {
+ assert!(
+ pos.0 < self.num_commits(),
+ "expected lexigraphical position less than {}, got {}",
+ self.num_commits(),
+ pos.0
+ );
+ let pos: usize = pos
+ .0
+ .try_into()
+ .expect("an architecture able to hold 32 bits of integer");
+ let entry_size = self.hash_len + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
+ let start = self.commit_data_offset + (pos * entry_size);
+ &self.data[start..][..entry_size]
+ }
+
+ /// Returns the byte slice for this file's entire Extra Edge List (EDGE) chunk.
+ pub(crate) fn extra_edges_data(&self) -> Option<&[u8]> {
+ Some(&self.data[self.extra_edges_list_range.clone()?])
+ }
+}
+
+impl Debug for File {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(f, r#"File("{:?}")"#, self.path.display())
+ }
+}
diff --git a/vendor/gix-commitgraph/src/file/commit.rs b/vendor/gix-commitgraph/src/file/commit.rs
new file mode 100644
index 000000000..14fe15e84
--- /dev/null
+++ b/vendor/gix-commitgraph/src/file/commit.rs
@@ -0,0 +1,258 @@
+//! Low-level operations on individual commits.
+use std::{
+ convert::TryInto,
+ fmt::{Debug, Formatter},
+ slice::Chunks,
+};
+
+use crate::{
+ file::{self, EXTENDED_EDGES_MASK, LAST_EXTENDED_EDGE_MASK, NO_PARENT},
+ File, Position,
+};
+
+/// The error used in the [`file::commit`][self] module.
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("commit {0}'s extra edges overflows the commit-graph file's extra edges list")]
+ ExtraEdgesListOverflow(gix_hash::ObjectId),
+ #[error("commit {0}'s first parent is an extra edge index, which is invalid")]
+ FirstParentIsExtraEdgeIndex(gix_hash::ObjectId),
+ #[error("commit {0} has extra edges, but commit-graph file has no extra edges list")]
+ MissingExtraEdgesList(gix_hash::ObjectId),
+ #[error("commit {0} has a second parent but not a first parent")]
+ SecondParentWithoutFirstParent(gix_hash::ObjectId),
+}
+
+/// A commit as stored in a [`File`].
+#[derive(Copy, Clone)]
+pub struct Commit<'a> {
+ file: &'a File,
+ pos: file::Position,
+ // We can parse the below fields lazily if needed.
+ commit_timestamp: u64,
+ generation: u32,
+ parent1: ParentEdge,
+ parent2: ParentEdge,
+ root_tree_id: &'a gix_hash::oid,
+}
+
+#[inline]
+fn read_u32(b: &[u8]) -> u32 {
+ u32::from_be_bytes(b.try_into().unwrap())
+}
+
+impl<'a> Commit<'a> {
+ pub(crate) fn new(file: &'a File, pos: file::Position) -> Self {
+ let bytes = file.commit_data_bytes(pos);
+ Commit {
+ file,
+ pos,
+ root_tree_id: gix_hash::oid::from_bytes_unchecked(&bytes[..file.hash_len]),
+ parent1: ParentEdge::from_raw(read_u32(&bytes[file.hash_len..][..4])),
+ parent2: ParentEdge::from_raw(read_u32(&bytes[file.hash_len + 4..][..4])),
+ // TODO: Add support for corrected commit date offset overflow.
+ // See https://github.com/git/git/commit/e8b63005c48696a26f976f5f9b0ccaf1983e439d and
+ // https://github.com/git/git/commit/f90fca638e99a031dce8e3aca72427b2f9b4bb38 for more details and hints at a test.
+ generation: read_u32(&bytes[file.hash_len + 8..][..4]) >> 2,
+ commit_timestamp: u64::from_be_bytes(bytes[file.hash_len + 8..][..8].try_into().unwrap())
+ & 0x0003_ffff_ffff,
+ }
+ }
+
+ /// Returns the committer timestamp of this commit.
+ ///
+ /// The value is the number of seconds since 1970-01-01 00:00:00 UTC.
+ pub fn committer_timestamp(&self) -> u64 {
+ self.commit_timestamp
+ }
+
+ /// Returns the generation number of this commit.
+ ///
+ /// Commits without parents have generation number 1. Commits with parents have a generation
+ /// number that is the max of their parents' generation numbers + 1.
+ pub fn generation(&self) -> u32 {
+ self.generation
+ }
+
+ /// Returns an iterator over the parent positions for lookup in the owning [Graph][crate::Graph].
+ pub fn iter_parents(self) -> Parents<'a> {
+ // I didn't find a combinator approach that a) was as strict as ParentIterator, b) supported
+ // fuse-after-first-error behavior, and b) was significantly shorter or more understandable
+ // than ParentIterator. So here we are.
+ Parents {
+ commit_data: self,
+ state: ParentIteratorState::First,
+ }
+ }
+
+ /// Returns the hash of this commit.
+ pub fn id(&self) -> &'a gix_hash::oid {
+ self.file.id_at(self.pos)
+ }
+
+ /// Returns the first parent of this commit.
+ pub fn parent1(&self) -> Result<Option<Position>, Error> {
+ self.iter_parents().next().transpose()
+ }
+
+ /// Returns the position at which this commit is stored in the parent [File].
+ pub fn position(&self) -> file::Position {
+ self.pos
+ }
+
+ /// Return the hash of the tree this commit points to.
+ pub fn root_tree_id(&self) -> &gix_hash::oid {
+ self.root_tree_id
+ }
+}
+
+impl<'a> Debug for Commit<'a> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(
+ f,
+ "Commit {{ id: {}, lex_pos: {}, generation: {}, root_tree_id: {}, parent1: {:?}, parent2: {:?} }}",
+ self.id(),
+ self.pos,
+ self.generation(),
+ self.root_tree_id(),
+ self.parent1,
+ self.parent2,
+ )
+ }
+}
+
+impl<'a> Eq for Commit<'a> {}
+
+impl<'a> PartialEq for Commit<'a> {
+ fn eq(&self, other: &Self) -> bool {
+ std::ptr::eq(self.file, other.file) && self.pos == other.pos
+ }
+}
+
+/// An iterator over parents of a [`Commit`].
+pub struct Parents<'a> {
+ commit_data: Commit<'a>,
+ state: ParentIteratorState<'a>,
+}
+
+impl<'a> Iterator for Parents<'a> {
+ type Item = Result<Position, Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let state = std::mem::replace(&mut self.state, ParentIteratorState::Exhausted);
+ match state {
+ ParentIteratorState::First => match self.commit_data.parent1 {
+ ParentEdge::None => match self.commit_data.parent2 {
+ ParentEdge::None => None,
+ _ => Some(Err(Error::SecondParentWithoutFirstParent(self.commit_data.id().into()))),
+ },
+ ParentEdge::GraphPosition(pos) => {
+ self.state = ParentIteratorState::Second;
+ Some(Ok(pos))
+ }
+ ParentEdge::ExtraEdgeIndex(_) => {
+ Some(Err(Error::FirstParentIsExtraEdgeIndex(self.commit_data.id().into())))
+ }
+ },
+ ParentIteratorState::Second => match self.commit_data.parent2 {
+ ParentEdge::None => None,
+ ParentEdge::GraphPosition(pos) => Some(Ok(pos)),
+ ParentEdge::ExtraEdgeIndex(extra_edge_index) => {
+ if let Some(extra_edges_list) = self.commit_data.file.extra_edges_data() {
+ let start_offset: usize = extra_edge_index
+ .try_into()
+ .expect("an architecture able to hold 32 bits of integer");
+ let start_offset = start_offset
+ .checked_mul(4)
+ .expect("an extended edge index small enough to fit in usize");
+ if let Some(tail) = extra_edges_list.get(start_offset..) {
+ self.state = ParentIteratorState::Extra(tail.chunks(4));
+ // This recursive call is what blocks me from replacing ParentIterator
+ // with a std::iter::from_fn closure.
+ self.next()
+ } else {
+ Some(Err(Error::ExtraEdgesListOverflow(self.commit_data.id().into())))
+ }
+ } else {
+ Some(Err(Error::MissingExtraEdgesList(self.commit_data.id().into())))
+ }
+ }
+ },
+ ParentIteratorState::Extra(mut chunks) => {
+ if let Some(chunk) = chunks.next() {
+ let extra_edge = read_u32(chunk);
+ match ExtraEdge::from_raw(extra_edge) {
+ ExtraEdge::Internal(pos) => {
+ self.state = ParentIteratorState::Extra(chunks);
+ Some(Ok(pos))
+ }
+ ExtraEdge::Last(pos) => Some(Ok(pos)),
+ }
+ } else {
+ Some(Err(Error::ExtraEdgesListOverflow(self.commit_data.id().into())))
+ }
+ }
+ ParentIteratorState::Exhausted => None,
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ match (&self.state, self.commit_data.parent1, self.commit_data.parent2) {
+ (ParentIteratorState::First, ParentEdge::None, ParentEdge::None) => (0, Some(0)),
+ (ParentIteratorState::First, ParentEdge::None, _) => (1, Some(1)),
+ (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::None) => (1, Some(1)),
+ (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::GraphPosition(_)) => (2, Some(2)),
+ (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::ExtraEdgeIndex(_)) => (3, None),
+ (ParentIteratorState::First, ParentEdge::ExtraEdgeIndex(_), _) => (1, Some(1)),
+ (ParentIteratorState::Second, _, ParentEdge::None) => (0, Some(0)),
+ (ParentIteratorState::Second, _, ParentEdge::GraphPosition(_)) => (1, Some(1)),
+ (ParentIteratorState::Second, _, ParentEdge::ExtraEdgeIndex(_)) => (2, None),
+ (ParentIteratorState::Extra(_), _, _) => (1, None),
+ (ParentIteratorState::Exhausted, _, _) => (0, Some(0)),
+ }
+ }
+}
+
+#[derive(Debug)]
+enum ParentIteratorState<'a> {
+ First,
+ Second,
+ Extra(Chunks<'a, u8>),
+ Exhausted,
+}
+
+#[derive(Clone, Copy, Debug)]
+enum ParentEdge {
+ None,
+ GraphPosition(Position),
+ ExtraEdgeIndex(u32),
+}
+
+impl ParentEdge {
+ pub fn from_raw(raw: u32) -> ParentEdge {
+ if raw == NO_PARENT {
+ return ParentEdge::None;
+ }
+ if raw & EXTENDED_EDGES_MASK != 0 {
+ ParentEdge::ExtraEdgeIndex(raw & !EXTENDED_EDGES_MASK)
+ } else {
+ ParentEdge::GraphPosition(Position(raw))
+ }
+ }
+}
+
+enum ExtraEdge {
+ Internal(Position),
+ Last(Position),
+}
+
+impl ExtraEdge {
+ pub fn from_raw(raw: u32) -> Self {
+ if raw & LAST_EXTENDED_EDGE_MASK != 0 {
+ Self::Last(Position(raw & !LAST_EXTENDED_EDGE_MASK))
+ } else {
+ Self::Internal(Position(raw))
+ }
+ }
+}
diff --git a/vendor/gix-commitgraph/src/file/init.rs b/vendor/gix-commitgraph/src/file/init.rs
new file mode 100644
index 000000000..1d8f52e3f
--- /dev/null
+++ b/vendor/gix-commitgraph/src/file/init.rs
@@ -0,0 +1,257 @@
+use std::{
+ convert::{TryFrom, TryInto},
+ path::Path,
+};
+
+use bstr::ByteSlice;
+use memmap2::Mmap;
+
+use crate::{
+ file::{
+ ChunkId, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH,
+ EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE,
+ },
+ File,
+};
+
+/// The error used in [`File::at()`].
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("Commit-graph {:?} chunk contains {from_chunk} base graphs, but commit-graph file header claims {from_header} base graphs", BASE_GRAPHS_LIST_CHUNK_ID.as_bstr())]
+ BaseGraphMismatch { from_header: u8, from_chunk: u32 },
+ #[error("Commit-graph {:?} chunk contains {chunk1_commits} commits, but {:?} chunk contains {chunk2_commits} commits", .chunk1_id.as_bstr(), .chunk2_id.as_bstr())]
+ CommitCountMismatch {
+ chunk1_id: ChunkId,
+ chunk1_commits: u32,
+ chunk2_id: ChunkId,
+ chunk2_commits: u32,
+ },
+ #[error("{0}")]
+ Corrupt(String),
+ // This error case is disabled, as git allows extra garbage in the extra edges list?
+ // #[error("The last entry in commit-graph's extended edges list does is not marked as being terminal")]
+ // ExtraEdgesOverflow,
+ #[error("Could not open commit-graph file at '{}'", .path.display())]
+ Io {
+ #[source]
+ err: std::io::Error,
+ path: std::path::PathBuf,
+ },
+ #[error("{0}")]
+ Trailer(String),
+ #[error("Commit-graph file uses unsupported hash version: {0}")]
+ UnsupportedHashVersion(u8),
+ #[error("Unsupported commit-graph file version: {0}")]
+ UnsupportedVersion(u8),
+ #[error(transparent)]
+ ChunkFileDecode(#[from] gix_chunk::file::decode::Error),
+ #[error(transparent)]
+ MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error),
+ #[error("Commit-graph chunk {:?} has invalid size: {msg}", .id.as_bstr())]
+ InvalidChunkSize { id: ChunkId, msg: String },
+}
+
+const MIN_FILE_SIZE: usize = HEADER_LEN
+ + gix_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/)
+ + FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */
+ + gix_hash::Kind::shortest().len_in_bytes();
+
+impl File {
+ /// Try to parse the commit graph file at `path`.
+ pub fn at(path: impl AsRef<Path>) -> Result<File, Error> {
+ Self::try_from(path.as_ref())
+ }
+}
+
+impl TryFrom<&Path> for File {
+ type Error = Error;
+
+ fn try_from(path: &Path) -> Result<Self, Self::Error> {
+ let data = std::fs::File::open(path)
+ .and_then(|file| {
+ // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
+ #[allow(unsafe_code)]
+ unsafe {
+ Mmap::map(&file)
+ }
+ })
+ .map_err(|e| Error::Io {
+ err: e,
+ path: path.to_owned(),
+ })?;
+ let data_size = data.len();
+ if data_size < MIN_FILE_SIZE {
+ return Err(Error::Corrupt(
+ "Commit-graph file too small even for an empty graph".to_owned(),
+ ));
+ }
+
+ let mut ofs = 0;
+ if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE {
+ return Err(Error::Corrupt(
+ "Commit-graph file does not start with expected signature".to_owned(),
+ ));
+ }
+ ofs += SIGNATURE.len();
+
+ match data[ofs] {
+ 1 => (),
+ x => {
+ return Err(Error::UnsupportedVersion(x));
+ }
+ };
+ ofs += 1;
+
+ let object_hash = gix_hash::Kind::try_from(data[ofs]).map_err(Error::UnsupportedHashVersion)?;
+ ofs += 1;
+
+ let chunk_count = data[ofs];
+ // Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make
+ // it redundant.
+ ofs += 1;
+
+ let base_graph_count = data[ofs];
+ ofs += 1;
+
+ let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, chunk_count as u32)?;
+
+ let base_graphs_list_offset = chunks
+ .validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| {
+ let chunk_size = chunk_range.len();
+ if chunk_size % object_hash.len_in_bytes() != 0 {
+ return Err(Error::InvalidChunkSize {
+ id: BASE_GRAPHS_LIST_CHUNK_ID,
+ msg: format!(
+ "chunk size {} is not a multiple of {}",
+ chunk_size,
+ object_hash.len_in_bytes()
+ ),
+ });
+ }
+ let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes())
+ .try_into()
+ .expect("base graph count to fit in 32-bits");
+ if chunk_base_graph_count != u32::from(base_graph_count) {
+ return Err(Error::BaseGraphMismatch {
+ from_chunk: chunk_base_graph_count,
+ from_header: base_graph_count,
+ });
+ }
+ Ok(chunk_range.start)
+ })
+ .ok()
+ .transpose()?;
+
+ let (commit_data_offset, commit_data_count) =
+ chunks.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| {
+ let chunk_size = chunk_range.len();
+
+ let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
+ if chunk_size % entry_size != 0 {
+ return Err(Error::InvalidChunkSize {
+ id: COMMIT_DATA_CHUNK_ID,
+ msg: format!("chunk size {chunk_size} is not a multiple of {entry_size}"),
+ });
+ }
+ Ok((
+ chunk_range.start,
+ (chunk_size / entry_size)
+ .try_into()
+ .expect("number of commits in CDAT chunk to fit in 32 bits"),
+ ))
+ })??;
+
+ let fan_offset = chunks.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| {
+ let chunk_size = chunk_range.len();
+
+ let expected_size = 4 * FAN_LEN;
+ if chunk_size != expected_size {
+ return Err(Error::InvalidChunkSize {
+ id: OID_FAN_CHUNK_ID,
+ msg: format!("expected chunk length {expected_size}, got {chunk_size}"),
+ });
+ }
+ Ok(chunk_range.start)
+ })??;
+
+ let (oid_lookup_offset, oid_lookup_count) =
+ chunks.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| {
+ let chunk_size = chunk_range.len();
+
+ if chunk_size % object_hash.len_in_bytes() != 0 {
+ return Err(Error::InvalidChunkSize {
+ id: OID_LOOKUP_CHUNK_ID,
+ msg: format!(
+ "chunk size {} is not a multiple of {}",
+ chunk_size,
+ object_hash.len_in_bytes()
+ ),
+ });
+ }
+ Ok((
+ chunk_range.start,
+ (chunk_size / object_hash.len_in_bytes())
+ .try_into()
+ .expect("number of commits in OIDL chunk to fit in 32 bits"),
+ ))
+ })??;
+
+ let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok();
+
+ let trailer = &data[chunks.highest_offset() as usize..];
+ if trailer.len() != object_hash.len_in_bytes() {
+ return Err(Error::Trailer(format!(
+ "Expected commit-graph trailer to contain {} bytes, got {}",
+ object_hash.len_in_bytes(),
+ trailer.len()
+ )));
+ }
+
+ if base_graph_count > 0 && base_graphs_list_offset.is_none() {
+ return Err(gix_chunk::file::index::offset_by_kind::Error {
+ kind: BASE_GRAPHS_LIST_CHUNK_ID,
+ }
+ .into());
+ }
+
+ let (fan, _) = read_fan(&data[fan_offset..]);
+ if oid_lookup_count != fan[255] {
+ return Err(Error::CommitCountMismatch {
+ chunk1_id: OID_FAN_CHUNK_ID,
+ chunk1_commits: fan[255],
+ chunk2_id: OID_LOOKUP_CHUNK_ID,
+ chunk2_commits: oid_lookup_count,
+ });
+ }
+ if commit_data_count != fan[255] {
+ return Err(Error::CommitCountMismatch {
+ chunk1_id: OID_FAN_CHUNK_ID,
+ chunk1_commits: fan[255],
+ chunk2_id: COMMIT_DATA_CHUNK_ID,
+ chunk2_commits: commit_data_count,
+ });
+ }
+ Ok(File {
+ base_graph_count,
+ base_graphs_list_offset,
+ commit_data_offset,
+ data,
+ extra_edges_list_range,
+ fan,
+ oid_lookup_offset,
+ path: path.to_owned(),
+ hash_len: object_hash.len_in_bytes(),
+ object_hash,
+ })
+ }
+}
+
+// Copied from gix-odb/pack/index/init.rs
+fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) {
+ let mut fan = [0; FAN_LEN];
+ for (c, f) in d.chunks(4).zip(fan.iter_mut()) {
+ *f = u32::from_be_bytes(c.try_into().unwrap());
+ }
+ (fan, FAN_LEN * 4)
+}
diff --git a/vendor/gix-commitgraph/src/file/mod.rs b/vendor/gix-commitgraph/src/file/mod.rs
new file mode 100644
index 000000000..528cd69fb
--- /dev/null
+++ b/vendor/gix-commitgraph/src/file/mod.rs
@@ -0,0 +1,46 @@
+//! Operations on a single commit-graph file.
+
+use std::fmt::{Display, Formatter};
+
+pub use self::{commit::Commit, init::Error};
+
+mod access;
+pub mod commit;
+mod init;
+pub mod verify;
+
+const COMMIT_DATA_ENTRY_SIZE_SANS_HASH: usize = 16;
+pub(crate) const FAN_LEN: usize = 256;
+const HEADER_LEN: usize = 8;
+
+const SIGNATURE: &[u8] = b"CGPH";
+
+type ChunkId = gix_chunk::Id;
+const BASE_GRAPHS_LIST_CHUNK_ID: ChunkId = *b"BASE";
+const COMMIT_DATA_CHUNK_ID: ChunkId = *b"CDAT";
+const EXTENDED_EDGES_LIST_CHUNK_ID: ChunkId = *b"EDGE";
+const OID_FAN_CHUNK_ID: ChunkId = *b"OIDF";
+const OID_LOOKUP_CHUNK_ID: ChunkId = *b"OIDL";
+
+// Note that git's commit-graph-format.txt as of v2.28.0 gives an incorrect value 0x0700_0000 for
+// NO_PARENT. Fixed in https://github.com/git/git/commit/4d515253afcef985e94400adbfed7044959f9121 .
+const NO_PARENT: u32 = 0x7000_0000;
+const EXTENDED_EDGES_MASK: u32 = 0x8000_0000;
+const LAST_EXTENDED_EDGE_MASK: u32 = 0x8000_0000;
+
+/// The position of a given commit within a graph file, starting at 0.
+///
+/// Commits within a graph file are sorted in lexicographical order by OID; a commit's lexicographical position
+/// is its position in this ordering. If a commit graph spans multiple files, each file's commits
+/// start at lexicographical position 0, so it is unique across a single file but is not unique across
+/// the whole commit graph. Each commit also has a graph position ([`Position`][crate::Position]),
+/// which is unique across the whole commit graph.
+/// In order to avoid accidentally mixing lexicographical positions with graph positions, distinct types are used for each.
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct Position(pub u32);
+
+impl Display for Position {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ self.0.fmt(f)
+ }
+}
diff --git a/vendor/gix-commitgraph/src/file/verify.rs b/vendor/gix-commitgraph/src/file/verify.rs
new file mode 100644
index 000000000..4f4f76829
--- /dev/null
+++ b/vendor/gix-commitgraph/src/file/verify.rs
@@ -0,0 +1,173 @@
+//! Auxiliary types used in commit graph file verification methods.
+use std::{
+ cmp::{max, min},
+ collections::HashMap,
+ path::Path,
+};
+
+use crate::{file, File, GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX};
+
+/// The error used in [`File::traverse()`].
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error<E: std::error::Error + 'static> {
+ #[error(transparent)]
+ Commit(#[from] file::commit::Error),
+ #[error("commit at file position {pos} has invalid ID {id}")]
+ CommitId {
+ id: gix_hash::ObjectId,
+ pos: file::Position,
+ },
+ #[error("commit at file position {pos} with ID {id} is out of order relative to its predecessor with ID {predecessor_id}")]
+ CommitsOutOfOrder {
+ id: gix_hash::ObjectId,
+ pos: file::Position,
+ predecessor_id: gix_hash::ObjectId,
+ },
+ #[error("commit-graph filename should be {0}")]
+ Filename(String),
+ #[error("commit {id} has invalid generation {generation}")]
+ Generation { generation: u32, id: gix_hash::ObjectId },
+ #[error("checksum mismatch: expected {expected}, got {actual}")]
+ Mismatch {
+ actual: gix_hash::ObjectId,
+ expected: gix_hash::ObjectId,
+ },
+ #[error("{0}")]
+ Processor(#[source] E),
+ #[error("commit {id} has invalid root tree ID {root_tree_id}")]
+ RootTreeId {
+ id: gix_hash::ObjectId,
+ root_tree_id: gix_hash::ObjectId,
+ },
+}
+
+/// The positive result of [`File::traverse()`] providing some statistical information.
+#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct Outcome {
+ /// The largest encountered [`file::Commit`] generation number.
+ pub max_generation: u32,
+ /// The smallest encountered [`file::Commit`] generation number.
+ pub min_generation: u32,
+ /// The largest number of parents in a single [`file::Commit`].
+ pub max_parents: u32,
+ /// The total number of [`commits`][file::Commit]s seen in the iteration.
+ pub num_commits: u32,
+ /// A mapping of `N -> number of commits with N parents`.
+ pub parent_counts: HashMap<u32, u32>,
+}
+
+/// Verification
+impl File {
+ /// Returns the trailing checksum over the entire content of this file.
+ pub fn checksum(&self) -> &gix_hash::oid {
+ gix_hash::oid::from_bytes_unchecked(&self.data[self.data.len() - self.hash_len..])
+ }
+
+ /// Traverse all [commits][file::Commit] stored in this file and call `processor(commit) -> Result<(), Error>` on it.
+ ///
+ /// If the `processor` fails, the iteration will be stopped and the entire call results in the respective error.
+ pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Error<E>>
+ where
+ E: std::error::Error + 'static,
+ Processor: FnMut(&file::Commit<'a>) -> Result<(), E>,
+ {
+ self.verify_checksum()
+ .map_err(|(actual, expected)| Error::Mismatch { actual, expected })?;
+ verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?;
+
+ let null_id = self.object_hash().null_ref();
+
+ let mut stats = Outcome {
+ max_generation: 0,
+ max_parents: 0,
+ min_generation: GENERATION_NUMBER_INFINITY,
+ num_commits: self.num_commits(),
+ parent_counts: HashMap::new(),
+ };
+
+ // TODO: Verify self.fan values as we go.
+ let mut prev_id: &gix_hash::oid = null_id;
+ for commit in self.iter_commits() {
+ if commit.id() <= prev_id {
+ if commit.id() == null_id {
+ return Err(Error::CommitId {
+ pos: commit.position(),
+ id: commit.id().into(),
+ });
+ }
+ return Err(Error::CommitsOutOfOrder {
+ pos: commit.position(),
+ id: commit.id().into(),
+ predecessor_id: prev_id.into(),
+ });
+ }
+ if commit.root_tree_id() == null_id {
+ return Err(Error::RootTreeId {
+ id: commit.id().into(),
+ root_tree_id: commit.root_tree_id().into(),
+ });
+ }
+ if commit.generation() > GENERATION_NUMBER_MAX {
+ return Err(Error::Generation {
+ generation: commit.generation(),
+ id: commit.id().into(),
+ });
+ }
+
+ processor(&commit).map_err(Error::Processor)?;
+
+ stats.max_generation = max(stats.max_generation, commit.generation());
+ stats.min_generation = min(stats.min_generation, commit.generation());
+ let parent_count = commit
+ .iter_parents()
+ .try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))
+ .map_err(Error::Commit)?;
+ *stats.parent_counts.entry(parent_count).or_insert(0) += 1;
+ prev_id = commit.id();
+ }
+
+ if stats.min_generation == GENERATION_NUMBER_INFINITY {
+ stats.min_generation = 0;
+ }
+
+ Ok(stats)
+ }
+
+ /// Assure the [`checksum`][File::checksum()] matches the actual checksum over all content of this file, excluding the trailing
+ /// checksum itself.
+ ///
+ /// Return the actual checksum on success or `(actual checksum, expected checksum)` if there is a mismatch.
+ pub fn verify_checksum(&self) -> Result<gix_hash::ObjectId, (gix_hash::ObjectId, gix_hash::ObjectId)> {
+ // Even though we could use gix_features::hash::bytes_of_file(…), this would require using our own
+ // Error type to support io::Error and Mismatch. As we only gain progress, there probably isn't much value
+ // as these files are usually small enough to process them in less than a second, even for the large ones.
+ // But it's possible, once a progress instance is passed.
+ let data_len_without_trailer = self.data.len() - self.hash_len;
+ let mut hasher = gix_features::hash::hasher(self.object_hash());
+ hasher.update(&self.data[..data_len_without_trailer]);
+ let actual = gix_hash::ObjectId::from(hasher.digest().as_ref());
+
+ let expected = self.checksum();
+ if actual == expected {
+ Ok(actual)
+ } else {
+ Err((actual, expected.into()))
+ }
+ }
+}
+
+/// If the given path's filename matches "graph-{hash}.graph", check that `hash` matches the
+/// expected hash.
+fn verify_split_chain_filename_hash(path: impl AsRef<Path>, expected: &gix_hash::oid) -> Result<(), String> {
+ let path = path.as_ref();
+ path.file_name()
+ .and_then(|filename| filename.to_str())
+ .and_then(|filename| filename.strip_suffix(".graph"))
+ .and_then(|stem| stem.strip_prefix("graph-"))
+ .map_or(Ok(()), |hex| match gix_hash::ObjectId::from_hex(hex.as_bytes()) {
+ Ok(actual) if actual == expected => Ok(()),
+ _ => Err(format!("graph-{}.graph", expected.to_hex())),
+ })
+}
diff --git a/vendor/gix-commitgraph/src/init.rs b/vendor/gix-commitgraph/src/init.rs
new file mode 100644
index 000000000..f964aade0
--- /dev/null
+++ b/vendor/gix-commitgraph/src/init.rs
@@ -0,0 +1,130 @@
+use std::{
+ convert::TryFrom,
+ io::{BufRead, BufReader},
+ path::{Path, PathBuf},
+};
+
+use crate::{file, File, Graph, MAX_COMMITS};
+
+/// The error returned by initializations functions like [`Graph::at()`].
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("{}", .path.display())]
+ File {
+ #[source]
+ err: file::Error,
+ path: PathBuf,
+ },
+ #[error("Commit-graph files mismatch: '{}' uses hash {hash1:?}, but '{}' uses hash {hash2:?}", .path1.display(), .path2.display())]
+ HashVersionMismatch {
+ path1: PathBuf,
+ hash1: gix_hash::Kind,
+ path2: PathBuf,
+ hash2: gix_hash::Kind,
+ },
+ #[error("Did not find any files that look like commit graphs at '{}'", .0.display())]
+ InvalidPath(PathBuf),
+ #[error("Could not open commit-graph file at '{}'", .path.display())]
+ Io {
+ #[source]
+ err: std::io::Error,
+ path: PathBuf,
+ },
+ #[error(
+ "Commit-graph files contain {0} commits altogether, but only {} commits are allowed",
+ MAX_COMMITS
+ )]
+ TooManyCommits(u64),
+}
+
+/// Instantiate a `Graph` from various sources.
+impl Graph {
+ /// Instantiate a commit graph from `path` which may be a directory containing graph files or the graph file itself.
+ pub fn at(path: impl AsRef<Path>) -> Result<Self, Error> {
+ Self::try_from(path.as_ref())
+ }
+
+ /// Instantiate a commit graph from the directory containing all of its files.
+ pub fn from_commit_graphs_dir(path: impl AsRef<Path>) -> Result<Self, Error> {
+ let commit_graphs_dir = path.as_ref();
+ let chain_file_path = commit_graphs_dir.join("commit-graph-chain");
+ let chain_file = std::fs::File::open(&chain_file_path).map_err(|e| Error::Io {
+ err: e,
+ path: chain_file_path.clone(),
+ })?;
+ let mut files = Vec::new();
+ for line in BufReader::new(chain_file).lines() {
+ let hash = line.map_err(|e| Error::Io {
+ err: e,
+ path: chain_file_path.clone(),
+ })?;
+ let graph_file_path = commit_graphs_dir.join(format!("graph-{hash}.graph"));
+ files.push(File::at(&graph_file_path).map_err(|e| Error::File {
+ err: e,
+ path: graph_file_path.clone(),
+ })?);
+ }
+ Self::new(files)
+ }
+
+ /// Instantiate a commit graph from a `.git/objects/info/commit-graph` or
+ /// `.git/objects/info/commit-graphs/graph-*.graph` file.
+ pub fn from_file(path: impl AsRef<Path>) -> Result<Self, Error> {
+ let path = path.as_ref();
+ let file = File::at(path).map_err(|e| Error::File {
+ err: e,
+ path: path.to_owned(),
+ })?;
+ Self::new(vec![file])
+ }
+
+ /// Instantiate a commit graph from an `.git/objects/info` directory.
+ pub fn from_info_dir(info_dir: impl AsRef<Path>) -> Result<Self, Error> {
+ Self::from_file(info_dir.as_ref().join("commit-graph"))
+ .or_else(|_| Self::from_commit_graphs_dir(info_dir.as_ref().join("commit-graphs")))
+ }
+
+ /// Create a new commit graph from a list of `files`.
+ pub fn new(files: Vec<File>) -> Result<Self, Error> {
+ let num_commits: u64 = files.iter().map(|f| u64::from(f.num_commits())).sum();
+ if num_commits > u64::from(MAX_COMMITS) {
+ return Err(Error::TooManyCommits(num_commits));
+ }
+
+ for window in files.windows(2) {
+ let f1 = &window[0];
+ let f2 = &window[1];
+ if f1.object_hash() != f2.object_hash() {
+ return Err(Error::HashVersionMismatch {
+ path1: f1.path().to_owned(),
+ hash1: f1.object_hash(),
+ path2: f2.path().to_owned(),
+ hash2: f2.object_hash(),
+ });
+ }
+ }
+
+ Ok(Self { files })
+ }
+}
+
+impl TryFrom<&Path> for Graph {
+ type Error = Error;
+
+ fn try_from(path: &Path) -> Result<Self, Self::Error> {
+ if path.is_file() {
+ // Assume we are looking at `.git/objects/info/commit-graph` or
+ // `.git/objects/info/commit-graphs/graph-*.graph`.
+ Self::from_file(path)
+ } else if path.is_dir() {
+ if path.join("commit-graph-chain").is_file() {
+ Self::from_commit_graphs_dir(path)
+ } else {
+ Self::from_info_dir(path)
+ }
+ } else {
+ Err(Error::InvalidPath(path.to_owned()))
+ }
+ }
+}
diff --git a/vendor/gix-commitgraph/src/lib.rs b/vendor/gix-commitgraph/src/lib.rs
new file mode 100644
index 000000000..231c11c6f
--- /dev/null
+++ b/vendor/gix-commitgraph/src/lib.rs
@@ -0,0 +1,77 @@
+//! Read, verify, and traverse git commit graphs.
+//!
+//! A [commit graph][Graph] is an index of commits in the git commit history.
+//! The [Graph] stores commit data in a way that accelerates lookups considerably compared to
+//! traversing the git history by usual means.
+//!
+//! As generating the full commit graph from scratch can take some time, git may write new commits
+//! to separate [files][File] instead of overwriting the original file.
+//! Eventually, git will merge these files together as the number of files grows.
+//! ## Feature Flags
+#![cfg_attr(
+ feature = "document-features",
+ cfg_attr(doc, doc = ::document_features::document_features!())
+)]
+#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
+#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
+
+use std::path::Path;
+
+/// A single commit-graph file.
+///
+/// All operations on a `File` are local to that graph file. Since a commit graph can span multiple
+/// files, all interesting graph operations belong on [`Graph`][crate::Graph].
+pub struct File {
+ base_graph_count: u8,
+ base_graphs_list_offset: Option<usize>,
+ commit_data_offset: usize,
+ data: memmap2::Mmap,
+ extra_edges_list_range: Option<std::ops::Range<usize>>,
+ fan: [u32; file::FAN_LEN],
+ oid_lookup_offset: usize,
+ path: std::path::PathBuf,
+ hash_len: usize,
+ object_hash: gix_hash::Kind,
+}
+
+/// A complete commit graph.
+///
+/// The data in the commit graph may come from a monolithic `objects/info/commit-graph` file, or it
+/// may come from one or more `objects/info/commit-graphs/graph-*.graph` files. These files are
+/// generated via `git commit-graph write ...` commands.
+pub struct Graph {
+ files: Vec<File>,
+}
+
+/// Instantiate a commit graph from an `.git/objects/info` directory, or one of the various commit-graph files.
+pub fn at(path: impl AsRef<Path>) -> Result<Graph, init::Error> {
+ Graph::at(path)
+}
+
+mod access;
+pub mod file;
+///
+pub mod init;
+pub mod verify;
+
+/// The number of generations that are considered 'infinite' commit history.
+pub const GENERATION_NUMBER_INFINITY: u32 = 0xffff_ffff;
+/// The largest valid generation number.
+///
+/// If a commit's real generation number is larger than this, the commit graph will cap the value to
+/// this number.
+/// The largest distinct generation number is `GENERATION_NUMBER_MAX - 1`.
+pub const GENERATION_NUMBER_MAX: u32 = 0x3fff_ffff;
+
+/// The maximum number of commits that can be stored in a commit graph.
+pub const MAX_COMMITS: u32 = (1 << 30) + (1 << 29) + (1 << 28) - 1;
+
+/// A generalized position for use in [`Graph`].
+#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)]
+pub struct Position(pub u32);
+
+impl std::fmt::Display for Position {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ self.0.fmt(f)
+ }
+}
diff --git a/vendor/gix-commitgraph/src/verify.rs b/vendor/gix-commitgraph/src/verify.rs
new file mode 100644
index 000000000..2d863e648
--- /dev/null
+++ b/vendor/gix-commitgraph/src/verify.rs
@@ -0,0 +1,205 @@
+//! Auxiliary types used by graph verification methods.
+use std::{
+ cmp::{max, min},
+ collections::BTreeMap,
+ convert::TryInto,
+ path::PathBuf,
+};
+
+use crate::{
+ file::{self, commit},
+ Graph, Position, GENERATION_NUMBER_MAX,
+};
+
+/// The error used in [`verify_integrity()`][Graph::verify_integrity].
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error<E: std::error::Error + 'static> {
+ #[error("'{}' should have {expected} base graphs, but claims {actual} base graphs", .path.display())]
+ BaseGraphCount { actual: u8, expected: u8, path: PathBuf },
+ #[error("'{}' base graph at index {index} should have ID {expected} but is {actual}", .path.display())]
+ BaseGraphId {
+ actual: gix_hash::ObjectId,
+ expected: gix_hash::ObjectId,
+ index: u8,
+ path: PathBuf,
+ },
+ #[error(transparent)]
+ Commit(#[from] commit::Error),
+ #[error("{}: {err}", .path.display())]
+ File {
+ // Use zero-size error type. We will never return
+ // `graph::verify::Error::File(file::verify::Error::Processor(...))`, because we are the
+ // file's processor, and we convert`file::verify::Error::Processor<graph::verify::Error>`
+ // variants into direct `graph::verify::Error` values.
+ err: file::verify::Error<std::convert::Infallible>,
+ path: PathBuf,
+ },
+ #[error("Commit {id}'s generation should be {expected} but is {actual}")]
+ Generation {
+ actual: u32,
+ expected: u32,
+ id: gix_hash::ObjectId,
+ },
+ #[error(
+ "Commit {id} has parent position {parent_pos} that is out of range (should be in range 0-{max_valid_pos})"
+ )]
+ ParentOutOfRange {
+ id: gix_hash::ObjectId,
+ max_valid_pos: Position,
+ parent_pos: Position,
+ },
+ #[error("{0}")]
+ Processor(#[source] E),
+ #[error("Commit-graph should be composed of at most 256 files but actually contains {0} files")]
+ TooManyFiles(usize),
+}
+
+/// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`].
+#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct Outcome {
+ /// The length of the longest path between any two commits in this graph.
+ ///
+ /// For example, this will be `Some(9)` for a commit graph containing 10 linear commits.
+ /// This will be `Some(0)` for a commit graph containing 0 or 1 commits.
+ /// If the longest path length is too large to fit in a [u32], then this will be [None].
+ pub longest_path_length: Option<u32>,
+ /// The total number of commits traversed.
+ pub num_commits: u32,
+ /// A mapping of `N -> number of commits with N parents`.
+ pub parent_counts: BTreeMap<u32, u32>,
+}
+
+impl Graph {
+ /// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums.
+ ///
+ /// When `processor` returns an error, the entire verification is stopped and the error returned.
+ pub fn verify_integrity<E>(
+ &self,
+ mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
+ ) -> Result<Outcome, Error<E>>
+ where
+ E: std::error::Error + 'static,
+ {
+ if self.files.len() > 256 {
+ // A file in a split chain can only have up to 255 base files.
+ return Err(Error::TooManyFiles(self.files.len()));
+ }
+
+ let mut stats = Outcome {
+ longest_path_length: None,
+ num_commits: 0,
+ parent_counts: BTreeMap::new(),
+ };
+ let mut max_generation = 0u32;
+
+ // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
+ // a separate loop, e.g. self.iter_sorted_ids().
+
+ let mut file_start_pos = Position(0);
+ for (file_index, file) in self.files.iter().enumerate() {
+ if usize::from(file.base_graph_count()) != file_index {
+ return Err(Error::BaseGraphCount {
+ actual: file.base_graph_count(),
+ expected: file_index
+ .try_into()
+ .expect("files.len() check to protect against this"),
+ path: file.path().to_owned(),
+ });
+ }
+
+ for (base_graph_index, (expected, actual)) in self.files[..file_index]
+ .iter()
+ .map(|base_file| base_file.checksum())
+ .zip(file.iter_base_graph_ids())
+ .enumerate()
+ {
+ if actual != expected {
+ return Err(Error::BaseGraphId {
+ actual: actual.into(),
+ expected: expected.into(),
+ index: base_graph_index
+ .try_into()
+ .expect("files.len() check to protect against this"),
+ path: file.path().to_owned(),
+ });
+ }
+ }
+
+ let next_file_start_pos = Position(file_start_pos.0 + file.num_commits());
+ let file_stats = file
+ .traverse(|commit| {
+ let mut max_parent_generation = 0u32;
+ for parent_pos in commit.iter_parents() {
+ let parent_pos = parent_pos.map_err(Error::Commit)?;
+ if parent_pos >= next_file_start_pos {
+ return Err(Error::ParentOutOfRange {
+ parent_pos,
+ id: commit.id().into(),
+ max_valid_pos: Position(next_file_start_pos.0 - 1),
+ });
+ }
+ let parent = self.commit_at(parent_pos);
+ max_parent_generation = max(max_parent_generation, parent.generation());
+ }
+
+ // If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
+ // generation should be GENERATION_NUMBER_MAX too.
+ let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
+ if commit.generation() != expected_generation {
+ return Err(Error::Generation {
+ actual: commit.generation(),
+ expected: expected_generation,
+ id: commit.id().into(),
+ });
+ }
+
+ processor(commit).map_err(Error::Processor)?;
+
+ Ok(())
+ })
+ .map_err(|err| Error::File {
+ err: match err {
+ file::verify::Error::Processor(e) => return e,
+ file::verify::Error::RootTreeId { id, root_tree_id } => {
+ file::verify::Error::RootTreeId { id, root_tree_id }
+ }
+ file::verify::Error::Mismatch { actual, expected } => {
+ file::verify::Error::Mismatch { actual, expected }
+ }
+ file::verify::Error::Generation { generation, id } => {
+ file::verify::Error::Generation { generation, id }
+ }
+ file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
+ file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
+ file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
+ file::verify::Error::CommitsOutOfOrder {
+ id,
+ pos,
+ predecessor_id,
+ } => file::verify::Error::CommitsOutOfOrder {
+ id,
+ pos,
+ predecessor_id,
+ },
+ },
+ path: file.path().to_owned(),
+ })?;
+
+ max_generation = max(max_generation, file_stats.max_generation);
+ stats.num_commits += file_stats.num_commits;
+ for (key, value) in file_stats.parent_counts.into_iter() {
+ *stats.parent_counts.entry(key).or_insert(0) += value;
+ }
+ file_start_pos = next_file_start_pos;
+ }
+
+ stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
+ Some(max_generation.saturating_sub(1))
+ } else {
+ None
+ };
+ Ok(stats)
+ }
+}