diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:31 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:31 +0000 |
commit | dc0db358abe19481e475e10c32149b53370f1a1c (patch) | |
tree | ab8ce99c4b255ce46f99ef402c27916055b899ee /vendor/gix-commitgraph/src | |
parent | Releasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff) | |
download | rustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip |
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-commitgraph/src')
-rw-r--r-- | vendor/gix-commitgraph/src/access.rs | 97 | ||||
-rw-r--r-- | vendor/gix-commitgraph/src/file/access.rs | 138 | ||||
-rw-r--r-- | vendor/gix-commitgraph/src/file/commit.rs | 258 | ||||
-rw-r--r-- | vendor/gix-commitgraph/src/file/init.rs | 257 | ||||
-rw-r--r-- | vendor/gix-commitgraph/src/file/mod.rs | 46 | ||||
-rw-r--r-- | vendor/gix-commitgraph/src/file/verify.rs | 173 | ||||
-rw-r--r-- | vendor/gix-commitgraph/src/init.rs | 130 | ||||
-rw-r--r-- | vendor/gix-commitgraph/src/lib.rs | 77 | ||||
-rw-r--r-- | vendor/gix-commitgraph/src/verify.rs | 205 |
9 files changed, 1381 insertions, 0 deletions
diff --git a/vendor/gix-commitgraph/src/access.rs b/vendor/gix-commitgraph/src/access.rs new file mode 100644 index 000000000..ce3688f78 --- /dev/null +++ b/vendor/gix-commitgraph/src/access.rs @@ -0,0 +1,97 @@ +use crate::{file, file::Commit, File, Graph, Position}; + +/// Access +impl Graph { + /// Returns the commit at the given position `pos`. + /// + /// # Panics + /// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()]. + pub fn commit_at(&self, pos: Position) -> Commit<'_> { + let r = self.lookup_by_pos(pos); + r.file.commit_at(r.pos) + } + + /// Returns the commit matching the given `id`. + pub fn commit_by_id(&self, id: impl AsRef<gix_hash::oid>) -> Option<Commit<'_>> { + let r = self.lookup_by_id(id.as_ref())?; + Some(r.file.commit_at(r.file_pos)) + } + + /// Returns the `hash` at the given position `pos`. + /// + /// # Panics + /// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()]. + pub fn id_at(&self, pos: Position) -> &gix_hash::oid { + let r = self.lookup_by_pos(pos); + r.file.id_at(r.pos) + } + + /// Iterate over commits in unsorted order. + pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> { + self.files.iter().flat_map(|file| file.iter_commits()) + } + + /// Iterate over commit IDs in unsorted order. + pub fn iter_ids(&self) -> impl Iterator<Item = &gix_hash::oid> { + self.files.iter().flat_map(|file| file.iter_ids()) + } + + /// Translate the given `id` to its position in the file. + pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<Position> { + Some(self.lookup_by_id(id.as_ref())?.graph_pos) + } + + /// Returns the number of commits stored in this file. + pub fn num_commits(&self) -> u32 { + self.files.iter().map(|f| f.num_commits()).sum() + } +} + +/// Access fundamentals +impl Graph { + fn lookup_by_id(&self, id: &gix_hash::oid) -> Option<LookupByIdResult<'_>> { + let mut current_file_start = 0; + for file in &self.files { + if let Some(lex_pos) = file.lookup(id) { + return Some(LookupByIdResult { + file, + file_pos: lex_pos, + graph_pos: Position(current_file_start + lex_pos.0), + }); + } + current_file_start += file.num_commits(); + } + None + } + + fn lookup_by_pos(&self, pos: Position) -> LookupByPositionResult<'_> { + let mut remaining = pos.0; + for (file_index, file) in self.files.iter().enumerate() { + match remaining.checked_sub(file.num_commits()) { + Some(v) => remaining = v, + None => { + return LookupByPositionResult { + file, + _file_index: file_index, + pos: file::Position(remaining), + } + } + } + } + panic!("graph position too large: {}", pos.0); + } +} + +#[derive(Clone)] +struct LookupByIdResult<'a> { + pub file: &'a File, + pub graph_pos: Position, + pub file_pos: file::Position, +} + +#[derive(Clone)] +struct LookupByPositionResult<'a> { + pub file: &'a File, + pub _file_index: usize, + pub pos: file::Position, +} diff --git a/vendor/gix-commitgraph/src/file/access.rs b/vendor/gix-commitgraph/src/file/access.rs new file mode 100644 index 000000000..d3c32d8bb --- /dev/null +++ b/vendor/gix-commitgraph/src/file/access.rs @@ -0,0 +1,138 @@ +use std::{ + convert::TryInto, + fmt::{Debug, Formatter}, + path::Path, +}; + +use crate::{ + file::{self, commit::Commit, COMMIT_DATA_ENTRY_SIZE_SANS_HASH}, + File, +}; + +/// Access +impl File { + /// The number of base graphs that this file depends on. + pub fn base_graph_count(&self) -> u8 { + self.base_graph_count + } + + /// Returns the commit data for the commit located at the given lexigraphical position. + /// + /// `pos` must range from 0 to `self.num_commits()`. + /// + /// # Panics + /// + /// Panics if `pos` is out of bounds. + pub fn commit_at(&self, pos: file::Position) -> Commit<'_> { + Commit::new(self, pos) + } + + /// The kind of hash used in this File. + /// + /// Note that it is always conforming to the hash used in the owning repository. + pub fn object_hash(&self) -> gix_hash::Kind { + self.object_hash + } + + /// Returns an object id at the given index in our list of (sorted) hashes. + /// The position ranges from 0 to `self.num_commits()` + // copied from gix-odb/src/pack/index/ext + pub fn id_at(&self, pos: file::Position) -> &gix_hash::oid { + assert!( + pos.0 < self.num_commits(), + "expected lexigraphical position less than {}, got {}", + self.num_commits(), + pos.0 + ); + let pos: usize = pos + .0 + .try_into() + .expect("an architecture able to hold 32 bits of integer"); + let start = self.oid_lookup_offset + (pos * self.hash_len); + gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len]) + } + + /// Return an iterator over all object hashes stored in the base graph. + pub fn iter_base_graph_ids(&self) -> impl Iterator<Item = &gix_hash::oid> { + let start = self.base_graphs_list_offset.unwrap_or(0); + let base_graphs_list = &self.data[start..][..self.hash_len * usize::from(self.base_graph_count)]; + base_graphs_list + .chunks(self.hash_len) + .map(gix_hash::oid::from_bytes_unchecked) + } + + /// return an iterator over all commits in this file. + pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> { + (0..self.num_commits()).map(move |i| self.commit_at(file::Position(i))) + } + + /// Return an iterator over all object hashes stored in this file. + pub fn iter_ids(&self) -> impl Iterator<Item = &gix_hash::oid> { + (0..self.num_commits()).map(move |i| self.id_at(file::Position(i))) + } + + /// Translate the given object hash to its position within this file, if present. + // copied from gix-odb/src/pack/index/ext + pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<file::Position> { + let id = id.as_ref(); + let first_byte = usize::from(id.first_byte()); + let mut upper_bound = self.fan[first_byte]; + let mut lower_bound = if first_byte != 0 { self.fan[first_byte - 1] } else { 0 }; + + while lower_bound < upper_bound { + let mid = (lower_bound + upper_bound) / 2; + let mid_sha = self.id_at(file::Position(mid)); + + use std::cmp::Ordering::*; + match id.cmp(mid_sha) { + Less => upper_bound = mid, + Equal => return Some(file::Position(mid)), + Greater => lower_bound = mid + 1, + } + } + None + } + + /// Returns the number of commits in this graph file. + /// + /// The maximum valid `file::Position` that can be used with this file is one less than + /// `num_commits()`. + pub fn num_commits(&self) -> u32 { + self.fan[255] + } + + /// Returns the path to this file. + pub fn path(&self) -> &Path { + &self.path + } +} + +impl File { + /// Returns the byte slice for the given commit in this file's Commit Data (CDAT) chunk. + pub(crate) fn commit_data_bytes(&self, pos: file::Position) -> &[u8] { + assert!( + pos.0 < self.num_commits(), + "expected lexigraphical position less than {}, got {}", + self.num_commits(), + pos.0 + ); + let pos: usize = pos + .0 + .try_into() + .expect("an architecture able to hold 32 bits of integer"); + let entry_size = self.hash_len + COMMIT_DATA_ENTRY_SIZE_SANS_HASH; + let start = self.commit_data_offset + (pos * entry_size); + &self.data[start..][..entry_size] + } + + /// Returns the byte slice for this file's entire Extra Edge List (EDGE) chunk. + pub(crate) fn extra_edges_data(&self) -> Option<&[u8]> { + Some(&self.data[self.extra_edges_list_range.clone()?]) + } +} + +impl Debug for File { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, r#"File("{:?}")"#, self.path.display()) + } +} diff --git a/vendor/gix-commitgraph/src/file/commit.rs b/vendor/gix-commitgraph/src/file/commit.rs new file mode 100644 index 000000000..14fe15e84 --- /dev/null +++ b/vendor/gix-commitgraph/src/file/commit.rs @@ -0,0 +1,258 @@ +//! Low-level operations on individual commits. +use std::{ + convert::TryInto, + fmt::{Debug, Formatter}, + slice::Chunks, +}; + +use crate::{ + file::{self, EXTENDED_EDGES_MASK, LAST_EXTENDED_EDGE_MASK, NO_PARENT}, + File, Position, +}; + +/// The error used in the [`file::commit`][self] module. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("commit {0}'s extra edges overflows the commit-graph file's extra edges list")] + ExtraEdgesListOverflow(gix_hash::ObjectId), + #[error("commit {0}'s first parent is an extra edge index, which is invalid")] + FirstParentIsExtraEdgeIndex(gix_hash::ObjectId), + #[error("commit {0} has extra edges, but commit-graph file has no extra edges list")] + MissingExtraEdgesList(gix_hash::ObjectId), + #[error("commit {0} has a second parent but not a first parent")] + SecondParentWithoutFirstParent(gix_hash::ObjectId), +} + +/// A commit as stored in a [`File`]. +#[derive(Copy, Clone)] +pub struct Commit<'a> { + file: &'a File, + pos: file::Position, + // We can parse the below fields lazily if needed. + commit_timestamp: u64, + generation: u32, + parent1: ParentEdge, + parent2: ParentEdge, + root_tree_id: &'a gix_hash::oid, +} + +#[inline] +fn read_u32(b: &[u8]) -> u32 { + u32::from_be_bytes(b.try_into().unwrap()) +} + +impl<'a> Commit<'a> { + pub(crate) fn new(file: &'a File, pos: file::Position) -> Self { + let bytes = file.commit_data_bytes(pos); + Commit { + file, + pos, + root_tree_id: gix_hash::oid::from_bytes_unchecked(&bytes[..file.hash_len]), + parent1: ParentEdge::from_raw(read_u32(&bytes[file.hash_len..][..4])), + parent2: ParentEdge::from_raw(read_u32(&bytes[file.hash_len + 4..][..4])), + // TODO: Add support for corrected commit date offset overflow. + // See https://github.com/git/git/commit/e8b63005c48696a26f976f5f9b0ccaf1983e439d and + // https://github.com/git/git/commit/f90fca638e99a031dce8e3aca72427b2f9b4bb38 for more details and hints at a test. + generation: read_u32(&bytes[file.hash_len + 8..][..4]) >> 2, + commit_timestamp: u64::from_be_bytes(bytes[file.hash_len + 8..][..8].try_into().unwrap()) + & 0x0003_ffff_ffff, + } + } + + /// Returns the committer timestamp of this commit. + /// + /// The value is the number of seconds since 1970-01-01 00:00:00 UTC. + pub fn committer_timestamp(&self) -> u64 { + self.commit_timestamp + } + + /// Returns the generation number of this commit. + /// + /// Commits without parents have generation number 1. Commits with parents have a generation + /// number that is the max of their parents' generation numbers + 1. + pub fn generation(&self) -> u32 { + self.generation + } + + /// Returns an iterator over the parent positions for lookup in the owning [Graph][crate::Graph]. + pub fn iter_parents(self) -> Parents<'a> { + // I didn't find a combinator approach that a) was as strict as ParentIterator, b) supported + // fuse-after-first-error behavior, and b) was significantly shorter or more understandable + // than ParentIterator. So here we are. + Parents { + commit_data: self, + state: ParentIteratorState::First, + } + } + + /// Returns the hash of this commit. + pub fn id(&self) -> &'a gix_hash::oid { + self.file.id_at(self.pos) + } + + /// Returns the first parent of this commit. + pub fn parent1(&self) -> Result<Option<Position>, Error> { + self.iter_parents().next().transpose() + } + + /// Returns the position at which this commit is stored in the parent [File]. + pub fn position(&self) -> file::Position { + self.pos + } + + /// Return the hash of the tree this commit points to. + pub fn root_tree_id(&self) -> &gix_hash::oid { + self.root_tree_id + } +} + +impl<'a> Debug for Commit<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Commit {{ id: {}, lex_pos: {}, generation: {}, root_tree_id: {}, parent1: {:?}, parent2: {:?} }}", + self.id(), + self.pos, + self.generation(), + self.root_tree_id(), + self.parent1, + self.parent2, + ) + } +} + +impl<'a> Eq for Commit<'a> {} + +impl<'a> PartialEq for Commit<'a> { + fn eq(&self, other: &Self) -> bool { + std::ptr::eq(self.file, other.file) && self.pos == other.pos + } +} + +/// An iterator over parents of a [`Commit`]. +pub struct Parents<'a> { + commit_data: Commit<'a>, + state: ParentIteratorState<'a>, +} + +impl<'a> Iterator for Parents<'a> { + type Item = Result<Position, Error>; + + fn next(&mut self) -> Option<Self::Item> { + let state = std::mem::replace(&mut self.state, ParentIteratorState::Exhausted); + match state { + ParentIteratorState::First => match self.commit_data.parent1 { + ParentEdge::None => match self.commit_data.parent2 { + ParentEdge::None => None, + _ => Some(Err(Error::SecondParentWithoutFirstParent(self.commit_data.id().into()))), + }, + ParentEdge::GraphPosition(pos) => { + self.state = ParentIteratorState::Second; + Some(Ok(pos)) + } + ParentEdge::ExtraEdgeIndex(_) => { + Some(Err(Error::FirstParentIsExtraEdgeIndex(self.commit_data.id().into()))) + } + }, + ParentIteratorState::Second => match self.commit_data.parent2 { + ParentEdge::None => None, + ParentEdge::GraphPosition(pos) => Some(Ok(pos)), + ParentEdge::ExtraEdgeIndex(extra_edge_index) => { + if let Some(extra_edges_list) = self.commit_data.file.extra_edges_data() { + let start_offset: usize = extra_edge_index + .try_into() + .expect("an architecture able to hold 32 bits of integer"); + let start_offset = start_offset + .checked_mul(4) + .expect("an extended edge index small enough to fit in usize"); + if let Some(tail) = extra_edges_list.get(start_offset..) { + self.state = ParentIteratorState::Extra(tail.chunks(4)); + // This recursive call is what blocks me from replacing ParentIterator + // with a std::iter::from_fn closure. + self.next() + } else { + Some(Err(Error::ExtraEdgesListOverflow(self.commit_data.id().into()))) + } + } else { + Some(Err(Error::MissingExtraEdgesList(self.commit_data.id().into()))) + } + } + }, + ParentIteratorState::Extra(mut chunks) => { + if let Some(chunk) = chunks.next() { + let extra_edge = read_u32(chunk); + match ExtraEdge::from_raw(extra_edge) { + ExtraEdge::Internal(pos) => { + self.state = ParentIteratorState::Extra(chunks); + Some(Ok(pos)) + } + ExtraEdge::Last(pos) => Some(Ok(pos)), + } + } else { + Some(Err(Error::ExtraEdgesListOverflow(self.commit_data.id().into()))) + } + } + ParentIteratorState::Exhausted => None, + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + match (&self.state, self.commit_data.parent1, self.commit_data.parent2) { + (ParentIteratorState::First, ParentEdge::None, ParentEdge::None) => (0, Some(0)), + (ParentIteratorState::First, ParentEdge::None, _) => (1, Some(1)), + (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::None) => (1, Some(1)), + (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::GraphPosition(_)) => (2, Some(2)), + (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::ExtraEdgeIndex(_)) => (3, None), + (ParentIteratorState::First, ParentEdge::ExtraEdgeIndex(_), _) => (1, Some(1)), + (ParentIteratorState::Second, _, ParentEdge::None) => (0, Some(0)), + (ParentIteratorState::Second, _, ParentEdge::GraphPosition(_)) => (1, Some(1)), + (ParentIteratorState::Second, _, ParentEdge::ExtraEdgeIndex(_)) => (2, None), + (ParentIteratorState::Extra(_), _, _) => (1, None), + (ParentIteratorState::Exhausted, _, _) => (0, Some(0)), + } + } +} + +#[derive(Debug)] +enum ParentIteratorState<'a> { + First, + Second, + Extra(Chunks<'a, u8>), + Exhausted, +} + +#[derive(Clone, Copy, Debug)] +enum ParentEdge { + None, + GraphPosition(Position), + ExtraEdgeIndex(u32), +} + +impl ParentEdge { + pub fn from_raw(raw: u32) -> ParentEdge { + if raw == NO_PARENT { + return ParentEdge::None; + } + if raw & EXTENDED_EDGES_MASK != 0 { + ParentEdge::ExtraEdgeIndex(raw & !EXTENDED_EDGES_MASK) + } else { + ParentEdge::GraphPosition(Position(raw)) + } + } +} + +enum ExtraEdge { + Internal(Position), + Last(Position), +} + +impl ExtraEdge { + pub fn from_raw(raw: u32) -> Self { + if raw & LAST_EXTENDED_EDGE_MASK != 0 { + Self::Last(Position(raw & !LAST_EXTENDED_EDGE_MASK)) + } else { + Self::Internal(Position(raw)) + } + } +} diff --git a/vendor/gix-commitgraph/src/file/init.rs b/vendor/gix-commitgraph/src/file/init.rs new file mode 100644 index 000000000..1d8f52e3f --- /dev/null +++ b/vendor/gix-commitgraph/src/file/init.rs @@ -0,0 +1,257 @@ +use std::{ + convert::{TryFrom, TryInto}, + path::Path, +}; + +use bstr::ByteSlice; +use memmap2::Mmap; + +use crate::{ + file::{ + ChunkId, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH, + EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE, + }, + File, +}; + +/// The error used in [`File::at()`]. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("Commit-graph {:?} chunk contains {from_chunk} base graphs, but commit-graph file header claims {from_header} base graphs", BASE_GRAPHS_LIST_CHUNK_ID.as_bstr())] + BaseGraphMismatch { from_header: u8, from_chunk: u32 }, + #[error("Commit-graph {:?} chunk contains {chunk1_commits} commits, but {:?} chunk contains {chunk2_commits} commits", .chunk1_id.as_bstr(), .chunk2_id.as_bstr())] + CommitCountMismatch { + chunk1_id: ChunkId, + chunk1_commits: u32, + chunk2_id: ChunkId, + chunk2_commits: u32, + }, + #[error("{0}")] + Corrupt(String), + // This error case is disabled, as git allows extra garbage in the extra edges list? + // #[error("The last entry in commit-graph's extended edges list does is not marked as being terminal")] + // ExtraEdgesOverflow, + #[error("Could not open commit-graph file at '{}'", .path.display())] + Io { + #[source] + err: std::io::Error, + path: std::path::PathBuf, + }, + #[error("{0}")] + Trailer(String), + #[error("Commit-graph file uses unsupported hash version: {0}")] + UnsupportedHashVersion(u8), + #[error("Unsupported commit-graph file version: {0}")] + UnsupportedVersion(u8), + #[error(transparent)] + ChunkFileDecode(#[from] gix_chunk::file::decode::Error), + #[error(transparent)] + MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error), + #[error("Commit-graph chunk {:?} has invalid size: {msg}", .id.as_bstr())] + InvalidChunkSize { id: ChunkId, msg: String }, +} + +const MIN_FILE_SIZE: usize = HEADER_LEN + + gix_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/) + + FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */ + + gix_hash::Kind::shortest().len_in_bytes(); + +impl File { + /// Try to parse the commit graph file at `path`. + pub fn at(path: impl AsRef<Path>) -> Result<File, Error> { + Self::try_from(path.as_ref()) + } +} + +impl TryFrom<&Path> for File { + type Error = Error; + + fn try_from(path: &Path) -> Result<Self, Self::Error> { + let data = std::fs::File::open(path) + .and_then(|file| { + // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. + #[allow(unsafe_code)] + unsafe { + Mmap::map(&file) + } + }) + .map_err(|e| Error::Io { + err: e, + path: path.to_owned(), + })?; + let data_size = data.len(); + if data_size < MIN_FILE_SIZE { + return Err(Error::Corrupt( + "Commit-graph file too small even for an empty graph".to_owned(), + )); + } + + let mut ofs = 0; + if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE { + return Err(Error::Corrupt( + "Commit-graph file does not start with expected signature".to_owned(), + )); + } + ofs += SIGNATURE.len(); + + match data[ofs] { + 1 => (), + x => { + return Err(Error::UnsupportedVersion(x)); + } + }; + ofs += 1; + + let object_hash = gix_hash::Kind::try_from(data[ofs]).map_err(Error::UnsupportedHashVersion)?; + ofs += 1; + + let chunk_count = data[ofs]; + // Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make + // it redundant. + ofs += 1; + + let base_graph_count = data[ofs]; + ofs += 1; + + let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, chunk_count as u32)?; + + let base_graphs_list_offset = chunks + .validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| { + let chunk_size = chunk_range.len(); + if chunk_size % object_hash.len_in_bytes() != 0 { + return Err(Error::InvalidChunkSize { + id: BASE_GRAPHS_LIST_CHUNK_ID, + msg: format!( + "chunk size {} is not a multiple of {}", + chunk_size, + object_hash.len_in_bytes() + ), + }); + } + let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes()) + .try_into() + .expect("base graph count to fit in 32-bits"); + if chunk_base_graph_count != u32::from(base_graph_count) { + return Err(Error::BaseGraphMismatch { + from_chunk: chunk_base_graph_count, + from_header: base_graph_count, + }); + } + Ok(chunk_range.start) + }) + .ok() + .transpose()?; + + let (commit_data_offset, commit_data_count) = + chunks.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| { + let chunk_size = chunk_range.len(); + + let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH; + if chunk_size % entry_size != 0 { + return Err(Error::InvalidChunkSize { + id: COMMIT_DATA_CHUNK_ID, + msg: format!("chunk size {chunk_size} is not a multiple of {entry_size}"), + }); + } + Ok(( + chunk_range.start, + (chunk_size / entry_size) + .try_into() + .expect("number of commits in CDAT chunk to fit in 32 bits"), + )) + })??; + + let fan_offset = chunks.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| { + let chunk_size = chunk_range.len(); + + let expected_size = 4 * FAN_LEN; + if chunk_size != expected_size { + return Err(Error::InvalidChunkSize { + id: OID_FAN_CHUNK_ID, + msg: format!("expected chunk length {expected_size}, got {chunk_size}"), + }); + } + Ok(chunk_range.start) + })??; + + let (oid_lookup_offset, oid_lookup_count) = + chunks.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| { + let chunk_size = chunk_range.len(); + + if chunk_size % object_hash.len_in_bytes() != 0 { + return Err(Error::InvalidChunkSize { + id: OID_LOOKUP_CHUNK_ID, + msg: format!( + "chunk size {} is not a multiple of {}", + chunk_size, + object_hash.len_in_bytes() + ), + }); + } + Ok(( + chunk_range.start, + (chunk_size / object_hash.len_in_bytes()) + .try_into() + .expect("number of commits in OIDL chunk to fit in 32 bits"), + )) + })??; + + let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok(); + + let trailer = &data[chunks.highest_offset() as usize..]; + if trailer.len() != object_hash.len_in_bytes() { + return Err(Error::Trailer(format!( + "Expected commit-graph trailer to contain {} bytes, got {}", + object_hash.len_in_bytes(), + trailer.len() + ))); + } + + if base_graph_count > 0 && base_graphs_list_offset.is_none() { + return Err(gix_chunk::file::index::offset_by_kind::Error { + kind: BASE_GRAPHS_LIST_CHUNK_ID, + } + .into()); + } + + let (fan, _) = read_fan(&data[fan_offset..]); + if oid_lookup_count != fan[255] { + return Err(Error::CommitCountMismatch { + chunk1_id: OID_FAN_CHUNK_ID, + chunk1_commits: fan[255], + chunk2_id: OID_LOOKUP_CHUNK_ID, + chunk2_commits: oid_lookup_count, + }); + } + if commit_data_count != fan[255] { + return Err(Error::CommitCountMismatch { + chunk1_id: OID_FAN_CHUNK_ID, + chunk1_commits: fan[255], + chunk2_id: COMMIT_DATA_CHUNK_ID, + chunk2_commits: commit_data_count, + }); + } + Ok(File { + base_graph_count, + base_graphs_list_offset, + commit_data_offset, + data, + extra_edges_list_range, + fan, + oid_lookup_offset, + path: path.to_owned(), + hash_len: object_hash.len_in_bytes(), + object_hash, + }) + } +} + +// Copied from gix-odb/pack/index/init.rs +fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) { + let mut fan = [0; FAN_LEN]; + for (c, f) in d.chunks(4).zip(fan.iter_mut()) { + *f = u32::from_be_bytes(c.try_into().unwrap()); + } + (fan, FAN_LEN * 4) +} diff --git a/vendor/gix-commitgraph/src/file/mod.rs b/vendor/gix-commitgraph/src/file/mod.rs new file mode 100644 index 000000000..528cd69fb --- /dev/null +++ b/vendor/gix-commitgraph/src/file/mod.rs @@ -0,0 +1,46 @@ +//! Operations on a single commit-graph file. + +use std::fmt::{Display, Formatter}; + +pub use self::{commit::Commit, init::Error}; + +mod access; +pub mod commit; +mod init; +pub mod verify; + +const COMMIT_DATA_ENTRY_SIZE_SANS_HASH: usize = 16; +pub(crate) const FAN_LEN: usize = 256; +const HEADER_LEN: usize = 8; + +const SIGNATURE: &[u8] = b"CGPH"; + +type ChunkId = gix_chunk::Id; +const BASE_GRAPHS_LIST_CHUNK_ID: ChunkId = *b"BASE"; +const COMMIT_DATA_CHUNK_ID: ChunkId = *b"CDAT"; +const EXTENDED_EDGES_LIST_CHUNK_ID: ChunkId = *b"EDGE"; +const OID_FAN_CHUNK_ID: ChunkId = *b"OIDF"; +const OID_LOOKUP_CHUNK_ID: ChunkId = *b"OIDL"; + +// Note that git's commit-graph-format.txt as of v2.28.0 gives an incorrect value 0x0700_0000 for +// NO_PARENT. Fixed in https://github.com/git/git/commit/4d515253afcef985e94400adbfed7044959f9121 . +const NO_PARENT: u32 = 0x7000_0000; +const EXTENDED_EDGES_MASK: u32 = 0x8000_0000; +const LAST_EXTENDED_EDGE_MASK: u32 = 0x8000_0000; + +/// The position of a given commit within a graph file, starting at 0. +/// +/// Commits within a graph file are sorted in lexicographical order by OID; a commit's lexicographical position +/// is its position in this ordering. If a commit graph spans multiple files, each file's commits +/// start at lexicographical position 0, so it is unique across a single file but is not unique across +/// the whole commit graph. Each commit also has a graph position ([`Position`][crate::Position]), +/// which is unique across the whole commit graph. +/// In order to avoid accidentally mixing lexicographical positions with graph positions, distinct types are used for each. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Position(pub u32); + +impl Display for Position { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/vendor/gix-commitgraph/src/file/verify.rs b/vendor/gix-commitgraph/src/file/verify.rs new file mode 100644 index 000000000..4f4f76829 --- /dev/null +++ b/vendor/gix-commitgraph/src/file/verify.rs @@ -0,0 +1,173 @@ +//! Auxiliary types used in commit graph file verification methods. +use std::{ + cmp::{max, min}, + collections::HashMap, + path::Path, +}; + +use crate::{file, File, GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX}; + +/// The error used in [`File::traverse()`]. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error<E: std::error::Error + 'static> { + #[error(transparent)] + Commit(#[from] file::commit::Error), + #[error("commit at file position {pos} has invalid ID {id}")] + CommitId { + id: gix_hash::ObjectId, + pos: file::Position, + }, + #[error("commit at file position {pos} with ID {id} is out of order relative to its predecessor with ID {predecessor_id}")] + CommitsOutOfOrder { + id: gix_hash::ObjectId, + pos: file::Position, + predecessor_id: gix_hash::ObjectId, + }, + #[error("commit-graph filename should be {0}")] + Filename(String), + #[error("commit {id} has invalid generation {generation}")] + Generation { generation: u32, id: gix_hash::ObjectId }, + #[error("checksum mismatch: expected {expected}, got {actual}")] + Mismatch { + actual: gix_hash::ObjectId, + expected: gix_hash::ObjectId, + }, + #[error("{0}")] + Processor(#[source] E), + #[error("commit {id} has invalid root tree ID {root_tree_id}")] + RootTreeId { + id: gix_hash::ObjectId, + root_tree_id: gix_hash::ObjectId, + }, +} + +/// The positive result of [`File::traverse()`] providing some statistical information. +#[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct Outcome { + /// The largest encountered [`file::Commit`] generation number. + pub max_generation: u32, + /// The smallest encountered [`file::Commit`] generation number. + pub min_generation: u32, + /// The largest number of parents in a single [`file::Commit`]. + pub max_parents: u32, + /// The total number of [`commits`][file::Commit]s seen in the iteration. + pub num_commits: u32, + /// A mapping of `N -> number of commits with N parents`. + pub parent_counts: HashMap<u32, u32>, +} + +/// Verification +impl File { + /// Returns the trailing checksum over the entire content of this file. + pub fn checksum(&self) -> &gix_hash::oid { + gix_hash::oid::from_bytes_unchecked(&self.data[self.data.len() - self.hash_len..]) + } + + /// Traverse all [commits][file::Commit] stored in this file and call `processor(commit) -> Result<(), Error>` on it. + /// + /// If the `processor` fails, the iteration will be stopped and the entire call results in the respective error. + pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Error<E>> + where + E: std::error::Error + 'static, + Processor: FnMut(&file::Commit<'a>) -> Result<(), E>, + { + self.verify_checksum() + .map_err(|(actual, expected)| Error::Mismatch { actual, expected })?; + verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?; + + let null_id = self.object_hash().null_ref(); + + let mut stats = Outcome { + max_generation: 0, + max_parents: 0, + min_generation: GENERATION_NUMBER_INFINITY, + num_commits: self.num_commits(), + parent_counts: HashMap::new(), + }; + + // TODO: Verify self.fan values as we go. + let mut prev_id: &gix_hash::oid = null_id; + for commit in self.iter_commits() { + if commit.id() <= prev_id { + if commit.id() == null_id { + return Err(Error::CommitId { + pos: commit.position(), + id: commit.id().into(), + }); + } + return Err(Error::CommitsOutOfOrder { + pos: commit.position(), + id: commit.id().into(), + predecessor_id: prev_id.into(), + }); + } + if commit.root_tree_id() == null_id { + return Err(Error::RootTreeId { + id: commit.id().into(), + root_tree_id: commit.root_tree_id().into(), + }); + } + if commit.generation() > GENERATION_NUMBER_MAX { + return Err(Error::Generation { + generation: commit.generation(), + id: commit.id().into(), + }); + } + + processor(&commit).map_err(Error::Processor)?; + + stats.max_generation = max(stats.max_generation, commit.generation()); + stats.min_generation = min(stats.min_generation, commit.generation()); + let parent_count = commit + .iter_parents() + .try_fold(0u32, |acc, pos| pos.map(|_| acc + 1)) + .map_err(Error::Commit)?; + *stats.parent_counts.entry(parent_count).or_insert(0) += 1; + prev_id = commit.id(); + } + + if stats.min_generation == GENERATION_NUMBER_INFINITY { + stats.min_generation = 0; + } + + Ok(stats) + } + + /// Assure the [`checksum`][File::checksum()] matches the actual checksum over all content of this file, excluding the trailing + /// checksum itself. + /// + /// Return the actual checksum on success or `(actual checksum, expected checksum)` if there is a mismatch. + pub fn verify_checksum(&self) -> Result<gix_hash::ObjectId, (gix_hash::ObjectId, gix_hash::ObjectId)> { + // Even though we could use gix_features::hash::bytes_of_file(…), this would require using our own + // Error type to support io::Error and Mismatch. As we only gain progress, there probably isn't much value + // as these files are usually small enough to process them in less than a second, even for the large ones. + // But it's possible, once a progress instance is passed. + let data_len_without_trailer = self.data.len() - self.hash_len; + let mut hasher = gix_features::hash::hasher(self.object_hash()); + hasher.update(&self.data[..data_len_without_trailer]); + let actual = gix_hash::ObjectId::from(hasher.digest().as_ref()); + + let expected = self.checksum(); + if actual == expected { + Ok(actual) + } else { + Err((actual, expected.into())) + } + } +} + +/// If the given path's filename matches "graph-{hash}.graph", check that `hash` matches the +/// expected hash. +fn verify_split_chain_filename_hash(path: impl AsRef<Path>, expected: &gix_hash::oid) -> Result<(), String> { + let path = path.as_ref(); + path.file_name() + .and_then(|filename| filename.to_str()) + .and_then(|filename| filename.strip_suffix(".graph")) + .and_then(|stem| stem.strip_prefix("graph-")) + .map_or(Ok(()), |hex| match gix_hash::ObjectId::from_hex(hex.as_bytes()) { + Ok(actual) if actual == expected => Ok(()), + _ => Err(format!("graph-{}.graph", expected.to_hex())), + }) +} diff --git a/vendor/gix-commitgraph/src/init.rs b/vendor/gix-commitgraph/src/init.rs new file mode 100644 index 000000000..f964aade0 --- /dev/null +++ b/vendor/gix-commitgraph/src/init.rs @@ -0,0 +1,130 @@ +use std::{ + convert::TryFrom, + io::{BufRead, BufReader}, + path::{Path, PathBuf}, +}; + +use crate::{file, File, Graph, MAX_COMMITS}; + +/// The error returned by initializations functions like [`Graph::at()`]. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("{}", .path.display())] + File { + #[source] + err: file::Error, + path: PathBuf, + }, + #[error("Commit-graph files mismatch: '{}' uses hash {hash1:?}, but '{}' uses hash {hash2:?}", .path1.display(), .path2.display())] + HashVersionMismatch { + path1: PathBuf, + hash1: gix_hash::Kind, + path2: PathBuf, + hash2: gix_hash::Kind, + }, + #[error("Did not find any files that look like commit graphs at '{}'", .0.display())] + InvalidPath(PathBuf), + #[error("Could not open commit-graph file at '{}'", .path.display())] + Io { + #[source] + err: std::io::Error, + path: PathBuf, + }, + #[error( + "Commit-graph files contain {0} commits altogether, but only {} commits are allowed", + MAX_COMMITS + )] + TooManyCommits(u64), +} + +/// Instantiate a `Graph` from various sources. +impl Graph { + /// Instantiate a commit graph from `path` which may be a directory containing graph files or the graph file itself. + pub fn at(path: impl AsRef<Path>) -> Result<Self, Error> { + Self::try_from(path.as_ref()) + } + + /// Instantiate a commit graph from the directory containing all of its files. + pub fn from_commit_graphs_dir(path: impl AsRef<Path>) -> Result<Self, Error> { + let commit_graphs_dir = path.as_ref(); + let chain_file_path = commit_graphs_dir.join("commit-graph-chain"); + let chain_file = std::fs::File::open(&chain_file_path).map_err(|e| Error::Io { + err: e, + path: chain_file_path.clone(), + })?; + let mut files = Vec::new(); + for line in BufReader::new(chain_file).lines() { + let hash = line.map_err(|e| Error::Io { + err: e, + path: chain_file_path.clone(), + })?; + let graph_file_path = commit_graphs_dir.join(format!("graph-{hash}.graph")); + files.push(File::at(&graph_file_path).map_err(|e| Error::File { + err: e, + path: graph_file_path.clone(), + })?); + } + Self::new(files) + } + + /// Instantiate a commit graph from a `.git/objects/info/commit-graph` or + /// `.git/objects/info/commit-graphs/graph-*.graph` file. + pub fn from_file(path: impl AsRef<Path>) -> Result<Self, Error> { + let path = path.as_ref(); + let file = File::at(path).map_err(|e| Error::File { + err: e, + path: path.to_owned(), + })?; + Self::new(vec![file]) + } + + /// Instantiate a commit graph from an `.git/objects/info` directory. + pub fn from_info_dir(info_dir: impl AsRef<Path>) -> Result<Self, Error> { + Self::from_file(info_dir.as_ref().join("commit-graph")) + .or_else(|_| Self::from_commit_graphs_dir(info_dir.as_ref().join("commit-graphs"))) + } + + /// Create a new commit graph from a list of `files`. + pub fn new(files: Vec<File>) -> Result<Self, Error> { + let num_commits: u64 = files.iter().map(|f| u64::from(f.num_commits())).sum(); + if num_commits > u64::from(MAX_COMMITS) { + return Err(Error::TooManyCommits(num_commits)); + } + + for window in files.windows(2) { + let f1 = &window[0]; + let f2 = &window[1]; + if f1.object_hash() != f2.object_hash() { + return Err(Error::HashVersionMismatch { + path1: f1.path().to_owned(), + hash1: f1.object_hash(), + path2: f2.path().to_owned(), + hash2: f2.object_hash(), + }); + } + } + + Ok(Self { files }) + } +} + +impl TryFrom<&Path> for Graph { + type Error = Error; + + fn try_from(path: &Path) -> Result<Self, Self::Error> { + if path.is_file() { + // Assume we are looking at `.git/objects/info/commit-graph` or + // `.git/objects/info/commit-graphs/graph-*.graph`. + Self::from_file(path) + } else if path.is_dir() { + if path.join("commit-graph-chain").is_file() { + Self::from_commit_graphs_dir(path) + } else { + Self::from_info_dir(path) + } + } else { + Err(Error::InvalidPath(path.to_owned())) + } + } +} diff --git a/vendor/gix-commitgraph/src/lib.rs b/vendor/gix-commitgraph/src/lib.rs new file mode 100644 index 000000000..231c11c6f --- /dev/null +++ b/vendor/gix-commitgraph/src/lib.rs @@ -0,0 +1,77 @@ +//! Read, verify, and traverse git commit graphs. +//! +//! A [commit graph][Graph] is an index of commits in the git commit history. +//! The [Graph] stores commit data in a way that accelerates lookups considerably compared to +//! traversing the git history by usual means. +//! +//! As generating the full commit graph from scratch can take some time, git may write new commits +//! to separate [files][File] instead of overwriting the original file. +//! Eventually, git will merge these files together as the number of files grows. +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] + +use std::path::Path; + +/// A single commit-graph file. +/// +/// All operations on a `File` are local to that graph file. Since a commit graph can span multiple +/// files, all interesting graph operations belong on [`Graph`][crate::Graph]. +pub struct File { + base_graph_count: u8, + base_graphs_list_offset: Option<usize>, + commit_data_offset: usize, + data: memmap2::Mmap, + extra_edges_list_range: Option<std::ops::Range<usize>>, + fan: [u32; file::FAN_LEN], + oid_lookup_offset: usize, + path: std::path::PathBuf, + hash_len: usize, + object_hash: gix_hash::Kind, +} + +/// A complete commit graph. +/// +/// The data in the commit graph may come from a monolithic `objects/info/commit-graph` file, or it +/// may come from one or more `objects/info/commit-graphs/graph-*.graph` files. These files are +/// generated via `git commit-graph write ...` commands. +pub struct Graph { + files: Vec<File>, +} + +/// Instantiate a commit graph from an `.git/objects/info` directory, or one of the various commit-graph files. +pub fn at(path: impl AsRef<Path>) -> Result<Graph, init::Error> { + Graph::at(path) +} + +mod access; +pub mod file; +/// +pub mod init; +pub mod verify; + +/// The number of generations that are considered 'infinite' commit history. +pub const GENERATION_NUMBER_INFINITY: u32 = 0xffff_ffff; +/// The largest valid generation number. +/// +/// If a commit's real generation number is larger than this, the commit graph will cap the value to +/// this number. +/// The largest distinct generation number is `GENERATION_NUMBER_MAX - 1`. +pub const GENERATION_NUMBER_MAX: u32 = 0x3fff_ffff; + +/// The maximum number of commits that can be stored in a commit graph. +pub const MAX_COMMITS: u32 = (1 << 30) + (1 << 29) + (1 << 28) - 1; + +/// A generalized position for use in [`Graph`]. +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)] +pub struct Position(pub u32); + +impl std::fmt::Display for Position { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/vendor/gix-commitgraph/src/verify.rs b/vendor/gix-commitgraph/src/verify.rs new file mode 100644 index 000000000..2d863e648 --- /dev/null +++ b/vendor/gix-commitgraph/src/verify.rs @@ -0,0 +1,205 @@ +//! Auxiliary types used by graph verification methods. +use std::{ + cmp::{max, min}, + collections::BTreeMap, + convert::TryInto, + path::PathBuf, +}; + +use crate::{ + file::{self, commit}, + Graph, Position, GENERATION_NUMBER_MAX, +}; + +/// The error used in [`verify_integrity()`][Graph::verify_integrity]. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error<E: std::error::Error + 'static> { + #[error("'{}' should have {expected} base graphs, but claims {actual} base graphs", .path.display())] + BaseGraphCount { actual: u8, expected: u8, path: PathBuf }, + #[error("'{}' base graph at index {index} should have ID {expected} but is {actual}", .path.display())] + BaseGraphId { + actual: gix_hash::ObjectId, + expected: gix_hash::ObjectId, + index: u8, + path: PathBuf, + }, + #[error(transparent)] + Commit(#[from] commit::Error), + #[error("{}: {err}", .path.display())] + File { + // Use zero-size error type. We will never return + // `graph::verify::Error::File(file::verify::Error::Processor(...))`, because we are the + // file's processor, and we convert`file::verify::Error::Processor<graph::verify::Error>` + // variants into direct `graph::verify::Error` values. + err: file::verify::Error<std::convert::Infallible>, + path: PathBuf, + }, + #[error("Commit {id}'s generation should be {expected} but is {actual}")] + Generation { + actual: u32, + expected: u32, + id: gix_hash::ObjectId, + }, + #[error( + "Commit {id} has parent position {parent_pos} that is out of range (should be in range 0-{max_valid_pos})" + )] + ParentOutOfRange { + id: gix_hash::ObjectId, + max_valid_pos: Position, + parent_pos: Position, + }, + #[error("{0}")] + Processor(#[source] E), + #[error("Commit-graph should be composed of at most 256 files but actually contains {0} files")] + TooManyFiles(usize), +} + +/// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`]. +#[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct Outcome { + /// The length of the longest path between any two commits in this graph. + /// + /// For example, this will be `Some(9)` for a commit graph containing 10 linear commits. + /// This will be `Some(0)` for a commit graph containing 0 or 1 commits. + /// If the longest path length is too large to fit in a [u32], then this will be [None]. + pub longest_path_length: Option<u32>, + /// The total number of commits traversed. + pub num_commits: u32, + /// A mapping of `N -> number of commits with N parents`. + pub parent_counts: BTreeMap<u32, u32>, +} + +impl Graph { + /// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums. + /// + /// When `processor` returns an error, the entire verification is stopped and the error returned. + pub fn verify_integrity<E>( + &self, + mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>, + ) -> Result<Outcome, Error<E>> + where + E: std::error::Error + 'static, + { + if self.files.len() > 256 { + // A file in a split chain can only have up to 255 base files. + return Err(Error::TooManyFiles(self.files.len())); + } + + let mut stats = Outcome { + longest_path_length: None, + num_commits: 0, + parent_counts: BTreeMap::new(), + }; + let mut max_generation = 0u32; + + // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without + // a separate loop, e.g. self.iter_sorted_ids(). + + let mut file_start_pos = Position(0); + for (file_index, file) in self.files.iter().enumerate() { + if usize::from(file.base_graph_count()) != file_index { + return Err(Error::BaseGraphCount { + actual: file.base_graph_count(), + expected: file_index + .try_into() + .expect("files.len() check to protect against this"), + path: file.path().to_owned(), + }); + } + + for (base_graph_index, (expected, actual)) in self.files[..file_index] + .iter() + .map(|base_file| base_file.checksum()) + .zip(file.iter_base_graph_ids()) + .enumerate() + { + if actual != expected { + return Err(Error::BaseGraphId { + actual: actual.into(), + expected: expected.into(), + index: base_graph_index + .try_into() + .expect("files.len() check to protect against this"), + path: file.path().to_owned(), + }); + } + } + + let next_file_start_pos = Position(file_start_pos.0 + file.num_commits()); + let file_stats = file + .traverse(|commit| { + let mut max_parent_generation = 0u32; + for parent_pos in commit.iter_parents() { + let parent_pos = parent_pos.map_err(Error::Commit)?; + if parent_pos >= next_file_start_pos { + return Err(Error::ParentOutOfRange { + parent_pos, + id: commit.id().into(), + max_valid_pos: Position(next_file_start_pos.0 - 1), + }); + } + let parent = self.commit_at(parent_pos); + max_parent_generation = max(max_parent_generation, parent.generation()); + } + + // If the max parent generation is GENERATION_NUMBER_MAX, then this commit's + // generation should be GENERATION_NUMBER_MAX too. + let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX); + if commit.generation() != expected_generation { + return Err(Error::Generation { + actual: commit.generation(), + expected: expected_generation, + id: commit.id().into(), + }); + } + + processor(commit).map_err(Error::Processor)?; + + Ok(()) + }) + .map_err(|err| Error::File { + err: match err { + file::verify::Error::Processor(e) => return e, + file::verify::Error::RootTreeId { id, root_tree_id } => { + file::verify::Error::RootTreeId { id, root_tree_id } + } + file::verify::Error::Mismatch { actual, expected } => { + file::verify::Error::Mismatch { actual, expected } + } + file::verify::Error::Generation { generation, id } => { + file::verify::Error::Generation { generation, id } + } + file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected), + file::verify::Error::Commit(err) => file::verify::Error::Commit(err), + file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos }, + file::verify::Error::CommitsOutOfOrder { + id, + pos, + predecessor_id, + } => file::verify::Error::CommitsOutOfOrder { + id, + pos, + predecessor_id, + }, + }, + path: file.path().to_owned(), + })?; + + max_generation = max(max_generation, file_stats.max_generation); + stats.num_commits += file_stats.num_commits; + for (key, value) in file_stats.parent_counts.into_iter() { + *stats.parent_counts.entry(key).or_insert(0) += value; + } + file_start_pos = next_file_start_pos; + } + + stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX { + Some(max_generation.saturating_sub(1)) + } else { + None + }; + Ok(stats) + } +} |