//! Auxiliary types used by graph verification methods. use std::{ cmp::{max, min}, collections::BTreeMap, convert::TryInto, path::PathBuf, }; use crate::{ file::{self, commit}, Graph, Position, GENERATION_NUMBER_MAX, }; /// The error used in [`verify_integrity()`][Graph::verify_integrity]. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("'{}' should have {expected} base graphs, but claims {actual} base graphs", .path.display())] BaseGraphCount { actual: u8, expected: u8, path: PathBuf }, #[error("'{}' base graph at index {index} should have ID {expected} but is {actual}", .path.display())] BaseGraphId { actual: gix_hash::ObjectId, expected: gix_hash::ObjectId, index: u8, path: PathBuf, }, #[error(transparent)] Commit(#[from] commit::Error), #[error("{}: {err}", .path.display())] File { // Use zero-size error type. We will never return // `graph::verify::Error::File(file::verify::Error::Processor(...))`, because we are the // file's processor, and we convert`file::verify::Error::Processor` // variants into direct `graph::verify::Error` values. err: file::verify::Error, path: PathBuf, }, #[error("Commit {id}'s generation should be {expected} but is {actual}")] Generation { actual: u32, expected: u32, id: gix_hash::ObjectId, }, #[error( "Commit {id} has parent position {parent_pos} that is out of range (should be in range 0-{max_valid_pos})" )] ParentOutOfRange { id: gix_hash::ObjectId, max_valid_pos: Position, parent_pos: Position, }, #[error("{0}")] Processor(#[source] E), #[error("Commit-graph should be composed of at most 256 files but actually contains {0} files")] TooManyFiles(usize), } /// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`]. #[derive(Clone, Debug, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct Outcome { /// The length of the longest path between any two commits in this graph. /// /// For example, this will be `Some(9)` for a commit graph containing 10 linear commits. /// This will be `Some(0)` for a commit graph containing 0 or 1 commits. /// If the longest path length is too large to fit in a [u32], then this will be [None]. pub longest_path_length: Option, /// The total number of commits traversed. pub num_commits: u32, /// A mapping of `N -> number of commits with N parents`. pub parent_counts: BTreeMap, } impl Graph { /// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums. /// /// When `processor` returns an error, the entire verification is stopped and the error returned. pub fn verify_integrity( &self, mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>, ) -> Result> where E: std::error::Error + 'static, { if self.files.len() > 256 { // A file in a split chain can only have up to 255 base files. return Err(Error::TooManyFiles(self.files.len())); } let mut stats = Outcome { longest_path_length: None, num_commits: 0, parent_counts: BTreeMap::new(), }; let mut max_generation = 0u32; // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without // a separate loop, e.g. self.iter_sorted_ids(). let mut file_start_pos = Position(0); for (file_index, file) in self.files.iter().enumerate() { if usize::from(file.base_graph_count()) != file_index { return Err(Error::BaseGraphCount { actual: file.base_graph_count(), expected: file_index .try_into() .expect("files.len() check to protect against this"), path: file.path().to_owned(), }); } for (base_graph_index, (expected, actual)) in self.files[..file_index] .iter() .map(|base_file| base_file.checksum()) .zip(file.iter_base_graph_ids()) .enumerate() { if actual != expected { return Err(Error::BaseGraphId { actual: actual.into(), expected: expected.into(), index: base_graph_index .try_into() .expect("files.len() check to protect against this"), path: file.path().to_owned(), }); } } let next_file_start_pos = Position(file_start_pos.0 + file.num_commits()); let file_stats = file .traverse(|commit| { let mut max_parent_generation = 0u32; for parent_pos in commit.iter_parents() { let parent_pos = parent_pos.map_err(Error::Commit)?; if parent_pos >= next_file_start_pos { return Err(Error::ParentOutOfRange { parent_pos, id: commit.id().into(), max_valid_pos: Position(next_file_start_pos.0 - 1), }); } let parent = self.commit_at(parent_pos); max_parent_generation = max(max_parent_generation, parent.generation()); } // If the max parent generation is GENERATION_NUMBER_MAX, then this commit's // generation should be GENERATION_NUMBER_MAX too. let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX); if commit.generation() != expected_generation { return Err(Error::Generation { actual: commit.generation(), expected: expected_generation, id: commit.id().into(), }); } processor(commit).map_err(Error::Processor)?; Ok(()) }) .map_err(|err| Error::File { err: match err { file::verify::Error::Processor(e) => return e, file::verify::Error::RootTreeId { id, root_tree_id } => { file::verify::Error::RootTreeId { id, root_tree_id } } file::verify::Error::Mismatch { actual, expected } => { file::verify::Error::Mismatch { actual, expected } } file::verify::Error::Generation { generation, id } => { file::verify::Error::Generation { generation, id } } file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected), file::verify::Error::Commit(err) => file::verify::Error::Commit(err), file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos }, file::verify::Error::CommitsOutOfOrder { id, pos, predecessor_id, } => file::verify::Error::CommitsOutOfOrder { id, pos, predecessor_id, }, }, path: file.path().to_owned(), })?; max_generation = max(max_generation, file_stats.max_generation); stats.num_commits += file_stats.num_commits; for (key, value) in file_stats.parent_counts.into_iter() { *stats.parent_counts.entry(key).or_insert(0) += value; } file_start_pos = next_file_start_pos; } stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX { Some(max_generation.saturating_sub(1)) } else { None }; Ok(stats) } }