use std::{cmp::Ordering, sync::atomic::AtomicBool, time::Instant};
use gix_features::progress::Progress;
use crate::{index, multi_index::File};
///
pub mod integrity {
use crate::multi_index::EntryIndex;
/// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
#[error("Object {id} should be at pack-offset {expected_pack_offset} but was found at {actual_pack_offset}")]
PackOffsetMismatch {
id: gix_hash::ObjectId,
expected_pack_offset: u64,
actual_pack_offset: u64,
},
#[error(transparent)]
MultiIndexChecksum(#[from] crate::multi_index::verify::checksum::Error),
#[error(transparent)]
IndexIntegrity(#[from] crate::index::verify::integrity::Error),
#[error(transparent)]
BundleInit(#[from] crate::bundle::init::Error),
#[error("Counted {actual} objects, but expected {expected} as per multi-index")]
UnexpectedObjectCount { actual: usize, expected: usize },
#[error("{id} wasn't found in the index referenced in the multi-pack index")]
OidNotFound { id: gix_hash::ObjectId },
#[error("The object id at multi-index entry {index} wasn't in order")]
OutOfOrder { index: EntryIndex },
#[error("The fan at index {index} is out of order as it's larger then the following value.")]
Fan { index: usize },
#[error("The multi-index claims to have no objects")]
Empty,
#[error("Interrupted")]
Interrupted,
}
/// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
pub struct Outcome
{
/// The computed checksum of the multi-index which matched the stored one.
pub actual_index_checksum: gix_hash::ObjectId,
/// The for each entry in [`index_names()`][super::File::index_names()] provide the corresponding pack traversal outcome.
pub pack_traverse_statistics: Vec,
/// The provided progress instance.
pub progress: P,
}
/// The progress ids used in [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
///
/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
#[derive(Debug, Copy, Clone)]
pub enum ProgressId {
/// The amount of bytes read to verify the multi-index checksum.
ChecksumBytes,
/// The amount of objects whose offset has been checked.
ObjectOffsets,
}
impl From for gix_features::progress::Id {
fn from(v: ProgressId) -> Self {
match v {
ProgressId::ChecksumBytes => *b"MVCK",
ProgressId::ObjectOffsets => *b"MVOF",
}
}
}
}
///
pub mod checksum {
/// Returned by [`multi_index::File::verify_checksum()`][crate::multi_index::File::verify_checksum()].
pub type Error = crate::verify::checksum::Error;
}
impl File {
/// Validate that our [`checksum()`][File::checksum()] matches the actual contents
/// of this index file, and return it if it does.
pub fn verify_checksum(
&self,
progress: impl Progress,
should_interrupt: &AtomicBool,
) -> Result {
crate::verify::checksum_on_disk_or_mmap(
self.path(),
&self.data,
self.checksum(),
self.object_hash,
progress,
should_interrupt,
)
}
/// Similar to [`verify_integrity()`][File::verify_integrity()] but without any deep inspection of objects.
///
/// Instead we only validate the contents of the multi-index itself.
pub fn verify_integrity_fast(
&self,
progress: P,
should_interrupt: &AtomicBool,
) -> Result<(gix_hash::ObjectId, P), integrity::Error>
where
P: Progress,
{
self.verify_integrity_inner(
progress,
should_interrupt,
false,
index::verify::integrity::Options::default(),
)
.map_err(|err| match err {
index::traverse::Error::Processor(err) => err,
_ => unreachable!("BUG: no other error type is possible"),
})
.map(|o| (o.actual_index_checksum, o.progress))
}
/// Similar to [`crate::Bundle::verify_integrity()`] but checks all contained indices and their packs.
///
/// Note that it's considered a failure if an index doesn't have a corresponding pack.
pub fn verify_integrity(
&self,
progress: P,
should_interrupt: &AtomicBool,
options: index::verify::integrity::Options,
) -> Result, index::traverse::Error>
where
P: Progress,
C: crate::cache::DecodeEntry,
F: Fn() -> C + Send + Clone,
{
self.verify_integrity_inner(progress, should_interrupt, true, options)
}
fn verify_integrity_inner(
&self,
mut progress: P,
should_interrupt: &AtomicBool,
deep_check: bool,
options: index::verify::integrity::Options,
) -> Result, index::traverse::Error>
where
P: Progress,
C: crate::cache::DecodeEntry,
F: Fn() -> C + Send + Clone,
{
let parent = self.path.parent().expect("must be in a directory");
let actual_index_checksum = self
.verify_checksum(
progress.add_child_with_id(
format!("{}: checksum", self.path.display()),
integrity::ProgressId::ChecksumBytes.into(),
),
should_interrupt,
)
.map_err(integrity::Error::from)
.map_err(index::traverse::Error::Processor)?;
if let Some(first_invalid) = crate::verify::fan(&self.fan) {
return Err(index::traverse::Error::Processor(integrity::Error::Fan {
index: first_invalid,
}));
}
if self.num_objects == 0 {
return Err(index::traverse::Error::Processor(integrity::Error::Empty));
}
let mut pack_traverse_statistics = Vec::new();
let operation_start = Instant::now();
let mut total_objects_checked = 0;
let mut pack_ids_and_offsets = Vec::with_capacity(self.num_objects as usize);
{
let order_start = Instant::now();
let mut progress = progress.add_child_with_id("checking oid order", gix_features::progress::UNKNOWN);
progress.init(
Some(self.num_objects as usize),
gix_features::progress::count("objects"),
);
for entry_index in 0..(self.num_objects - 1) {
let lhs = self.oid_at_index(entry_index);
let rhs = self.oid_at_index(entry_index + 1);
if rhs.cmp(lhs) != Ordering::Greater {
return Err(index::traverse::Error::Processor(integrity::Error::OutOfOrder {
index: entry_index,
}));
}
let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
pack_ids_and_offsets.push((pack_id, entry_index));
progress.inc();
}
{
let entry_index = self.num_objects - 1;
let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
pack_ids_and_offsets.push((pack_id, entry_index));
}
// sort by pack-id to allow handling all indices matching a pack while its open.
pack_ids_and_offsets.sort_by(|l, r| l.0.cmp(&r.0));
progress.show_throughput(order_start);
};
progress.init(
Some(self.num_indices as usize),
gix_features::progress::count("indices"),
);
let mut pack_ids_slice = pack_ids_and_offsets.as_slice();
for (pack_id, index_file_name) in self.index_names.iter().enumerate() {
progress.set_name(index_file_name.display().to_string());
progress.inc();
let mut bundle = None;
let index;
let index_path = parent.join(index_file_name);
let index = if deep_check {
bundle = crate::Bundle::at(index_path, self.object_hash)
.map_err(integrity::Error::from)
.map_err(index::traverse::Error::Processor)?
.into();
bundle.as_ref().map(|b| &b.index).expect("just set")
} else {
index = Some(
index::File::at(index_path, self.object_hash)
.map_err(|err| integrity::Error::BundleInit(crate::bundle::init::Error::Index(err)))
.map_err(index::traverse::Error::Processor)?,
);
index.as_ref().expect("just set")
};
let slice_end = pack_ids_slice.partition_point(|e| e.0 == pack_id as crate::data::Id);
let multi_index_entries_to_check = &pack_ids_slice[..slice_end];
{
let offset_start = Instant::now();
let mut offsets_progress =
progress.add_child_with_id("verify object offsets", integrity::ProgressId::ObjectOffsets.into());
offsets_progress.init(
Some(pack_ids_and_offsets.len()),
gix_features::progress::count("objects"),
);
pack_ids_slice = &pack_ids_slice[slice_end..];
for entry_id in multi_index_entries_to_check.iter().map(|e| e.1) {
let oid = self.oid_at_index(entry_id);
let (_, expected_pack_offset) = self.pack_id_and_pack_offset_at_index(entry_id);
let entry_in_bundle_index = index.lookup(oid).ok_or_else(|| {
index::traverse::Error::Processor(integrity::Error::OidNotFound { id: oid.to_owned() })
})?;
let actual_pack_offset = index.pack_offset_at_index(entry_in_bundle_index);
if actual_pack_offset != expected_pack_offset {
return Err(index::traverse::Error::Processor(
integrity::Error::PackOffsetMismatch {
id: oid.to_owned(),
expected_pack_offset,
actual_pack_offset,
},
));
}
offsets_progress.inc();
}
if should_interrupt.load(std::sync::atomic::Ordering::Relaxed) {
return Err(index::traverse::Error::Processor(integrity::Error::Interrupted));
}
offsets_progress.show_throughput(offset_start);
}
total_objects_checked += multi_index_entries_to_check.len();
if let Some(bundle) = bundle {
progress.set_name(format!("Validating {}", index_file_name.display()));
let crate::bundle::verify::integrity::Outcome {
actual_index_checksum: _,
pack_traverse_outcome,
progress: returned_progress,
} = bundle
.verify_integrity(progress, should_interrupt, options.clone())
.map_err(|err| {
use index::traverse::Error::*;
match err {
Processor(err) => Processor(integrity::Error::IndexIntegrity(err)),
VerifyChecksum(err) => VerifyChecksum(err),
Tree(err) => Tree(err),
TreeTraversal(err) => TreeTraversal(err),
PackDecode { id, offset, source } => PackDecode { id, offset, source },
PackMismatch { expected, actual } => PackMismatch { expected, actual },
PackObjectMismatch {
expected,
actual,
offset,
kind,
} => PackObjectMismatch {
expected,
actual,
offset,
kind,
},
Crc32Mismatch {
expected,
actual,
offset,
kind,
} => Crc32Mismatch {
expected,
actual,
offset,
kind,
},
Interrupted => Interrupted,
}
})?;
progress = returned_progress;
pack_traverse_statistics.push(pack_traverse_outcome);
}
}
assert_eq!(
self.num_objects as usize, total_objects_checked,
"BUG: our slicing should allow to visit all objects"
);
progress.set_name("Validating multi-pack");
progress.show_throughput(operation_start);
Ok(integrity::Outcome {
actual_index_checksum,
pack_traverse_statistics,
progress,
})
}
}