diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:41:41 +0000 |
commit | 10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87 (patch) | |
tree | bdffd5d80c26cf4a7a518281a204be1ace85b4c1 /vendor/gix-odb/src | |
parent | Releasing progress-linux version 1.70.0+dfsg1-9~progress7.99u1. (diff) | |
download | rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.tar.xz rustc-10ee2acdd26a7f1298c6f6d6b7af9b469fe29b87.zip |
Merging upstream version 1.70.0+dfsg2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-odb/src')
27 files changed, 5236 insertions, 0 deletions
diff --git a/vendor/gix-odb/src/alternate/mod.rs b/vendor/gix-odb/src/alternate/mod.rs new file mode 100644 index 000000000..c343ef5aa --- /dev/null +++ b/vendor/gix-odb/src/alternate/mod.rs @@ -0,0 +1,75 @@ +//! A file with directories of other git object databases to use when reading objects. +//! +//! This inherently makes alternates read-only. +//! +//! An alternate file in `<git-dir>/info/alternates` can look as follows: +//! +//! ```text +//! # a comment, empty lines are also allowed +//! # relative paths resolve relative to the parent git repository +//! ../path/relative/to/repo/.git +//! /absolute/path/to/repo/.git +//! +//! "/a/ansi-c-quoted/path/with/tabs\t/.git" +//! +//! # each .git directory should indeed be a directory, and not a file +//! ``` +//! +//! Based on the [canonical implementation](https://github.com/git/git/blob/master/sha1-file.c#L598:L609). +use std::{fs, io, path::PathBuf}; + +use gix_path::realpath::MAX_SYMLINKS; + +/// +pub mod parse; + +/// Returned by [`resolve()`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + Io(#[from] io::Error), + #[error(transparent)] + Realpath(#[from] gix_path::realpath::Error), + #[error(transparent)] + Parse(#[from] parse::Error), + #[error("Alternates form a cycle: {} -> {}", .0.iter().map(|p| format!("'{}'", p.display())).collect::<Vec<_>>().join(" -> "), .0.first().expect("more than one directories").display())] + Cycle(Vec<PathBuf>), +} + +/// Given an `objects_directory`, try to resolve alternate object directories possibly located in the +/// `./info/alternates` file into canonical paths and resolve relative paths with the help of the `current_dir`. +/// If no alternate object database was resolved, the resulting `Vec` is empty (it is not an error +/// if there are no alternates). +/// It is an error once a repository is seen again as it would lead to a cycle. +pub fn resolve( + objects_directory: impl Into<PathBuf>, + current_dir: impl AsRef<std::path::Path>, +) -> Result<Vec<PathBuf>, Error> { + let relative_base = objects_directory.into(); + let mut dirs = vec![(0, relative_base.clone())]; + let mut out = Vec::new(); + let cwd = current_dir.as_ref(); + let mut seen = vec![gix_path::realpath_opts(&relative_base, cwd, MAX_SYMLINKS)?]; + while let Some((depth, dir)) = dirs.pop() { + match fs::read(dir.join("info").join("alternates")) { + Ok(input) => { + for path in parse::content(&input)?.into_iter() { + let path = relative_base.join(path); + let path_canonicalized = gix_path::realpath_opts(&path, cwd, MAX_SYMLINKS)?; + if seen.contains(&path_canonicalized) { + return Err(Error::Cycle(seen)); + } + seen.push(path_canonicalized); + dirs.push((depth + 1, path)); + } + } + Err(err) if err.kind() == io::ErrorKind::NotFound => {} + Err(err) => return Err(err.into()), + }; + if depth != 0 { + out.push(dir); + } + } + Ok(out) +} diff --git a/vendor/gix-odb/src/alternate/parse.rs b/vendor/gix-odb/src/alternate/parse.rs new file mode 100644 index 000000000..1c297d153 --- /dev/null +++ b/vendor/gix-odb/src/alternate/parse.rs @@ -0,0 +1,33 @@ +use std::{borrow::Cow, path::PathBuf}; + +use gix_object::bstr::ByteSlice; + +/// Returned as part of [`crate::alternate::Error::Parse`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("Could not obtain an object path for the alternate directory '{}'", String::from_utf8_lossy(.0))] + PathConversion(Vec<u8>), + #[error("Could not unquote alternate path")] + Unquote(#[from] gix_quote::ansi_c::undo::Error), +} + +pub(crate) fn content(input: &[u8]) -> Result<Vec<PathBuf>, Error> { + let mut out = Vec::new(); + for line in input.split(|b| *b == b'\n') { + let line = line.as_bstr(); + if line.is_empty() || line.starts_with(b"#") { + continue; + } + out.push( + gix_path::try_from_bstr(if line.starts_with(b"\"") { + gix_quote::ansi_c::undo(line)?.0 + } else { + Cow::Borrowed(line) + }) + .map_err(|_| Error::PathConversion(line.to_vec()))? + .into_owned(), + ) + } + Ok(out) +} diff --git a/vendor/gix-odb/src/cache.rs b/vendor/gix-odb/src/cache.rs new file mode 100644 index 000000000..8e108646f --- /dev/null +++ b/vendor/gix-odb/src/cache.rs @@ -0,0 +1,234 @@ +use std::{ + cell::RefCell, + ops::{Deref, DerefMut}, + rc::Rc, + sync::Arc, +}; + +use crate::Cache; + +/// A type to store pack caches in boxes. +pub type PackCache = dyn gix_pack::cache::DecodeEntry + Send + 'static; +/// A constructor for boxed pack caches. +pub type NewPackCacheFn = dyn Fn() -> Box<PackCache> + Send + Sync + 'static; + +/// A type to store object caches in boxes. +pub type ObjectCache = dyn gix_pack::cache::Object + Send + 'static; +/// A constructor for boxed object caches. +pub type NewObjectCacheFn = dyn Fn() -> Box<ObjectCache> + Send + Sync + 'static; + +impl Cache<crate::store::Handle<Rc<crate::Store>>> { + /// Convert this cache's handle into one that keeps its store in an arc. This creates an entirely new store, + /// so should be done early to avoid unnecessary work (and mappings). + pub fn into_arc(self) -> std::io::Result<Cache<crate::store::Handle<Arc<crate::Store>>>> { + let inner = self.inner.into_arc()?; + Ok(Cache { + inner, + new_pack_cache: self.new_pack_cache, + new_object_cache: self.new_object_cache, + pack_cache: self.pack_cache, + object_cache: self.object_cache, + }) + } +} +impl Cache<crate::store::Handle<Arc<crate::Store>>> { + /// No op, as we are containing an arc handle already. + pub fn into_arc(self) -> std::io::Result<Cache<crate::store::Handle<Arc<crate::Store>>>> { + Ok(self) + } +} + +impl<S> Cache<S> { + /// Dissolve this instance, discard all caches, and return the inner implementation. + pub fn into_inner(self) -> S { + self.inner + } + /// Use this methods directly after creating a new instance to add a constructor for pack caches. + /// + /// These are used to speed up decoding objects which are located in packs, reducing long delta chains by storing + /// their intermediate results. + pub fn with_pack_cache(mut self, create: impl Fn() -> Box<PackCache> + Send + Sync + 'static) -> Self { + self.pack_cache = Some(RefCell::new(create())); + self.new_pack_cache = Some(Arc::new(create)); + self + } + /// Use this methods directly after creating a new instance to add a constructor for object caches. + /// + /// Only use this kind of cache if the same objects are repeatedly accessed for great speedups, usually during diffing of + /// trees. + pub fn with_object_cache(mut self, create: impl Fn() -> Box<ObjectCache> + Send + Sync + 'static) -> Self { + self.object_cache = Some(RefCell::new(create())); + self.new_object_cache = Some(Arc::new(create)); + self + } + /// Set the pack cache constructor on this instance. + pub fn set_pack_cache(&mut self, create: impl Fn() -> Box<PackCache> + Send + Sync + 'static) { + self.pack_cache = Some(RefCell::new(create())); + self.new_pack_cache = Some(Arc::new(create)); + } + /// Set the object cache constructor on this instance. + pub fn set_object_cache(&mut self, create: impl Fn() -> Box<ObjectCache> + Send + Sync + 'static) { + self.object_cache = Some(RefCell::new(create())); + self.new_object_cache = Some(Arc::new(create)); + } + /// Return true if an object cache is present. + pub fn has_object_cache(&self) -> bool { + self.object_cache.is_some() + } + /// Return true if a pack cache is present. + pub fn has_pack_cache(&self) -> bool { + self.pack_cache.is_some() + } + /// Remove the current pack cache as well as its constructor from this instance. + pub fn unset_pack_cache(&mut self) { + self.pack_cache = None; + self.new_pack_cache = None; + } + /// Remove the current object cache as well as its constructor from this instance. + pub fn unset_object_cache(&mut self) { + self.object_cache = None; + self.new_object_cache = None; + } +} + +impl<S> From<S> for Cache<S> +where + S: gix_pack::Find, +{ + fn from(store: S) -> Self { + Self { + inner: store, + pack_cache: None, + new_pack_cache: None, + object_cache: None, + new_object_cache: None, + } + } +} + +impl<S: Clone> Clone for Cache<S> { + fn clone(&self) -> Self { + Cache { + inner: self.inner.clone(), + new_pack_cache: self.new_pack_cache.clone(), + new_object_cache: self.new_object_cache.clone(), + pack_cache: self.new_pack_cache.as_ref().map(|create| RefCell::new(create())), + object_cache: self.new_object_cache.as_ref().map(|create| RefCell::new(create())), + } + } +} + +impl<S> Deref for Cache<S> { + type Target = S; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<S> DerefMut for Cache<S> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +mod impls { + use std::{io::Read, ops::DerefMut}; + + use gix_hash::{oid, ObjectId}; + use gix_object::{Data, Kind}; + use gix_pack::cache::Object; + + use crate::{find::Header, pack::data::entry::Location, Cache}; + + impl<S> crate::Write for Cache<S> + where + S: crate::Write, + { + type Error = S::Error; + + fn write_stream(&self, kind: Kind, size: u64, from: impl Read) -> Result<ObjectId, Self::Error> { + self.inner.write_stream(kind, size, from) + } + } + + impl<S> crate::Find for Cache<S> + where + S: gix_pack::Find, + { + type Error = S::Error; + + fn contains(&self, id: impl AsRef<oid>) -> bool { + self.inner.contains(id) + } + + fn try_find<'a>(&self, id: impl AsRef<oid>, buffer: &'a mut Vec<u8>) -> Result<Option<Data<'a>>, Self::Error> { + gix_pack::Find::try_find(self, id, buffer).map(|t| t.map(|t| t.0)) + } + } + + impl<S> crate::Header for Cache<S> + where + S: crate::Header, + { + type Error = S::Error; + + fn try_header(&self, id: impl AsRef<oid>) -> Result<Option<Header>, Self::Error> { + self.inner.try_header(id) + } + } + + impl<S> gix_pack::Find for Cache<S> + where + S: gix_pack::Find, + { + type Error = S::Error; + + fn contains(&self, id: impl AsRef<oid>) -> bool { + self.inner.contains(id) + } + + fn try_find<'a>( + &self, + id: impl AsRef<oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<Option<(Data<'a>, Option<Location>)>, Self::Error> { + match self.pack_cache.as_ref().map(|rc| rc.borrow_mut()) { + Some(mut pack_cache) => self.try_find_cached(id, buffer, pack_cache.deref_mut()), + None => self.try_find_cached(id, buffer, &mut gix_pack::cache::Never), + } + } + + fn try_find_cached<'a>( + &self, + id: impl AsRef<oid>, + buffer: &'a mut Vec<u8>, + pack_cache: &mut impl gix_pack::cache::DecodeEntry, + ) -> Result<Option<(Data<'a>, Option<gix_pack::data::entry::Location>)>, Self::Error> { + if let Some(mut obj_cache) = self.object_cache.as_ref().map(|rc| rc.borrow_mut()) { + if let Some(kind) = obj_cache.get(&id.as_ref().to_owned(), buffer) { + return Ok(Some((Data::new(kind, buffer), None))); + } + } + let possibly_obj = self.inner.try_find_cached(id.as_ref(), buffer, pack_cache)?; + if let (Some(mut obj_cache), Some((obj, _location))) = + (self.object_cache.as_ref().map(|rc| rc.borrow_mut()), &possibly_obj) + { + obj_cache.put(id.as_ref().to_owned(), obj.kind, obj.data); + } + Ok(possibly_obj) + } + + fn location_by_oid(&self, id: impl AsRef<oid>, buf: &mut Vec<u8>) -> Option<gix_pack::data::entry::Location> { + self.inner.location_by_oid(id, buf) + } + + fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(u64, gix_hash::ObjectId)>> { + self.inner.pack_offsets_and_oid(pack_id) + } + + fn entry_by_location(&self, location: &Location) -> Option<gix_pack::find::Entry> { + self.inner.entry_by_location(location) + } + } +} diff --git a/vendor/gix-odb/src/find.rs b/vendor/gix-odb/src/find.rs new file mode 100644 index 000000000..69eccbf04 --- /dev/null +++ b/vendor/gix-odb/src/find.rs @@ -0,0 +1,113 @@ +/// +pub mod existing { + use gix_hash::ObjectId; + + /// The error returned by the [`find(…)`][crate::FindExt::find()] trait methods. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error<T: std::error::Error + 'static> { + #[error(transparent)] + Find(T), + #[error("An object with id {} could not be found", .oid)] + NotFound { oid: ObjectId }, + } +} + +/// +pub mod existing_object { + use gix_hash::ObjectId; + + /// The error returned by the various [`find_*()`][crate::FindExt::find_commit()] trait methods. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error<T: std::error::Error + 'static> { + #[error(transparent)] + Find(T), + #[error(transparent)] + Decode(gix_object::decode::Error), + #[error("An object with id {oid} could not be found")] + NotFound { oid: ObjectId }, + #[error("Expected object of kind {expected}")] + ObjectKind { expected: gix_object::Kind }, + } +} + +/// +pub mod existing_iter { + use gix_hash::ObjectId; + + /// The error returned by the various [`find_*_iter()`][crate::FindExt::find_commit_iter()] trait methods. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error<T: std::error::Error + 'static> { + #[error(transparent)] + Find(T), + #[error("An object with id {oid} could not be found")] + NotFound { oid: ObjectId }, + #[error("Expected object of kind {expected}")] + ObjectKind { expected: gix_object::Kind }, + } +} + +/// An object header informing about object properties, without it being fully decoded in the process. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Header { + /// The object was not packed, but is currently located in the loose object portion of the database. + /// + /// As packs are searched first, this means that in this very moment, the object whose header we retrieved is unique + /// in the object database. + Loose { + /// The kind of the object. + kind: gix_object::Kind, + /// The size of the object's data in bytes. + size: u64, + }, + /// The object was present in a pack. + /// + /// Note that this does not imply it is unique in the database, as it might be present in more than one pack and even + /// as loose object. + Packed(gix_pack::data::decode::header::Outcome), +} + +mod header { + use super::Header; + + impl Header { + /// Return the object kind of the object we represent. + pub fn kind(&self) -> gix_object::Kind { + match self { + Header::Packed(out) => out.kind, + Header::Loose { kind, .. } => *kind, + } + } + /// Return the size of the object in bytes. + pub fn size(&self) -> u64 { + match self { + Header::Packed(out) => out.object_size, + Header::Loose { size, .. } => *size, + } + } + /// Return the amount of deltas decoded to obtain this header, if the object was packed. + pub fn num_deltas(&self) -> Option<u32> { + match self { + Header::Packed(out) => out.num_deltas.into(), + Header::Loose { .. } => None, + } + } + } + + impl From<gix_pack::data::decode::header::Outcome> for Header { + fn from(packed_header: gix_pack::data::decode::header::Outcome) -> Self { + Header::Packed(packed_header) + } + } + + impl From<(usize, gix_object::Kind)> for Header { + fn from((object_size, kind): (usize, gix_object::Kind)) -> Self { + Header::Loose { + kind, + size: object_size as u64, + } + } + } +} diff --git a/vendor/gix-odb/src/lib.rs b/vendor/gix-odb/src/lib.rs new file mode 100644 index 000000000..a63ea544f --- /dev/null +++ b/vendor/gix-odb/src/lib.rs @@ -0,0 +1,152 @@ +//! Git stores all of its data as _Objects_, which are data along with a hash over all data. Thus it's an +//! object store indexed by the signature of data itself with inherent deduplication: the same data will have the same hash, +//! and thus occupy the same space within the store. +//! +//! There is only one all-round object store, also known as the [`Store`], as it supports ~~everything~~ most of what git has to offer. +//! +//! * loose object reading and writing +//! * access to packed objects +//! * multiple loose objects and pack locations as gathered from `alternates` files. +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] + +use std::{ + cell::RefCell, + path::PathBuf, + sync::{atomic::AtomicUsize, Arc}, +}; + +use arc_swap::ArcSwap; +use gix_features::{threading::OwnShared, zlib::stream::deflate}; +pub use gix_pack as pack; + +mod store_impls; +pub use store_impls::{dynamic as store, loose}; + +pub mod alternate; + +/// A way to access objects along with pre-configured thread-local caches for packed base objects as well as objects themselves. +/// +/// By default, no cache will be used. +pub struct Cache<S> { + /// The inner provider of trait implementations we use in conjunction with our caches. + /// + /// For calling methods on `inner`, prefer to make use of auto-dereferencing, i.e. `cache.inner_method()` instead of `cache.inner.inner_method()`. + inner: S, + // TODO: have single-threaded code-paths also for pack-creation (entries from counts) so that we can use OwnShared here + // instead of Arc. However, it's probably not that important as these aren't called often. + new_pack_cache: Option<Arc<cache::NewPackCacheFn>>, + new_object_cache: Option<Arc<cache::NewObjectCacheFn>>, + pack_cache: Option<RefCell<Box<cache::PackCache>>>, + object_cache: Option<RefCell<Box<cache::ObjectCache>>>, +} + +/// +pub mod cache; + +/// +/// It can optionally compress the content, similarly to what would happen when using a [`loose::Store`][crate::loose::Store]. +/// +pub struct Sink { + compressor: Option<RefCell<deflate::Write<std::io::Sink>>>, + object_hash: gix_hash::Kind, +} + +/// Create a new [`Sink`] with compression disabled. +pub fn sink(object_hash: gix_hash::Kind) -> Sink { + Sink { + compressor: None, + object_hash, + } +} + +/// +pub mod sink; + +/// +pub mod find; + +/// An object database equivalent to `/dev/null`, dropping all objects stored into it. +mod traits; + +pub use traits::{Find, FindExt, Header, HeaderExt, Write}; + +/// A thread-local handle to access any object. +pub type Handle = Cache<store::Handle<OwnShared<Store>>>; +/// A thread-local handle to access any object, but thread-safe and independent of the actual type of `OwnShared` or feature toggles in `gix-features`. +pub type HandleArc = Cache<store::Handle<Arc<Store>>>; + +use store::types; + +/// The object store for use in any applications with support for auto-updates in the light of changes to the object database. +/// +/// ### Features +/// +/// - entirely lazy, creating an instance does no disk IO at all if [`Slots::Given`][store::init::Slots::Given] is used. +/// - multi-threaded lazy-loading of indices and packs +/// - per-thread pack and object caching avoiding cache trashing. +/// - most-recently-used packs are always first for speedups if objects are stored in the same pack, typical for packs organized by +/// commit graph and object age. +/// - lock-free reading for perfect scaling across all cores, and changes to it don't affect readers as long as these don't want to +/// enter the same branch. +/// - sync with the state on disk if objects aren't found to catch up with changes if an object seems to be missing. +/// - turn off the behaviour above for all handles if objects are expected to be missing due to spare checkouts. +pub struct Store { + /// The central write lock without which the slotmap index can't be changed. + write: parking_lot::Mutex<()>, + + /// The source directory from which all content is loaded, and the central write lock for use when a directory refresh is needed. + pub(crate) path: PathBuf, + + /// The current working directory at the time this store was instantiated. It becomes relevant when resolving alternate paths + /// when re-reading the store configuration on updates when an object was missed. + /// Keeping it here helps to assure consistency even while a process changes its CWD. + pub(crate) current_dir: PathBuf, + + /// A set of replacements that given a source OID return a destination OID. The vector is sorted. + pub(crate) replacements: Vec<(gix_hash::ObjectId, gix_hash::ObjectId)>, + + /// A list of indices keeping track of which slots are filled with data. These are usually, but not always, consecutive. + pub(crate) index: ArcSwap<types::SlotMapIndex>, + + /// The below state acts like a slot-map with each slot is mutable when the write lock is held, but readable independently of it. + /// This allows multiple file to be loaded concurrently if there is multiple handles requesting to load packs or additional indices. + /// The map is static and cannot typically change. + /// It's read often and changed rarely. + pub(crate) files: Vec<types::MutableIndexAndPack>, + + /// The amount of handles that would prevent us from unloading packs or indices + pub(crate) num_handles_stable: AtomicUsize, + /// The amount of handles that don't affect our ability to compact our internal data structures or unload packs or indices. + pub(crate) num_handles_unstable: AtomicUsize, + + /// The amount of times we re-read the disk state to consolidate our in-memory representation. + pub(crate) num_disk_state_consolidation: AtomicUsize, + /// If true, we are allowed to use multi-pack indices and they must have the `object_hash` or be ignored. + use_multi_pack_index: bool, + /// The hash kind to use for some operations + object_hash: gix_hash::Kind, +} + +/// Create a new cached handle to the object store with support for additional options. +/// +/// `replacements` is an iterator over pairs of old and new object ids for replacement support. +/// This means that when asking for object `X`, one will receive object `X-replaced` given an iterator like `Some((X, X-replaced))`. +pub fn at_opts( + objects_dir: impl Into<PathBuf>, + replacements: impl IntoIterator<Item = (gix_hash::ObjectId, gix_hash::ObjectId)>, + options: store::init::Options, +) -> std::io::Result<Handle> { + let handle = OwnShared::new(Store::at_opts(objects_dir, replacements, options)?).to_handle(); + Ok(Cache::from(handle)) +} + +/// Create a new cached handle to the object store. +pub fn at(objects_dir: impl Into<PathBuf>) -> std::io::Result<Handle> { + at_opts(objects_dir, Vec::new().into_iter(), Default::default()) +} diff --git a/vendor/gix-odb/src/sink.rs b/vendor/gix-odb/src/sink.rs new file mode 100644 index 000000000..1befd6fdf --- /dev/null +++ b/vendor/gix-odb/src/sink.rs @@ -0,0 +1,66 @@ +use std::{ + cell::RefCell, + convert::TryInto, + io::{self, Write}, +}; + +use gix_features::zlib::stream::deflate; + +use crate::Sink; + +impl Sink { + /// Enable or disable compression. Compression is disabled by default + pub fn compress(mut self, enable: bool) -> Self { + if enable { + self.compressor = Some(RefCell::new(deflate::Write::new(io::sink()))); + } else { + self.compressor = None; + } + self + } +} + +impl crate::traits::Write for Sink { + type Error = io::Error; + + fn write_stream( + &self, + kind: gix_object::Kind, + size: u64, + mut from: impl io::Read, + ) -> Result<gix_hash::ObjectId, Self::Error> { + let mut size = size.try_into().expect("object size to fit into usize"); + use gix_features::hash::Sha1; + let mut buf = [0u8; 8096]; + let header = gix_object::encode::loose_header(kind, size); + + let possibly_compress = |buf: &[u8]| -> io::Result<()> { + if let Some(compressor) = self.compressor.as_ref() { + compressor.try_borrow_mut().expect("no recursion").write_all(buf)?; + } + Ok(()) + }; + match self.object_hash { + gix_hash::Kind::Sha1 => { + let mut hasher = Sha1::default(); + hasher.update(&header); + possibly_compress(&header)?; + + while size != 0 { + let bytes = size.min(buf.len()); + from.read_exact(&mut buf[..bytes])?; + hasher.update(&buf[..bytes]); + possibly_compress(&buf[..bytes])?; + size -= bytes; + } + if let Some(compressor) = self.compressor.as_ref() { + let mut c = compressor.borrow_mut(); + c.flush()?; + c.reset(); + } + + Ok(hasher.digest().into()) + } + } + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/access.rs b/vendor/gix-odb/src/store_impls/dynamic/access.rs new file mode 100644 index 000000000..7a07bcfef --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/access.rs @@ -0,0 +1,24 @@ +use crate::Store; + +impl Store { + /// The root path at which we expect to find all objects and packs, and which is the source of the + /// alternate file traversal in case there are linked repositories. + pub fn path(&self) -> &std::path::Path { + &self.path + } + + /// The kind of object hash to assume when dealing with pack indices and pack data files. + pub fn object_hash(&self) -> gix_hash::Kind { + self.object_hash + } + + /// Whether or not we are allowed to use multi-pack indices + pub fn use_multi_pack_index(&self) -> bool { + self.use_multi_pack_index + } + + /// An iterator over replacements from object-ids `X` to `X-replaced` as `(X, X-replaced)`, sorted by the original id `X`. + pub fn replacements(&self) -> impl Iterator<Item = (gix_hash::ObjectId, gix_hash::ObjectId)> + '_ { + self.replacements.iter().cloned() + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/find.rs b/vendor/gix-odb/src/store_impls/dynamic/find.rs new file mode 100644 index 000000000..b6fa3b312 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/find.rs @@ -0,0 +1,519 @@ +use std::{convert::TryInto, ops::Deref}; + +use gix_pack::cache::DecodeEntry; + +use crate::store::{handle, load_index}; + +pub(crate) mod error { + use crate::{loose, pack}; + + /// Returned by [`Handle::try_find()`][gix_pack::Find::try_find()] + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("An error occurred while obtaining an object from the loose object store")] + Loose(#[from] loose::find::Error), + #[error("An error occurred while obtaining an object from the packed object store")] + Pack(#[from] pack::data::decode::Error), + #[error(transparent)] + LoadIndex(#[from] crate::store::load_index::Error), + #[error(transparent)] + LoadPack(#[from] std::io::Error), + #[error("Reached recursion limit of {} while resolving ref delta bases for {}", .max_depth, .id)] + DeltaBaseRecursionLimit { + /// the maximum recursion depth we encountered. + max_depth: usize, + /// The original object to lookup + id: gix_hash::ObjectId, + }, + #[error("The base object {} could not be found but is required to decode {}", .base_id, .id)] + DeltaBaseMissing { + /// the id of the base object which failed to lookup + base_id: gix_hash::ObjectId, + /// The original object to lookup + id: gix_hash::ObjectId, + }, + #[error("An error occurred when looking up a ref delta base object {} to decode {}", .base_id, .id)] + DeltaBaseLookup { + #[source] + err: Box<Self>, + /// the id of the base object which failed to lookup + base_id: gix_hash::ObjectId, + /// The original object to lookup + id: gix_hash::ObjectId, + }, + } + + #[derive(Copy, Clone)] + pub(crate) struct DeltaBaseRecursion<'a> { + pub depth: usize, + pub original_id: &'a gix_hash::oid, + } + + impl<'a> DeltaBaseRecursion<'a> { + pub fn new(id: &'a gix_hash::oid) -> Self { + Self { + original_id: id, + depth: 0, + } + } + pub fn inc_depth(mut self) -> Self { + self.depth += 1; + self + } + } + + #[cfg(test)] + mod tests { + use super::*; + + #[test] + fn error_size() { + let actual = std::mem::size_of::<Error>(); + assert!(actual <= 88, "{actual} <= 88: should not grow without us noticing"); + } + } +} +pub use error::Error; + +use crate::{store::types::PackId, Find}; + +impl<S> super::Handle<S> +where + S: Deref<Target = super::Store> + Clone, +{ + fn try_find_cached_inner<'a, 'b>( + &'b self, + mut id: &'b gix_hash::oid, + buffer: &'a mut Vec<u8>, + pack_cache: &mut impl DecodeEntry, + snapshot: &mut load_index::Snapshot, + recursion: Option<error::DeltaBaseRecursion<'_>>, + ) -> Result<Option<(gix_object::Data<'a>, Option<gix_pack::data::entry::Location>)>, Error> { + if let Some(r) = recursion { + if r.depth >= self.max_recursion_depth { + return Err(Error::DeltaBaseRecursionLimit { + max_depth: self.max_recursion_depth, + id: r.original_id.to_owned(), + }); + } + } else if !self.ignore_replacements { + if let Ok(pos) = self + .store + .replacements + .binary_search_by(|(map_this, _)| map_this.as_ref().cmp(id)) + { + id = self.store.replacements[pos].1.as_ref(); + } + } + + 'outer: loop { + { + let marker = snapshot.marker; + for (idx, index) in snapshot.indices.iter_mut().enumerate() { + if let Some(handle::index_lookup::Outcome { + object_index: handle::IndexForObjectInPack { pack_id, pack_offset }, + index_file, + pack: possibly_pack, + }) = index.lookup(id) + { + let pack = match possibly_pack { + Some(pack) => pack, + None => match self.store.load_pack(pack_id, marker)? { + Some(pack) => { + *possibly_pack = Some(pack); + possibly_pack.as_deref().expect("just put it in") + } + None => { + // The pack wasn't available anymore so we are supposed to try another round with a fresh index + match self.store.load_one_index(self.refresh, snapshot.marker)? { + Some(new_snapshot) => { + *snapshot = new_snapshot; + self.clear_cache(); + continue 'outer; + } + None => { + // nothing new in the index, kind of unexpected to not have a pack but to also + // to have no new index yet. We set the new index before removing any slots, so + // this should be observable. + return Ok(None); + } + } + } + }, + }; + let entry = pack.entry(pack_offset); + let header_size = entry.header_size(); + let res = match pack.decode_entry( + entry, + buffer, + |id, _out| { + index_file.pack_offset_by_id(id).map(|pack_offset| { + gix_pack::data::decode::entry::ResolvedBase::InPack(pack.entry(pack_offset)) + }) + }, + pack_cache, + ) { + Ok(r) => Ok(( + gix_object::Data { + kind: r.kind, + data: buffer.as_slice(), + }, + Some(gix_pack::data::entry::Location { + pack_id: pack.id, + pack_offset, + entry_size: r.compressed_size + header_size, + }), + )), + Err(gix_pack::data::decode::Error::DeltaBaseUnresolved(base_id)) => { + // Only with multi-pack indices it's allowed to jump to refer to other packs within this + // multi-pack. Otherwise this would constitute a thin pack which is only allowed in transit. + // However, if we somehow end up with that, we will resolve it safely, even though we could + // avoid handling this case and error instead. + + // Since this is a special case, we just allocate here to make it work. It's an actual delta-ref object + // which is sent by some servers that points to an object outside of the pack we are looking + // at right now. With the complexities of loading packs, we go into recursion here. Git itself + // doesn't do a cycle check, and we won't either but limit the recursive depth. + // The whole ordeal isn't as efficient as it could be due to memory allocation and + // later mem-copying when trying again. + let mut buf = Vec::new(); + let obj_kind = self + .try_find_cached_inner( + &base_id, + &mut buf, + pack_cache, + snapshot, + recursion + .map(|r| r.inc_depth()) + .or_else(|| error::DeltaBaseRecursion::new(id).into()), + ) + .map_err(|err| Error::DeltaBaseLookup { + err: Box::new(err), + base_id, + id: id.to_owned(), + })? + .ok_or_else(|| Error::DeltaBaseMissing { + base_id, + id: id.to_owned(), + })? + .0 + .kind; + let handle::index_lookup::Outcome { + object_index: + handle::IndexForObjectInPack { + pack_id: _, + pack_offset, + }, + index_file, + pack: possibly_pack, + } = match snapshot.indices[idx].lookup(id) { + Some(res) => res, + None => { + let mut out = None; + for index in snapshot.indices.iter_mut() { + out = index.lookup(id); + if out.is_some() { + break; + } + } + + out.unwrap_or_else(|| { + panic!("could not find object {id} in any index after looking up one of its base objects {base_id}" ) + }) + } + }; + let pack = possibly_pack + .as_ref() + .expect("pack to still be available like just now"); + let entry = pack.entry(pack_offset); + let header_size = entry.header_size(); + pack.decode_entry( + entry, + buffer, + |id, out| { + index_file + .pack_offset_by_id(id) + .map(|pack_offset| { + gix_pack::data::decode::entry::ResolvedBase::InPack( + pack.entry(pack_offset), + ) + }) + .or_else(|| { + (id == base_id).then(|| { + out.resize(buf.len(), 0); + out.copy_from_slice(buf.as_slice()); + gix_pack::data::decode::entry::ResolvedBase::OutOfPack { + kind: obj_kind, + end: out.len(), + } + }) + }) + }, + pack_cache, + ) + .map(move |r| { + ( + gix_object::Data { + kind: r.kind, + data: buffer.as_slice(), + }, + Some(gix_pack::data::entry::Location { + pack_id: pack.id, + pack_offset, + entry_size: r.compressed_size + header_size, + }), + ) + }) + } + Err(err) => Err(err), + }?; + + if idx != 0 { + snapshot.indices.swap(0, idx); + } + return Ok(Some(res)); + } + } + } + + for lodb in snapshot.loose_dbs.iter() { + // TODO: remove this double-lookup once the borrow checker allows it. + if lodb.contains(id) { + return lodb + .try_find(id, buffer) + .map(|obj| obj.map(|obj| (obj, None))) + .map_err(Into::into); + } + } + + match self.store.load_one_index(self.refresh, snapshot.marker)? { + Some(new_snapshot) => { + *snapshot = new_snapshot; + self.clear_cache(); + } + None => return Ok(None), + } + } + } + + pub(crate) fn clear_cache(&self) { + self.packed_object_count.borrow_mut().take(); + } +} + +impl<S> gix_pack::Find for super::Handle<S> +where + S: Deref<Target = super::Store> + Clone, +{ + type Error = Error; + + // TODO: probably make this method fallible, but that would mean its own error type. + fn contains(&self, id: impl AsRef<gix_hash::oid>) -> bool { + let id = id.as_ref(); + let mut snapshot = self.snapshot.borrow_mut(); + loop { + for (idx, index) in snapshot.indices.iter().enumerate() { + if index.contains(id) { + if idx != 0 { + snapshot.indices.swap(0, idx); + } + return true; + } + } + + for lodb in snapshot.loose_dbs.iter() { + if lodb.contains(id) { + return true; + } + } + + match self.store.load_one_index(self.refresh, snapshot.marker) { + Ok(Some(new_snapshot)) => { + *snapshot = new_snapshot; + self.clear_cache(); + } + Ok(None) => return false, // nothing more to load, or our refresh mode doesn't allow disk refreshes + Err(_) => return false, // something went wrong, nothing we can communicate here with this trait. TODO: Maybe that should change? + } + } + } + + fn try_find_cached<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + pack_cache: &mut impl DecodeEntry, + ) -> Result<Option<(gix_object::Data<'a>, Option<gix_pack::data::entry::Location>)>, Self::Error> { + let id = id.as_ref(); + let mut snapshot = self.snapshot.borrow_mut(); + self.try_find_cached_inner(id, buffer, pack_cache, &mut snapshot, None) + } + + fn location_by_oid( + &self, + id: impl AsRef<gix_hash::oid>, + buf: &mut Vec<u8>, + ) -> Option<gix_pack::data::entry::Location> { + assert!( + matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)), + "BUG: handle must be configured to `prevent_pack_unload()` before using this method" + ); + + assert!(self.store_ref().replacements.is_empty() || self.ignore_replacements, "Everything related to packing must not use replacements. These are not used here, but it should be turned off for good measure."); + + let id = id.as_ref(); + let mut snapshot = self.snapshot.borrow_mut(); + 'outer: loop { + { + let marker = snapshot.marker; + for (idx, index) in snapshot.indices.iter_mut().enumerate() { + if let Some(handle::index_lookup::Outcome { + object_index: handle::IndexForObjectInPack { pack_id, pack_offset }, + index_file: _, + pack: possibly_pack, + }) = index.lookup(id) + { + let pack = match possibly_pack { + Some(pack) => pack, + None => match self.store.load_pack(pack_id, marker).ok()? { + Some(pack) => { + *possibly_pack = Some(pack); + possibly_pack.as_deref().expect("just put it in") + } + None => { + // The pack wasn't available anymore so we are supposed to try another round with a fresh index + match self.store.load_one_index(self.refresh, snapshot.marker).ok()? { + Some(new_snapshot) => { + *snapshot = new_snapshot; + self.clear_cache(); + continue 'outer; + } + None => { + // nothing new in the index, kind of unexpected to not have a pack but to also + // to have no new index yet. We set the new index before removing any slots, so + // this should be observable. + return None; + } + } + } + }, + }; + let entry = pack.entry(pack_offset); + + buf.resize(entry.decompressed_size.try_into().expect("representable size"), 0); + assert_eq!(pack.id, pack_id.to_intrinsic_pack_id(), "both ids must always match"); + + let res = pack.decompress_entry(&entry, buf).ok().map(|entry_size_past_header| { + gix_pack::data::entry::Location { + pack_id: pack.id, + pack_offset, + entry_size: entry.header_size() + entry_size_past_header, + } + }); + + if idx != 0 { + snapshot.indices.swap(0, idx); + } + return res; + } + } + } + + match self.store.load_one_index(self.refresh, snapshot.marker).ok()? { + Some(new_snapshot) => { + *snapshot = new_snapshot; + self.clear_cache(); + } + None => return None, + } + } + } + + fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(u64, gix_hash::ObjectId)>> { + assert!( + matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)), + "BUG: handle must be configured to `prevent_pack_unload()` before using this method" + ); + let pack_id = PackId::from_intrinsic_pack_id(pack_id); + loop { + let snapshot = self.snapshot.borrow(); + { + for index in snapshot.indices.iter() { + if let Some(iter) = index.iter(pack_id) { + return Some(iter.map(|e| (e.pack_offset, e.oid)).collect()); + } + } + } + + match self.store.load_one_index(self.refresh, snapshot.marker).ok()? { + Some(new_snapshot) => { + drop(snapshot); + *self.snapshot.borrow_mut() = new_snapshot; + } + None => return None, + } + } + } + + fn entry_by_location(&self, location: &gix_pack::data::entry::Location) -> Option<gix_pack::find::Entry> { + assert!( + matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)), + "BUG: handle must be configured to `prevent_pack_unload()` before using this method" + ); + let pack_id = PackId::from_intrinsic_pack_id(location.pack_id); + let mut snapshot = self.snapshot.borrow_mut(); + let marker = snapshot.marker; + loop { + { + for index in snapshot.indices.iter_mut() { + if let Some(possibly_pack) = index.pack(pack_id) { + let pack = match possibly_pack { + Some(pack) => pack, + None => { + let pack = self.store.load_pack(pack_id, marker).ok()?.expect( + "BUG: pack must exist from previous call to location_by_oid() and must not be unloaded", + ); + *possibly_pack = Some(pack); + possibly_pack.as_deref().expect("just put it in") + } + }; + return pack + .entry_slice(location.entry_range(location.pack_offset)) + .map(|data| gix_pack::find::Entry { + data: data.to_owned(), + version: pack.version(), + }); + } + } + } + + snapshot.indices.insert( + 0, + self.store + .index_by_id(pack_id, marker) + .expect("BUG: index must always be present, must not be unloaded or overwritten"), + ); + } + } +} + +impl<S> Find for super::Handle<S> +where + S: Deref<Target = super::Store> + Clone, + Self: gix_pack::Find, +{ + type Error = <Self as gix_pack::Find>::Error; + + fn contains(&self, id: impl AsRef<gix_hash::oid>) -> bool { + gix_pack::Find::contains(self, id) + } + + fn try_find<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<Option<gix_object::Data<'a>>, Self::Error> { + gix_pack::Find::try_find(self, id, buffer).map(|t| t.map(|t| t.0)) + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/handle.rs b/vendor/gix-odb/src/store_impls/dynamic/handle.rs new file mode 100644 index 000000000..78efd4451 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/handle.rs @@ -0,0 +1,399 @@ +use std::{ + cell::RefCell, + convert::{TryFrom, TryInto}, + ops::Deref, + rc::Rc, + sync::{atomic::Ordering, Arc}, +}; + +use gix_features::threading::OwnShared; +use gix_hash::oid; + +use crate::store::{handle, types, RefreshMode}; + +pub(crate) enum SingleOrMultiIndex { + Single { + index: Arc<gix_pack::index::File>, + data: Option<Arc<gix_pack::data::File>>, + }, + Multi { + index: Arc<gix_pack::multi_index::File>, + data: Vec<Option<Arc<gix_pack::data::File>>>, + }, +} + +/// A utility to allow looking up pack offsets for a particular pack +pub(crate) enum IntraPackLookup<'a> { + Single(&'a gix_pack::index::File), + /// the internal pack-id inside of a multi-index for which the lookup is supposed to be. + /// Used to prevent ref-delta OIDs to, for some reason, point to a different pack. + Multi { + index: &'a gix_pack::multi_index::File, + required_pack_index: gix_pack::multi_index::PackIndex, + }, +} + +impl<'a> IntraPackLookup<'a> { + pub(crate) fn pack_offset_by_id(&self, id: &oid) -> Option<gix_pack::data::Offset> { + match self { + IntraPackLookup::Single(index) => index + .lookup(id) + .map(|entry_index| index.pack_offset_at_index(entry_index)), + IntraPackLookup::Multi { + index, + required_pack_index, + } => index.lookup(id).and_then(|entry_index| { + let (pack_index, pack_offset) = index.pack_id_and_pack_offset_at_index(entry_index); + (pack_index == *required_pack_index).then_some(pack_offset) + }), + } + } +} + +pub struct IndexLookup { + pub(crate) file: SingleOrMultiIndex, + /// The index we were found at in the slot map + pub(crate) id: types::IndexId, +} + +pub struct IndexForObjectInPack { + /// The internal identifier of the pack itself, which either is referred to by an index or a multi-pack index. + pub(crate) pack_id: types::PackId, + /// The offset at which the object's entry can be found + pub(crate) pack_offset: u64, +} + +pub(crate) mod index_lookup { + use std::{collections::HashSet, sync::Arc}; + + use gix_hash::oid; + + use crate::store::{handle, handle::IntraPackLookup, types}; + + pub(crate) struct Outcome<'a> { + pub object_index: handle::IndexForObjectInPack, + pub index_file: IntraPackLookup<'a>, + pub pack: &'a mut Option<Arc<gix_pack::data::File>>, + } + + impl handle::IndexLookup { + /// Return an iterator over the entries of the given pack. The `pack_id` is required to identify a pack uniquely within + /// a potential multi-pack index. + pub(crate) fn iter( + &self, + pack_id: types::PackId, + ) -> Option<Box<dyn Iterator<Item = gix_pack::index::Entry> + '_>> { + (self.id == pack_id.index).then(|| match &self.file { + handle::SingleOrMultiIndex::Single { index, .. } => index.iter(), + handle::SingleOrMultiIndex::Multi { index, .. } => { + let pack_index = pack_id.multipack_index.expect( + "BUG: multi-pack index must be set if this is a multi-pack, pack-indices seem unstable", + ); + Box::new(index.iter().filter_map(move |e| { + (e.pack_index == pack_index).then_some(gix_pack::index::Entry { + oid: e.oid, + pack_offset: e.pack_offset, + crc32: None, + }) + })) + } + }) + } + + pub(crate) fn pack(&mut self, pack_id: types::PackId) -> Option<&'_ mut Option<Arc<gix_pack::data::File>>> { + (self.id == pack_id.index).then(move || match &mut self.file { + handle::SingleOrMultiIndex::Single { data, .. } => data, + handle::SingleOrMultiIndex::Multi { data, .. } => { + let pack_index = pack_id.multipack_index.expect( + "BUG: multi-pack index must be set if this is a multi-pack, pack-indices seem unstable", + ); + &mut data[pack_index as usize] + } + }) + } + + /// Return true if the given object id exists in this index + pub(crate) fn contains(&self, object_id: &oid) -> bool { + match &self.file { + handle::SingleOrMultiIndex::Single { index, .. } => index.lookup(object_id).is_some(), + handle::SingleOrMultiIndex::Multi { index, .. } => index.lookup(object_id).is_some(), + } + } + + /// Return true if the given object id exists in this index + pub(crate) fn oid_at_index(&self, entry_index: u32) -> &gix_hash::oid { + match &self.file { + handle::SingleOrMultiIndex::Single { index, .. } => index.oid_at_index(entry_index), + handle::SingleOrMultiIndex::Multi { index, .. } => index.oid_at_index(entry_index), + } + } + + /// Return the amount of objects contained in the index, essentially the number of object ids. + pub(crate) fn num_objects(&self) -> u32 { + match &self.file { + handle::SingleOrMultiIndex::Single { index, .. } => index.num_objects(), + handle::SingleOrMultiIndex::Multi { index, .. } => index.num_objects(), + } + } + + /// Call `lookup_prefix(…)` on either index or multi-index, and transform matches into an object id. + pub(crate) fn lookup_prefix( + &self, + prefix: gix_hash::Prefix, + candidates: Option<&mut HashSet<gix_hash::ObjectId>>, + ) -> Option<crate::store::prefix::lookup::Outcome> { + let mut candidate_entries = candidates.as_ref().map(|_| 0..0); + let res = match &self.file { + handle::SingleOrMultiIndex::Single { index, .. } => { + index.lookup_prefix(prefix, candidate_entries.as_mut()) + } + handle::SingleOrMultiIndex::Multi { index, .. } => { + index.lookup_prefix(prefix, candidate_entries.as_mut()) + } + }?; + + if let Some((candidates, entries)) = candidates.zip(candidate_entries) { + candidates.extend(entries.map(|entry| self.oid_at_index(entry).to_owned())); + } + Some(res.map(|entry_index| self.oid_at_index(entry_index).to_owned())) + } + + /// See if the oid is contained in this index, and return its full id for lookup possibly alongside its data file if already + /// loaded. + /// Also return the index itself as it's needed to resolve intra-pack ref-delta objects. They are a possibility even though + /// they won't be used in practice as it's more efficient to store their offsets. + /// If it is not loaded, ask it to be loaded and put it into the returned mutable option for safe-keeping. + pub(crate) fn lookup(&mut self, object_id: &oid) -> Option<Outcome<'_>> { + let id = self.id; + match &mut self.file { + handle::SingleOrMultiIndex::Single { index, data } => index.lookup(object_id).map(move |idx| Outcome { + object_index: handle::IndexForObjectInPack { + pack_id: types::PackId { + index: id, + multipack_index: None, + }, + pack_offset: index.pack_offset_at_index(idx), + }, + index_file: IntraPackLookup::Single(index), + pack: data, + }), + handle::SingleOrMultiIndex::Multi { index, data } => index.lookup(object_id).map(move |idx| { + let (pack_index, pack_offset) = index.pack_id_and_pack_offset_at_index(idx); + Outcome { + object_index: handle::IndexForObjectInPack { + pack_id: types::PackId { + index: id, + multipack_index: Some(pack_index), + }, + pack_offset, + }, + index_file: IntraPackLookup::Multi { + index, + required_pack_index: pack_index, + }, + pack: &mut data[pack_index as usize], + } + }), + } + } + } +} + +pub(crate) enum Mode { + DeletedPacksAreInaccessible, + /// This mode signals that we should not unload packs even after they went missing. + KeepDeletedPacksAvailable, +} + +/// Handle registration +impl super::Store { + pub(crate) fn register_handle(&self) -> Mode { + self.num_handles_unstable.fetch_add(1, Ordering::Relaxed); + Mode::DeletedPacksAreInaccessible + } + pub(crate) fn remove_handle(&self, mode: Mode) { + match mode { + Mode::KeepDeletedPacksAvailable => { + let _lock = self.write.lock(); + self.num_handles_stable.fetch_sub(1, Ordering::SeqCst) + } + Mode::DeletedPacksAreInaccessible => self.num_handles_unstable.fetch_sub(1, Ordering::Relaxed), + }; + } + pub(crate) fn upgrade_handle(&self, mode: Mode) -> Mode { + if let Mode::DeletedPacksAreInaccessible = mode { + let _lock = self.write.lock(); + self.num_handles_stable.fetch_add(1, Ordering::SeqCst); + self.num_handles_unstable.fetch_sub(1, Ordering::SeqCst); + } + Mode::KeepDeletedPacksAvailable + } +} + +/// Handle creation +impl super::Store { + /// The amount of times a ref-delta base can be followed when multi-indices are involved. + pub const INITIAL_MAX_RECURSION_DEPTH: usize = 32; + + /// Create a new cache filled with a handle to this store, if this store is supporting shared ownership. + /// + /// Note that the actual type of `OwnShared` depends on the `parallel` feature toggle of the `gix-features` crate. + pub fn to_cache(self: &OwnShared<Self>) -> crate::Cache<super::Handle<OwnShared<super::Store>>> { + self.to_handle().into() + } + + /// Create a new cache filled with a handle to this store if this store is held in an `Arc`. + pub fn to_cache_arc(self: &Arc<Self>) -> crate::Cache<super::Handle<Arc<super::Store>>> { + self.to_handle_arc().into() + } + + /// Create a new database handle to this store if this store is supporting shared ownership. + /// + /// See also, [`to_cache()`][super::Store::to_cache()] which is probably more useful. + pub fn to_handle(self: &OwnShared<Self>) -> super::Handle<OwnShared<super::Store>> { + let token = self.register_handle(); + super::Handle { + store: self.clone(), + refresh: RefreshMode::default(), + ignore_replacements: false, + token: Some(token), + snapshot: RefCell::new(self.collect_snapshot()), + max_recursion_depth: Self::INITIAL_MAX_RECURSION_DEPTH, + packed_object_count: Default::default(), + } + } + + /// Create a new database handle to this store if this store is held in an `Arc`. + /// + /// This method is useful in applications that know they will use threads. + pub fn to_handle_arc(self: &Arc<Self>) -> super::Handle<Arc<super::Store>> { + let token = self.register_handle(); + super::Handle { + store: self.clone(), + refresh: Default::default(), + ignore_replacements: false, + token: Some(token), + snapshot: RefCell::new(self.collect_snapshot()), + max_recursion_depth: Self::INITIAL_MAX_RECURSION_DEPTH, + packed_object_count: Default::default(), + } + } + + /// Transform the only instance into an `Arc<Self>` or panic if this is not the only Rc handle + /// to the contained store. + /// + /// This is meant to be used when the `gix_features::threading::OwnShared` refers to an `Rc` as it was compiled without the + /// `parallel` feature toggle. + pub fn into_shared_arc(self: OwnShared<Self>) -> Arc<Self> { + match OwnShared::try_unwrap(self) { + Ok(this) => Arc::new(this), + Err(_) => panic!("BUG: Must be called when there is only one owner for this RC"), + } + } +} + +impl<S> super::Handle<S> +where + S: Deref<Target = super::Store> + Clone, +{ + /// Call once if pack ids are stored and later used for lookup, meaning they should always remain mapped and not be unloaded + /// even if they disappear from disk. + /// This must be called if there is a chance that git maintenance is happening while a pack is created. + pub fn prevent_pack_unload(&mut self) { + self.token = self.token.take().map(|token| self.store.upgrade_handle(token)); + } + + /// Return a shared reference to the contained store. + pub fn store_ref(&self) -> &S::Target { + &self.store + } + + /// Return an owned store with shared ownership. + pub fn store(&self) -> S { + self.store.clone() + } + + /// Set the handle to never cause ODB refreshes if an object could not be found. + /// + /// The latter is the default, as typically all objects referenced in a git repository are contained in the local clone. + /// More recently, however, this doesn't always have to be the case due to sparse checkouts and other ways to only have a + /// limited amount of objects available locally. + pub fn refresh_never(&mut self) { + self.refresh = RefreshMode::Never; + } + + /// Return the current refresh mode. + pub fn refresh_mode(&mut self) -> RefreshMode { + self.refresh + } +} + +impl<S> Drop for super::Handle<S> +where + S: Deref<Target = super::Store> + Clone, +{ + fn drop(&mut self) { + if let Some(token) = self.token.take() { + self.store.remove_handle(token) + } + } +} + +impl TryFrom<&super::Store> for super::Store { + type Error = std::io::Error; + + fn try_from(s: &super::Store) -> Result<Self, Self::Error> { + super::Store::at_opts( + s.path(), + s.replacements(), + crate::store::init::Options { + slots: crate::store::init::Slots::Given(s.files.len().try_into().expect("BUG: too many slots")), + object_hash: Default::default(), + use_multi_pack_index: false, + current_dir: s.current_dir.clone().into(), + }, + ) + } +} + +impl super::Handle<Rc<super::Store>> { + /// Convert a ref counted store into one that is ref-counted and thread-safe, by creating a new Store. + pub fn into_arc(self) -> std::io::Result<super::Handle<Arc<super::Store>>> { + let store = Arc::new(super::Store::try_from(self.store_ref())?); + let mut cache = store.to_handle_arc(); + cache.refresh = self.refresh; + cache.max_recursion_depth = self.max_recursion_depth; + Ok(cache) + } +} + +impl super::Handle<Arc<super::Store>> { + /// Convert a ref counted store into one that is ref-counted and thread-safe, by creating a new Store + pub fn into_arc(self) -> std::io::Result<super::Handle<Arc<super::Store>>> { + Ok(self) + } +} + +impl<S> Clone for super::Handle<S> +where + S: Deref<Target = super::Store> + Clone, +{ + fn clone(&self) -> Self { + super::Handle { + store: self.store.clone(), + refresh: self.refresh, + ignore_replacements: self.ignore_replacements, + token: { + let token = self.store.register_handle(); + match self.token.as_ref().expect("token is always set here ") { + handle::Mode::DeletedPacksAreInaccessible => token, + handle::Mode::KeepDeletedPacksAvailable => self.store.upgrade_handle(token), + } + .into() + }, + snapshot: RefCell::new(self.store.collect_snapshot()), + max_recursion_depth: self.max_recursion_depth, + packed_object_count: Default::default(), + } + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/header.rs b/vendor/gix-odb/src/store_impls/dynamic/header.rs new file mode 100644 index 000000000..a1fb770ed --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/header.rs @@ -0,0 +1,189 @@ +use std::ops::Deref; + +use gix_hash::oid; + +use super::find::Error; +use crate::{ + find::Header, + store::{find::error::DeltaBaseRecursion, handle, load_index}, +}; + +impl<S> super::Handle<S> +where + S: Deref<Target = super::Store> + Clone, +{ + fn try_header_inner<'b>( + &'b self, + mut id: &'b gix_hash::oid, + snapshot: &mut load_index::Snapshot, + recursion: Option<DeltaBaseRecursion<'_>>, + ) -> Result<Option<Header>, Error> { + if let Some(r) = recursion { + if r.depth >= self.max_recursion_depth { + return Err(Error::DeltaBaseRecursionLimit { + max_depth: self.max_recursion_depth, + id: r.original_id.to_owned(), + }); + } + } else if !self.ignore_replacements { + if let Ok(pos) = self + .store + .replacements + .binary_search_by(|(map_this, _)| map_this.as_ref().cmp(id)) + { + id = self.store.replacements[pos].1.as_ref(); + } + } + + 'outer: loop { + { + let marker = snapshot.marker; + for (idx, index) in snapshot.indices.iter_mut().enumerate() { + if let Some(handle::index_lookup::Outcome { + object_index: handle::IndexForObjectInPack { pack_id, pack_offset }, + index_file, + pack: possibly_pack, + }) = index.lookup(id) + { + let pack = match possibly_pack { + Some(pack) => pack, + None => match self.store.load_pack(pack_id, marker)? { + Some(pack) => { + *possibly_pack = Some(pack); + possibly_pack.as_deref().expect("just put it in") + } + None => { + // The pack wasn't available anymore so we are supposed to try another round with a fresh index + match self.store.load_one_index(self.refresh, snapshot.marker)? { + Some(new_snapshot) => { + *snapshot = new_snapshot; + self.clear_cache(); + continue 'outer; + } + None => { + // nothing new in the index, kind of unexpected to not have a pack but to also + // to have no new index yet. We set the new index before removing any slots, so + // this should be observable. + return Ok(None); + } + } + } + }, + }; + let entry = pack.entry(pack_offset); + let res = match pack.decode_header(entry, |id| { + index_file.pack_offset_by_id(id).map(|pack_offset| { + gix_pack::data::decode::header::ResolvedBase::InPack(pack.entry(pack_offset)) + }) + }) { + Ok(header) => Ok(header.into()), + Err(gix_pack::data::decode::Error::DeltaBaseUnresolved(base_id)) => { + // Only with multi-pack indices it's allowed to jump to refer to other packs within this + // multi-pack. Otherwise this would constitute a thin pack which is only allowed in transit. + // However, if we somehow end up with that, we will resolve it safely, even though we could + // avoid handling this case and error instead. + let hdr = self + .try_header_inner( + &base_id, + snapshot, + recursion + .map(|r| r.inc_depth()) + .or_else(|| DeltaBaseRecursion::new(id).into()), + ) + .map_err(|err| Error::DeltaBaseLookup { + err: Box::new(err), + base_id, + id: id.to_owned(), + })? + .ok_or_else(|| Error::DeltaBaseMissing { + base_id, + id: id.to_owned(), + })?; + let handle::index_lookup::Outcome { + object_index: + handle::IndexForObjectInPack { + pack_id: _, + pack_offset, + }, + index_file, + pack: possibly_pack, + } = match snapshot.indices[idx].lookup(id) { + Some(res) => res, + None => { + let mut out = None; + for index in snapshot.indices.iter_mut() { + out = index.lookup(id); + if out.is_some() { + break; + } + } + + out.unwrap_or_else(|| { + panic!("could not find object {id} in any index after looking up one of its base objects {base_id}") + }) + } + }; + let pack = possibly_pack + .as_ref() + .expect("pack to still be available like just now"); + let entry = pack.entry(pack_offset); + pack.decode_header(entry, |id| { + index_file + .pack_offset_by_id(id) + .map(|pack_offset| { + gix_pack::data::decode::header::ResolvedBase::InPack( + pack.entry(pack_offset), + ) + }) + .or_else(|| { + (id == base_id).then(|| { + gix_pack::data::decode::header::ResolvedBase::OutOfPack { + kind: hdr.kind(), + num_deltas: hdr.num_deltas(), + } + }) + }) + }) + .map(Into::into) + } + Err(err) => Err(err), + }?; + + if idx != 0 { + snapshot.indices.swap(0, idx); + } + return Ok(Some(res)); + } + } + } + + for lodb in snapshot.loose_dbs.iter() { + // TODO: remove this double-lookup once the borrow checker allows it. + if lodb.contains(id) { + return lodb.try_header(id).map(|opt| opt.map(Into::into)).map_err(Into::into); + } + } + + match self.store.load_one_index(self.refresh, snapshot.marker)? { + Some(new_snapshot) => { + *snapshot = new_snapshot; + self.clear_cache(); + } + None => return Ok(None), + } + } + } +} + +impl<S> crate::Header for super::Handle<S> +where + S: Deref<Target = super::Store> + Clone, +{ + type Error = Error; + + fn try_header(&self, id: impl AsRef<oid>) -> Result<Option<Header>, Self::Error> { + let id = id.as_ref(); + let mut snapshot = self.snapshot.borrow_mut(); + self.try_header_inner(id, &mut snapshot, None) + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/init.rs b/vendor/gix-odb/src/store_impls/dynamic/init.rs new file mode 100644 index 000000000..2fb660ef1 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/init.rs @@ -0,0 +1,126 @@ +use std::{iter::FromIterator, path::PathBuf, sync::Arc}; + +use arc_swap::ArcSwap; + +use crate::{ + store::types::{MutableIndexAndPack, SlotMapIndex}, + Store, +}; + +/// Options for use in [`Store::at_opts()`]. +#[derive(Clone, Debug)] +pub struct Options { + /// How to obtain a size for the slot map. + pub slots: Slots, + /// The kind of hash we expect in our packs and would use for loose object iteration and object writing. + pub object_hash: gix_hash::Kind, + /// If false, no multi-pack indices will be used. If true, they will be used if their hash matches `object_hash`. + pub use_multi_pack_index: bool, + /// The current directory of the process at the time of instantiation. + /// If unset, it will be retrieved using `std::env::current_dir()`. + pub current_dir: Option<std::path::PathBuf>, +} + +impl Default for Options { + fn default() -> Self { + Options { + slots: Default::default(), + object_hash: Default::default(), + use_multi_pack_index: true, + current_dir: None, + } + } +} + +/// Configures the amount of slots in the index slotmap, which is fixed throughout the existence of the store. +#[derive(Copy, Clone, Debug)] +pub enum Slots { + /// The amount of slots to use, that is the total amount of indices we can hold at a time. + /// Using this has the advantage of avoiding an initial directory listing of the repository, and is recommended + /// on the server side where the repository setup is controlled. + /// + /// Note that this won't affect their packs, as each index can have one or more packs associated with it. + Given(u16), + /// Compute the amount of slots needed, as probably best used on the client side where a variety of repositories is encountered. + AsNeededByDiskState { + /// 1.0 means no safety, 1.1 means 10% more slots than needed + multiplier: f32, + /// The minimum amount of slots to assume + minimum: usize, + }, +} + +impl Default for Slots { + fn default() -> Self { + Slots::AsNeededByDiskState { + multiplier: 1.1, + minimum: 32, + } + } +} + +impl Store { + /// Open the store at `objects_dir` (containing loose objects and `packs/`), which must only be a directory for + /// the store to be created without any additional work being done. + /// `slots` defines how many multi-pack-indices as well as indices we can know about at a time, which includes + /// the allowance for all additional object databases coming in via `alternates` as well. + /// Note that the `slots` isn't used for packs, these are included with their multi-index or index respectively. + /// For example, In a repository with 250m objects and geometric packing one would expect 27 index/pack pairs, + /// or a single multi-pack index. + /// `replacements` is an iterator over pairs of old and new object ids for replacement support. + /// This means that when asking for object `X`, one will receive object `X-replaced` given an iterator like `Some((X, X-replaced))`. + pub fn at_opts( + objects_dir: impl Into<PathBuf>, + replacements: impl IntoIterator<Item = (gix_hash::ObjectId, gix_hash::ObjectId)>, + Options { + slots, + object_hash, + use_multi_pack_index, + current_dir, + }: Options, + ) -> std::io::Result<Self> { + let objects_dir = objects_dir.into(); + let current_dir = current_dir.map(Ok).unwrap_or_else(std::env::current_dir)?; + if !objects_dir.is_dir() { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, // TODO: use NotADirectory when stabilized + format!("'{}' wasn't a directory", objects_dir.display()), + )); + } + let slot_count = match slots { + Slots::Given(n) => n as usize, + Slots::AsNeededByDiskState { multiplier, minimum } => { + let mut db_paths = crate::alternate::resolve(&objects_dir, ¤t_dir) + .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; + db_paths.insert(0, objects_dir.clone()); + let num_slots = super::Store::collect_indices_and_mtime_sorted_by_size(db_paths, None, None) + .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))? + .len(); + + ((num_slots as f32 * multiplier) as usize).max(minimum) + } + }; + if slot_count > crate::store::types::PackId::max_indices() { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Cannot use more than 1^15 slots", + )); + } + let mut replacements: Vec<_> = replacements.into_iter().collect(); + replacements.sort_by(|a, b| a.0.cmp(&b.0)); + + Ok(Store { + current_dir, + write: Default::default(), + replacements, + path: objects_dir, + files: Vec::from_iter(std::iter::repeat_with(MutableIndexAndPack::default).take(slot_count)), + index: ArcSwap::new(Arc::new(SlotMapIndex::default())), + use_multi_pack_index, + object_hash, + num_handles_stable: Default::default(), + num_handles_unstable: Default::default(), + num_disk_state_consolidation: Default::default(), + }) + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/iter.rs b/vendor/gix-odb/src/store_impls/dynamic/iter.rs new file mode 100644 index 000000000..bbe859e7c --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/iter.rs @@ -0,0 +1,236 @@ +use std::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter}; + +use gix_hash::ObjectId; + +use crate::{ + loose, + store::{handle, handle::SingleOrMultiIndex, types::PackId}, + store_impls::dynamic, +}; + +struct EntryForOrdering { + pack_offset: u64, + entry_index: u32, + pack_index: u16, +} + +enum State { + Pack { + index_iter: IntoIter<handle::IndexLookup>, + index: handle::IndexLookup, + ordered_entries: Option<Vec<EntryForOrdering>>, + entry_index: u32, + num_objects: u32, + }, + Loose { + iter: loose::Iter, + index: usize, + }, + Depleted, +} + +/// Define the order in which objects are returned. +#[derive(Debug, Copy, Clone)] +pub enum Ordering { + /// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects + /// as sorted by their names as well. + /// + /// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible, + /// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this + /// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order. + PackLexicographicalThenLooseLexicographical, + /// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first. + /// Then follow loose objects sorted by their names. + /// + /// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping + /// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches + /// are more efficiently used that way. + PackAscendingOffsetThenLooseLexicographical, +} + +impl Default for Ordering { + fn default() -> Self { + Ordering::PackLexicographicalThenLooseLexicographical + } +} + +/// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an +/// order that is costly to traverse when querying object information or decoding them. +/// +/// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off. +pub struct AllObjects { + state: State, + num_objects: usize, + loose_dbs: Arc<Vec<loose::Store>>, + order: Ordering, +} + +/// Builder +impl AllObjects { + /// Set the ordering of the objects returned, trading off memory and latency for object query performance. + pub fn with_ordering(mut self, order: Ordering) -> Self { + self.order = order; + self + } +} + +impl AllObjects { + /// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread. + pub fn new(db: &dynamic::Store) -> Result<Self, crate::store::load_index::Error> { + let snapshot = db.load_all_indices()?; + + let packed_objects = snapshot + .indices + .iter() + .fold(0usize, |dbc, index| dbc.saturating_add(index.num_objects() as usize)); + let mut index_iter = snapshot.indices.into_iter(); + let loose_dbs = snapshot.loose_dbs; + let order = Default::default(); + let state = match index_iter.next() { + Some(index) => { + let num_objects = index.num_objects(); + State::Pack { + index_iter, + ordered_entries: maybe_sort_entries(&index, order), + index, + entry_index: 0, + num_objects, + } + } + None => { + let index = 0; + State::Loose { + iter: loose_dbs.get(index).expect("at least one loose db").iter(), + index, + } + } + }; + Ok(AllObjects { + state, + loose_dbs, + num_objects: packed_objects, + order, + }) + } +} + +fn maybe_sort_entries(index: &handle::IndexLookup, order: Ordering) -> Option<Vec<EntryForOrdering>> { + let mut order: Vec<_> = match order { + Ordering::PackLexicographicalThenLooseLexicographical => return None, + Ordering::PackAscendingOffsetThenLooseLexicographical => match &index.file { + // We know that we cannot have more than u32 entry indices per pack. + SingleOrMultiIndex::Single { index, .. } => index + .iter() + .enumerate() + .map(|(idx, e)| EntryForOrdering { + pack_offset: e.pack_offset, + entry_index: idx as u32, + pack_index: 0, + }) + .collect(), + SingleOrMultiIndex::Multi { index, .. } => index + .iter() + .enumerate() + .map(|(idx, e)| EntryForOrdering { + pack_offset: e.pack_offset, + entry_index: idx as u32, + pack_index: { + debug_assert!( + e.pack_index < PackId::max_packs_in_multi_index(), + "this shows the relation between u16 and pack_index (u32) and why this is OK" + ); + e.pack_index as u16 + }, + }) + .collect(), + }, + }; + order.sort_by(|a, b| { + a.pack_index + .cmp(&b.pack_index) + .then_with(|| a.pack_offset.cmp(&b.pack_offset)) + }); + Some(order) +} + +impl Iterator for AllObjects { + type Item = Result<ObjectId, loose::iter::Error>; + + fn next(&mut self) -> Option<Self::Item> { + match &mut self.state { + State::Depleted => None, + State::Pack { + index_iter, + ordered_entries, + index, + entry_index, + num_objects, + } => { + if *entry_index < *num_objects { + let oid = match ordered_entries { + Some(entries) => index.oid_at_index(entries[*entry_index as usize].entry_index), + None => index.oid_at_index(*entry_index), + } + .to_owned(); + *entry_index += 1; + Some(Ok(oid)) + } else { + match index_iter.next() { + Some(new_index) => { + *ordered_entries = maybe_sort_entries(&new_index, self.order); + *index = new_index; + *entry_index = 0; + *num_objects = index.num_objects(); + } + None => { + let index = 0; + self.state = State::Loose { + iter: self.loose_dbs.get(index).expect("at least one loose odb").iter(), + index, + } + } + } + self.next() + } + } + State::Loose { iter, index } => match iter.next() { + Some(id) => Some(id), + None => { + *index += 1; + match self.loose_dbs.get(*index).map(|ldb| ldb.iter()) { + Some(new_iter) => { + *iter = new_iter; + self.next() + } + None => { + self.state = State::Depleted; + None + } + } + } + }, + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.num_objects, None) + } +} + +impl<S> super::Handle<S> +where + S: Deref<Target = super::Store> + Clone, +{ + /// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates), + /// followed by all loose objects. + pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> { + AllObjects::new(self.store_ref()) + } +} + +impl dynamic::Store { + /// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store. + pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> { + AllObjects::new(self) + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/load_index.rs b/vendor/gix-odb/src/store_impls/dynamic/load_index.rs new file mode 100644 index 000000000..86cf6c43b --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/load_index.rs @@ -0,0 +1,719 @@ +use std::{ + collections::{BTreeMap, VecDeque}, + ffi::OsStr, + ops::Deref, + path::{Path, PathBuf}, + sync::{ + atomic::{AtomicU16, AtomicUsize, Ordering}, + Arc, + }, + time::SystemTime, +}; + +use crate::store::{handle, types, RefreshMode}; + +pub(crate) struct Snapshot { + /// Indices ready for object lookup or contains checks, ordered usually by modification data, recent ones first. + pub(crate) indices: Vec<handle::IndexLookup>, + /// A set of loose objects dbs to search once packed objects weren't found. + pub(crate) loose_dbs: Arc<Vec<crate::loose::Store>>, + /// remember what this state represents and to compare to other states. + pub(crate) marker: types::SlotIndexMarker, +} + +mod error { + use std::path::PathBuf; + + use gix_pack::multi_index::PackIndex; + + /// Returned by [`crate::at_opts()`] + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("The objects directory at '{0}' is not an accessible directory")] + Inaccessible(PathBuf), + #[error(transparent)] + Io(#[from] std::io::Error), + #[error(transparent)] + Alternate(#[from] crate::alternate::Error), + #[error("The slotmap turned out to be too small with {} entries, would need {} more", .current, .needed)] + InsufficientSlots { current: usize, needed: usize }, + /// The problem here is that some logic assumes that more recent generations are higher than previous ones. If we would overflow, + /// we would break that invariant which can lead to the wrong object from being returned. It would probably be super rare, but… + /// let's not risk it. + #[error( + "Would have overflown amount of max possible generations of {}", + super::Generation::MAX + )] + GenerationOverflow, + #[error("Cannot numerically handle more than {limit} packs in a single multi-pack index, got {actual} in file {index_path:?}")] + TooManyPacksInMultiIndex { + actual: PackIndex, + limit: PackIndex, + index_path: PathBuf, + }, + } +} + +pub use error::Error; + +use crate::store::types::{Generation, IndexAndPacks, MutableIndexAndPack, PackId, SlotMapIndex}; + +impl super::Store { + /// Load all indices, refreshing from disk only if needed. + pub(crate) fn load_all_indices(&self) -> Result<Snapshot, Error> { + let mut snapshot = self.collect_snapshot(); + while let Some(new_snapshot) = self.load_one_index(RefreshMode::Never, snapshot.marker)? { + snapshot = new_snapshot + } + Ok(snapshot) + } + + /// If `None` is returned, there is new indices and the caller should give up. This is a possibility even if it's allowed to refresh + /// as here might be no change to pick up. + pub(crate) fn load_one_index( + &self, + refresh_mode: RefreshMode, + marker: types::SlotIndexMarker, + ) -> Result<Option<Snapshot>, Error> { + let index = self.index.load(); + if !index.is_initialized() { + return self.consolidate_with_disk_state(true /* needs_init */, false /*load one new index*/); + } + + if marker.generation != index.generation || marker.state_id != index.state_id() { + // We have a more recent state already, provide it. + Ok(Some(self.collect_snapshot())) + } else { + // always compare to the latest state + // Nothing changed in the mean time, try to load another index… + if self.load_next_index(index) { + Ok(Some(self.collect_snapshot())) + } else { + // …and if that didn't yield anything new consider refreshing our disk state. + match refresh_mode { + RefreshMode::Never => Ok(None), + RefreshMode::AfterAllIndicesLoaded => { + self.consolidate_with_disk_state(false /* needs init */, true /*load one new index*/) + } + } + } + } + } + + /// load a new index (if not yet loaded), and return true if one was indeed loaded (leading to a state_id() change) of the current index. + /// Note that interacting with the slot-map is inherently racy and we have to deal with it, being conservative in what we even try to load + /// as our index might already be out-of-date as we try to use it to learn what's next. + fn load_next_index(&self, mut index: arc_swap::Guard<Arc<SlotMapIndex>>) -> bool { + 'retry_with_changed_index: loop { + let previous_state_id = index.state_id(); + 'retry_with_next_slot_index: loop { + match index + .next_index_to_load + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { + (current != index.slot_indices.len()).then_some(current + 1) + }) { + Ok(slot_map_index) => { + // This slot-map index is in bounds and was only given to us. + let _ongoing_operation = IncOnNewAndDecOnDrop::new(&index.num_indices_currently_being_loaded); + let slot = &self.files[index.slot_indices[slot_map_index]]; + let _lock = slot.write.lock(); + if slot.generation.load(Ordering::SeqCst) > index.generation { + // There is a disk consolidation in progress which just overwrote a slot that cold be disposed with some other + // index, one we didn't intend to load. + // Continue with the next slot index in the hope there is something else we can do… + continue 'retry_with_next_slot_index; + } + let mut bundle = slot.files.load_full(); + let bundle_mut = Arc::make_mut(&mut bundle); + if let Some(files) = bundle_mut.as_mut() { + // these are always expected to be set, unless somebody raced us. We handle this later by retrying. + let _loaded_count = IncOnDrop(&index.loaded_indices); + match files.load_index(self.object_hash) { + Ok(_) => { + slot.files.store(bundle); + break 'retry_with_next_slot_index; + } + Err(_) => { + slot.files.store(bundle); + continue 'retry_with_next_slot_index; + } + } + } + } + Err(_nothing_more_to_load) => { + // There can be contention as many threads start working at the same time and take all the + // slots to load indices for. Some threads might just be left-over and have to wait for something + // to change. + let num_load_operations = index.num_indices_currently_being_loaded.deref(); + // TODO: potentially hot loop - could this be a condition variable? + while num_load_operations.load(Ordering::Relaxed) != 0 { + std::thread::yield_now() + } + break 'retry_with_next_slot_index; + } + } + } + if previous_state_id == index.state_id() { + let potentially_new_index = self.index.load(); + if Arc::as_ptr(&potentially_new_index) == Arc::as_ptr(&index) { + // There isn't a new index with which to retry the whole ordeal, so nothing could be done here. + return false; + } else { + // the index changed, worth trying again + index = potentially_new_index; + continue 'retry_with_changed_index; + } + } else { + // something inarguably changed, probably an index was loaded. 'probably' because we consider failed loads valid attempts, + // even they don't change anything for the caller which would then do a round for nothing. + return true; + } + } + } + + /// refresh and possibly clear out our existing data structures, causing all pack ids to be invalidated. + /// `load_new_index` is an optimization to at least provide one newly loaded pack after refreshing the slot map. + pub(crate) fn consolidate_with_disk_state( + &self, + needs_init: bool, + load_new_index: bool, + ) -> Result<Option<Snapshot>, Error> { + let index = self.index.load(); + let previous_index_state = Arc::as_ptr(&index) as usize; + + // IMPORTANT: get a lock after we recorded the previous state. + let write = self.write.lock(); + let objects_directory = &self.path; + + // Now we know the index isn't going to change anymore, even though threads might still load indices in the meantime. + let index = self.index.load(); + if previous_index_state != Arc::as_ptr(&index) as usize { + // Someone else took the look before and changed the index. Return it without doing any additional work. + return Ok(Some(self.collect_snapshot())); + } + + let was_uninitialized = !index.is_initialized(); + + // We might not be able to detect by pointer if the state changed, as this itself is racy. So we keep track of double-initialization + // using a flag, which means that if `needs_init` was true we saw the index uninitialized once, but now that we are here it's + // initialized meaning that somebody was faster and we couldn't detect it by comparisons to the index. + // If so, make sure we collect the snapshot instead of returning None in case nothing actually changed, which is likely with a + // race like this. + if !was_uninitialized && needs_init { + return Ok(Some(self.collect_snapshot())); + } + self.num_disk_state_consolidation.fetch_add(1, Ordering::Relaxed); + + let db_paths: Vec<_> = std::iter::once(objects_directory.to_owned()) + .chain(crate::alternate::resolve(objects_directory, &self.current_dir)?) + .collect(); + + // turn db paths into loose object databases. Reuse what's there, but only if it is in the right order. + let loose_dbs = if was_uninitialized + || db_paths.len() != index.loose_dbs.len() + || db_paths + .iter() + .zip(index.loose_dbs.iter().map(|ldb| &ldb.path)) + .any(|(lhs, rhs)| lhs != rhs) + { + Arc::new( + db_paths + .iter() + .map(|path| crate::loose::Store::at(path, self.object_hash)) + .collect::<Vec<_>>(), + ) + } else { + Arc::clone(&index.loose_dbs) + }; + + let indices_by_modification_time = Self::collect_indices_and_mtime_sorted_by_size( + db_paths, + index.slot_indices.len().into(), + self.use_multi_pack_index.then_some(self.object_hash), + )?; + let mut idx_by_index_path: BTreeMap<_, _> = index + .slot_indices + .iter() + .filter_map(|&idx| { + let f = &self.files[idx]; + Option::as_ref(&f.files.load()).map(|f| (f.index_path().to_owned(), idx)) + }) + .collect(); + + let mut new_slot_map_indices = Vec::new(); // these indices into the slot map still exist there/didn't change + let mut index_paths_to_add = was_uninitialized + .then(|| VecDeque::with_capacity(indices_by_modification_time.len())) + .unwrap_or_default(); + + // Figure out this number based on what we see while handling the existing indices + let mut num_loaded_indices = 0; + for (index_info, mtime) in indices_by_modification_time.into_iter().map(|(a, b, _)| (a, b)) { + match idx_by_index_path.remove(index_info.path()) { + Some(slot_idx) => { + let slot = &self.files[slot_idx]; + let files_guard = slot.files.load(); + let files = + Option::as_ref(&files_guard).expect("slot is set or we wouldn't know it points to this file"); + if index_info.is_multi_index() && files.mtime() != mtime { + // we have a changed multi-pack index. We can't just change the existing slot as it may alter slot indices + // that are currently available. Instead we have to move what's there into a new slot, along with the changes, + // and later free the slot or dispose of the index in the slot (like we do for removed/missing files). + index_paths_to_add.push_back((index_info, mtime, Some(slot_idx))); + // If the current slot is loaded, the soon-to-be copied multi-index path will be loaded as well. + if files.index_is_loaded() { + num_loaded_indices += 1; + } + } else { + // packs and indices are immutable, so no need to check modification times. Unchanged multi-pack indices also + // are handled like this just to be sure they are in the desired state. For these, the only way this could happen + // is if somebody deletes and then puts back + if Self::assure_slot_matches_index(&write, slot, index_info, mtime, index.generation) { + num_loaded_indices += 1; + } + new_slot_map_indices.push(slot_idx); + } + } + None => index_paths_to_add.push_back((index_info, mtime, None)), + } + } + let needs_stable_indices = self.maintain_stable_indices(&write); + + let mut next_possibly_free_index = index + .slot_indices + .iter() + .max() + .map(|idx| (idx + 1) % self.files.len()) + .unwrap_or(0); + let mut num_indices_checked = 0; + let mut needs_generation_change = false; + let mut slot_indices_to_remove: Vec<_> = idx_by_index_path.into_values().collect(); + while let Some((mut index_info, mtime, move_from_slot_idx)) = index_paths_to_add.pop_front() { + 'increment_slot_index: loop { + if num_indices_checked == self.files.len() { + return Err(Error::InsufficientSlots { + current: self.files.len(), + needed: index_paths_to_add.len() + 1, /*the one currently popped off*/ + }); + } + let slot_index = next_possibly_free_index; + let slot = &self.files[slot_index]; + next_possibly_free_index = (next_possibly_free_index + 1) % self.files.len(); + num_indices_checked += 1; + match move_from_slot_idx { + Some(move_from_slot_idx) => { + debug_assert!(index_info.is_multi_index(), "only set for multi-pack indices"); + if slot_index == move_from_slot_idx { + // don't try to move onto ourselves + continue 'increment_slot_index; + } + match Self::try_set_index_slot( + &write, + slot, + index_info, + mtime, + index.generation, + needs_stable_indices, + ) { + Ok(dest_was_empty) => { + slot_indices_to_remove.push(move_from_slot_idx); + new_slot_map_indices.push(slot_index); + // To avoid handling out the wrong pack (due to reassigned pack ids), declare this a new generation. + if !dest_was_empty { + needs_generation_change = true; + } + break 'increment_slot_index; + } + Err(unused_index_info) => index_info = unused_index_info, + } + } + None => { + match Self::try_set_index_slot( + &write, + slot, + index_info, + mtime, + index.generation, + needs_stable_indices, + ) { + Ok(dest_was_empty) => { + new_slot_map_indices.push(slot_index); + if !dest_was_empty { + needs_generation_change = true; + } + break 'increment_slot_index; + } + Err(unused_index_info) => index_info = unused_index_info, + } + } + } + // This isn't racy as it's only us who can change the Option::Some/None state of a slot. + } + } + assert_eq!( + index_paths_to_add.len(), + 0, + "By this time we have assigned all new files to slots" + ); + + let generation = if needs_generation_change { + index.generation.checked_add(1).ok_or(Error::GenerationOverflow)? + } else { + index.generation + }; + let index_unchanged = index.slot_indices == new_slot_map_indices; + if generation != index.generation { + assert!( + !index_unchanged, + "if the generation changed, the slot index must have changed for sure" + ); + } + if !index_unchanged || loose_dbs != index.loose_dbs { + let new_index = Arc::new(SlotMapIndex { + slot_indices: new_slot_map_indices, + loose_dbs, + generation, + // if there was a prior generation, some indices might already be loaded. But we deal with it by trying to load the next index then, + // until we find one. + next_index_to_load: index_unchanged + .then(|| Arc::clone(&index.next_index_to_load)) + .unwrap_or_default(), + loaded_indices: index_unchanged + .then(|| Arc::clone(&index.loaded_indices)) + .unwrap_or_else(|| Arc::new(num_loaded_indices.into())), + num_indices_currently_being_loaded: Default::default(), + }); + self.index.store(new_index); + } + + // deleted items - remove their slots AFTER we have set the new index if we may alter indices, otherwise we only declare them garbage. + // removing slots may cause pack loading to fail, and they will then reload their indices. + for slot in slot_indices_to_remove.into_iter().map(|idx| &self.files[idx]) { + let _lock = slot.write.lock(); + let mut files = slot.files.load_full(); + let files_mut = Arc::make_mut(&mut files); + if needs_stable_indices { + if let Some(files) = files_mut.as_mut() { + files.trash(); + // generation stays the same, as it's the same value still but scheduled for eventual removal. + } + } else { + *files_mut = None; + }; + slot.files.store(files); + if !needs_stable_indices { + // Not racy due to lock, generation must be set after unsetting the slot value AND storing it. + slot.generation.store(generation, Ordering::SeqCst); + } + } + + let new_index = self.index.load(); + Ok(if index.state_id() == new_index.state_id() { + // there was no change, and nothing was loaded in the meantime, reflect that in the return value to not get into loops + None + } else { + if load_new_index { + self.load_next_index(new_index); + } + Some(self.collect_snapshot()) + }) + } + + pub(crate) fn collect_indices_and_mtime_sorted_by_size( + db_paths: Vec<PathBuf>, + initial_capacity: Option<usize>, + multi_pack_index_object_hash: Option<gix_hash::Kind>, + ) -> Result<Vec<(Either, SystemTime, u64)>, Error> { + let mut indices_by_modification_time = Vec::with_capacity(initial_capacity.unwrap_or_default()); + for db_path in db_paths { + let packs = db_path.join("pack"); + let entries = match std::fs::read_dir(packs) { + Ok(e) => e, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue, + Err(err) => return Err(err.into()), + }; + let indices = entries + .filter_map(Result::ok) + .filter_map(|e| e.metadata().map(|md| (e.path(), md)).ok()) + .filter(|(_, md)| md.file_type().is_file()) + .filter(|(p, _)| { + let ext = p.extension(); + (ext == Some(OsStr::new("idx")) && p.with_extension("pack").is_file()) + || (multi_pack_index_object_hash.is_some() && ext.is_none() && is_multipack_index(p)) + }) + .map(|(p, md)| md.modified().map_err(Error::from).map(|mtime| (p, mtime, md.len()))) + .collect::<Result<Vec<_>, _>>()?; + + let multi_index_info = multi_pack_index_object_hash + .and_then(|hash| { + indices.iter().find_map(|(p, a, b)| { + is_multipack_index(p) + .then(|| { + // we always open the multi-pack here to be able to remove indices + gix_pack::multi_index::File::at(p) + .ok() + .filter(|midx| midx.object_hash() == hash) + .map(|midx| (midx, *a, *b)) + }) + .flatten() + .map(|t| { + if t.0.num_indices() > PackId::max_packs_in_multi_index() { + Err(Error::TooManyPacksInMultiIndex { + index_path: p.to_owned(), + actual: t.0.num_indices(), + limit: PackId::max_packs_in_multi_index(), + }) + } else { + Ok(t) + } + }) + }) + }) + .transpose()?; + if let Some((multi_index, mtime, flen)) = multi_index_info { + let index_names_in_multi_index: Vec<_> = + multi_index.index_names().iter().map(|p| p.as_path()).collect(); + let mut indices_not_in_multi_index: Vec<(Either, _, _)> = indices + .into_iter() + .filter_map(|(path, a, b)| { + (path != multi_index.path() + && !index_names_in_multi_index + .contains(&Path::new(path.file_name().expect("file name present")))) + .then_some((Either::IndexPath(path), a, b)) + }) + .collect(); + indices_not_in_multi_index.insert(0, (Either::MultiIndexFile(Arc::new(multi_index)), mtime, flen)); + indices_by_modification_time.extend(indices_not_in_multi_index); + } else { + indices_by_modification_time.extend( + indices + .into_iter() + .filter_map(|(p, a, b)| (!is_multipack_index(&p)).then_some((Either::IndexPath(p), a, b))), + ) + } + } + // Unlike libgit2, do not sort by modification date, but by size and put the biggest indices first. That way + // the chance to hit an object should be higher. We leave it to the handle to sort by LRU. + // Git itself doesn't change the order which may safe time, but we want it to be stable which also helps some tests. + indices_by_modification_time.sort_by(|l, r| l.2.cmp(&r.2).reverse()); + Ok(indices_by_modification_time) + } + + /// returns Ok<dest slot was empty> if the copy could happen because dest-slot was actually free or disposable , and Some(true) if it was empty + #[allow(clippy::too_many_arguments)] + fn try_set_index_slot( + lock: &parking_lot::MutexGuard<'_, ()>, + dest_slot: &MutableIndexAndPack, + index_info: Either, + mtime: SystemTime, + current_generation: Generation, + needs_stable_indices: bool, + ) -> Result<bool, Either> { + let (dest_slot_was_empty, generation) = match &**dest_slot.files.load() { + Some(bundle) => { + if bundle.index_path() == index_info.path() || (bundle.is_disposable() && needs_stable_indices) { + // it might be possible to see ourselves in case all slots are taken, but there are still a few more destination + // slots to look for. + return Err(index_info); + } + // Since we overwrite an existing slot, we have to increment the generation to prevent anyone from trying to use it while + // before we are replacing it with a different value. + // In detail: + // We need to declare this to be the future to avoid anything in that slot to be returned to people who + // last saw the old state. They will then try to get a new index which by that time, might be happening + // in time so they get the latest one. If not, they will probably get into the same situation again until + // it finally succeeds. Alternatively, the object will be reported unobtainable, but at least it won't return + // some other object. + (false, current_generation + 1) + } + None => { + // For multi-pack indices: + // Do NOT copy the packs over, they need to be reopened to get the correct pack id matching the new slot map index. + // If we are allowed to delete the original, and nobody has the pack referenced, it is closed which is preferred. + // Thus we simply always start new with packs in multi-pack indices. + // In the worst case this could mean duplicate file handle usage though as the old and the new index can't share + // packs due to the intrinsic id. + // Note that the ID is used for cache access, too, so it must be unique. It must also be mappable from pack-id to slotmap id. + (true, current_generation) + } + }; + Self::set_slot_to_index(lock, dest_slot, index_info, mtime, generation); + Ok(dest_slot_was_empty) + } + + fn set_slot_to_index( + _lock: &parking_lot::MutexGuard<'_, ()>, + slot: &MutableIndexAndPack, + index_info: Either, + mtime: SystemTime, + generation: Generation, + ) { + let _lock = slot.write.lock(); + let mut files = slot.files.load_full(); + let files_mut = Arc::make_mut(&mut files); + // set the generation before we actually change the value, otherwise readers of old generations could observe the new one. + // We rather want them to turn around here and update their index, which, by that time, might actually already be available. + // If not, they would fail unable to load a pack or index they need, but that's preferred over returning wrong objects. + // Safety: can't race as we hold the lock, have to set the generation beforehand to help avoid others to observe the value. + slot.generation.store(generation, Ordering::SeqCst); + *files_mut = Some(index_info.into_index_and_packs(mtime)); + slot.files.store(files); + } + + /// Returns true if the index was left in a loaded state. + fn assure_slot_matches_index( + _lock: &parking_lot::MutexGuard<'_, ()>, + slot: &MutableIndexAndPack, + index_info: Either, + mtime: SystemTime, + current_generation: Generation, + ) -> bool { + match Option::as_ref(&slot.files.load()) { + Some(bundle) => { + assert_eq!( + bundle.index_path(), + index_info.path(), + "Parallel writers cannot change the file the slot points to." + ); + if bundle.is_disposable() { + // put it into the correct mode, it's now available for sure so should not be missing or garbage. + // The latter can happen if files are removed and put back for some reason, but we should definitely + // have them in a decent state now that we know/think they are there. + let _lock = slot.write.lock(); + let mut files = slot.files.load_full(); + let files_mut = Arc::make_mut(&mut files) + .as_mut() + .expect("BUG: cannot change from something to nothing, would be race"); + files_mut.put_back(); + debug_assert_eq!( + files_mut.mtime(), + mtime, + "BUG: we can only put back files that didn't obviously change" + ); + // Safety: can't race as we hold the lock, must be set before replacing the data. + // NOTE that we don't change the generation as it's still the very same index we talk about, it doesn't change + // identity. + slot.generation.store(current_generation, Ordering::SeqCst); + slot.files.store(files); + } else { + // it's already in the correct state, either loaded or unloaded. + } + bundle.index_is_loaded() + } + None => { + unreachable!("BUG: a slot can never be deleted if we have it recorded in the index WHILE changing said index. There shouldn't be a race") + } + } + } + + /// Stability means that indices returned by this API will remain valid. + /// Without that constraint, we may unload unused packs and indices, and may rebuild the slotmap index. + /// + /// Note that this must be called with a lock to the relevant state held to assure these values don't change while + /// we are working on said index. + fn maintain_stable_indices(&self, _guard: &parking_lot::MutexGuard<'_, ()>) -> bool { + self.num_handles_stable.load(Ordering::SeqCst) > 0 + } + + pub(crate) fn collect_snapshot(&self) -> Snapshot { + let index = self.index.load(); + let indices = if index.is_initialized() { + index + .slot_indices + .iter() + .map(|idx| (*idx, &self.files[*idx])) + .filter_map(|(id, file)| { + let lookup = match (**file.files.load()).as_ref()? { + types::IndexAndPacks::Index(bundle) => handle::SingleOrMultiIndex::Single { + index: bundle.index.loaded()?.clone(), + data: bundle.data.loaded().cloned(), + }, + types::IndexAndPacks::MultiIndex(multi) => handle::SingleOrMultiIndex::Multi { + index: multi.multi_index.loaded()?.clone(), + data: multi.data.iter().map(|f| f.loaded().cloned()).collect(), + }, + }; + handle::IndexLookup { file: lookup, id }.into() + }) + .collect() + } else { + Vec::new() + }; + + Snapshot { + indices, + loose_dbs: Arc::clone(&index.loose_dbs), + marker: index.marker(), + } + } +} + +// Outside of this method we will never assign new slot indices. +fn is_multipack_index(path: &Path) -> bool { + path.file_name() == Some(OsStr::new("multi-pack-index")) +} + +struct IncOnNewAndDecOnDrop<'a>(&'a AtomicU16); +impl<'a> IncOnNewAndDecOnDrop<'a> { + pub fn new(v: &'a AtomicU16) -> Self { + v.fetch_add(1, Ordering::SeqCst); + Self(v) + } +} +impl<'a> Drop for IncOnNewAndDecOnDrop<'a> { + fn drop(&mut self) { + self.0.fetch_sub(1, Ordering::SeqCst); + } +} + +struct IncOnDrop<'a>(&'a AtomicUsize); +impl<'a> Drop for IncOnDrop<'a> { + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::SeqCst); + } +} + +pub(crate) enum Either { + IndexPath(PathBuf), + MultiIndexFile(Arc<gix_pack::multi_index::File>), +} + +impl Either { + fn path(&self) -> &Path { + match self { + Either::IndexPath(p) => p, + Either::MultiIndexFile(f) => f.path(), + } + } + + fn into_index_and_packs(self, mtime: SystemTime) -> IndexAndPacks { + match self { + Either::IndexPath(path) => IndexAndPacks::new_single(path, mtime), + Either::MultiIndexFile(file) => IndexAndPacks::new_multi_from_open_file(file, mtime), + } + } + + fn is_multi_index(&self) -> bool { + matches!(self, Either::MultiIndexFile(_)) + } +} + +impl Eq for Either {} + +impl PartialEq<Self> for Either { + fn eq(&self, other: &Self) -> bool { + self.path().eq(other.path()) + } +} + +impl PartialOrd<Self> for Either { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + self.path().partial_cmp(other.path()) + } +} + +impl Ord for Either { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.path().cmp(other.path()) + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/load_one.rs b/vendor/gix-odb/src/store_impls/dynamic/load_one.rs new file mode 100644 index 000000000..9961532c6 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/load_one.rs @@ -0,0 +1,154 @@ +use std::{ + path::Path, + sync::{atomic::Ordering, Arc}, +}; + +use crate::store::{handle, types}; + +impl super::Store { + /// If Ok(None) is returned, the pack-id was stale and referred to an unloaded pack or a pack which couldn't be + /// loaded as its file didn't exist on disk anymore. + /// If the oid is known, just load indices again to continue + /// (objects rarely ever removed so should be present, maybe in another pack though), + /// and redo the entire lookup for a valid pack id whose pack can probably be loaded next time. + pub(crate) fn load_pack( + &self, + id: types::PackId, + marker: types::SlotIndexMarker, + ) -> std::io::Result<Option<Arc<gix_pack::data::File>>> { + let index = self.index.load(); + if index.generation != marker.generation { + return Ok(None); + } + fn load_pack( + path: &Path, + id: types::PackId, + object_hash: gix_hash::Kind, + ) -> std::io::Result<Arc<gix_pack::data::File>> { + gix_pack::data::File::at(path, object_hash) + .map(|mut pack| { + pack.id = id.to_intrinsic_pack_id(); + Arc::new(pack) + }) + .map_err(|err| match err { + gix_pack::data::header::decode::Error::Io { source, .. } => source, + other => std::io::Error::new(std::io::ErrorKind::Other, other), + }) + } + + let slot = &self.files[id.index]; + // pin the current state before loading in the generation. That way we won't risk seeing the wrong value later. + let slot_files = &**slot.files.load(); + if slot.generation.load(Ordering::SeqCst) > marker.generation { + // There is a disk consolidation in progress which just overwrote a slot that could be disposed with some other + // pack, one we didn't intend to load. + // Hope that when the caller returns/retries the new index is set so they can fetch it and retry. + return Ok(None); + } + match id.multipack_index { + None => { + match slot_files { + Some(types::IndexAndPacks::Index(bundle)) => { + match bundle.data.loaded() { + Some(pack) => Ok(Some(pack.clone())), + None => { + let _lock = slot.write.lock(); + let mut files = slot.files.load_full(); + let files_mut = Arc::make_mut(&mut files); + let pack = match files_mut { + Some(types::IndexAndPacks::Index(bundle)) => bundle + .data + .load_with_recovery(|path| load_pack(path, id, self.object_hash))?, + Some(types::IndexAndPacks::MultiIndex(_)) => { + // something changed between us getting the lock, trigger a complete index refresh. + None + } + None => { + unreachable!("BUG: must set this handle to be stable to avoid slots to be cleared/changed") + } + }; + slot.files.store(files); + Ok(pack) + } + } + } + // This can also happen if they use an old index into our new and refreshed data which might have a multi-index + // here. + Some(types::IndexAndPacks::MultiIndex(_)) => Ok(None), + None => { + unreachable!("BUG: must set this handle to be stable to avoid slots to be cleared/changed") + } + } + } + Some(pack_index) => { + match slot_files { + Some(types::IndexAndPacks::MultiIndex(bundle)) => { + match bundle.data.get(pack_index as usize) { + None => Ok(None), // somewhat unexpected, data must be stale + Some(on_disk_pack) => match on_disk_pack.loaded() { + Some(pack) => Ok(Some(pack.clone())), + None => { + let _lock = slot.write.lock(); + let mut files = slot.files.load_full(); + let files_mut = Arc::make_mut(&mut files); + let pack = match files_mut { + Some(types::IndexAndPacks::Index(_)) => { + // something changed between us getting the lock, trigger a complete index refresh. + None + } + Some(types::IndexAndPacks::MultiIndex(bundle)) => bundle + .data + .get_mut(pack_index as usize) + .expect("BUG: must set this handle to be stable") + .load_with_recovery(|path| load_pack(path, id, self.object_hash))?, + None => { + unreachable!("BUG: must set this handle to be stable to avoid slots to be cleared/changed") + } + }; + slot.files.store(files); + Ok(pack) + } + }, + } + } + // This can also happen if they use an old index into our new and refreshed data which might have a multi-index + // here. + Some(types::IndexAndPacks::Index(_)) => Ok(None), + None => { + unreachable!("BUG: must set this handle to be stable to avoid slots to be cleared/changed") + } + } + } + } + } + + /// Similar to `.load_pack()`, but for entire indices, bypassing the index entirely and going solely by marker and id. + /// Returns `None` if the index wasn't available anymore or could otherwise not be loaded, which can be considered a bug + /// as we should always keep needed indices available. + pub(crate) fn index_by_id(&self, id: types::PackId, marker: types::SlotIndexMarker) -> Option<handle::IndexLookup> { + let slot = self.files.get(id.index)?; + // Pin this value before we check the generation to avoid seeing something newer later. + let slot_files = &**slot.files.load(); + if slot.generation.load(Ordering::SeqCst) > marker.generation { + // This means somebody just overwrote our trashed slot with a new (or about to be stored) index, which means the slot isn't + // what we need it to be. + // This shouldn't as we won't overwrite slots while handles need stable indices. + return None; + } + let lookup = match (slot_files).as_ref()? { + types::IndexAndPacks::Index(bundle) => handle::SingleOrMultiIndex::Single { + index: bundle.index.loaded()?.clone(), + data: bundle.data.loaded().cloned(), + }, + types::IndexAndPacks::MultiIndex(multi) => handle::SingleOrMultiIndex::Multi { + index: multi.multi_index.loaded()?.clone(), + data: multi.data.iter().map(|f| f.loaded().cloned()).collect(), + }, + }; + handle::IndexLookup { + id: id.index, + file: lookup, + } + .into() + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/metrics.rs b/vendor/gix-odb/src/store_impls/dynamic/metrics.rs new file mode 100644 index 000000000..630674940 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/metrics.rs @@ -0,0 +1,80 @@ +use std::sync::atomic::Ordering; + +use crate::store::{types, types::IndexAndPacks}; + +impl super::Store { + /// Return metrics collected in a racy fashion, giving an idea of what's currently going on in the store. + /// + /// Use this to decide whether a new instance should be created to get a chance at dropping all open handles. + pub fn metrics(&self) -> types::Metrics { + let mut open_packs = 0; + let mut open_indices = 0; + let mut known_packs = 0; + let mut known_indices = 0; + let mut unused_slots = 0; + let mut unreachable_indices = 0; + let mut unreachable_packs = 0; + + let index = self.index.load(); + for f in index.slot_indices.iter().map(|idx| &self.files[*idx]) { + match &**f.files.load() { + Some(IndexAndPacks::Index(bundle)) => { + if bundle.index.is_loaded() { + open_indices += 1; + } + known_indices += 1; + if bundle.data.is_loaded() { + open_packs += 1; + } + known_packs += 1; + } + Some(IndexAndPacks::MultiIndex(multi)) => { + if multi.multi_index.is_loaded() { + open_indices += 1; + } + known_indices += 1; + for pack in multi.data.iter() { + if pack.is_loaded() { + open_packs += 1; + } + known_packs += 1; + } + } + None => {} + } + } + + for slot in &self.files { + match slot.files.load().as_ref() { + None => { + unused_slots += 1; + } + Some(bundle) => { + if bundle.is_disposable() { + unreachable_indices += 1; + unreachable_packs += match bundle { + IndexAndPacks::Index(single) => usize::from(single.data.is_loaded()), + IndexAndPacks::MultiIndex(multi) => { + multi.data.iter().map(|p| usize::from(p.is_loaded())).sum() + } + } + } + } + } + } + + types::Metrics { + num_handles: self.num_handles_unstable.load(Ordering::Relaxed) + + self.num_handles_stable.load(Ordering::Relaxed), + num_refreshes: self.num_disk_state_consolidation.load(Ordering::Relaxed), + open_reachable_packs: open_packs, + open_reachable_indices: open_indices, + known_reachable_indices: known_indices, + known_packs, + unused_slots, + loose_dbs: index.loose_dbs.len(), + unreachable_indices, + unreachable_packs, + } + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/mod.rs b/vendor/gix-odb/src/store_impls/dynamic/mod.rs new file mode 100644 index 000000000..5cf2a26ee --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/mod.rs @@ -0,0 +1,188 @@ +//! The standard object store which should fit all needs. +use std::{cell::RefCell, ops::Deref}; + +use crate::Store; + +/// This effectively acts like a handle but exists to be usable from the actual `crate::Handle` implementation which adds caches on top. +/// Each store is quickly cloned and contains thread-local state for shared packs. +pub struct Handle<S> +where + S: Deref<Target = Store> + Clone, +{ + pub(crate) store: S, + /// Defines what happens when there is no more indices to load. + pub refresh: RefreshMode, + /// The maximum recursion depth for resolving ref-delta base objects, that is objects referring to other objects within + /// a pack. + /// Recursive loops are possible only in purposefully crafted packs. + /// This value doesn't have to be huge as in typical scenarios, these kind of objects are rare and chains supposedly are + /// even more rare. + pub max_recursion_depth: usize, + + /// If true, replacements will not be performed even if these are available. + pub ignore_replacements: bool, + + pub(crate) token: Option<handle::Mode>, + snapshot: RefCell<load_index::Snapshot>, + packed_object_count: RefCell<Option<u64>>, +} + +/// Decide what happens when all indices are loaded. +#[derive(Clone, Copy)] +pub enum RefreshMode { + /// Check for new or changed pack indices (and pack data files) when the last known index is loaded. + /// During runtime we will keep pack indices stable by never reusing them, however, there is the option for + /// clearing internal caches which is likely to change pack ids and it will trigger unloading of packs as they are missing on disk. + AfterAllIndicesLoaded, + /// Use this if you expect a lot of missing objects that shouldn't trigger refreshes even after all packs are loaded. + /// This comes at the risk of not learning that the packs have changed in the mean time. + Never, +} + +impl Default for RefreshMode { + fn default() -> Self { + RefreshMode::AfterAllIndicesLoaded + } +} + +impl RefreshMode { + /// Set this refresh mode to never refresh. + pub fn never(&mut self) { + *self = RefreshMode::Never; + } +} + +/// +pub mod find; + +/// +pub mod prefix; + +mod header; + +/// +pub mod iter; + +/// +pub mod write; + +/// +pub mod init; + +pub(crate) mod types; +pub use types::Metrics; + +pub(crate) mod handle; + +/// +pub mod load_index; + +/// +pub mod verify; + +mod load_one; + +mod metrics; + +mod access; + +/// +pub mod structure { + use std::path::PathBuf; + + use crate::{store::load_index, types::IndexAndPacks, Store}; + + /// A record of a structural element of an object database. + #[derive(Debug, Clone, PartialEq, Eq)] + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub enum Record { + /// A loose object database. + LooseObjectDatabase { + /// The root of the object database. + objects_directory: PathBuf, + /// The amount of object files. + num_objects: usize, + }, + /// A pack index file + Index { + /// The location of the index file, + path: PathBuf, + /// Whether or not the index is mapped into memory. + state: IndexState, + }, + /// A multi-index file + MultiIndex { + /// The location of the multi-index file, + path: PathBuf, + /// Whether or not the index is mapped into memory. + state: IndexState, + }, + /// An empty slot was encountered, this is possibly happening as the ODB changes during query with + /// a file being removed. + Empty, + } + + #[derive(Debug, Clone, PartialEq, Eq)] + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + /// Possible stats of pack indices. + pub enum IndexState { + /// The index is active in memory because a mapping exists. + Loaded, + /// The index couldn't be unloaded as it was still in use, but that can happen another time. + Disposable, + /// The index isn't loaded/memory mapped. + Unloaded, + } + + impl Store { + /// Return information about all files known to us as well as their loading state. + /// + /// Note that this call is expensive as it gathers additional information about loose object databases. + /// Note that it may change as we collect information due to the highly volatile nature of the + /// implementation. The likelihood of actual changes is low though as these still depend on something + /// changing on disk and somebody reading at the same time. + pub fn structure(&self) -> Result<Vec<Record>, load_index::Error> { + let index = self.index.load(); + if !index.is_initialized() { + self.consolidate_with_disk_state(true, false /*load one new index*/)?; + } + let index = self.index.load(); + let mut res: Vec<_> = index + .loose_dbs + .iter() + .map(|db| Record::LooseObjectDatabase { + objects_directory: db.path.clone(), + num_objects: db.iter().count(), + }) + .collect(); + + for slot in index.slot_indices.iter().map(|idx| &self.files[*idx]) { + let files = slot.files.load(); + let record = match &**files { + Some(index) => { + let state = if index.is_disposable() { + IndexState::Disposable + } else if index.index_is_loaded() { + IndexState::Loaded + } else { + IndexState::Unloaded + }; + match index { + IndexAndPacks::Index(b) => Record::Index { + path: b.index.path().into(), + state, + }, + IndexAndPacks::MultiIndex(b) => Record::MultiIndex { + path: b.multi_index.path().into(), + state, + }, + } + } + None => Record::Empty, + }; + res.push(record); + } + Ok(res) + } + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/prefix.rs b/vendor/gix-odb/src/store_impls/dynamic/prefix.rs new file mode 100644 index 000000000..9097c8cf6 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/prefix.rs @@ -0,0 +1,196 @@ +use std::{collections::HashSet, ops::Deref}; + +use crate::{ + store::{load_index, Handle}, + Find, +}; + +/// +pub mod lookup { + use crate::loose; + + /// Returned by [`Handle::lookup_prefix()`][crate::store::Handle::lookup_prefix()] + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("An error occurred looking up a prefix which requires iteration")] + LooseWalkDir(#[from] loose::iter::Error), + #[error(transparent)] + LoadIndex(#[from] crate::store::load_index::Error), + } + + /// A way to indicate if a lookup, despite successful, was ambiguous or yielded exactly + /// one result in the particular index. + pub type Outcome = Result<gix_hash::ObjectId, ()>; +} + +/// +pub mod disambiguate { + /// A potentially ambiguous prefix for use with `Handle::disambiguate_prefix()`. + #[derive(Debug, Copy, Clone)] + pub struct Candidate { + id: gix_hash::ObjectId, + hex_len: usize, + } + + impl Candidate { + /// Create a new potentially ambiguous prefix from an `id` and the desired minimal `hex_len`. + /// + /// It is considered ambiguous until it's disambiguated by validating that there is only a single object + /// matching this prefix. + pub fn new(id: impl Into<gix_hash::ObjectId>, hex_len: usize) -> Result<Self, gix_hash::prefix::Error> { + let id = id.into(); + gix_hash::Prefix::new(id, hex_len)?; + Ok(Candidate { id, hex_len }) + } + + /// Transform ourselves into a `Prefix` with our current hex lengths. + pub fn to_prefix(&self) -> gix_hash::Prefix { + gix_hash::Prefix::new(self.id, self.hex_len).expect("our hex-len to always be in bounds") + } + + pub(crate) fn inc_hex_len(&mut self) { + self.hex_len += 1; + assert!(self.hex_len <= self.id.kind().len_in_hex()); + } + + pub(crate) fn id(&self) -> &gix_hash::oid { + &self.id + } + + pub(crate) fn hex_len(&self) -> usize { + self.hex_len + } + } + + /// Returned by [`Handle::disambiguate_prefix()`][crate::store::Handle::disambiguate_prefix()] + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("An error occurred while trying to determine if a full hash contained in the object database")] + Contains(#[from] crate::store::find::Error), + #[error(transparent)] + Lookup(#[from] super::lookup::Error), + } +} + +impl<S> Handle<S> +where + S: Deref<Target = super::Store> + Clone, +{ + /// Return the exact number of packed objects after loading all currently available indices + /// as last seen on disk. + pub fn packed_object_count(&self) -> Result<u64, load_index::Error> { + let mut count = self.packed_object_count.borrow_mut(); + match *count { + Some(count) => Ok(count), + None => { + let mut snapshot = self.snapshot.borrow_mut(); + *snapshot = self.store.load_all_indices()?; + let mut obj_count = 0; + for index in &snapshot.indices { + obj_count += index.num_objects() as u64; + } + *count = Some(obj_count); + Ok(obj_count) + } + } + } + + /// Given a prefix `candidate` with an object id and an initial `hex_len`, check if it only matches a single + /// object within the entire object database and increment its `hex_len` by one until it is unambiguous. + /// Return `Ok(None)` if no object with that prefix exists. + pub fn disambiguate_prefix( + &self, + mut candidate: disambiguate::Candidate, + ) -> Result<Option<gix_hash::Prefix>, disambiguate::Error> { + let max_hex_len = candidate.id().kind().len_in_hex(); + if candidate.hex_len() == max_hex_len { + return Ok(self.contains(candidate.id()).then(|| candidate.to_prefix())); + } + + while candidate.hex_len() != max_hex_len { + let res = self.lookup_prefix(candidate.to_prefix(), None)?; + match res { + Some(Ok(_id)) => return Ok(Some(candidate.to_prefix())), + Some(Err(())) => { + candidate.inc_hex_len(); + continue; + } + None => return Ok(None), + } + } + Ok(Some(candidate.to_prefix())) + } + + /// Find the only object matching `prefix` and return it as `Ok(Some(Ok(<ObjectId>)))`, or return `Ok(Some(Err(()))` + /// if multiple different objects with the same prefix were found. + /// + /// Return `Ok(None)` if no object matched the `prefix`. + /// + /// Pass `candidates` to obtain the set of all object ids matching `prefix`, with the same return value as + /// one would have received if it remained `None`. + /// + /// ### Performance Note + /// + /// - Unless the handles refresh mode is set to `Never`, each lookup will trigger a refresh of the object databases files + /// on disk if the prefix doesn't lead to ambiguous results. + /// - Since all objects need to be examined to assure non-ambiguous return values, after calling this method all indices will + /// be loaded. + /// - If `candidates` is `Some(…)`, the traversal will continue to obtain all candidates, which takes more time + /// as there is no early abort. + pub fn lookup_prefix( + &self, + prefix: gix_hash::Prefix, + mut candidates: Option<&mut HashSet<gix_hash::ObjectId>>, + ) -> Result<Option<lookup::Outcome>, lookup::Error> { + let mut candidate: Option<gix_hash::ObjectId> = None; + loop { + let snapshot = self.snapshot.borrow(); + for index in snapshot.indices.iter() { + #[allow(clippy::needless_option_as_deref)] // needed as it's the equivalent of a reborrow. + let lookup_result = index.lookup_prefix(prefix, candidates.as_deref_mut()); + if candidates.is_none() && !check_candidate(lookup_result, &mut candidate) { + return Ok(Some(Err(()))); + } + } + + for lodb in snapshot.loose_dbs.iter() { + #[allow(clippy::needless_option_as_deref)] // needed as it's the equivalent of a reborrow. + let lookup_result = lodb.lookup_prefix(prefix, candidates.as_deref_mut())?; + if candidates.is_none() && !check_candidate(lookup_result, &mut candidate) { + return Ok(Some(Err(()))); + } + } + + match self.store.load_one_index(self.refresh, snapshot.marker)? { + Some(new_snapshot) => { + drop(snapshot); + *self.snapshot.borrow_mut() = new_snapshot; + } + None => { + return match &candidates { + Some(candidates) => match candidates.len() { + 0 => Ok(None), + 1 => Ok(candidates.iter().cloned().next().map(Ok)), + _ => Ok(Some(Err(()))), + }, + None => Ok(candidate.map(Ok)), + }; + } + } + } + + fn check_candidate(lookup_result: Option<lookup::Outcome>, candidate: &mut Option<gix_hash::ObjectId>) -> bool { + match (lookup_result, &*candidate) { + (Some(Ok(oid)), Some(candidate)) if *candidate != oid => false, + (Some(Ok(_)), Some(_)) | (None, None) | (None, Some(_)) => true, + (Some(Err(())), _) => false, + (Some(Ok(oid)), None) => { + *candidate = Some(oid); + true + } + } + } + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/types.rs b/vendor/gix-odb/src/store_impls/dynamic/types.rs new file mode 100644 index 000000000..2bda0d7d3 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/types.rs @@ -0,0 +1,460 @@ +use std::{ + path::{Path, PathBuf}, + sync::{ + atomic::{AtomicU16, AtomicU32, AtomicUsize, Ordering}, + Arc, + }, + time::SystemTime, +}; + +use arc_swap::ArcSwap; +use gix_features::hash; + +/// An id to refer to an index file or a multipack index file +pub type IndexId = usize; +pub(crate) type StateId = u32; +pub(crate) type Generation = u32; +pub(crate) type AtomicGeneration = AtomicU32; + +/// A way to indicate which pack indices we have seen already and which of them are loaded, along with an idea +/// of whether stored `PackId`s are still usable. +#[derive(Default, Copy, Clone)] +pub struct SlotIndexMarker { + /// The generation the `loaded_until_index` belongs to. Indices of different generations are completely incompatible. + /// This value changes once the internal representation is compacted, something that may happen only if there is no handle + /// requiring stable pack indices. + pub(crate) generation: Generation, + /// A unique id identifying the index state as well as all loose databases we have last observed. + /// If it changes in any way, the value is different. + pub(crate) state_id: StateId, +} + +/// A way to load and refer to a pack uniquely, namespaced by their indexing mechanism, aka multi-pack or not. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct PackId { + /// This is the index in the slot map at which the packs index is located. + pub(crate) index: IndexId, + /// If the pack is in a multi-pack index, this additional index is the pack-index within the multi-pack index identified by `index`. + pub(crate) multipack_index: Option<gix_pack::multi_index::PackIndex>, +} + +impl PackId { + /// Returns the maximum of indices we can represent. + pub(crate) const fn max_indices() -> usize { + (1 << 15) - 1 + } + /// Returns the maximum of packs we can represent if stored in a multi-index. + pub(crate) const fn max_packs_in_multi_index() -> gix_pack::multi_index::PackIndex { + (1 << 16) - 1 + } + /// Packs have a built-in identifier to make data structures simpler, and this method represents ourselves as such id + /// to be convertible back and forth. We essentially compress ourselves into a u32. + /// + /// Bit 16 is a marker to tell us if it's a multi-pack or not, the ones before are the index file itself, the ones after + /// are used to encode the pack index within the multi-pack. + pub(crate) fn to_intrinsic_pack_id(self) -> gix_pack::data::Id { + assert!(self.index < (1 << 15), "There shouldn't be more than 2^15 indices"); + match self.multipack_index { + None => self.index as gix_pack::data::Id, + Some(midx) => { + assert!( + midx <= Self::max_packs_in_multi_index(), + "There shouldn't be more than 2^16 packs per multi-index" + ); + ((self.index as gix_pack::data::Id | 1 << 15) | midx << 16) as gix_pack::data::Id + } + } + } + + pub(crate) fn from_intrinsic_pack_id(pack_id: gix_pack::data::Id) -> Self { + if pack_id & (1 << 15) == 0 { + PackId { + index: (pack_id & 0x7fff) as IndexId, + multipack_index: None, + } + } else { + PackId { + index: (pack_id & 0x7fff) as IndexId, + multipack_index: Some(pack_id >> 16), + } + } + } +} + +/// An index that changes only if the packs directory changes and its contents is re-read. +#[derive(Default)] +pub struct SlotMapIndex { + /// The index into the slot map at which we expect an index or pack file. Neither of these might be loaded yet. + pub(crate) slot_indices: Vec<usize>, + /// A list of loose object databases as resolved by their alternates file in the `object_directory`. The first entry is this objects + /// directory loose file database. All other entries are the loose stores of alternates. + /// It's in an Arc to be shared to Handles, but not to be shared across SlotMapIndices. + pub(crate) loose_dbs: Arc<Vec<crate::loose::Store>>, + + /// A static value that doesn't ever change for a particular clone of this index. + pub(crate) generation: Generation, + /// The number of indices loaded thus far when the index of the slot map was last examined, which can change as new indices are loaded + /// in parallel. + /// Shared across SlotMapIndex instances of the same generation. + pub(crate) next_index_to_load: Arc<AtomicUsize>, + /// Incremented by one up to `slot_indices.len()` once an attempt to load an index completed. + /// If a load failed, there will also be an increment. + /// Shared across SlotMapIndex instances of the same generation. + pub(crate) loaded_indices: Arc<AtomicUsize>, + /// The amount of indices that are currently being loaded. + /// Zero if no loading operation is currently happening, or more otherwise. + pub(crate) num_indices_currently_being_loaded: Arc<AtomicU16>, +} + +impl SlotMapIndex { + pub(crate) fn state_id(self: &Arc<SlotMapIndex>) -> StateId { + // We let the loaded indices take part despite not being part of our own snapshot. + // This is to account for indices being loaded in parallel without actually changing the snapshot itself. + let hash = hash::crc32(&(Arc::as_ptr(self) as usize).to_be_bytes()); + hash::crc32_update(hash, &self.loaded_indices.load(Ordering::SeqCst).to_be_bytes()) + } + + pub(crate) fn marker(self: &Arc<SlotMapIndex>) -> SlotIndexMarker { + SlotIndexMarker { + generation: self.generation, + state_id: self.state_id(), + } + } + + /// Returns true if we already know at least one loose object db, a sign of being initialized + pub(crate) fn is_initialized(&self) -> bool { + !self.loose_dbs.is_empty() + } +} + +#[derive(Clone)] +pub(crate) struct OnDiskFile<T: Clone> { + /// The last known path of the file + path: Arc<PathBuf>, + /// the time the file was last modified + mtime: SystemTime, + state: OnDiskFileState<T>, +} + +#[derive(Clone)] +pub(crate) enum OnDiskFileState<T: Clone> { + /// The file is on disk and can be loaded from there. + Unloaded, + Loaded(T), + /// The file was loaded, but appeared to be missing on disk after reconciling our state with what's on disk. + /// As there were handles that required pack-id stability we had to keep the item to allow finding it on later + /// lookups. + Garbage(T), + /// File is missing on disk and could not be loaded when we tried or turned missing after reconciling our state. + Missing, +} + +impl<T: Clone> OnDiskFile<T> { + pub fn path(&self) -> &Path { + &self.path + } + /// Return true if we hold a memory map of the file already. + pub fn is_loaded(&self) -> bool { + matches!(self.state, OnDiskFileState::Loaded(_) | OnDiskFileState::Garbage(_)) + } + + /// Return true if we are to be collected as garbage + pub fn is_disposable(&self) -> bool { + matches!(self.state, OnDiskFileState::Garbage(_) | OnDiskFileState::Missing) + } + + // On error, always declare the file missing and return an error. + pub(crate) fn load_strict(&mut self, load: impl FnOnce(&Path) -> std::io::Result<T>) -> std::io::Result<()> { + use OnDiskFileState::*; + match self.state { + Unloaded | Missing => match load(&self.path) { + Ok(v) => { + self.state = Loaded(v); + Ok(()) + } + Err(err) => { + // TODO: Should be provide more information? We don't even know what exactly failed right now, degenerating information. + self.state = Missing; + Err(err) + } + }, + Loaded(_) | Garbage(_) => Ok(()), + } + } + /// If the file is missing, we don't consider this failure but instead return Ok(None) to allow recovery. + /// when we know that loading is necessary. This also works around borrow check, which is a nice coincidence. + pub fn load_with_recovery(&mut self, load: impl FnOnce(&Path) -> std::io::Result<T>) -> std::io::Result<Option<T>> { + use OnDiskFileState::*; + match &mut self.state { + Loaded(v) | Garbage(v) => Ok(Some(v.clone())), + Missing => Ok(None), + Unloaded => match load(&self.path) { + Ok(v) => { + self.state = OnDiskFileState::Loaded(v.clone()); + Ok(Some(v)) + } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + self.state = OnDiskFileState::Missing; + Ok(None) + } + Err(err) => Err(err), + }, + } + } + + pub fn loaded(&self) -> Option<&T> { + use OnDiskFileState::*; + match &self.state { + Loaded(v) | Garbage(v) => Some(v), + Unloaded | Missing => None, + } + } + + pub fn put_back(&mut self) { + match std::mem::replace(&mut self.state, OnDiskFileState::Missing) { + OnDiskFileState::Garbage(v) => self.state = OnDiskFileState::Loaded(v), + OnDiskFileState::Missing => self.state = OnDiskFileState::Unloaded, + other @ OnDiskFileState::Loaded(_) | other @ OnDiskFileState::Unloaded => self.state = other, + } + } + + pub fn trash(&mut self) { + match std::mem::replace(&mut self.state, OnDiskFileState::Missing) { + OnDiskFileState::Loaded(v) => self.state = OnDiskFileState::Garbage(v), + other @ OnDiskFileState::Garbage(_) + | other @ OnDiskFileState::Unloaded + | other @ OnDiskFileState::Missing => self.state = other, + } + } +} + +#[derive(Clone)] +pub(crate) struct IndexFileBundle { + pub index: OnDiskFile<Arc<gix_pack::index::File>>, + pub data: OnDiskFile<Arc<gix_pack::data::File>>, +} + +#[derive(Clone)] +pub(crate) struct MultiIndexFileBundle { + pub multi_index: OnDiskFile<Arc<gix_pack::multi_index::File>>, + pub data: Vec<OnDiskFile<Arc<gix_pack::data::File>>>, +} + +#[derive(Clone)] +pub(crate) enum IndexAndPacks { + Index(IndexFileBundle), + /// Note that there can only be one multi-pack file per repository, but thanks to git alternates, there can be multiple overall. + MultiIndex(MultiIndexFileBundle), +} + +impl IndexAndPacks { + pub(crate) fn index_path(&self) -> &Path { + match self { + IndexAndPacks::Index(index) => &index.index.path, + IndexAndPacks::MultiIndex(index) => &index.multi_index.path, + } + } + + pub(crate) fn mtime(&self) -> SystemTime { + match self { + IndexAndPacks::Index(index) => index.index.mtime, + IndexAndPacks::MultiIndex(index) => index.multi_index.mtime, + } + } + + /// If we are garbage, put ourselves into the loaded state. Otherwise put ourselves back to unloaded. + pub(crate) fn put_back(&mut self) { + match self { + IndexAndPacks::Index(bundle) => { + bundle.index.put_back(); + bundle.data.put_back(); + } + IndexAndPacks::MultiIndex(bundle) => { + bundle.multi_index.put_back(); + for data in &mut bundle.data { + data.put_back(); + } + } + } + } + + // The inverse of `put_back()`, by trashing the content. + pub(crate) fn trash(&mut self) { + match self { + IndexAndPacks::Index(bundle) => { + bundle.index.trash(); + bundle.data.trash(); + } + IndexAndPacks::MultiIndex(bundle) => { + bundle.multi_index.trash(); + for data in &mut bundle.data { + data.trash(); + } + } + } + } + + pub(crate) fn index_is_loaded(&self) -> bool { + match self { + Self::Index(bundle) => bundle.index.is_loaded(), + Self::MultiIndex(bundle) => bundle.multi_index.is_loaded(), + } + } + + pub(crate) fn is_disposable(&self) -> bool { + match self { + Self::Index(bundle) => bundle.index.is_disposable() || bundle.data.is_disposable(), + Self::MultiIndex(bundle) => { + bundle.multi_index.is_disposable() || bundle.data.iter().any(|odf| odf.is_disposable()) + } + } + } + + pub(crate) fn load_index(&mut self, object_hash: gix_hash::Kind) -> std::io::Result<()> { + match self { + IndexAndPacks::Index(bundle) => bundle.index.load_strict(|path| { + gix_pack::index::File::at(path, object_hash) + .map(Arc::new) + .map_err(|err| match err { + gix_pack::index::init::Error::Io { source, .. } => source, + err => std::io::Error::new(std::io::ErrorKind::Other, err), + }) + }), + IndexAndPacks::MultiIndex(bundle) => { + bundle.multi_index.load_strict(|path| { + gix_pack::multi_index::File::at(path) + .map(Arc::new) + .map_err(|err| match err { + gix_pack::multi_index::init::Error::Io { source, .. } => source, + err => std::io::Error::new(std::io::ErrorKind::Other, err), + }) + })?; + if let Some(multi_index) = bundle.multi_index.loaded() { + bundle.data = Self::index_names_to_pack_paths(multi_index); + } + Ok(()) + } + } + } + + pub(crate) fn new_single(index_path: PathBuf, mtime: SystemTime) -> Self { + let data_path = index_path.with_extension("pack"); + Self::Index(IndexFileBundle { + index: OnDiskFile { + path: index_path.into(), + state: OnDiskFileState::Unloaded, + mtime, + }, + data: OnDiskFile { + path: data_path.into(), + state: OnDiskFileState::Unloaded, + mtime, + }, + }) + } + + pub(crate) fn new_multi_from_open_file(multi_index: Arc<gix_pack::multi_index::File>, mtime: SystemTime) -> Self { + let data = Self::index_names_to_pack_paths(&multi_index); + Self::MultiIndex(MultiIndexFileBundle { + multi_index: OnDiskFile { + path: Arc::new(multi_index.path().to_owned()), + state: OnDiskFileState::Loaded(multi_index), + mtime, + }, + data, + }) + } + + fn index_names_to_pack_paths( + multi_index: &gix_pack::multi_index::File, + ) -> Vec<OnDiskFile<Arc<gix_pack::data::File>>> { + let parent_dir = multi_index.path().parent().expect("parent present"); + let data = multi_index + .index_names() + .iter() + .map(|idx| OnDiskFile { + path: parent_dir.join(idx.with_extension("pack")).into(), + state: OnDiskFileState::Unloaded, + mtime: SystemTime::UNIX_EPOCH, + }) + .collect(); + data + } +} + +#[derive(Default)] +pub(crate) struct MutableIndexAndPack { + pub(crate) files: ArcSwap<Option<IndexAndPacks>>, + pub(crate) write: parking_lot::Mutex<()>, + /// The generation required at least to read this slot. If these mismatch, the caller is likely referring to a now changed slot + /// that has different content under the same id. + /// Must only be changed when the write lock is held. + pub(crate) generation: AtomicGeneration, +} + +/// A snapshot about resource usage. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Metrics { + /// The total amount of handles which can be used to access object information. + pub num_handles: usize, + /// The amount of refreshes performed to reconcile with the ODB state on disk. + pub num_refreshes: usize, + /// The amount of indices that are currently open and will be returned to handles. + pub open_reachable_indices: usize, + /// The amount of reachable, known indices, which aren't opened yet. + pub known_reachable_indices: usize, + /// The amount of packs which are open in memory and will be returned to handles. + pub open_reachable_packs: usize, + /// The amount of packs that are reachable and will be returned to handles. They aren't open yet. + pub known_packs: usize, + /// The amount of slots which are empty. + /// + /// Over time these will fill, but they can be emptied as files are removed from disk. + pub unused_slots: usize, + /// Unreachable indices are still using slots, but aren't returned to new handles anymore unless they still happen to + /// know their id. + /// + /// This allows to keep files available while they are still potentially required for operations like pack generation, despite + /// the file on disk being removed or changed. + pub unreachable_indices: usize, + /// Equivalent to `unreachable_indices`, but for mapped packed data files + pub unreachable_packs: usize, + /// The amount of loose object databases currently available for object retrieval. + /// + /// There may be more than one if 'alternates' are used. + pub loose_dbs: usize, +} + +#[cfg(test)] +mod tests { + use super::*; + + mod pack_id { + use super::PackId; + + #[test] + fn to_intrinsic_roundtrip() { + let single = PackId { + index: (1 << 15) - 1, + multipack_index: None, + }; + let multi = PackId { + index: (1 << 15) - 1, + multipack_index: Some((1 << 16) - 1), + }; + assert_eq!(PackId::from_intrinsic_pack_id(single.to_intrinsic_pack_id()), single); + assert_eq!(PackId::from_intrinsic_pack_id(multi.to_intrinsic_pack_id()), multi); + } + + #[test] + #[should_panic] + fn max_supported_index_count() { + PackId { + index: 1 << 15, + multipack_index: None, + } + .to_intrinsic_pack_id(); + } + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/verify.rs b/vendor/gix-odb/src/store_impls/dynamic/verify.rs new file mode 100644 index 000000000..9a35cb5d7 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/verify.rs @@ -0,0 +1,267 @@ +use std::{ + ops::Deref, + sync::atomic::{AtomicBool, Ordering}, + time::Instant, +}; + +use gix_features::progress::{MessageLevel, Progress}; + +use crate::{ + pack, + store::verify::integrity::{IndexStatistics, SingleOrMultiStatistics}, + types::IndexAndPacks, +}; + +/// +pub mod integrity { + use std::{marker::PhantomData, path::PathBuf}; + + use crate::pack; + + /// Options for use in [`Store::verify_integrity()`][crate::Store::verify_integrity()]. + pub type Options<F> = pack::index::verify::integrity::Options<F>; + + /// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + MultiIndexIntegrity(#[from] pack::index::traverse::Error<pack::multi_index::verify::integrity::Error>), + #[error(transparent)] + IndexIntegrity(#[from] pack::index::traverse::Error<pack::index::verify::integrity::Error>), + #[error(transparent)] + IndexOpen(#[from] pack::index::init::Error), + #[error(transparent)] + LooseObjectStoreIntegrity(#[from] crate::loose::verify::integrity::Error), + #[error(transparent)] + MultiIndexOpen(#[from] pack::multi_index::init::Error), + #[error(transparent)] + PackOpen(#[from] pack::data::init::Error), + #[error(transparent)] + InitializeODB(#[from] crate::store::load_index::Error), + #[error("The disk on state changed while performing the operation, and we observed the change.")] + NeedsRetryDueToChangeOnDisk, + } + + #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + /// Integrity information about loose object databases + pub struct LooseObjectStatistics { + /// The path to the root directory of the loose objects database + pub path: PathBuf, + /// The statistics created after verifying the loose object database. + pub statistics: crate::loose::verify::integrity::Statistics, + } + + #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + /// Traversal statistics of packs governed by single indices or multi-pack indices. + #[allow(missing_docs)] + pub enum SingleOrMultiStatistics { + Single(pack::index::traverse::Statistics), + Multi(Vec<(PathBuf, pack::index::traverse::Statistics)>), + } + + /// Statistics gathered when traversing packs of various kinds of indices. + #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct IndexStatistics { + /// The path to the index or multi-pack index for which statics were gathered. + pub path: PathBuf, + /// The actual statistics for the index at `path`. + pub statistics: SingleOrMultiStatistics, + } + + /// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()]. + pub struct Outcome<P> { + /// Statistics for validated loose object stores. + pub loose_object_stores: Vec<LooseObjectStatistics>, + /// Pack traversal statistics for each index and their pack(s) + pub index_statistics: Vec<IndexStatistics>, + /// The provided progress instance. + pub progress: P, + } + + /// The progress ids used in [`Store::verify_integrity()`][crate::Store::verify_integrity()]. + /// + /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. + #[derive(Debug, Copy, Clone)] + pub enum ProgressId { + /// Contains the path of the currently validated loose object database. + VerifyLooseObjectDbPath, + /// The root progress for all verification of an index. It doesn't contain any useful information itself. + VerifyIndex(PhantomData<gix_pack::index::verify::integrity::ProgressId>), + /// The root progress for all verification of a multi-index. It doesn't contain any useful information itself. + VerifyMultiIndex(PhantomData<gix_pack::multi_index::verify::integrity::ProgressId>), + } + + impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::VerifyLooseObjectDbPath => *b"VISP", + ProgressId::VerifyMultiIndex(_) => *b"VIMI", + ProgressId::VerifyIndex(_) => *b"VISI", + } + } + } +} + +impl super::Store { + /// Check the integrity of all objects as per the given `options`. + /// + /// Note that this will not not force loading all indices or packs permanently, as we will only use the momentarily loaded disk state. + /// This does, however, include all alternates. + pub fn verify_integrity<C, P, F>( + &self, + mut progress: P, + should_interrupt: &AtomicBool, + options: integrity::Options<F>, + ) -> Result<integrity::Outcome<P>, integrity::Error> + where + P: Progress, + C: pack::cache::DecodeEntry, + F: Fn() -> C + Send + Clone, + { + let mut index = self.index.load(); + if !index.is_initialized() { + self.consolidate_with_disk_state(true, false)?; + index = self.index.load(); + assert!( + index.is_initialized(), + "BUG: after consolidating successfully, we have an initialized index" + ) + } + + progress.init( + Some(index.slot_indices.len()), + gix_features::progress::count("pack indices"), + ); + let mut statistics = Vec::new(); + let index_check_message = |path: &std::path::Path| { + format!( + "Checking integrity: {}", + path.file_name() + .map(|f| f.to_string_lossy()) + .unwrap_or_else(std::borrow::Cow::default) + ) + }; + for slot_index in &index.slot_indices { + let slot = &self.files[*slot_index]; + if slot.generation.load(Ordering::SeqCst) != index.generation { + return Err(integrity::Error::NeedsRetryDueToChangeOnDisk); + } + let files = slot.files.load(); + let files = Option::as_ref(&files).ok_or(integrity::Error::NeedsRetryDueToChangeOnDisk)?; + + let start = Instant::now(); + let (mut child_progress, num_objects, index_path) = match files { + IndexAndPacks::Index(bundle) => { + let index; + let index = match bundle.index.loaded() { + Some(index) => index.deref(), + None => { + index = pack::index::File::at(bundle.index.path(), self.object_hash)?; + &index + } + }; + let pack; + let data = match bundle.data.loaded() { + Some(pack) => pack.deref(), + None => { + pack = pack::data::File::at(bundle.data.path(), self.object_hash)?; + &pack + } + }; + let outcome = index.verify_integrity( + Some(pack::index::verify::PackContext { + data, + options: options.clone(), + }), + progress.add_child_with_id( + "verify index", + integrity::ProgressId::VerifyIndex(Default::default()).into(), + ), + should_interrupt, + )?; + statistics.push(IndexStatistics { + path: bundle.index.path().to_owned(), + statistics: SingleOrMultiStatistics::Single( + outcome + .pack_traverse_statistics + .expect("pack provided so there are stats"), + ), + }); + (outcome.progress, index.num_objects(), index.path().to_owned()) + } + IndexAndPacks::MultiIndex(bundle) => { + let index; + let index = match bundle.multi_index.loaded() { + Some(index) => index.deref(), + None => { + index = pack::multi_index::File::at(bundle.multi_index.path())?; + &index + } + }; + let outcome = index.verify_integrity( + progress.add_child_with_id( + "verify multi-index", + integrity::ProgressId::VerifyMultiIndex(Default::default()).into(), + ), + should_interrupt, + options.clone(), + )?; + + let index_dir = bundle.multi_index.path().parent().expect("file in a directory"); + statistics.push(IndexStatistics { + path: Default::default(), + statistics: SingleOrMultiStatistics::Multi( + outcome + .pack_traverse_statistics + .into_iter() + .zip(index.index_names()) + .map(|(statistics, index_name)| (index_dir.join(index_name), statistics)) + .collect(), + ), + }); + (outcome.progress, index.num_objects(), index.path().to_owned()) + } + }; + + child_progress.set_name(index_check_message(&index_path)); + child_progress.show_throughput_with( + start, + num_objects as usize, + gix_features::progress::count("objects").expect("set"), + MessageLevel::Success, + ); + progress.inc(); + } + + progress.init( + Some(index.loose_dbs.len()), + gix_features::progress::count("loose object stores"), + ); + let mut loose_object_stores = Vec::new(); + for loose_db in &*index.loose_dbs { + let out = loose_db + .verify_integrity( + progress.add_child_with_id( + loose_db.path().display().to_string(), + integrity::ProgressId::VerifyLooseObjectDbPath.into(), + ), + should_interrupt, + ) + .map(|statistics| integrity::LooseObjectStatistics { + path: loose_db.path().to_owned(), + statistics, + })?; + loose_object_stores.push(out); + } + + Ok(integrity::Outcome { + loose_object_stores, + index_statistics: statistics, + progress, + }) + } +} diff --git a/vendor/gix-odb/src/store_impls/dynamic/write.rs b/vendor/gix-odb/src/store_impls/dynamic/write.rs new file mode 100644 index 000000000..a2e40eec4 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/dynamic/write.rs @@ -0,0 +1,47 @@ +use std::{io::Read, ops::Deref}; + +use gix_hash::ObjectId; +use gix_object::Kind; + +use crate::store; + +mod error { + use crate::{loose, store}; + + /// The error returned by the [dynamic Store's][crate::Store] [`Write`][crate::Write] implementation. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + LoadIndex(#[from] store::load_index::Error), + #[error(transparent)] + LooseWrite(#[from] loose::write::Error), + #[error(transparent)] + Io(#[from] std::io::Error), + } +} +pub use error::Error; + +use crate::store_impls::dynamic; + +impl<S> crate::Write for store::Handle<S> +where + S: Deref<Target = dynamic::Store> + Clone, +{ + type Error = Error; + + fn write_stream(&self, kind: Kind, size: u64, from: impl Read) -> Result<ObjectId, Self::Error> { + let mut snapshot = self.snapshot.borrow_mut(); + Ok(match snapshot.loose_dbs.first() { + Some(ldb) => ldb.write_stream(kind, size, from)?, + None => { + let new_snapshot = self + .store + .load_one_index(self.refresh, snapshot.marker)? + .expect("there is always at least one ODB, and this code runs only once for initialization"); + *snapshot = new_snapshot; + snapshot.loose_dbs[0].write_stream(kind, size, from)? + } + }) + } +} diff --git a/vendor/gix-odb/src/store_impls/loose/find.rs b/vendor/gix-odb/src/store_impls/loose/find.rs new file mode 100644 index 000000000..13bd26818 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/loose/find.rs @@ -0,0 +1,262 @@ +use std::{cmp::Ordering, collections::HashSet, fs, io::Read, path::PathBuf}; + +use gix_features::zlib; + +use crate::store_impls::loose::{hash_path, Store, HEADER_MAX_SIZE}; + +/// Returned by [`Store::try_find()`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("decompression of loose object at '{path}' failed")] + DecompressFile { + source: zlib::inflate::Error, + path: PathBuf, + }, + #[error("file at '{path}' showed invalid size of inflated data, expected {expected}, got {actual}")] + SizeMismatch { + actual: usize, + expected: usize, + path: PathBuf, + }, + #[error(transparent)] + Decode(#[from] gix_object::decode::LooseHeaderDecodeError), + #[error("Could not {action} data at '{path}'")] + Io { + source: std::io::Error, + action: &'static str, + path: PathBuf, + }, +} + +/// Object lookup +impl Store { + const OPEN_ACTION: &'static str = "open"; + + /// Returns true if the given id is contained in our repository. + pub fn contains(&self, id: impl AsRef<gix_hash::oid>) -> bool { + debug_assert_eq!(self.object_hash, id.as_ref().kind()); + hash_path(id.as_ref(), self.path.clone()).is_file() + } + + /// Given a `prefix`, find an object that matches it uniquely within this loose object + /// database as `Ok(Some(Ok(<oid>)))`. + /// If there is more than one object matching the object `Ok(Some(Err(()))` is returned. + /// + /// Finally, if no object matches, the return value is `Ok(None)`. + /// + /// The outer `Result` is to indicate errors during file system traversal. + /// + /// Pass `candidates` to obtain the set of all object ids matching `prefix`, with the same return value as + /// one would have received if it remained `None`. + pub fn lookup_prefix( + &self, + prefix: gix_hash::Prefix, + mut candidates: Option<&mut HashSet<gix_hash::ObjectId>>, + ) -> Result<Option<crate::store::prefix::lookup::Outcome>, crate::loose::iter::Error> { + let single_directory_iter = crate::loose::Iter { + inner: gix_features::fs::walkdir_new( + self.path.join(prefix.as_oid().to_hex_with_len(2).to_string()), + gix_features::fs::walkdir::Parallelism::Serial, + ) + .min_depth(1) + .max_depth(1) + .follow_links(false) + .into_iter(), + hash_hex_len: prefix.as_oid().kind().len_in_hex(), + }; + let mut candidate = None; + for oid in single_directory_iter { + let oid = match oid { + Ok(oid) => oid, + Err(err) => { + return match err.io_error() { + Some(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), + None | Some(_) => Err(err), + } + } + }; + if prefix.cmp_oid(&oid) == Ordering::Equal { + match &mut candidates { + Some(candidates) => { + candidates.insert(oid); + } + None => { + if candidate.is_some() { + return Ok(Some(Err(()))); + } + candidate = Some(oid); + } + } + } + } + + match &mut candidates { + Some(candidates) => match candidates.len() { + 0 => Ok(None), + 1 => Ok(candidates.iter().next().cloned().map(Ok)), + _ => Ok(Some(Err(()))), + }, + None => Ok(candidate.map(Ok)), + } + } + + /// Return the object identified by the given [`ObjectId`][gix_hash::ObjectId] if present in this database, + /// writing its raw data into the given `out` buffer. + /// + /// Returns `Err` if there was an error locating or reading the object. Returns `Ok<None>` if + /// there was no such object. + pub fn try_find<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + out: &'a mut Vec<u8>, + ) -> Result<Option<gix_object::Data<'a>>, Error> { + debug_assert_eq!(self.object_hash, id.as_ref().kind()); + match self.find_inner(id.as_ref(), out) { + Ok(obj) => Ok(Some(obj)), + Err(err) => match err { + Error::Io { + source: err, + action, + path, + } => { + if action == Self::OPEN_ACTION && err.kind() == std::io::ErrorKind::NotFound { + Ok(None) + } else { + Err(Error::Io { + source: err, + action, + path, + }) + } + } + err => Err(err), + }, + } + } + + /// Return only the decompressed size of the object and its kind without fully reading it into memory as tuple of `(size, kind)`. + /// Returns `None` if `id` does not exist in the database. + pub fn try_header(&self, id: impl AsRef<gix_hash::oid>) -> Result<Option<(usize, gix_object::Kind)>, Error> { + const BUF_SIZE: usize = 256; + let mut buf = [0_u8; BUF_SIZE]; + let path = hash_path(id.as_ref(), self.path.clone()); + + let mut inflate = zlib::Inflate::default(); + let mut istream = match fs::File::open(&path) { + Ok(f) => f, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) => { + return Err(Error::Io { + source: err, + action: Self::OPEN_ACTION, + path, + }) + } + }; + + let (compressed_buf, _) = buf.split_at_mut(BUF_SIZE - HEADER_MAX_SIZE); + let bytes_read = istream.read(compressed_buf).map_err(|e| Error::Io { + source: e, + action: "read", + path: path.to_owned(), + })?; + let (compressed_buf, header_buf) = buf.split_at_mut(bytes_read); + let (status, _consumed_in, consumed_out) = + inflate + .once(compressed_buf, header_buf) + .map_err(|e| Error::DecompressFile { + source: e, + path: path.to_owned(), + })?; + + if status == zlib::Status::BufError { + return Err(Error::DecompressFile { + source: zlib::inflate::Error::Status(status), + path, + }); + } + let (kind, size, _header_size) = gix_object::decode::loose_header(&header_buf[..consumed_out])?; + Ok(Some((size, kind))) + } + + fn find_inner<'a>(&self, id: &gix_hash::oid, buf: &'a mut Vec<u8>) -> Result<gix_object::Data<'a>, Error> { + let path = hash_path(id, self.path.clone()); + + let mut inflate = zlib::Inflate::default(); + let ((status, consumed_in, consumed_out), bytes_read) = { + let mut istream = fs::File::open(&path).map_err(|e| Error::Io { + source: e, + action: Self::OPEN_ACTION, + path: path.to_owned(), + })?; + + buf.clear(); + let bytes_read = istream.read_to_end(buf).map_err(|e| Error::Io { + source: e, + action: "read", + path: path.to_owned(), + })?; + buf.resize(bytes_read + HEADER_MAX_SIZE, 0); + let (input, output) = buf.split_at_mut(bytes_read); + ( + inflate + .once(&input[..bytes_read], output) + .map_err(|e| Error::DecompressFile { + source: e, + path: path.to_owned(), + })?, + bytes_read, + ) + }; + if status == zlib::Status::BufError { + return Err(Error::DecompressFile { + source: zlib::inflate::Error::Status(status), + path, + }); + } + + let decompressed_start = bytes_read; + let (kind, size, header_size) = + gix_object::decode::loose_header(&buf[decompressed_start..decompressed_start + consumed_out])?; + + if status == zlib::Status::StreamEnd { + let decompressed_body_bytes_sans_header = + decompressed_start + header_size..decompressed_start + consumed_out; + + if consumed_out != size + header_size { + return Err(Error::SizeMismatch { + expected: size + header_size, + actual: consumed_out, + path, + }); + } + buf.copy_within(decompressed_body_bytes_sans_header, 0); + } else { + buf.resize(bytes_read + size + header_size, 0); + { + let (input, output) = buf.split_at_mut(bytes_read); + let num_decompressed_bytes = zlib::stream::inflate::read( + &mut &input[consumed_in..], + &mut inflate.state, + &mut output[consumed_out..], + ) + .map_err(|e| Error::Io { + source: e, + action: "deflate", + path: path.to_owned(), + })?; + if num_decompressed_bytes + consumed_out != size + header_size { + return Err(Error::SizeMismatch { + expected: size + header_size, + actual: num_decompressed_bytes + consumed_out, + path, + }); + } + }; + buf.copy_within(decompressed_start + header_size.., 0); + } + buf.resize(size, 0); + Ok(gix_object::Data { kind, data: buf }) + } +} diff --git a/vendor/gix-odb/src/store_impls/loose/iter.rs b/vendor/gix-odb/src/store_impls/loose/iter.rs new file mode 100644 index 000000000..5904aeb12 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/loose/iter.rs @@ -0,0 +1,81 @@ +use gix_features::fs; + +use crate::store_impls::loose; + +/// Returned by [`loose::Store::iter()`] +pub type Error = gix_features::fs::walkdir::Error; + +impl loose::Iter { + fn path_to_id( + &self, + res: Result<fs::walkdir::DirEntry, fs::walkdir::Error>, + ) -> Option<Result<gix_hash::ObjectId, Error>> { + use std::path::Component::Normal; + + match res { + Ok(e) => { + let p = e.path(); + let mut ci = p.components(); + let (c2, c1) = (ci.next_back(), ci.next_back()); + if let (Some(Normal(c1)), Some(Normal(c2))) = (c1, c2) { + if c1.len() == 2 && c2.len() == self.hash_hex_len - 2 { + if let (Some(c1), Some(c2)) = (c1.to_str(), c2.to_str()) { + let mut buf = gix_hash::Kind::hex_buf(); + { + let (first_byte, rest) = buf[..self.hash_hex_len].split_at_mut(2); + first_byte.copy_from_slice(c1.as_bytes()); + rest.copy_from_slice(c2.as_bytes()); + } + if let Ok(b) = gix_hash::ObjectId::from_hex(&buf[..self.hash_hex_len]) { + return Some(Ok(b)); + } + } + } + } + } + Err(err) => return Some(Err(err)), + }; + None + } +} + +impl Iterator for loose::Iter { + type Item = Result<gix_hash::ObjectId, Error>; + + fn next(&mut self) -> Option<Self::Item> { + while let Some(res) = self.inner.next() { + if let Some(res) = self.path_to_id(res) { + return Some(res); + } + } + None + } +} + +/// Iteration and traversal +impl loose::Store { + /// Return an iterator over all objects contained in the database. + /// + /// The [`Id`][gix_hash::ObjectId]s returned by the iterator can typically be used in the [`locate(…)`][loose::Store::try_find()] method. + /// _Note_ that the result is not sorted or stable, thus ordering can change between runs. + /// + /// # Notes + /// + /// [`loose::Iter`] is used instead of `impl Iterator<…>` to allow using this iterator in struct fields, as is currently + /// needed if iterators need to be implemented by hand in the absence of generators. + pub fn iter(&self) -> loose::Iter { + loose::Iter { + inner: fs::walkdir_new( + &self.path, + fs::walkdir::Parallelism::ThreadPoolPerTraversal { + thread_name: "gix_odb::loose::Store::iter: fs-walk", + }, + ) + .min_depth(2) + .max_depth(3) + .follow_links(false) + .into_iter(), + hash_hex_len: self.object_hash.len_in_hex(), + } + } +} diff --git a/vendor/gix-odb/src/store_impls/loose/mod.rs b/vendor/gix-odb/src/store_impls/loose/mod.rs new file mode 100644 index 000000000..17e4a33d6 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/loose/mod.rs @@ -0,0 +1,66 @@ +//! An object database storing each object in a zlib compressed file with its hash in the path +/// The maximum size that an object header can have. `git2` says 64, and `git` says 32 but also mentions it can be larger. +const HEADER_MAX_SIZE: usize = 64; +use std::path::{Path, PathBuf}; + +use gix_features::fs; + +/// A database for reading and writing objects to disk, one file per object. +#[derive(Clone, PartialEq, Eq)] +pub struct Store { + /// The directory in which objects are stored, containing 256 folders representing the hashes first byte. + pub(crate) path: PathBuf, + /// The kind of hash we should assume during iteration and when writing new objects. + pub(crate) object_hash: gix_hash::Kind, +} + +/// Initialization +impl Store { + /// Initialize the Db with the `objects_directory` containing the hexadecimal first byte subdirectories, which in turn + /// contain all loose objects. + /// + /// In a git repository, this would be `.git/objects`. + /// + /// The `object_hash` determines which hash to use when writing, finding or iterating objects. + pub fn at(objects_directory: impl Into<PathBuf>, object_hash: gix_hash::Kind) -> Store { + Store { + path: objects_directory.into(), + object_hash, + } + } + + /// Return the path to our `objects` directory. + pub fn path(&self) -> &Path { + &self.path + } + + /// Return the kind of hash we would iterate and write. + pub fn object_hash(&self) -> gix_hash::Kind { + self.object_hash + } +} + +fn hash_path(id: &gix_hash::oid, mut root: PathBuf) -> PathBuf { + let mut hex = gix_hash::Kind::hex_buf(); + let hex_len = id.hex_to_buf(hex.as_mut()); + let buf = std::str::from_utf8(&hex[..hex_len]).expect("ascii only in hex"); + root.push(&buf[..2]); + root.push(&buf[2..]); + root +} + +/// +pub mod find; +/// +pub mod iter; +/// +pub mod verify; + +/// The type for an iterator over `Result<gix_hash::ObjectId, Error>)` +pub struct Iter { + inner: fs::walkdir::DirEntryIter, + hash_hex_len: usize, +} + +/// +pub mod write; diff --git a/vendor/gix-odb/src/store_impls/loose/verify.rs b/vendor/gix-odb/src/store_impls/loose/verify.rs new file mode 100644 index 000000000..648e5764c --- /dev/null +++ b/vendor/gix-odb/src/store_impls/loose/verify.rs @@ -0,0 +1,103 @@ +use std::{ + sync::atomic::{AtomicBool, Ordering}, + time::Instant, +}; + +use gix_features::progress::Progress; + +use crate::{loose::Store, Write}; + +/// +pub mod integrity { + /// The error returned by [`verify_integrity()`][super::Store::verify_integrity()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("{kind} object {id} could not be decoded")] + ObjectDecode { + source: gix_object::decode::Error, + kind: gix_object::Kind, + id: gix_hash::ObjectId, + }, + #[error("{kind} object {expected} wasn't re-encoded without change - new hash is {actual}")] + ObjectHashMismatch { + kind: gix_object::Kind, + actual: gix_hash::ObjectId, + expected: gix_hash::ObjectId, + }, + #[error("Objects were deleted during iteration - try again")] + Retry, + #[error("Interrupted")] + Interrupted, + } + + /// The outcome returned by [`verify_integrity()`][super::Store::verify_integrity()]. + #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct Statistics { + /// The amount of loose objects we checked. + pub num_objects: usize, + } + + /// The progress ids used in [`verify_integrity()`][super::Store::verify_integrity()]. + /// + /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. + #[derive(Debug, Copy, Clone)] + pub enum ProgressId { + /// The amount of loose objects that have been verified. + LooseObjects, + } + + impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::LooseObjects => *b"VILO", + } + } + } +} + +impl Store { + /// Check all loose objects for their integrity checking their hash matches the actual data and by decoding them fully. + pub fn verify_integrity( + &self, + mut progress: impl Progress, + should_interrupt: &AtomicBool, + ) -> Result<integrity::Statistics, integrity::Error> { + let mut buf = Vec::new(); + let sink = crate::sink(self.object_hash); + + let mut num_objects = 0; + let start = Instant::now(); + let mut progress = progress.add_child_with_id("Validating", integrity::ProgressId::LooseObjects.into()); + progress.init(None, gix_features::progress::count("loose objects")); + for id in self.iter().filter_map(Result::ok) { + let object = self + .try_find(id, &mut buf) + .map_err(|_| integrity::Error::Retry)? + .ok_or(integrity::Error::Retry)?; + let actual_id = sink.write_buf(object.kind, object.data).expect("sink never fails"); + if actual_id != id { + return Err(integrity::Error::ObjectHashMismatch { + kind: object.kind, + actual: actual_id, + expected: id, + }); + } + object.decode().map_err(|err| integrity::Error::ObjectDecode { + source: err, + kind: object.kind, + id, + })?; + + progress.inc(); + num_objects += 1; + if should_interrupt.load(Ordering::SeqCst) { + return Err(integrity::Error::Interrupted); + } + } + progress.show_throughput(start); + + Ok(integrity::Statistics { num_objects }) + } +} diff --git a/vendor/gix-odb/src/store_impls/loose/write.rs b/vendor/gix-odb/src/store_impls/loose/write.rs new file mode 100644 index 000000000..e87462e4c --- /dev/null +++ b/vendor/gix-odb/src/store_impls/loose/write.rs @@ -0,0 +1,135 @@ +use std::{convert::TryInto, fs, io, io::Write, path::PathBuf}; + +use gix_features::{hash, zlib::stream::deflate}; +use gix_object::WriteTo; +use tempfile::NamedTempFile; + +use super::Store; +use crate::store_impls::loose; + +/// Returned by the [`crate::Write`] trait implementation of [`Store`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("Could not {message} '{path}'")] + Io { + source: io::Error, + message: &'static str, + path: PathBuf, + }, + #[error("An IO error occurred while writing an object")] + IoRaw(#[from] io::Error), + #[error("Could not turn temporary file into persisted file at '{target}'")] + Persist { + source: tempfile::PersistError, + target: PathBuf, + }, +} + +impl crate::traits::Write for Store { + type Error = Error; + + fn write(&self, object: impl WriteTo) -> Result<gix_hash::ObjectId, Self::Error> { + let mut to = self.dest()?; + to.write_all(&object.loose_header()).map_err(|err| Error::Io { + source: err, + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + object.write_to(&mut to).map_err(|err| Error::Io { + source: err, + message: "stream all data into tempfile in", + path: self.path.to_owned(), + })?; + to.flush()?; + self.finalize_object(to) + } + + /// Write the given buffer in `from` to disk in one syscall at best. + /// + /// This will cost at least 4 IO operations. + fn write_buf(&self, kind: gix_object::Kind, from: &[u8]) -> Result<gix_hash::ObjectId, Self::Error> { + let mut to = self.dest()?; + to.write_all(&gix_object::encode::loose_header(kind, from.len())) + .map_err(|err| Error::Io { + source: err, + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + + to.write_all(from).map_err(|err| Error::Io { + source: err, + message: "stream all data into tempfile in", + path: self.path.to_owned(), + })?; + to.flush()?; + self.finalize_object(to) + } + + /// Write the given stream in `from` to disk with at least one syscall. + /// + /// This will cost at least 4 IO operations. + fn write_stream( + &self, + kind: gix_object::Kind, + size: u64, + mut from: impl io::Read, + ) -> Result<gix_hash::ObjectId, Self::Error> { + let mut to = self.dest()?; + to.write_all(&gix_object::encode::loose_header( + kind, + size.try_into().expect("object size to fit into usize"), + )) + .map_err(|err| Error::Io { + source: err, + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + + io::copy(&mut from, &mut to).map_err(|err| Error::Io { + source: err, + message: "stream all data into tempfile in", + path: self.path.to_owned(), + })?; + to.flush()?; + self.finalize_object(to) + } +} + +type CompressedTempfile = deflate::Write<NamedTempFile>; + +impl Store { + fn dest(&self) -> Result<hash::Write<CompressedTempfile>, Error> { + Ok(hash::Write::new( + deflate::Write::new(NamedTempFile::new_in(&self.path).map_err(|err| Error::Io { + source: err, + message: "create named temp file in", + path: self.path.to_owned(), + })?), + self.object_hash, + )) + } + + fn finalize_object( + &self, + hash::Write { hash, inner: file }: hash::Write<CompressedTempfile>, + ) -> Result<gix_hash::ObjectId, Error> { + let id = gix_hash::ObjectId::from(hash.digest()); + let object_path = loose::hash_path(&id, self.path.clone()); + let object_dir = object_path + .parent() + .expect("each object path has a 1 hex-bytes directory"); + if let Err(err) = fs::create_dir(object_dir) { + match err.kind() { + io::ErrorKind::AlreadyExists => {} + _ => return Err(err.into()), + } + } + let file = file.into_inner(); + file.persist(&object_path).map_err(|err| Error::Persist { + source: err, + target: object_path, + })?; + Ok(id) + } +} diff --git a/vendor/gix-odb/src/store_impls/mod.rs b/vendor/gix-odb/src/store_impls/mod.rs new file mode 100644 index 000000000..4e3250508 --- /dev/null +++ b/vendor/gix-odb/src/store_impls/mod.rs @@ -0,0 +1,2 @@ +pub mod dynamic; +pub mod loose; diff --git a/vendor/gix-odb/src/traits.rs b/vendor/gix-odb/src/traits.rs new file mode 100644 index 000000000..ddec78b8e --- /dev/null +++ b/vendor/gix-odb/src/traits.rs @@ -0,0 +1,310 @@ +use std::io; + +use gix_object::WriteTo; + +/// Describe the capability to write git objects into an object store. +pub trait Write { + /// The error type used for all trait methods. + /// + /// _Note_ the default implementations require the `From<io::Error>` bound. + type Error: std::error::Error + From<io::Error>; + + /// Write objects using the intrinsic kind of [`hash`][gix_hash::Kind] into the database, + /// returning id to reference it in subsequent reads. + fn write(&self, object: impl WriteTo) -> Result<gix_hash::ObjectId, Self::Error> { + let mut buf = Vec::with_capacity(2048); + object.write_to(&mut buf)?; + self.write_stream(object.kind(), buf.len() as u64, buf.as_slice()) + } + /// As [`write`][Write::write], but takes an [`object` kind][gix_object::Kind] along with its encoded bytes. + fn write_buf(&self, object: gix_object::Kind, from: &[u8]) -> Result<gix_hash::ObjectId, Self::Error> { + self.write_stream(object, from.len() as u64, from) + } + /// As [`write`][Write::write], but takes an input stream. + /// This is commonly used for writing blobs directly without reading them to memory first. + fn write_stream( + &self, + kind: gix_object::Kind, + size: u64, + from: impl io::Read, + ) -> Result<gix_hash::ObjectId, Self::Error>; +} + +/// Describe how object can be located in an object store. +/// +/// ## Notes +/// +/// Find effectively needs [generic associated types][issue] to allow a trait for the returned object type. +/// Until then, we will have to make due with explicit types and give them the potentially added features we want. +/// +/// [issue]: https://github.com/rust-lang/rust/issues/44265 +pub trait Find { + /// The error returned by [`try_find()`][Find::try_find()] + type Error: std::error::Error + 'static; + + /// Returns true if the object exists in the database. + fn contains(&self, id: impl AsRef<gix_hash::oid>) -> bool; + + /// Find an object matching `id` in the database while placing its raw, possibly encoded data into `buffer`. + /// + /// Returns `Some` object if it was present in the database, or the error that occurred during lookup or object + /// retrieval. + fn try_find<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<Option<gix_object::Data<'a>>, Self::Error>; +} + +/// A way to obtain object properties without fully decoding it. +pub trait Header { + /// The error returned by [`try_header()`][Header::try_header()]. + type Error: std::error::Error + 'static; + /// Try to read the header of the object associated with `id` or return `None` if it could not be found. + fn try_header(&self, id: impl AsRef<gix_hash::oid>) -> Result<Option<find::Header>, Self::Error>; +} + +mod _impls { + use std::{io::Read, ops::Deref, rc::Rc, sync::Arc}; + + use gix_hash::{oid, ObjectId}; + use gix_object::{Data, Kind, WriteTo}; + + use crate::find::Header; + + impl<T> crate::Write for &T + where + T: crate::Write, + { + type Error = T::Error; + + fn write(&self, object: impl WriteTo) -> Result<ObjectId, Self::Error> { + (*self).write(object) + } + + fn write_buf(&self, object: Kind, from: &[u8]) -> Result<ObjectId, Self::Error> { + (*self).write_buf(object, from) + } + + fn write_stream(&self, kind: Kind, size: u64, from: impl Read) -> Result<ObjectId, Self::Error> { + (*self).write_stream(kind, size, from) + } + } + + impl<T> crate::Write for Arc<T> + where + T: crate::Write, + { + type Error = T::Error; + + fn write(&self, object: impl WriteTo) -> Result<ObjectId, Self::Error> { + self.deref().write(object) + } + + fn write_buf(&self, object: Kind, from: &[u8]) -> Result<ObjectId, Self::Error> { + self.deref().write_buf(object, from) + } + + fn write_stream(&self, kind: Kind, size: u64, from: impl Read) -> Result<ObjectId, Self::Error> { + self.deref().write_stream(kind, size, from) + } + } + + impl<T> crate::Write for Rc<T> + where + T: crate::Write, + { + type Error = T::Error; + + fn write(&self, object: impl WriteTo) -> Result<ObjectId, Self::Error> { + self.deref().write(object) + } + + fn write_buf(&self, object: Kind, from: &[u8]) -> Result<ObjectId, Self::Error> { + self.deref().write_buf(object, from) + } + + fn write_stream(&self, kind: Kind, size: u64, from: impl Read) -> Result<ObjectId, Self::Error> { + self.deref().write_stream(kind, size, from) + } + } + + impl<T> crate::Find for &T + where + T: crate::Find, + { + type Error = T::Error; + + fn contains(&self, id: impl AsRef<oid>) -> bool { + (*self).contains(id) + } + + fn try_find<'a>(&self, id: impl AsRef<oid>, buffer: &'a mut Vec<u8>) -> Result<Option<Data<'a>>, Self::Error> { + (*self).try_find(id, buffer) + } + } + + impl<T> crate::Header for &T + where + T: crate::Header, + { + type Error = T::Error; + + fn try_header(&self, id: impl AsRef<oid>) -> Result<Option<Header>, Self::Error> { + (*self).try_header(id) + } + } + + impl<T> crate::Find for Rc<T> + where + T: crate::Find, + { + type Error = T::Error; + + fn contains(&self, id: impl AsRef<oid>) -> bool { + self.deref().contains(id) + } + + fn try_find<'a>(&self, id: impl AsRef<oid>, buffer: &'a mut Vec<u8>) -> Result<Option<Data<'a>>, Self::Error> { + self.deref().try_find(id, buffer) + } + } + + impl<T> crate::Header for Rc<T> + where + T: crate::Header, + { + type Error = T::Error; + + fn try_header(&self, id: impl AsRef<oid>) -> Result<Option<Header>, Self::Error> { + self.deref().try_header(id) + } + } + + impl<T> crate::Find for Arc<T> + where + T: crate::Find, + { + type Error = T::Error; + + fn contains(&self, id: impl AsRef<oid>) -> bool { + self.deref().contains(id) + } + + fn try_find<'a>(&self, id: impl AsRef<oid>, buffer: &'a mut Vec<u8>) -> Result<Option<Data<'a>>, Self::Error> { + self.deref().try_find(id, buffer) + } + } + + impl<T> crate::Header for Arc<T> + where + T: crate::Header, + { + type Error = T::Error; + + fn try_header(&self, id: impl AsRef<oid>) -> Result<Option<Header>, Self::Error> { + self.deref().try_header(id) + } + } +} + +mod ext { + use gix_object::{BlobRef, CommitRef, CommitRefIter, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter}; + + use crate::find; + + macro_rules! make_obj_lookup { + ($method:ident, $object_variant:path, $object_kind:path, $object_type:ty) => { + /// Like [`find(…)`][Self::find()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error + /// while returning the desired object type. + fn $method<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<$object_type, find::existing_object::Error<Self::Error>> { + let id = id.as_ref(); + self.try_find(id, buffer) + .map_err(find::existing_object::Error::Find)? + .ok_or_else(|| find::existing_object::Error::NotFound { + oid: id.as_ref().to_owned(), + }) + .and_then(|o| o.decode().map_err(find::existing_object::Error::Decode)) + .and_then(|o| match o { + $object_variant(o) => return Ok(o), + _other => Err(find::existing_object::Error::ObjectKind { + expected: $object_kind, + }), + }) + } + }; + } + + macro_rules! make_iter_lookup { + ($method:ident, $object_kind:path, $object_type:ty, $into_iter:tt) => { + /// Like [`find(…)`][Self::find()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error + /// while returning the desired iterator type. + fn $method<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<$object_type, find::existing_iter::Error<Self::Error>> { + let id = id.as_ref(); + self.try_find(id, buffer) + .map_err(find::existing_iter::Error::Find)? + .ok_or_else(|| find::existing_iter::Error::NotFound { + oid: id.as_ref().to_owned(), + }) + .and_then(|o| { + o.$into_iter() + .ok_or_else(|| find::existing_iter::Error::ObjectKind { + expected: $object_kind, + }) + }) + } + }; + } + + /// An extension trait with convenience functions. + pub trait HeaderExt: super::Header { + /// Like [`try_header(…)`][super::Header::try_header()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error. + fn header( + &self, + id: impl AsRef<gix_hash::oid>, + ) -> Result<crate::find::Header, find::existing::Error<Self::Error>> { + let id = id.as_ref(); + self.try_header(id) + .map_err(find::existing::Error::Find)? + .ok_or_else(|| find::existing::Error::NotFound { oid: id.to_owned() }) + } + } + + impl<T: super::Header> HeaderExt for T {} + + /// An extension trait with convenience functions. + pub trait FindExt: super::Find { + /// Like [`try_find(…)`][super::Find::try_find()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error. + fn find<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<gix_object::Data<'a>, find::existing::Error<Self::Error>> { + let id = id.as_ref(); + self.try_find(id, buffer) + .map_err(find::existing::Error::Find)? + .ok_or_else(|| find::existing::Error::NotFound { oid: id.to_owned() }) + } + + make_obj_lookup!(find_commit, ObjectRef::Commit, Kind::Commit, CommitRef<'a>); + make_obj_lookup!(find_tree, ObjectRef::Tree, Kind::Tree, TreeRef<'a>); + make_obj_lookup!(find_tag, ObjectRef::Tag, Kind::Tag, TagRef<'a>); + make_obj_lookup!(find_blob, ObjectRef::Blob, Kind::Blob, BlobRef<'a>); + make_iter_lookup!(find_commit_iter, Kind::Commit, CommitRefIter<'a>, try_into_commit_iter); + make_iter_lookup!(find_tree_iter, Kind::Tree, TreeRefIter<'a>, try_into_tree_iter); + make_iter_lookup!(find_tag_iter, Kind::Tag, TagRefIter<'a>, try_into_tag_iter); + } + + impl<T: super::Find> FindExt for T {} +} +pub use ext::{FindExt, HeaderExt}; + +use crate::find; |