summaryrefslogtreecommitdiffstats
path: root/vendor/gix-odb/src/store_impls/dynamic/types.rs
blob: 473c587bbeaae15a1117ddb1392af7cce1d91ac6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
use std::{
    path::{Path, PathBuf},
    sync::{
        atomic::{AtomicU16, AtomicU32, AtomicUsize, Ordering},
        Arc,
    },
    time::SystemTime,
};

use arc_swap::ArcSwap;
use gix_features::hash;

/// An id to refer to an index file or a multipack index file
pub type IndexId = usize;
pub(crate) type StateId = u32;
pub(crate) type Generation = u32;
pub(crate) type AtomicGeneration = AtomicU32;

/// A way to indicate which pack indices we have seen already and which of them are loaded, along with an idea
/// of whether stored `PackId`s are still usable.
#[derive(Default, Copy, Clone)]
pub struct SlotIndexMarker {
    /// The generation the `loaded_until_index` belongs to. Indices of different generations are completely incompatible.
    /// This value changes once the internal representation is compacted, something that may happen only if there is no handle
    /// requiring stable pack indices.
    pub(crate) generation: Generation,
    /// A unique id identifying the index state as well as all loose databases we have last observed.
    /// If it changes in any way, the value is different.
    pub(crate) state_id: StateId,
}

/// A way to load and refer to a pack uniquely, namespaced by their indexing mechanism, aka multi-pack or not.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct PackId {
    /// This is the index in the slot map at which the packs index is located.
    pub(crate) index: IndexId,
    /// If the pack is in a multi-pack index, this additional index is the pack-index within the multi-pack index identified by `index`.
    pub(crate) multipack_index: Option<gix_pack::multi_index::PackIndex>,
}

impl PackId {
    /// Returns the maximum of indices we can represent.
    pub(crate) const fn max_indices() -> usize {
        (1 << 15) - 1
    }
    /// Returns the maximum of packs we can represent if stored in a multi-index.
    pub(crate) const fn max_packs_in_multi_index() -> gix_pack::multi_index::PackIndex {
        (1 << 16) - 1
    }
    /// Packs have a built-in identifier to make data structures simpler, and this method represents ourselves as such id
    /// to be convertible back and forth. We essentially compress ourselves into a u32.
    ///
    /// Bit 16 is a marker to tell us if it's a multi-pack or not, the ones before are the index file itself, the ones after
    /// are used to encode the pack index within the multi-pack.
    pub(crate) fn to_intrinsic_pack_id(self) -> gix_pack::data::Id {
        assert!(self.index < (1 << 15), "There shouldn't be more than 2^15 indices");
        match self.multipack_index {
            None => self.index as gix_pack::data::Id,
            Some(midx) => {
                assert!(
                    midx <= Self::max_packs_in_multi_index(),
                    "There shouldn't be more than 2^16 packs per multi-index"
                );
                ((self.index as gix_pack::data::Id | 1 << 15) | midx << 16) as gix_pack::data::Id
            }
        }
    }

    pub(crate) fn from_intrinsic_pack_id(pack_id: gix_pack::data::Id) -> Self {
        if pack_id & (1 << 15) == 0 {
            PackId {
                index: (pack_id & 0x7fff) as IndexId,
                multipack_index: None,
            }
        } else {
            PackId {
                index: (pack_id & 0x7fff) as IndexId,
                multipack_index: Some(pack_id >> 16),
            }
        }
    }
}

/// An index that changes only if the packs directory changes and its contents is re-read.
#[derive(Default)]
pub struct SlotMapIndex {
    /// The index into the slot map at which we expect an index or pack file. Neither of these might be loaded yet.
    pub(crate) slot_indices: Vec<usize>,
    /// A list of loose object databases as resolved by their alternates file in the `object_directory`. The first entry is this objects
    /// directory loose file database. All other entries are the loose stores of alternates.
    /// It's in an Arc to be shared to Handles, but not to be shared across SlotMapIndices.
    pub(crate) loose_dbs: Arc<Vec<crate::loose::Store>>,

    /// A static value that doesn't ever change for a particular clone of this index.
    pub(crate) generation: Generation,
    /// The number of indices loaded thus far when the index of the slot map was last examined, which can change as new indices are loaded
    /// in parallel.
    /// Shared across SlotMapIndex instances of the same generation.
    pub(crate) next_index_to_load: Arc<AtomicUsize>,
    /// Incremented by one up to `slot_indices.len()` once an attempt to load an index completed.
    /// If a load failed, there will also be an increment.
    /// Shared across SlotMapIndex instances of the same generation.
    pub(crate) loaded_indices: Arc<AtomicUsize>,
    /// The amount of indices that are currently being loaded.
    /// Zero if no loading operation is currently happening, or more otherwise.
    pub(crate) num_indices_currently_being_loaded: Arc<AtomicU16>,
}

impl SlotMapIndex {
    pub(crate) fn state_id(self: &Arc<SlotMapIndex>) -> StateId {
        // We let the loaded indices take part despite not being part of our own snapshot.
        // This is to account for indices being loaded in parallel without actually changing the snapshot itself.
        let hash = hash::crc32(&(Arc::as_ptr(self) as usize).to_be_bytes());
        hash::crc32_update(hash, &self.loaded_indices.load(Ordering::SeqCst).to_be_bytes())
    }

    pub(crate) fn marker(self: &Arc<SlotMapIndex>) -> SlotIndexMarker {
        SlotIndexMarker {
            generation: self.generation,
            state_id: self.state_id(),
        }
    }

    /// Returns true if we already know at least one loose object db, a sign of being initialized
    pub(crate) fn is_initialized(&self) -> bool {
        !self.loose_dbs.is_empty()
    }
}

#[derive(Clone)]
pub(crate) struct OnDiskFile<T: Clone> {
    /// The last known path of the file
    path: Arc<PathBuf>,
    /// the time the file was last modified
    mtime: SystemTime,
    state: OnDiskFileState<T>,
}

#[derive(Clone)]
pub(crate) enum OnDiskFileState<T: Clone> {
    /// The file is on disk and can be loaded from there.
    Unloaded,
    Loaded(T),
    /// The file was loaded, but appeared to be missing on disk after reconciling our state with what's on disk.
    /// As there were handles that required pack-id stability we had to keep the item to allow finding it on later
    /// lookups.
    Garbage(T),
    /// File is missing on disk and could not be loaded when we tried or turned missing after reconciling our state.
    Missing,
}

impl<T: Clone> OnDiskFile<T> {
    pub fn path(&self) -> &Path {
        &self.path
    }
    /// Return true if we hold a memory map of the file already.
    pub fn is_loaded(&self) -> bool {
        matches!(self.state, OnDiskFileState::Loaded(_) | OnDiskFileState::Garbage(_))
    }

    /// Return true if we are to be collected as garbage
    pub fn is_disposable(&self) -> bool {
        matches!(self.state, OnDiskFileState::Garbage(_) | OnDiskFileState::Missing)
    }

    // On error, always declare the file missing and return an error.
    pub(crate) fn load_strict(&mut self, load: impl FnOnce(&Path) -> std::io::Result<T>) -> std::io::Result<()> {
        use OnDiskFileState::*;
        match self.state {
            Unloaded | Missing => match load(&self.path) {
                Ok(v) => {
                    self.state = Loaded(v);
                    Ok(())
                }
                Err(err) => {
                    // TODO: Should be provide more information? We don't even know what exactly failed right now, degenerating information.
                    self.state = Missing;
                    Err(err)
                }
            },
            Loaded(_) | Garbage(_) => Ok(()),
        }
    }
    /// If the file is missing, we don't consider this failure but instead return Ok(None) to allow recovery.
    /// when we know that loading is necessary. This also works around borrow check, which is a nice coincidence.
    pub fn load_with_recovery(&mut self, load: impl FnOnce(&Path) -> std::io::Result<T>) -> std::io::Result<Option<T>> {
        use OnDiskFileState::*;
        match &mut self.state {
            Loaded(v) | Garbage(v) => Ok(Some(v.clone())),
            Missing => Ok(None),
            Unloaded => match load(&self.path) {
                Ok(v) => {
                    self.state = OnDiskFileState::Loaded(v.clone());
                    Ok(Some(v))
                }
                Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
                    self.state = OnDiskFileState::Missing;
                    Ok(None)
                }
                Err(err) => Err(err),
            },
        }
    }

    pub fn loaded(&self) -> Option<&T> {
        use OnDiskFileState::*;
        match &self.state {
            Loaded(v) | Garbage(v) => Some(v),
            Unloaded | Missing => None,
        }
    }

    pub fn put_back(&mut self) {
        match std::mem::replace(&mut self.state, OnDiskFileState::Missing) {
            OnDiskFileState::Garbage(v) => self.state = OnDiskFileState::Loaded(v),
            OnDiskFileState::Missing => self.state = OnDiskFileState::Unloaded,
            other @ (OnDiskFileState::Loaded(_) | OnDiskFileState::Unloaded) => self.state = other,
        }
    }

    pub fn trash(&mut self) {
        match std::mem::replace(&mut self.state, OnDiskFileState::Missing) {
            OnDiskFileState::Loaded(v) => self.state = OnDiskFileState::Garbage(v),
            other @ (OnDiskFileState::Garbage(_) | OnDiskFileState::Unloaded | OnDiskFileState::Missing) => {
                self.state = other
            }
        }
    }
}

#[derive(Clone)]
pub(crate) struct IndexFileBundle {
    pub index: OnDiskFile<Arc<gix_pack::index::File>>,
    pub data: OnDiskFile<Arc<gix_pack::data::File>>,
}

#[derive(Clone)]
pub(crate) struct MultiIndexFileBundle {
    pub multi_index: OnDiskFile<Arc<gix_pack::multi_index::File>>,
    pub data: Vec<OnDiskFile<Arc<gix_pack::data::File>>>,
}

#[derive(Clone)]
pub(crate) enum IndexAndPacks {
    Index(IndexFileBundle),
    /// Note that there can only be one multi-pack file per repository, but thanks to git alternates, there can be multiple overall.
    MultiIndex(MultiIndexFileBundle),
}

impl IndexAndPacks {
    pub(crate) fn index_path(&self) -> &Path {
        match self {
            IndexAndPacks::Index(index) => &index.index.path,
            IndexAndPacks::MultiIndex(index) => &index.multi_index.path,
        }
    }

    pub(crate) fn mtime(&self) -> SystemTime {
        match self {
            IndexAndPacks::Index(index) => index.index.mtime,
            IndexAndPacks::MultiIndex(index) => index.multi_index.mtime,
        }
    }

    /// If we are garbage, put ourselves into the loaded state. Otherwise put ourselves back to unloaded.
    pub(crate) fn put_back(&mut self) {
        match self {
            IndexAndPacks::Index(bundle) => {
                bundle.index.put_back();
                bundle.data.put_back();
            }
            IndexAndPacks::MultiIndex(bundle) => {
                bundle.multi_index.put_back();
                for data in &mut bundle.data {
                    data.put_back();
                }
            }
        }
    }

    // The inverse of `put_back()`, by trashing the content.
    pub(crate) fn trash(&mut self) {
        match self {
            IndexAndPacks::Index(bundle) => {
                bundle.index.trash();
                bundle.data.trash();
            }
            IndexAndPacks::MultiIndex(bundle) => {
                bundle.multi_index.trash();
                for data in &mut bundle.data {
                    data.trash();
                }
            }
        }
    }

    pub(crate) fn index_is_loaded(&self) -> bool {
        match self {
            Self::Index(bundle) => bundle.index.is_loaded(),
            Self::MultiIndex(bundle) => bundle.multi_index.is_loaded(),
        }
    }

    pub(crate) fn is_disposable(&self) -> bool {
        match self {
            Self::Index(bundle) => bundle.index.is_disposable() || bundle.data.is_disposable(),
            Self::MultiIndex(bundle) => {
                bundle.multi_index.is_disposable() || bundle.data.iter().any(OnDiskFile::is_disposable)
            }
        }
    }

    pub(crate) fn load_index(&mut self, object_hash: gix_hash::Kind) -> std::io::Result<()> {
        match self {
            IndexAndPacks::Index(bundle) => bundle.index.load_strict(|path| {
                gix_pack::index::File::at(path, object_hash)
                    .map(Arc::new)
                    .map_err(|err| match err {
                        gix_pack::index::init::Error::Io { source, .. } => source,
                        err => std::io::Error::new(std::io::ErrorKind::Other, err),
                    })
            }),
            IndexAndPacks::MultiIndex(bundle) => {
                bundle.multi_index.load_strict(|path| {
                    gix_pack::multi_index::File::at(path)
                        .map(Arc::new)
                        .map_err(|err| match err {
                            gix_pack::multi_index::init::Error::Io { source, .. } => source,
                            err => std::io::Error::new(std::io::ErrorKind::Other, err),
                        })
                })?;
                if let Some(multi_index) = bundle.multi_index.loaded() {
                    bundle.data = Self::index_names_to_pack_paths(multi_index);
                }
                Ok(())
            }
        }
    }

    pub(crate) fn new_single(index_path: PathBuf, mtime: SystemTime) -> Self {
        let data_path = index_path.with_extension("pack");
        Self::Index(IndexFileBundle {
            index: OnDiskFile {
                path: index_path.into(),
                state: OnDiskFileState::Unloaded,
                mtime,
            },
            data: OnDiskFile {
                path: data_path.into(),
                state: OnDiskFileState::Unloaded,
                mtime,
            },
        })
    }

    pub(crate) fn new_multi_from_open_file(multi_index: Arc<gix_pack::multi_index::File>, mtime: SystemTime) -> Self {
        let data = Self::index_names_to_pack_paths(&multi_index);
        Self::MultiIndex(MultiIndexFileBundle {
            multi_index: OnDiskFile {
                path: Arc::new(multi_index.path().to_owned()),
                state: OnDiskFileState::Loaded(multi_index),
                mtime,
            },
            data,
        })
    }

    fn index_names_to_pack_paths(
        multi_index: &gix_pack::multi_index::File,
    ) -> Vec<OnDiskFile<Arc<gix_pack::data::File>>> {
        let parent_dir = multi_index.path().parent().expect("parent present");
        let data = multi_index
            .index_names()
            .iter()
            .map(|idx| OnDiskFile {
                path: parent_dir.join(idx.with_extension("pack")).into(),
                state: OnDiskFileState::Unloaded,
                mtime: SystemTime::UNIX_EPOCH,
            })
            .collect();
        data
    }
}

#[derive(Default)]
pub(crate) struct MutableIndexAndPack {
    pub(crate) files: ArcSwap<Option<IndexAndPacks>>,
    pub(crate) write: parking_lot::Mutex<()>,
    /// The generation required at least to read this slot. If these mismatch, the caller is likely referring to a now changed slot
    /// that has different content under the same id.
    /// Must only be changed when the write lock is held.
    pub(crate) generation: AtomicGeneration,
}

/// A snapshot about resource usage.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Metrics {
    /// The total amount of handles which can be used to access object information.
    pub num_handles: usize,
    /// The amount of refreshes performed to reconcile with the ODB state on disk.
    pub num_refreshes: usize,
    /// The amount of indices that are currently open and will be returned to handles.
    pub open_reachable_indices: usize,
    /// The amount of reachable, known indices, which aren't opened yet.
    pub known_reachable_indices: usize,
    /// The amount of packs which are open in memory and will be returned to handles.
    pub open_reachable_packs: usize,
    /// The amount of packs that are reachable and will be returned to handles. They aren't open yet.
    pub known_packs: usize,
    /// The amount of slots which are empty.
    ///
    /// Over time these will fill, but they can be emptied as files are removed from disk.
    pub unused_slots: usize,
    /// Unreachable indices are still using slots, but aren't returned to new handles anymore unless they still happen to
    /// know their id.
    ///
    /// This allows to keep files available while they are still potentially required for operations like pack generation, despite
    /// the file on disk being removed or changed.
    pub unreachable_indices: usize,
    /// Equivalent to `unreachable_indices`, but for mapped packed data files
    pub unreachable_packs: usize,
    /// The amount of loose object databases currently available for object retrieval.
    ///
    /// There may be more than one if 'alternates' are used.
    pub loose_dbs: usize,
}

#[cfg(test)]
mod tests {
    use super::*;

    mod pack_id {
        use super::PackId;

        #[test]
        fn to_intrinsic_roundtrip() {
            let single = PackId {
                index: (1 << 15) - 1,
                multipack_index: None,
            };
            let multi = PackId {
                index: (1 << 15) - 1,
                multipack_index: Some((1 << 16) - 1),
            };
            assert_eq!(PackId::from_intrinsic_pack_id(single.to_intrinsic_pack_id()), single);
            assert_eq!(PackId::from_intrinsic_pack_id(multi.to_intrinsic_pack_id()), multi);
        }

        #[test]
        #[should_panic]
        fn max_supported_index_count() {
            PackId {
                index: 1 << 15,
                multipack_index: None,
            }
            .to_intrinsic_pack_id();
        }
    }
}