summaryrefslogtreecommitdiffstats
path: root/vendor/gix-features/src/fs.rs
blob: f65779b926aa0548df685bbe6a1dc2bd6eba9279 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
//! Filesystem utilities
//!
//! These are will be parallel if the `parallel` feature is enabled, at the expense of compiling additional dependencies
//! along with runtime costs for maintaining a global [`rayon`](https://docs.rs/rayon) thread pool.
//!
//! For information on how to use the [`WalkDir`] type, have a look at
//! * [`jwalk::WalkDir`](https://docs.rs/jwalk/0.5.1/jwalk/type.WalkDir.html) if `parallel` feature is enabled
//! * [walkdir::WalkDir](https://docs.rs/walkdir/2.3.1/walkdir/struct.WalkDir.html) otherwise

#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]
mod shared {
    /// The desired level of parallelism.
    pub enum Parallelism {
        /// Do not parallelize at all by making a serial traversal on the current thread.
        Serial,
        /// Create a new thread pool for each traversal with up to 16 threads or the amount of logical cores of the machine.
        ThreadPoolPerTraversal {
            /// The base name of the threads we create as part of the thread-pool.
            thread_name: &'static str,
        },
    }
}

///
#[cfg(feature = "fs-walkdir-parallel")]
pub mod walkdir {
    use std::path::Path;

    pub use jwalk::{DirEntry as DirEntryGeneric, DirEntryIter as DirEntryIterGeneric, Error, WalkDir};

    pub use super::shared::Parallelism;

    /// An alias for an uncustomized directory entry to match the one of the non-parallel version offered by `walkdir`.
    pub type DirEntry = DirEntryGeneric<((), ())>;

    impl From<Parallelism> for jwalk::Parallelism {
        fn from(v: Parallelism) -> Self {
            match v {
                Parallelism::Serial => jwalk::Parallelism::Serial,
                Parallelism::ThreadPoolPerTraversal { thread_name } => std::thread::available_parallelism()
                    .map(|threads| {
                        let pool = jwalk::rayon::ThreadPoolBuilder::new()
                            .num_threads(threads.get().min(16))
                            .stack_size(128 * 1024)
                            .thread_name(move |idx| format!("{thread_name} {idx}"))
                            .build()
                            .expect("we only set options that can't cause a build failure");
                        jwalk::Parallelism::RayonExistingPool {
                            pool: pool.into(),
                            busy_timeout: None,
                        }
                    })
                    .unwrap_or_else(|_| Parallelism::Serial.into()),
            }
        }
    }

    /// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
    pub fn walkdir_new(root: impl AsRef<Path>, parallelism: Parallelism) -> WalkDir {
        WalkDir::new(root).skip_hidden(false).parallelism(parallelism.into())
    }

    /// Instantiate a new directory iterator which will not skip hidden files and is sorted
    pub fn walkdir_sorted_new(root: impl AsRef<Path>, parallelism: Parallelism) -> WalkDir {
        WalkDir::new(root)
            .skip_hidden(false)
            .sort(true)
            .parallelism(parallelism.into())
    }

    /// The Iterator yielding directory items
    pub type DirEntryIter = DirEntryIterGeneric<((), ())>;
}

#[cfg(all(feature = "walkdir", not(feature = "fs-walkdir-parallel")))]
///
pub mod walkdir {
    use std::path::Path;

    pub use walkdir::{DirEntry, Error, WalkDir};

    pub use super::shared::Parallelism;

    /// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
    pub fn walkdir_new(root: impl AsRef<Path>, _: Parallelism) -> WalkDir {
        WalkDir::new(root)
    }

    /// Instantiate a new directory iterator which will not skip hidden files and is sorted, with the given level of `parallelism`.
    pub fn walkdir_sorted_new(root: impl AsRef<Path>, _: Parallelism) -> WalkDir {
        WalkDir::new(root).sort_by_file_name()
    }

    /// The Iterator yielding directory items
    pub type DirEntryIter = walkdir::IntoIter;
}

#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]
pub use self::walkdir::{walkdir_new, walkdir_sorted_new, WalkDir};

/// Prepare open options which won't follow symlinks when the file is opened.
///
/// Note: only effective on unix currently.
pub fn open_options_no_follow() -> std::fs::OpenOptions {
    #[cfg_attr(not(unix), allow(unused_mut))]
    let mut options = std::fs::OpenOptions::new();
    #[cfg(unix)]
    {
        /// Make sure that it's impossible to follow through to the target of symlinks.
        /// Note that this will still follow symlinks in the path, which is what we assume
        /// has been checked separately.
        use std::os::unix::fs::OpenOptionsExt;
        options.custom_flags(libc::O_NOFOLLOW);
    }
    options
}

mod snapshot {
    use std::ops::Deref;

    use crate::threading::{get_mut, get_ref, MutableOnDemand, OwnShared};

    /// A structure holding enough information to reload a value if its on-disk representation changes as determined by its modified time.
    #[derive(Debug)]
    pub struct Snapshot<T: std::fmt::Debug> {
        value: T,
        modified: std::time::SystemTime,
    }

    impl<T: Clone + std::fmt::Debug> Clone for Snapshot<T> {
        fn clone(&self) -> Self {
            Self {
                value: self.value.clone(),
                modified: self.modified,
            }
        }
    }

    /// A snapshot of a resource which is up-to-date in the moment it is retrieved.
    pub type SharedSnapshot<T> = OwnShared<Snapshot<T>>;

    /// Use this type for fields in structs that are to store the [`Snapshot`], typically behind an [`OwnShared`].
    ///
    /// Note that the resource itself is behind another [`OwnShared`] to allow it to be used without holding any kind of lock, hence
    /// without blocking updates while it is used.
    #[derive(Debug, Default)]
    pub struct MutableSnapshot<T: std::fmt::Debug>(pub MutableOnDemand<Option<SharedSnapshot<T>>>);

    impl<T: std::fmt::Debug> Deref for Snapshot<T> {
        type Target = T;

        fn deref(&self) -> &Self::Target {
            &self.value
        }
    }

    impl<T: std::fmt::Debug> Deref for MutableSnapshot<T> {
        type Target = MutableOnDemand<Option<SharedSnapshot<T>>>;

        fn deref(&self) -> &Self::Target {
            &self.0
        }
    }

    impl<T: std::fmt::Debug> MutableSnapshot<T> {
        /// Create a new instance of this type.
        ///
        /// Useful in case `Default::default()` isn't working for some reason.
        pub fn new() -> Self {
            MutableSnapshot(MutableOnDemand::new(None))
        }

        /// Refresh `state` forcefully by re-`open`ing the resource. Note that `open()` returns `None` if the resource isn't
        /// present on disk, and that it's critical that the modified time is obtained _before_ opening the resource.
        pub fn force_refresh<E>(
            &self,
            open: impl FnOnce() -> Result<Option<(std::time::SystemTime, T)>, E>,
        ) -> Result<(), E> {
            let mut state = get_mut(&self.0);
            *state = open()?.map(|(modified, value)| OwnShared::new(Snapshot { value, modified }));
            Ok(())
        }

        /// Assure that the resource in `state` is up-to-date by comparing the `current_modification_time` with the one we know in `state`
        /// and by acting accordingly.
        /// Returns the potentially updated/reloaded resource if it is still present on disk, which then represents a snapshot that is up-to-date
        /// in that very moment, or `None` if the underlying file doesn't exist.
        ///
        /// Note that even though this is racy, each time a request is made there is a chance to see the actual state.
        pub fn recent_snapshot<E>(
            &self,
            mut current_modification_time: impl FnMut() -> Option<std::time::SystemTime>,
            open: impl FnOnce() -> Result<Option<T>, E>,
        ) -> Result<Option<SharedSnapshot<T>>, E> {
            let state = get_ref(self);
            let recent_modification = current_modification_time();
            let buffer = match (&*state, recent_modification) {
                (None, None) => (*state).clone(),
                (Some(_), None) => {
                    drop(state);
                    let mut state = get_mut(self);
                    *state = None;
                    (*state).clone()
                }
                (Some(snapshot), Some(modified_time)) => {
                    if snapshot.modified < modified_time {
                        drop(state);
                        let mut state = get_mut(self);

                        if let (Some(_snapshot), Some(modified_time)) = (&*state, current_modification_time()) {
                            *state = open()?.map(|value| {
                                OwnShared::new(Snapshot {
                                    value,
                                    modified: modified_time,
                                })
                            });
                        }

                        (*state).clone()
                    } else {
                        // Note that this relies on sub-section precision or else is a race when the packed file was just changed.
                        // It's nothing we can know though, so… up to the caller unfortunately.
                        Some(snapshot.clone())
                    }
                }
                (None, Some(_modified_time)) => {
                    drop(state);
                    let mut state = get_mut(self);
                    // Still in the same situation? If so, load the buffer. This compensates for the trampling herd
                    // during lazy-loading at the expense of another mtime check.
                    if let (None, Some(modified_time)) = (&*state, current_modification_time()) {
                        *state = open()?.map(|value| {
                            OwnShared::new(Snapshot {
                                value,
                                modified: modified_time,
                            })
                        });
                    }
                    (*state).clone()
                }
            };
            Ok(buffer)
        }
    }
}
pub use snapshot::{MutableSnapshot, SharedSnapshot, Snapshot};