summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex-automata/src/util/lazy.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/regex-automata/src/util/lazy.rs')
-rw-r--r--third_party/rust/regex-automata/src/util/lazy.rs465
1 files changed, 465 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/src/util/lazy.rs b/third_party/rust/regex-automata/src/util/lazy.rs
new file mode 100644
index 0000000000..de27a2a6e6
--- /dev/null
+++ b/third_party/rust/regex-automata/src/util/lazy.rs
@@ -0,0 +1,465 @@
+/*!
+A lazily initialized value for safe sharing between threads.
+
+The principal type in this module is `Lazy`, which makes it easy to construct
+values that are shared safely across multiple threads simultaneously.
+*/
+
+use core::fmt;
+
+/// A lazily initialized value that implements `Deref` for `T`.
+///
+/// A `Lazy` takes an initialization function and permits callers from any
+/// thread to access the result of that initialization function in a safe
+/// manner. In effect, this permits one-time initialization of global resources
+/// in a (possibly) multi-threaded program.
+///
+/// This type and its functionality are available even when neither the `alloc`
+/// nor the `std` features are enabled. In exchange, a `Lazy` does **not**
+/// guarantee that the given `create` function is called at most once. It
+/// might be called multiple times. Moreover, a call to `Lazy::get` (either
+/// explicitly or implicitly via `Lazy`'s `Deref` impl) may block until a `T`
+/// is available.
+///
+/// This is very similar to `lazy_static` or `once_cell`, except it doesn't
+/// guarantee that the initialization function will be run once and it works
+/// in no-alloc no-std environments. With that said, if you need stronger
+/// guarantees or a more flexible API, then it is recommended to use either
+/// `lazy_static` or `once_cell`.
+///
+/// # Warning: may use a spin lock
+///
+/// When this crate is compiled _without_ the `alloc` feature, then this type
+/// may used a spin lock internally. This can have subtle effects that may
+/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more
+/// thorough treatment of this topic.
+///
+/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
+///
+/// # Example
+///
+/// This type is useful for creating regexes once, and then using them from
+/// multiple threads simultaneously without worrying about synchronization.
+///
+/// ```
+/// use regex_automata::{dfa::regex::Regex, util::lazy::Lazy, Match};
+///
+/// static RE: Lazy<Regex> = Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap());
+///
+/// let expected = Some(Match::must(0, 3..14));
+/// assert_eq!(expected, RE.find(b"zzzfoo12345barzzz"));
+/// ```
+pub struct Lazy<T, F = fn() -> T>(lazy::Lazy<T, F>);
+
+impl<T, F> Lazy<T, F> {
+ /// Create a new `Lazy` value that is initialized via the given function.
+ ///
+ /// The `T` type is automatically inferred from the return type of the
+ /// `create` function given.
+ pub const fn new(create: F) -> Lazy<T, F> {
+ Lazy(lazy::Lazy::new(create))
+ }
+}
+
+impl<T, F: Fn() -> T> Lazy<T, F> {
+ /// Return a reference to the lazily initialized value.
+ ///
+ /// This routine may block if another thread is initializing a `T`.
+ ///
+ /// Note that given a `x` which has type `Lazy`, this must be called via
+ /// `Lazy::get(x)` and not `x.get()`. This routine is defined this way
+ /// because `Lazy` impls `Deref` with a target of `T`.
+ ///
+ /// # Panics
+ ///
+ /// This panics if the `create` function inside this lazy value panics.
+ /// If the panic occurred in another thread, then this routine _may_ also
+ /// panic (but is not guaranteed to do so).
+ pub fn get(this: &Lazy<T, F>) -> &T {
+ this.0.get()
+ }
+}
+
+impl<T, F: Fn() -> T> core::ops::Deref for Lazy<T, F> {
+ type Target = T;
+
+ fn deref(&self) -> &T {
+ Lazy::get(self)
+ }
+}
+
+impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+#[cfg(feature = "alloc")]
+mod lazy {
+ use core::{
+ fmt,
+ marker::PhantomData,
+ sync::atomic::{AtomicPtr, Ordering},
+ };
+
+ use alloc::boxed::Box;
+
+ /// A non-std lazy initialized value.
+ ///
+ /// This might run the initialization function more than once, but will
+ /// never block.
+ ///
+ /// I wish I could get these semantics into the non-alloc non-std Lazy
+ /// type below, but I'm not sure how to do it. If you can do an alloc,
+ /// then the implementation becomes very simple if you don't care about
+ /// redundant work precisely because a pointer can be atomically swapped.
+ ///
+ /// Perhaps making this approach work in the non-alloc non-std case
+ /// requires asking the caller for a pointer? It would make the API less
+ /// convenient I think.
+ pub(super) struct Lazy<T, F> {
+ data: AtomicPtr<T>,
+ create: F,
+ // This indicates to the compiler that this type can drop T. It's not
+ // totally clear how the absence of this marker could lead to trouble,
+ // but putting here doesn't have any downsides so we hedge until somone
+ // can from the Unsafe Working Group can tell us definitively that we
+ // don't need it.
+ //
+ // See: https://github.com/BurntSushi/regex-automata/issues/30
+ owned: PhantomData<Box<T>>,
+ }
+
+ // SAFETY: So long as T and &T (and F and &F) can themselves be safely
+ // shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
+ // permits accessing a &T and initialization is free of data races. So if T
+ // is thread safe, then so to is Lazy<T, _>.
+ //
+ // We specifically require that T: Send in order for Lazy<T> to be Sync.
+ // Without that requirement, it's possible to send a T from one thread to
+ // another via Lazy's destructor.
+ //
+ // It's not clear whether we need F: Send+Sync for Lazy to be Sync. But
+ // we're conservative for now and keep both.
+ unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
+
+ impl<T, F> Lazy<T, F> {
+ /// Create a new alloc but non-std lazy value that is racily
+ /// initialized. That is, the 'create' function may be called more than
+ /// once.
+ pub(super) const fn new(create: F) -> Lazy<T, F> {
+ Lazy {
+ data: AtomicPtr::new(core::ptr::null_mut()),
+ create,
+ owned: PhantomData,
+ }
+ }
+ }
+
+ impl<T, F: Fn() -> T> Lazy<T, F> {
+ /// Get the underlying lazy value. If it hasn't been initialized
+ /// yet, then always attempt to initialize it (even if some other
+ /// thread is initializing it) and atomically attach it to this lazy
+ /// value before returning it.
+ pub(super) fn get(&self) -> &T {
+ if let Some(data) = self.poll() {
+ return data;
+ }
+ let data = (self.create)();
+ let mut ptr = Box::into_raw(Box::new(data));
+ // We attempt to stuff our initialized value into our atomic
+ // pointer. Upon success, we don't need to do anything. But if
+ // someone else beat us to the punch, then we need to make sure
+ // our newly created value is dropped.
+ let result = self.data.compare_exchange(
+ core::ptr::null_mut(),
+ ptr,
+ Ordering::AcqRel,
+ Ordering::Acquire,
+ );
+ if let Err(old) = result {
+ // SAFETY: We created 'ptr' via Box::into_raw above, so turning
+ // it back into a Box via from_raw is safe.
+ drop(unsafe { Box::from_raw(ptr) });
+ ptr = old;
+ }
+ // SAFETY: We just set the pointer above to a non-null value, even
+ // in the error case, and set it to a fully initialized value
+ // returned by 'create'.
+ unsafe { &*ptr }
+ }
+
+ /// If this lazy value has been initialized successfully, then return
+ /// that value. Otherwise return None immediately. This never attempts
+ /// to run initialization itself.
+ fn poll(&self) -> Option<&T> {
+ let ptr = self.data.load(Ordering::Acquire);
+ if ptr.is_null() {
+ return None;
+ }
+ // SAFETY: We just checked that the pointer is not null. Since it's
+ // not null, it must have been fully initialized by 'get' at some
+ // point.
+ Some(unsafe { &*ptr })
+ }
+ }
+
+ impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("Lazy").field("data", &self.poll()).finish()
+ }
+ }
+
+ impl<T, F> Drop for Lazy<T, F> {
+ fn drop(&mut self) {
+ let ptr = *self.data.get_mut();
+ if !ptr.is_null() {
+ // SAFETY: We just checked that 'ptr' is not null. And since
+ // we have exclusive access, there are no races to worry about.
+ drop(unsafe { Box::from_raw(ptr) });
+ }
+ }
+ }
+}
+
+#[cfg(not(feature = "alloc"))]
+mod lazy {
+ use core::{
+ cell::Cell,
+ fmt,
+ mem::MaybeUninit,
+ panic::{RefUnwindSafe, UnwindSafe},
+ sync::atomic::{AtomicU8, Ordering},
+ };
+
+ /// Our 'Lazy' value can be in one of three states:
+ ///
+ /// * INIT is where it starts, and also ends up back here if the
+ /// 'create' routine panics.
+ /// * BUSY is where it sits while initialization is running in exactly
+ /// one thread.
+ /// * DONE is where it sits after 'create' has completed and 'data' has
+ /// been fully initialized.
+ const LAZY_STATE_INIT: u8 = 0;
+ const LAZY_STATE_BUSY: u8 = 1;
+ const LAZY_STATE_DONE: u8 = 2;
+
+ /// A non-alloc non-std lazy initialized value.
+ ///
+ /// This guarantees initialization only happens once, but uses a spinlock
+ /// to block in the case of simultaneous access. Blocking occurs so that
+ /// one thread waits while another thread initializes the value.
+ ///
+ /// I would much rather have the semantics of the 'alloc' Lazy type above.
+ /// Namely, that we might run the initialization function more than once,
+ /// but we never otherwise block. However, I don't know how to do that in
+ /// a non-alloc non-std context.
+ pub(super) struct Lazy<T, F> {
+ state: AtomicU8,
+ create: Cell<Option<F>>,
+ data: Cell<MaybeUninit<T>>,
+ }
+
+ // SAFETY: So long as T and &T (and F and &F) can themselves be safely
+ // shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
+ // permits accessing a &T and initialization is free of data races. So if T
+ // is thread safe, then so to is Lazy<T, _>.
+ unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
+ // A reference to a Lazy is unwind safe because we specifically take
+ // precautions to poison all accesses to a Lazy if the caller-provided
+ // 'create' function panics.
+ impl<T: UnwindSafe, F: UnwindSafe + RefUnwindSafe> RefUnwindSafe
+ for Lazy<T, F>
+ {
+ }
+
+ impl<T, F> Lazy<T, F> {
+ /// Create a new non-alloc non-std lazy value that is initialized
+ /// exactly once on first use using the given function.
+ pub(super) const fn new(create: F) -> Lazy<T, F> {
+ Lazy {
+ state: AtomicU8::new(LAZY_STATE_INIT),
+ create: Cell::new(Some(create)),
+ data: Cell::new(MaybeUninit::uninit()),
+ }
+ }
+ }
+
+ impl<T, F: FnOnce() -> T> Lazy<T, F> {
+ /// Get the underlying lazy value. If it isn't been initialized
+ /// yet, then either initialize it or block until some other thread
+ /// initializes it. If the 'create' function given to Lazy::new panics
+ /// (even in another thread), then this panics too.
+ pub(super) fn get(&self) -> &T {
+ // This is effectively a spinlock. We loop until we enter a DONE
+ // state, and if possible, initialize it ourselves. The only way
+ // we exit the loop is if 'create' panics, we initialize 'data' or
+ // some other thread initializes 'data'.
+ //
+ // Yes, I have read spinlocks considered harmful[1]. And that
+ // article is why this spinlock is only active when 'alloc' isn't
+ // enabled. I did this because I don't think there is really
+ // another choice without 'alloc', other than not providing this at
+ // all. But I think that's a big bummer.
+ //
+ // [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
+ while self.state.load(Ordering::Acquire) != LAZY_STATE_DONE {
+ // Check if we're the first ones to get here. If so, we'll be
+ // the ones who initialize.
+ let result = self.state.compare_exchange(
+ LAZY_STATE_INIT,
+ LAZY_STATE_BUSY,
+ Ordering::AcqRel,
+ Ordering::Acquire,
+ );
+ // This means we saw the INIT state and nobody else can. So we
+ // must take responsibility for initializing. And by virtue of
+ // observing INIT, we have also told anyone else trying to
+ // get here that we are BUSY. If someone else sees BUSY, then
+ // they will spin until we finish initialization.
+ if let Ok(_) = result {
+ // Since we are guaranteed to be the only ones here, we
+ // know that 'create' is there... Unless someone else got
+ // here before us and 'create' panicked. In which case,
+ // 'self.create' is now 'None' and we forward the panic
+ // to the caller. (i.e., We implement poisoning.)
+ //
+ // SAFETY: Our use of 'self.state' guarantees that we are
+ // the only thread executing this line, and thus there are
+ // no races.
+ let create = unsafe {
+ (*self.create.as_ptr()).take().expect(
+ "Lazy's create function panicked, \
+ preventing initialization,
+ poisoning current thread",
+ )
+ };
+ let guard = Guard { state: &self.state };
+ // SAFETY: Our use of 'self.state' guarantees that we are
+ // the only thread executing this line, and thus there are
+ // no races.
+ unsafe {
+ (*self.data.as_ptr()).as_mut_ptr().write(create());
+ }
+ // All is well. 'self.create' ran successfully, so we
+ // forget the guard.
+ core::mem::forget(guard);
+ // Everything is initialized, so we can declare success.
+ self.state.store(LAZY_STATE_DONE, Ordering::Release);
+ break;
+ }
+ core::hint::spin_loop();
+ }
+ // We only get here if data is fully initialized, and thus poll
+ // will always return something.
+ self.poll().unwrap()
+ }
+
+ /// If this lazy value has been initialized successfully, then return
+ /// that value. Otherwise return None immediately. This never blocks.
+ fn poll(&self) -> Option<&T> {
+ if self.state.load(Ordering::Acquire) == LAZY_STATE_DONE {
+ // SAFETY: The DONE state only occurs when data has been fully
+ // initialized.
+ Some(unsafe { &*(*self.data.as_ptr()).as_ptr() })
+ } else {
+ None
+ }
+ }
+ }
+
+ impl<T: fmt::Debug, F: FnMut() -> T> fmt::Debug for Lazy<T, F> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("Lazy")
+ .field("state", &self.state.load(Ordering::Acquire))
+ .field("create", &"<closure>")
+ .field("data", &self.poll())
+ .finish()
+ }
+ }
+
+ impl<T, F> Drop for Lazy<T, F> {
+ fn drop(&mut self) {
+ if *self.state.get_mut() == LAZY_STATE_DONE {
+ // SAFETY: state is DONE if and only if data has been fully
+ // initialized. At which point, it is safe to drop.
+ unsafe {
+ // MSRV(1.60): Use assume_init_drop. The below is how
+ // assume_init_drop is implemented.
+ core::ptr::drop_in_place(
+ (*self.data.as_ptr()).as_mut_ptr(),
+ )
+ }
+ }
+ }
+ }
+
+ /// A guard that will reset a Lazy's state back to INIT when dropped. The
+ /// idea here is to 'forget' this guard on success. On failure (when a
+ /// panic occurs), the Drop impl runs and causes all in-progress and future
+ /// 'get' calls to panic. Without this guard, all in-progress and future
+ /// 'get' calls would spin forever. Crashing is much better than getting
+ /// stuck in an infinite loop.
+ struct Guard<'a> {
+ state: &'a AtomicU8,
+ }
+
+ impl<'a> Drop for Guard<'a> {
+ fn drop(&mut self) {
+ // We force ourselves back into an INIT state. This will in turn
+ // cause any future 'get' calls to attempt calling 'self.create'
+ // again which will in turn panic because 'self.create' will now
+ // be 'None'.
+ self.state.store(LAZY_STATE_INIT, Ordering::Release);
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn assert_send<T: Send>() {}
+ fn assert_sync<T: Sync>() {}
+ fn assert_unwind<T: core::panic::UnwindSafe>() {}
+ fn assert_refunwind<T: core::panic::RefUnwindSafe>() {}
+
+ #[test]
+ fn oibits() {
+ assert_send::<Lazy<u64>>();
+ assert_sync::<Lazy<u64>>();
+ assert_unwind::<Lazy<u64>>();
+ assert_refunwind::<Lazy<u64>>();
+ }
+
+ // This is a regression test because we used to rely on the inferred Sync
+ // impl for the Lazy type defined above (for 'alloc' mode). In the
+ // inferred impl, it only requires that T: Sync for Lazy<T>: Sync. But
+ // if we have that, we can actually make use of the fact that Lazy<T> drops
+ // T to create a value on one thread and drop it on another. This *should*
+ // require T: Send, but our missing bounds before let it sneak by.
+ //
+ // Basically, this test should not compile, so we... comment it out. We
+ // don't have a great way of testing compile-fail tests right now.
+ //
+ // See: https://github.com/BurntSushi/regex-automata/issues/30
+ /*
+ #[test]
+ fn sync_not_send() {
+ #[allow(dead_code)]
+ fn inner<T: Sync + Default>() {
+ let lazy = Lazy::new(move || T::default());
+ std::thread::scope(|scope| {
+ scope.spawn(|| {
+ Lazy::get(&lazy); // We create T in this thread
+ });
+ });
+ // And drop in this thread.
+ drop(lazy);
+ // So we have send a !Send type over threads. (with some more
+ // legwork, its possible to even sneak the value out of drop
+ // through thread local)
+ }
+ }
+ */
+}