summaryrefslogtreecommitdiffstats
path: root/vendor/regex/src/pool.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex/src/pool.rs')
-rw-r--r--vendor/regex/src/pool.rs333
1 files changed, 0 insertions, 333 deletions
diff --git a/vendor/regex/src/pool.rs b/vendor/regex/src/pool.rs
deleted file mode 100644
index 6a6f15b19..000000000
--- a/vendor/regex/src/pool.rs
+++ /dev/null
@@ -1,333 +0,0 @@
-// This module provides a relatively simple thread-safe pool of reusable
-// objects. For the most part, it's implemented by a stack represented by a
-// Mutex<Vec<T>>. It has one small trick: because unlocking a mutex is somewhat
-// costly, in the case where a pool is accessed by the first thread that tried
-// to get a value, we bypass the mutex. Here are some benchmarks showing the
-// difference.
-//
-// 1) misc::anchored_literal_long_non_match 21 (18571 MB/s)
-// 2) misc::anchored_literal_long_non_match 107 (3644 MB/s)
-// 3) misc::anchored_literal_long_non_match 45 (8666 MB/s)
-// 4) misc::anchored_literal_long_non_match 19 (20526 MB/s)
-//
-// (1) represents our baseline: the master branch at the time of writing when
-// using the 'thread_local' crate to implement the pool below.
-//
-// (2) represents a naive pool implemented completely via Mutex<Vec<T>>. There
-// is no special trick for bypassing the mutex.
-//
-// (3) is the same as (2), except it uses Mutex<Vec<Box<T>>>. It is twice as
-// fast because a Box<T> is much smaller than the T we use with a Pool in this
-// crate. So pushing and popping a Box<T> from a Vec is quite a bit faster
-// than for T.
-//
-// (4) is the same as (3), but with the trick for bypassing the mutex in the
-// case of the first-to-get thread.
-//
-// Why move off of thread_local? Even though (4) is a hair faster than (1)
-// above, this was not the main goal. The main goal was to move off of
-// thread_local and find a way to *simply* re-capture some of its speed for
-// regex's specific case. So again, why move off of it? The *primary* reason is
-// because of memory leaks. See https://github.com/rust-lang/regex/issues/362
-// for example. (Why do I want it to be simple? Well, I suppose what I mean is,
-// "use as much safe code as possible to minimize risk and be as sure as I can
-// be that it is correct.")
-//
-// My guess is that the thread_local design is probably not appropriate for
-// regex since its memory usage scales to the number of active threads that
-// have used a regex, where as the pool below scales to the number of threads
-// that simultaneously use a regex. While neither case permits contraction,
-// since we own the pool data structure below, we can add contraction if a
-// clear use case pops up in the wild. More pressingly though, it seems that
-// there are at least some use case patterns where one might have many threads
-// sitting around that might have used a regex at one point. While thread_local
-// does try to reuse space previously used by a thread that has since stopped,
-// its maximal memory usage still scales with the total number of active
-// threads. In contrast, the pool below scales with the total number of threads
-// *simultaneously* using the pool. The hope is that this uses less memory
-// overall. And if it doesn't, we can hopefully tune it somehow.
-//
-// It seems that these sort of conditions happen frequently
-// in FFI inside of other more "managed" languages. This was
-// mentioned in the issue linked above, and also mentioned here:
-// https://github.com/BurntSushi/rure-go/issues/3. And in particular, users
-// confirm that disabling the use of thread_local resolves the leak.
-//
-// There were other weaker reasons for moving off of thread_local as well.
-// Namely, at the time, I was looking to reduce dependencies. And for something
-// like regex, maintenance can be simpler when we own the full dependency tree.
-
-use std::panic::{RefUnwindSafe, UnwindSafe};
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::Mutex;
-
-/// An atomic counter used to allocate thread IDs.
-static COUNTER: AtomicUsize = AtomicUsize::new(1);
-
-thread_local!(
- /// A thread local used to assign an ID to a thread.
- static THREAD_ID: usize = {
- let next = COUNTER.fetch_add(1, Ordering::Relaxed);
- // SAFETY: We cannot permit the reuse of thread IDs since reusing a
- // thread ID might result in more than one thread "owning" a pool,
- // and thus, permit accessing a mutable value from multiple threads
- // simultaneously without synchronization. The intent of this panic is
- // to be a sanity check. It is not expected that the thread ID space
- // will actually be exhausted in practice.
- //
- // This checks that the counter never wraps around, since atomic
- // addition wraps around on overflow.
- if next == 0 {
- panic!("regex: thread ID allocation space exhausted");
- }
- next
- };
-);
-
-/// The type of the function used to create values in a pool when the pool is
-/// empty and the caller requests one.
-type CreateFn<T> =
- Box<dyn Fn() -> T + Send + Sync + UnwindSafe + RefUnwindSafe + 'static>;
-
-/// A simple thread safe pool for reusing values.
-///
-/// Getting a value out comes with a guard. When that guard is dropped, the
-/// value is automatically put back in the pool.
-///
-/// A Pool<T> impls Sync when T is Send (even if it's not Sync). This means
-/// that T can use interior mutability. This is possible because a pool is
-/// guaranteed to provide a value to exactly one thread at any time.
-///
-/// Currently, a pool never contracts in size. Its size is proportional to the
-/// number of simultaneous uses.
-pub struct Pool<T> {
- /// A stack of T values to hand out. These are used when a Pool is
- /// accessed by a thread that didn't create it.
- stack: Mutex<Vec<Box<T>>>,
- /// A function to create more T values when stack is empty and a caller
- /// has requested a T.
- create: CreateFn<T>,
- /// The ID of the thread that owns this pool. The owner is the thread
- /// that makes the first call to 'get'. When the owner calls 'get', it
- /// gets 'owner_val' directly instead of returning a T from 'stack'.
- /// See comments elsewhere for details, but this is intended to be an
- /// optimization for the common case that makes getting a T faster.
- ///
- /// It is initialized to a value of zero (an impossible thread ID) as a
- /// sentinel to indicate that it is unowned.
- owner: AtomicUsize,
- /// A value to return when the caller is in the same thread that created
- /// the Pool.
- owner_val: T,
-}
-
-// SAFETY: Since we want to use a Pool from multiple threads simultaneously
-// behind an Arc, we need for it to be Sync. In cases where T is sync, Pool<T>
-// would be Sync. However, since we use a Pool to store mutable scratch space,
-// we wind up using a T that has interior mutability and is thus itself not
-// Sync. So what we *really* want is for our Pool<T> to by Sync even when T is
-// not Sync (but is at least Send).
-//
-// The only non-sync aspect of a Pool is its 'owner_val' field, which is used
-// to implement faster access to a pool value in the common case of a pool
-// being accessed in the same thread in which it was created. The 'stack' field
-// is also shared, but a Mutex<T> where T: Send is already Sync. So we only
-// need to worry about 'owner_val'.
-//
-// The key is to guarantee that 'owner_val' can only ever be accessed from one
-// thread. In our implementation below, we guarantee this by only returning the
-// 'owner_val' when the ID of the current thread matches the ID of the thread
-// that created the Pool. Since this can only ever be one thread, it follows
-// that only one thread can access 'owner_val' at any point in time. Thus, it
-// is safe to declare that Pool<T> is Sync when T is Send.
-//
-// NOTE: It would also be possible to make the owning thread be the *first*
-// thread that tries to get a value out of a Pool. However, the current
-// implementation is a little simpler and it's not clear if making the first
-// thread (rather than the creating thread) is meaningfully better.
-//
-// If there is a way to achieve our performance goals using safe code, then
-// I would very much welcome a patch. As it stands, the implementation below
-// tries to balance safety with performance. The case where a Regex is used
-// from multiple threads simultaneously will suffer a bit since getting a cache
-// will require unlocking a mutex.
-unsafe impl<T: Send> Sync for Pool<T> {}
-
-impl<T: ::std::fmt::Debug> ::std::fmt::Debug for Pool<T> {
- fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
- f.debug_struct("Pool")
- .field("stack", &self.stack)
- .field("owner", &self.owner)
- .field("owner_val", &self.owner_val)
- .finish()
- }
-}
-
-/// A guard that is returned when a caller requests a value from the pool.
-///
-/// The purpose of the guard is to use RAII to automatically put the value back
-/// in the pool once it's dropped.
-#[derive(Debug)]
-pub struct PoolGuard<'a, T: Send> {
- /// The pool that this guard is attached to.
- pool: &'a Pool<T>,
- /// This is None when the guard represents the special "owned" value. In
- /// which case, the value is retrieved from 'pool.owner_val'.
- value: Option<Box<T>>,
-}
-
-impl<T: Send> Pool<T> {
- /// Create a new pool. The given closure is used to create values in the
- /// pool when necessary.
- pub fn new(create: CreateFn<T>) -> Pool<T> {
- let owner = AtomicUsize::new(0);
- let owner_val = create();
- Pool { stack: Mutex::new(vec![]), create, owner, owner_val }
- }
-
- /// Get a value from the pool. The caller is guaranteed to have exclusive
- /// access to the given value.
- ///
- /// Note that there is no guarantee provided about which value in the
- /// pool is returned. That is, calling get, dropping the guard (causing
- /// the value to go back into the pool) and then calling get again is NOT
- /// guaranteed to return the same value received in the first get call.
- #[cfg_attr(feature = "perf-inline", inline(always))]
- pub fn get(&self) -> PoolGuard<'_, T> {
- // Our fast path checks if the caller is the thread that "owns" this
- // pool. Or stated differently, whether it is the first thread that
- // tried to extract a value from the pool. If it is, then we can return
- // a T to the caller without going through a mutex.
- //
- // SAFETY: We must guarantee that only one thread gets access to this
- // value. Since a thread is uniquely identified by the THREAD_ID thread
- // local, it follows that is the caller's thread ID is equal to the
- // owner, then only one thread may receive this value.
- let caller = THREAD_ID.with(|id| *id);
- let owner = self.owner.load(Ordering::Relaxed);
- if caller == owner {
- return self.guard_owned();
- }
- self.get_slow(caller, owner)
- }
-
- /// This is the "slow" version that goes through a mutex to pop an
- /// allocated value off a stack to return to the caller. (Or, if the stack
- /// is empty, a new value is created.)
- ///
- /// If the pool has no owner, then this will set the owner.
- #[cold]
- fn get_slow(&self, caller: usize, owner: usize) -> PoolGuard<'_, T> {
- use std::sync::atomic::Ordering::Relaxed;
-
- if owner == 0 {
- // The sentinel 0 value means this pool is not yet owned. We
- // try to atomically set the owner. If we do, then this thread
- // becomes the owner and we can return a guard that represents
- // the special T for the owner.
- let res = self.owner.compare_exchange(0, caller, Relaxed, Relaxed);
- if res.is_ok() {
- return self.guard_owned();
- }
- }
- let mut stack = self.stack.lock().unwrap();
- let value = match stack.pop() {
- None => Box::new((self.create)()),
- Some(value) => value,
- };
- self.guard_stack(value)
- }
-
- /// Puts a value back into the pool. Callers don't need to call this. Once
- /// the guard that's returned by 'get' is dropped, it is put back into the
- /// pool automatically.
- fn put(&self, value: Box<T>) {
- let mut stack = self.stack.lock().unwrap();
- stack.push(value);
- }
-
- /// Create a guard that represents the special owned T.
- fn guard_owned(&self) -> PoolGuard<'_, T> {
- PoolGuard { pool: self, value: None }
- }
-
- /// Create a guard that contains a value from the pool's stack.
- fn guard_stack(&self, value: Box<T>) -> PoolGuard<'_, T> {
- PoolGuard { pool: self, value: Some(value) }
- }
-}
-
-impl<'a, T: Send> PoolGuard<'a, T> {
- /// Return the underlying value.
- pub fn value(&self) -> &T {
- match self.value {
- None => &self.pool.owner_val,
- Some(ref v) => &**v,
- }
- }
-}
-
-impl<'a, T: Send> Drop for PoolGuard<'a, T> {
- #[cfg_attr(feature = "perf-inline", inline(always))]
- fn drop(&mut self) {
- if let Some(value) = self.value.take() {
- self.pool.put(value);
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use std::panic::{RefUnwindSafe, UnwindSafe};
-
- use super::*;
-
- #[test]
- fn oibits() {
- use crate::exec::ProgramCache;
-
- fn has_oibits<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
- has_oibits::<Pool<ProgramCache>>();
- }
-
- // Tests that Pool implements the "single owner" optimization. That is, the
- // thread that first accesses the pool gets its own copy, while all other
- // threads get distinct copies.
- #[test]
- fn thread_owner_optimization() {
- use std::cell::RefCell;
- use std::sync::Arc;
-
- let pool: Arc<Pool<RefCell<Vec<char>>>> =
- Arc::new(Pool::new(Box::new(|| RefCell::new(vec!['a']))));
- pool.get().value().borrow_mut().push('x');
-
- let pool1 = pool.clone();
- let t1 = std::thread::spawn(move || {
- let guard = pool1.get();
- let v = guard.value();
- v.borrow_mut().push('y');
- });
-
- let pool2 = pool.clone();
- let t2 = std::thread::spawn(move || {
- let guard = pool2.get();
- let v = guard.value();
- v.borrow_mut().push('z');
- });
-
- t1.join().unwrap();
- t2.join().unwrap();
-
- // If we didn't implement the single owner optimization, then one of
- // the threads above is likely to have mutated the [a, x] vec that
- // we stuffed in the pool before spawning the threads. But since
- // neither thread was first to access the pool, and because of the
- // optimization, we should be guaranteed that neither thread mutates
- // the special owned pool value.
- //
- // (Technically this is an implementation detail and not a contract of
- // Pool's API.)
- assert_eq!(vec!['a', 'x'], *pool.get().value().borrow());
- }
-}