diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/tokio/src/runtime | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/tokio/src/runtime')
96 files changed, 21122 insertions, 0 deletions
diff --git a/third_party/rust/tokio/src/runtime/blocking/mod.rs b/third_party/rust/tokio/src/runtime/blocking/mod.rs new file mode 100644 index 0000000000..c42924be77 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/blocking/mod.rs @@ -0,0 +1,26 @@ +//! Abstracts out the APIs necessary to `Runtime` for integrating the blocking +//! pool. When the `blocking` feature flag is **not** enabled, these APIs are +//! shells. This isolates the complexity of dealing with conditional +//! compilation. + +mod pool; +pub(crate) use pool::{spawn_blocking, BlockingPool, Spawner}; + +cfg_fs! { + pub(crate) use pool::spawn_mandatory_blocking; +} + +cfg_trace! { + pub(crate) use pool::Mandatory; +} + +mod schedule; +mod shutdown; +mod task; +pub(crate) use task::BlockingTask; + +use crate::runtime::Builder; + +pub(crate) fn create_blocking_pool(builder: &Builder, thread_cap: usize) -> BlockingPool { + BlockingPool::new(builder, thread_cap) +} diff --git a/third_party/rust/tokio/src/runtime/blocking/pool.rs b/third_party/rust/tokio/src/runtime/blocking/pool.rs new file mode 100644 index 0000000000..a23b0a0d2d --- /dev/null +++ b/third_party/rust/tokio/src/runtime/blocking/pool.rs @@ -0,0 +1,602 @@ +//! Thread pool for blocking operations + +use crate::loom::sync::{Arc, Condvar, Mutex}; +use crate::loom::thread; +use crate::runtime::blocking::schedule::BlockingSchedule; +use crate::runtime::blocking::{shutdown, BlockingTask}; +use crate::runtime::builder::ThreadNameFn; +use crate::runtime::task::{self, JoinHandle}; +use crate::runtime::{Builder, Callback, Handle}; + +use std::collections::{HashMap, VecDeque}; +use std::fmt; +use std::io; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Duration; + +pub(crate) struct BlockingPool { + spawner: Spawner, + shutdown_rx: shutdown::Receiver, +} + +#[derive(Clone)] +pub(crate) struct Spawner { + inner: Arc<Inner>, +} + +#[derive(Default)] +pub(crate) struct SpawnerMetrics { + num_threads: AtomicUsize, + num_idle_threads: AtomicUsize, + queue_depth: AtomicUsize, +} + +impl SpawnerMetrics { + fn num_threads(&self) -> usize { + self.num_threads.load(Ordering::Relaxed) + } + + fn num_idle_threads(&self) -> usize { + self.num_idle_threads.load(Ordering::Relaxed) + } + + cfg_metrics! { + fn queue_depth(&self) -> usize { + self.queue_depth.load(Ordering::Relaxed) + } + } + + fn inc_num_threads(&self) { + self.num_threads.fetch_add(1, Ordering::Relaxed); + } + + fn dec_num_threads(&self) { + self.num_threads.fetch_sub(1, Ordering::Relaxed); + } + + fn inc_num_idle_threads(&self) { + self.num_idle_threads.fetch_add(1, Ordering::Relaxed); + } + + fn dec_num_idle_threads(&self) -> usize { + self.num_idle_threads.fetch_sub(1, Ordering::Relaxed) + } + + fn inc_queue_depth(&self) { + self.queue_depth.fetch_add(1, Ordering::Relaxed); + } + + fn dec_queue_depth(&self) { + self.queue_depth.fetch_sub(1, Ordering::Relaxed); + } +} + +struct Inner { + /// State shared between worker threads. + shared: Mutex<Shared>, + + /// Pool threads wait on this. + condvar: Condvar, + + /// Spawned threads use this name. + thread_name: ThreadNameFn, + + /// Spawned thread stack size. + stack_size: Option<usize>, + + /// Call after a thread starts. + after_start: Option<Callback>, + + /// Call before a thread stops. + before_stop: Option<Callback>, + + // Maximum number of threads. + thread_cap: usize, + + // Customizable wait timeout. + keep_alive: Duration, + + // Metrics about the pool. + metrics: SpawnerMetrics, +} + +struct Shared { + queue: VecDeque<Task>, + num_notify: u32, + shutdown: bool, + shutdown_tx: Option<shutdown::Sender>, + /// Prior to shutdown, we clean up JoinHandles by having each timed-out + /// thread join on the previous timed-out thread. This is not strictly + /// necessary but helps avoid Valgrind false positives, see + /// <https://github.com/tokio-rs/tokio/commit/646fbae76535e397ef79dbcaacb945d4c829f666> + /// for more information. + last_exiting_thread: Option<thread::JoinHandle<()>>, + /// This holds the JoinHandles for all running threads; on shutdown, the thread + /// calling shutdown handles joining on these. + worker_threads: HashMap<usize, thread::JoinHandle<()>>, + /// This is a counter used to iterate worker_threads in a consistent order (for loom's + /// benefit). + worker_thread_index: usize, +} + +pub(crate) struct Task { + task: task::UnownedTask<BlockingSchedule>, + mandatory: Mandatory, +} + +#[derive(PartialEq, Eq)] +pub(crate) enum Mandatory { + #[cfg_attr(not(fs), allow(dead_code))] + Mandatory, + NonMandatory, +} + +pub(crate) enum SpawnError { + /// Pool is shutting down and the task was not scheduled + ShuttingDown, + /// There are no worker threads available to take the task + /// and the OS failed to spawn a new one + NoThreads(io::Error), +} + +impl From<SpawnError> for io::Error { + fn from(e: SpawnError) -> Self { + match e { + SpawnError::ShuttingDown => { + io::Error::new(io::ErrorKind::Other, "blocking pool shutting down") + } + SpawnError::NoThreads(e) => e, + } + } +} + +impl Task { + pub(crate) fn new(task: task::UnownedTask<BlockingSchedule>, mandatory: Mandatory) -> Task { + Task { task, mandatory } + } + + fn run(self) { + self.task.run(); + } + + fn shutdown_or_run_if_mandatory(self) { + match self.mandatory { + Mandatory::NonMandatory => self.task.shutdown(), + Mandatory::Mandatory => self.task.run(), + } + } +} + +const KEEP_ALIVE: Duration = Duration::from_secs(10); + +/// Runs the provided function on an executor dedicated to blocking operations. +/// Tasks will be scheduled as non-mandatory, meaning they may not get executed +/// in case of runtime shutdown. +#[track_caller] +#[cfg_attr(tokio_wasi, allow(dead_code))] +pub(crate) fn spawn_blocking<F, R>(func: F) -> JoinHandle<R> +where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, +{ + let rt = Handle::current(); + rt.spawn_blocking(func) +} + +cfg_fs! { + #[cfg_attr(any( + all(loom, not(test)), // the function is covered by loom tests + test + ), allow(dead_code))] + /// Runs the provided function on an executor dedicated to blocking + /// operations. Tasks will be scheduled as mandatory, meaning they are + /// guaranteed to run unless a shutdown is already taking place. In case a + /// shutdown is already taking place, `None` will be returned. + pub(crate) fn spawn_mandatory_blocking<F, R>(func: F) -> Option<JoinHandle<R>> + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + let rt = Handle::current(); + rt.inner.blocking_spawner().spawn_mandatory_blocking(&rt, func) + } +} + +// ===== impl BlockingPool ===== + +impl BlockingPool { + pub(crate) fn new(builder: &Builder, thread_cap: usize) -> BlockingPool { + let (shutdown_tx, shutdown_rx) = shutdown::channel(); + let keep_alive = builder.keep_alive.unwrap_or(KEEP_ALIVE); + + BlockingPool { + spawner: Spawner { + inner: Arc::new(Inner { + shared: Mutex::new(Shared { + queue: VecDeque::new(), + num_notify: 0, + shutdown: false, + shutdown_tx: Some(shutdown_tx), + last_exiting_thread: None, + worker_threads: HashMap::new(), + worker_thread_index: 0, + }), + condvar: Condvar::new(), + thread_name: builder.thread_name.clone(), + stack_size: builder.thread_stack_size, + after_start: builder.after_start.clone(), + before_stop: builder.before_stop.clone(), + thread_cap, + keep_alive, + metrics: Default::default(), + }), + }, + shutdown_rx, + } + } + + pub(crate) fn spawner(&self) -> &Spawner { + &self.spawner + } + + pub(crate) fn shutdown(&mut self, timeout: Option<Duration>) { + let mut shared = self.spawner.inner.shared.lock(); + + // The function can be called multiple times. First, by explicitly + // calling `shutdown` then by the drop handler calling `shutdown`. This + // prevents shutting down twice. + if shared.shutdown { + return; + } + + shared.shutdown = true; + shared.shutdown_tx = None; + self.spawner.inner.condvar.notify_all(); + + let last_exited_thread = std::mem::take(&mut shared.last_exiting_thread); + let workers = std::mem::take(&mut shared.worker_threads); + + drop(shared); + + if self.shutdown_rx.wait(timeout) { + let _ = last_exited_thread.map(|th| th.join()); + + // Loom requires that execution be deterministic, so sort by thread ID before joining. + // (HashMaps use a randomly-seeded hash function, so the order is nondeterministic) + let mut workers: Vec<(usize, thread::JoinHandle<()>)> = workers.into_iter().collect(); + workers.sort_by_key(|(id, _)| *id); + + for (_id, handle) in workers.into_iter() { + let _ = handle.join(); + } + } + } +} + +impl Drop for BlockingPool { + fn drop(&mut self) { + self.shutdown(None); + } +} + +impl fmt::Debug for BlockingPool { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("BlockingPool").finish() + } +} + +// ===== impl Spawner ===== + +impl Spawner { + #[track_caller] + pub(crate) fn spawn_blocking<F, R>(&self, rt: &Handle, func: F) -> JoinHandle<R> + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + let (join_handle, spawn_result) = + if cfg!(debug_assertions) && std::mem::size_of::<F>() > 2048 { + self.spawn_blocking_inner(Box::new(func), Mandatory::NonMandatory, None, rt) + } else { + self.spawn_blocking_inner(func, Mandatory::NonMandatory, None, rt) + }; + + match spawn_result { + Ok(()) => join_handle, + // Compat: do not panic here, return the join_handle even though it will never resolve + Err(SpawnError::ShuttingDown) => join_handle, + Err(SpawnError::NoThreads(e)) => { + panic!("OS can't spawn worker thread: {}", e) + } + } + } + + cfg_fs! { + #[track_caller] + #[cfg_attr(any( + all(loom, not(test)), // the function is covered by loom tests + test + ), allow(dead_code))] + pub(crate) fn spawn_mandatory_blocking<F, R>(&self, rt: &Handle, func: F) -> Option<JoinHandle<R>> + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + let (join_handle, spawn_result) = if cfg!(debug_assertions) && std::mem::size_of::<F>() > 2048 { + self.spawn_blocking_inner( + Box::new(func), + Mandatory::Mandatory, + None, + rt, + ) + } else { + self.spawn_blocking_inner( + func, + Mandatory::Mandatory, + None, + rt, + ) + }; + + if spawn_result.is_ok() { + Some(join_handle) + } else { + None + } + } + } + + #[track_caller] + pub(crate) fn spawn_blocking_inner<F, R>( + &self, + func: F, + is_mandatory: Mandatory, + name: Option<&str>, + rt: &Handle, + ) -> (JoinHandle<R>, Result<(), SpawnError>) + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + let fut = BlockingTask::new(func); + let id = task::Id::next(); + #[cfg(all(tokio_unstable, feature = "tracing"))] + let fut = { + use tracing::Instrument; + let location = std::panic::Location::caller(); + let span = tracing::trace_span!( + target: "tokio::task::blocking", + "runtime.spawn", + kind = %"blocking", + task.name = %name.unwrap_or_default(), + task.id = id.as_u64(), + "fn" = %std::any::type_name::<F>(), + loc.file = location.file(), + loc.line = location.line(), + loc.col = location.column(), + ); + fut.instrument(span) + }; + + #[cfg(not(all(tokio_unstable, feature = "tracing")))] + let _ = name; + + let (task, handle) = task::unowned(fut, BlockingSchedule::new(rt), id); + + let spawned = self.spawn_task(Task::new(task, is_mandatory), rt); + (handle, spawned) + } + + fn spawn_task(&self, task: Task, rt: &Handle) -> Result<(), SpawnError> { + let mut shared = self.inner.shared.lock(); + + if shared.shutdown { + // Shutdown the task: it's fine to shutdown this task (even if + // mandatory) because it was scheduled after the shutdown of the + // runtime began. + task.task.shutdown(); + + // no need to even push this task; it would never get picked up + return Err(SpawnError::ShuttingDown); + } + + shared.queue.push_back(task); + self.inner.metrics.inc_queue_depth(); + + if self.inner.metrics.num_idle_threads() == 0 { + // No threads are able to process the task. + + if self.inner.metrics.num_threads() == self.inner.thread_cap { + // At max number of threads + } else { + assert!(shared.shutdown_tx.is_some()); + let shutdown_tx = shared.shutdown_tx.clone(); + + if let Some(shutdown_tx) = shutdown_tx { + let id = shared.worker_thread_index; + + match self.spawn_thread(shutdown_tx, rt, id) { + Ok(handle) => { + self.inner.metrics.inc_num_threads(); + shared.worker_thread_index += 1; + shared.worker_threads.insert(id, handle); + } + Err(ref e) + if is_temporary_os_thread_error(e) + && self.inner.metrics.num_threads() > 0 => + { + // OS temporarily failed to spawn a new thread. + // The task will be picked up eventually by a currently + // busy thread. + } + Err(e) => { + // The OS refused to spawn the thread and there is no thread + // to pick up the task that has just been pushed to the queue. + return Err(SpawnError::NoThreads(e)); + } + } + } + } + } else { + // Notify an idle worker thread. The notification counter + // is used to count the needed amount of notifications + // exactly. Thread libraries may generate spurious + // wakeups, this counter is used to keep us in a + // consistent state. + self.inner.metrics.dec_num_idle_threads(); + shared.num_notify += 1; + self.inner.condvar.notify_one(); + } + + Ok(()) + } + + fn spawn_thread( + &self, + shutdown_tx: shutdown::Sender, + rt: &Handle, + id: usize, + ) -> std::io::Result<thread::JoinHandle<()>> { + let mut builder = thread::Builder::new().name((self.inner.thread_name)()); + + if let Some(stack_size) = self.inner.stack_size { + builder = builder.stack_size(stack_size); + } + + let rt = rt.clone(); + + builder.spawn(move || { + // Only the reference should be moved into the closure + let _enter = rt.enter(); + rt.inner.blocking_spawner().inner.run(id); + drop(shutdown_tx); + }) + } +} + +cfg_metrics! { + impl Spawner { + pub(crate) fn num_threads(&self) -> usize { + self.inner.metrics.num_threads() + } + + pub(crate) fn num_idle_threads(&self) -> usize { + self.inner.metrics.num_idle_threads() + } + + pub(crate) fn queue_depth(&self) -> usize { + self.inner.metrics.queue_depth() + } + } +} + +// Tells whether the error when spawning a thread is temporary. +#[inline] +fn is_temporary_os_thread_error(error: &std::io::Error) -> bool { + matches!(error.kind(), std::io::ErrorKind::WouldBlock) +} + +impl Inner { + fn run(&self, worker_thread_id: usize) { + if let Some(f) = &self.after_start { + f() + } + + let mut shared = self.shared.lock(); + let mut join_on_thread = None; + + 'main: loop { + // BUSY + while let Some(task) = shared.queue.pop_front() { + self.metrics.dec_queue_depth(); + drop(shared); + task.run(); + + shared = self.shared.lock(); + } + + // IDLE + self.metrics.inc_num_idle_threads(); + + while !shared.shutdown { + let lock_result = self.condvar.wait_timeout(shared, self.keep_alive).unwrap(); + + shared = lock_result.0; + let timeout_result = lock_result.1; + + if shared.num_notify != 0 { + // We have received a legitimate wakeup, + // acknowledge it by decrementing the counter + // and transition to the BUSY state. + shared.num_notify -= 1; + break; + } + + // Even if the condvar "timed out", if the pool is entering the + // shutdown phase, we want to perform the cleanup logic. + if !shared.shutdown && timeout_result.timed_out() { + // We'll join the prior timed-out thread's JoinHandle after dropping the lock. + // This isn't done when shutting down, because the thread calling shutdown will + // handle joining everything. + let my_handle = shared.worker_threads.remove(&worker_thread_id); + join_on_thread = std::mem::replace(&mut shared.last_exiting_thread, my_handle); + + break 'main; + } + + // Spurious wakeup detected, go back to sleep. + } + + if shared.shutdown { + // Drain the queue + while let Some(task) = shared.queue.pop_front() { + self.metrics.dec_queue_depth(); + drop(shared); + + task.shutdown_or_run_if_mandatory(); + + shared = self.shared.lock(); + } + + // Work was produced, and we "took" it (by decrementing num_notify). + // This means that num_idle was decremented once for our wakeup. + // But, since we are exiting, we need to "undo" that, as we'll stay idle. + self.metrics.inc_num_idle_threads(); + // NOTE: Technically we should also do num_notify++ and notify again, + // but since we're shutting down anyway, that won't be necessary. + break; + } + } + + // Thread exit + self.metrics.dec_num_threads(); + + // num_idle should now be tracked exactly, panic + // with a descriptive message if it is not the + // case. + let prev_idle = self.metrics.dec_num_idle_threads(); + if prev_idle < self.metrics.num_idle_threads() { + panic!("num_idle_threads underflowed on thread exit") + } + + if shared.shutdown && self.metrics.num_threads() == 0 { + self.condvar.notify_one(); + } + + drop(shared); + + if let Some(f) = &self.before_stop { + f() + } + + if let Some(handle) = join_on_thread { + let _ = handle.join(); + } + } +} + +impl fmt::Debug for Spawner { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("blocking::Spawner").finish() + } +} diff --git a/third_party/rust/tokio/src/runtime/blocking/schedule.rs b/third_party/rust/tokio/src/runtime/blocking/schedule.rs new file mode 100644 index 0000000000..edf775be8b --- /dev/null +++ b/third_party/rust/tokio/src/runtime/blocking/schedule.rs @@ -0,0 +1,56 @@ +#[cfg(feature = "test-util")] +use crate::runtime::scheduler; +use crate::runtime::task::{self, Task}; +use crate::runtime::Handle; + +/// `task::Schedule` implementation that does nothing (except some bookkeeping +/// in test-util builds). This is unique to the blocking scheduler as tasks +/// scheduled are not really futures but blocking operations. +/// +/// We avoid storing the task by forgetting it in `bind` and re-materializing it +/// in `release`. +pub(crate) struct BlockingSchedule { + #[cfg(feature = "test-util")] + handle: Handle, +} + +impl BlockingSchedule { + #[cfg_attr(not(feature = "test-util"), allow(unused_variables))] + pub(crate) fn new(handle: &Handle) -> Self { + #[cfg(feature = "test-util")] + { + match &handle.inner { + scheduler::Handle::CurrentThread(handle) => { + handle.driver.clock.inhibit_auto_advance(); + } + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThread(_) => {} + } + } + BlockingSchedule { + #[cfg(feature = "test-util")] + handle: handle.clone(), + } + } +} + +impl task::Schedule for BlockingSchedule { + fn release(&self, _task: &Task<Self>) -> Option<Task<Self>> { + #[cfg(feature = "test-util")] + { + match &self.handle.inner { + scheduler::Handle::CurrentThread(handle) => { + handle.driver.clock.allow_auto_advance(); + handle.driver.unpark(); + } + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThread(_) => {} + } + } + None + } + + fn schedule(&self, _task: task::Notified<Self>) { + unreachable!(); + } +} diff --git a/third_party/rust/tokio/src/runtime/blocking/shutdown.rs b/third_party/rust/tokio/src/runtime/blocking/shutdown.rs new file mode 100644 index 0000000000..fe5abae076 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/blocking/shutdown.rs @@ -0,0 +1,71 @@ +//! A shutdown channel. +//! +//! Each worker holds the `Sender` half. When all the `Sender` halves are +//! dropped, the `Receiver` receives a notification. + +use crate::loom::sync::Arc; +use crate::sync::oneshot; + +use std::time::Duration; + +#[derive(Debug, Clone)] +pub(super) struct Sender { + _tx: Arc<oneshot::Sender<()>>, +} + +#[derive(Debug)] +pub(super) struct Receiver { + rx: oneshot::Receiver<()>, +} + +pub(super) fn channel() -> (Sender, Receiver) { + let (tx, rx) = oneshot::channel(); + let tx = Sender { _tx: Arc::new(tx) }; + let rx = Receiver { rx }; + + (tx, rx) +} + +impl Receiver { + /// Blocks the current thread until all `Sender` handles drop. + /// + /// If `timeout` is `Some`, the thread is blocked for **at most** `timeout` + /// duration. If `timeout` is `None`, then the thread is blocked until the + /// shutdown signal is received. + /// + /// If the timeout has elapsed, it returns `false`, otherwise it returns `true`. + pub(crate) fn wait(&mut self, timeout: Option<Duration>) -> bool { + use crate::runtime::context::try_enter_blocking_region; + + if timeout == Some(Duration::from_nanos(0)) { + return false; + } + + let mut e = match try_enter_blocking_region() { + Some(enter) => enter, + _ => { + if std::thread::panicking() { + // Don't panic in a panic + return false; + } else { + panic!( + "Cannot drop a runtime in a context where blocking is not allowed. \ + This happens when a runtime is dropped from within an asynchronous context." + ); + } + } + }; + + // The oneshot completes with an Err + // + // If blocking fails to wait, this indicates a problem parking the + // current thread (usually, shutting down a runtime stored in a + // thread-local). + if let Some(timeout) = timeout { + e.block_on_timeout(&mut self.rx, timeout).is_ok() + } else { + let _ = e.block_on(&mut self.rx); + true + } + } +} diff --git a/third_party/rust/tokio/src/runtime/blocking/task.rs b/third_party/rust/tokio/src/runtime/blocking/task.rs new file mode 100644 index 0000000000..c446175400 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/blocking/task.rs @@ -0,0 +1,44 @@ +use std::future::Future; +use std::pin::Pin; +use std::task::{Context, Poll}; + +/// Converts a function to a future that completes on poll. +pub(crate) struct BlockingTask<T> { + func: Option<T>, +} + +impl<T> BlockingTask<T> { + /// Initializes a new blocking task from the given function. + pub(crate) fn new(func: T) -> BlockingTask<T> { + BlockingTask { func: Some(func) } + } +} + +// The closure `F` is never pinned +impl<T> Unpin for BlockingTask<T> {} + +impl<T, R> Future for BlockingTask<T> +where + T: FnOnce() -> R + Send + 'static, + R: Send + 'static, +{ + type Output = R; + + fn poll(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<R> { + let me = &mut *self; + let func = me + .func + .take() + .expect("[internal exception] blocking task ran twice."); + + // This is a little subtle: + // For convenience, we'd like _every_ call tokio ever makes to Task::poll() to be budgeted + // using coop. However, the way things are currently modeled, even running a blocking task + // currently goes through Task::poll(), and so is subject to budgeting. That isn't really + // what we want; a blocking task may itself want to run tasks (it might be a Worker!), so + // we want it to start without any budgeting. + crate::runtime::coop::stop(); + + Poll::Ready(func()) + } +} diff --git a/third_party/rust/tokio/src/runtime/builder.rs b/third_party/rust/tokio/src/runtime/builder.rs new file mode 100644 index 0000000000..af9e0e172f --- /dev/null +++ b/third_party/rust/tokio/src/runtime/builder.rs @@ -0,0 +1,1236 @@ +use crate::runtime::handle::Handle; +use crate::runtime::{blocking, driver, Callback, HistogramBuilder, Runtime}; +use crate::util::rand::{RngSeed, RngSeedGenerator}; + +use std::fmt; +use std::io; +use std::time::Duration; + +/// Builds Tokio Runtime with custom configuration values. +/// +/// Methods can be chained in order to set the configuration values. The +/// Runtime is constructed by calling [`build`]. +/// +/// New instances of `Builder` are obtained via [`Builder::new_multi_thread`] +/// or [`Builder::new_current_thread`]. +/// +/// See function level documentation for details on the various configuration +/// settings. +/// +/// [`build`]: method@Self::build +/// [`Builder::new_multi_thread`]: method@Self::new_multi_thread +/// [`Builder::new_current_thread`]: method@Self::new_current_thread +/// +/// # Examples +/// +/// ``` +/// use tokio::runtime::Builder; +/// +/// fn main() { +/// // build runtime +/// let runtime = Builder::new_multi_thread() +/// .worker_threads(4) +/// .thread_name("my-custom-name") +/// .thread_stack_size(3 * 1024 * 1024) +/// .build() +/// .unwrap(); +/// +/// // use runtime ... +/// } +/// ``` +pub struct Builder { + /// Runtime type + kind: Kind, + + /// Whether or not to enable the I/O driver + enable_io: bool, + nevents: usize, + + /// Whether or not to enable the time driver + enable_time: bool, + + /// Whether or not the clock should start paused. + start_paused: bool, + + /// The number of worker threads, used by Runtime. + /// + /// Only used when not using the current-thread executor. + worker_threads: Option<usize>, + + /// Cap on thread usage. + max_blocking_threads: usize, + + /// Name fn used for threads spawned by the runtime. + pub(super) thread_name: ThreadNameFn, + + /// Stack size used for threads spawned by the runtime. + pub(super) thread_stack_size: Option<usize>, + + /// Callback to run after each thread starts. + pub(super) after_start: Option<Callback>, + + /// To run before each worker thread stops + pub(super) before_stop: Option<Callback>, + + /// To run before each worker thread is parked. + pub(super) before_park: Option<Callback>, + + /// To run after each thread is unparked. + pub(super) after_unpark: Option<Callback>, + + /// Customizable keep alive timeout for BlockingPool + pub(super) keep_alive: Option<Duration>, + + /// How many ticks before pulling a task from the global/remote queue? + /// + /// When `None`, the value is unspecified and behavior details are left to + /// the scheduler. Each scheduler flavor could choose to either pick its own + /// default value or use some other strategy to decide when to poll from the + /// global queue. For example, the multi-threaded scheduler uses a + /// self-tuning strategy based on mean task poll times. + pub(super) global_queue_interval: Option<u32>, + + /// How many ticks before yielding to the driver for timer and I/O events? + pub(super) event_interval: u32, + + /// When true, the multi-threade scheduler LIFO slot should not be used. + /// + /// This option should only be exposed as unstable. + pub(super) disable_lifo_slot: bool, + + /// Specify a random number generator seed to provide deterministic results + pub(super) seed_generator: RngSeedGenerator, + + /// When true, enables task poll count histogram instrumentation. + pub(super) metrics_poll_count_histogram_enable: bool, + + /// Configures the task poll count histogram + pub(super) metrics_poll_count_histogram: HistogramBuilder, + + #[cfg(tokio_unstable)] + pub(super) unhandled_panic: UnhandledPanic, +} + +cfg_unstable! { + /// How the runtime should respond to unhandled panics. + /// + /// Instances of `UnhandledPanic` are passed to `Builder::unhandled_panic` + /// to configure the runtime behavior when a spawned task panics. + /// + /// See [`Builder::unhandled_panic`] for more details. + #[derive(Debug, Clone)] + #[non_exhaustive] + pub enum UnhandledPanic { + /// The runtime should ignore panics on spawned tasks. + /// + /// The panic is forwarded to the task's [`JoinHandle`] and all spawned + /// tasks continue running normally. + /// + /// This is the default behavior. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::{self, UnhandledPanic}; + /// + /// # pub fn main() { + /// let rt = runtime::Builder::new_current_thread() + /// .unhandled_panic(UnhandledPanic::Ignore) + /// .build() + /// .unwrap(); + /// + /// let task1 = rt.spawn(async { panic!("boom"); }); + /// let task2 = rt.spawn(async { + /// // This task completes normally + /// "done" + /// }); + /// + /// rt.block_on(async { + /// // The panic on the first task is forwarded to the `JoinHandle` + /// assert!(task1.await.is_err()); + /// + /// // The second task completes normally + /// assert!(task2.await.is_ok()); + /// }) + /// # } + /// ``` + /// + /// [`JoinHandle`]: struct@crate::task::JoinHandle + Ignore, + + /// The runtime should immediately shutdown if a spawned task panics. + /// + /// The runtime will immediately shutdown even if the panicked task's + /// [`JoinHandle`] is still available. All further spawned tasks will be + /// immediately dropped and call to [`Runtime::block_on`] will panic. + /// + /// # Examples + /// + /// ```should_panic + /// use tokio::runtime::{self, UnhandledPanic}; + /// + /// # pub fn main() { + /// let rt = runtime::Builder::new_current_thread() + /// .unhandled_panic(UnhandledPanic::ShutdownRuntime) + /// .build() + /// .unwrap(); + /// + /// rt.spawn(async { panic!("boom"); }); + /// rt.spawn(async { + /// // This task never completes. + /// }); + /// + /// rt.block_on(async { + /// // Do some work + /// # loop { tokio::task::yield_now().await; } + /// }) + /// # } + /// ``` + /// + /// [`JoinHandle`]: struct@crate::task::JoinHandle + ShutdownRuntime, + } +} + +pub(crate) type ThreadNameFn = std::sync::Arc<dyn Fn() -> String + Send + Sync + 'static>; + +#[derive(Clone, Copy)] +pub(crate) enum Kind { + CurrentThread, + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThread, +} + +impl Builder { + /// Returns a new builder with the current thread scheduler selected. + /// + /// Configuration methods can be chained on the return value. + /// + /// To spawn non-`Send` tasks on the resulting runtime, combine it with a + /// [`LocalSet`]. + /// + /// [`LocalSet`]: crate::task::LocalSet + pub fn new_current_thread() -> Builder { + #[cfg(loom)] + const EVENT_INTERVAL: u32 = 4; + // The number `61` is fairly arbitrary. I believe this value was copied from golang. + #[cfg(not(loom))] + const EVENT_INTERVAL: u32 = 61; + + Builder::new(Kind::CurrentThread, EVENT_INTERVAL) + } + + cfg_not_wasi! { + /// Returns a new builder with the multi thread scheduler selected. + /// + /// Configuration methods can be chained on the return value. + #[cfg(feature = "rt-multi-thread")] + #[cfg_attr(docsrs, doc(cfg(feature = "rt-multi-thread")))] + pub fn new_multi_thread() -> Builder { + // The number `61` is fairly arbitrary. I believe this value was copied from golang. + Builder::new(Kind::MultiThread, 61) + } + } + + /// Returns a new runtime builder initialized with default configuration + /// values. + /// + /// Configuration methods can be chained on the return value. + pub(crate) fn new(kind: Kind, event_interval: u32) -> Builder { + Builder { + kind, + + // I/O defaults to "off" + enable_io: false, + nevents: 1024, + + // Time defaults to "off" + enable_time: false, + + // The clock starts not-paused + start_paused: false, + + // Read from environment variable first in multi-threaded mode. + // Default to lazy auto-detection (one thread per CPU core) + worker_threads: None, + + max_blocking_threads: 512, + + // Default thread name + thread_name: std::sync::Arc::new(|| "tokio-runtime-worker".into()), + + // Do not set a stack size by default + thread_stack_size: None, + + // No worker thread callbacks + after_start: None, + before_stop: None, + before_park: None, + after_unpark: None, + + keep_alive: None, + + // Defaults for these values depend on the scheduler kind, so we get them + // as parameters. + global_queue_interval: None, + event_interval, + + seed_generator: RngSeedGenerator::new(RngSeed::new()), + + #[cfg(tokio_unstable)] + unhandled_panic: UnhandledPanic::Ignore, + + metrics_poll_count_histogram_enable: false, + + metrics_poll_count_histogram: Default::default(), + + disable_lifo_slot: false, + } + } + + /// Enables both I/O and time drivers. + /// + /// Doing this is a shorthand for calling `enable_io` and `enable_time` + /// individually. If additional components are added to Tokio in the future, + /// `enable_all` will include these future components. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .enable_all() + /// .build() + /// .unwrap(); + /// ``` + pub fn enable_all(&mut self) -> &mut Self { + #[cfg(any( + feature = "net", + all(unix, feature = "process"), + all(unix, feature = "signal") + ))] + self.enable_io(); + #[cfg(feature = "time")] + self.enable_time(); + + self + } + + /// Sets the number of worker threads the `Runtime` will use. + /// + /// This can be any number above 0 though it is advised to keep this value + /// on the smaller side. + /// + /// This will override the value read from environment variable `TOKIO_WORKER_THREADS`. + /// + /// # Default + /// + /// The default value is the number of cores available to the system. + /// + /// When using the `current_thread` runtime this method has no effect. + /// + /// # Examples + /// + /// ## Multi threaded runtime with 4 threads + /// + /// ``` + /// use tokio::runtime; + /// + /// // This will spawn a work-stealing runtime with 4 worker threads. + /// let rt = runtime::Builder::new_multi_thread() + /// .worker_threads(4) + /// .build() + /// .unwrap(); + /// + /// rt.spawn(async move {}); + /// ``` + /// + /// ## Current thread runtime (will only run on the current thread via `Runtime::block_on`) + /// + /// ``` + /// use tokio::runtime; + /// + /// // Create a runtime that _must_ be driven from a call + /// // to `Runtime::block_on`. + /// let rt = runtime::Builder::new_current_thread() + /// .build() + /// .unwrap(); + /// + /// // This will run the runtime and future on the current thread + /// rt.block_on(async move {}); + /// ``` + /// + /// # Panics + /// + /// This will panic if `val` is not larger than `0`. + #[track_caller] + pub fn worker_threads(&mut self, val: usize) -> &mut Self { + assert!(val > 0, "Worker threads cannot be set to 0"); + self.worker_threads = Some(val); + self + } + + /// Specifies the limit for additional threads spawned by the Runtime. + /// + /// These threads are used for blocking operations like tasks spawned + /// through [`spawn_blocking`]. Unlike the [`worker_threads`], they are not + /// always active and will exit if left idle for too long. You can change + /// this timeout duration with [`thread_keep_alive`]. + /// + /// The default value is 512. + /// + /// # Panics + /// + /// This will panic if `val` is not larger than `0`. + /// + /// # Upgrading from 0.x + /// + /// In old versions `max_threads` limited both blocking and worker threads, but the + /// current `max_blocking_threads` does not include async worker threads in the count. + /// + /// [`spawn_blocking`]: fn@crate::task::spawn_blocking + /// [`worker_threads`]: Self::worker_threads + /// [`thread_keep_alive`]: Self::thread_keep_alive + #[track_caller] + #[cfg_attr(docsrs, doc(alias = "max_threads"))] + pub fn max_blocking_threads(&mut self, val: usize) -> &mut Self { + assert!(val > 0, "Max blocking threads cannot be set to 0"); + self.max_blocking_threads = val; + self + } + + /// Sets name of threads spawned by the `Runtime`'s thread pool. + /// + /// The default name is "tokio-runtime-worker". + /// + /// # Examples + /// + /// ``` + /// # use tokio::runtime; + /// + /// # pub fn main() { + /// let rt = runtime::Builder::new_multi_thread() + /// .thread_name("my-pool") + /// .build(); + /// # } + /// ``` + pub fn thread_name(&mut self, val: impl Into<String>) -> &mut Self { + let val = val.into(); + self.thread_name = std::sync::Arc::new(move || val.clone()); + self + } + + /// Sets a function used to generate the name of threads spawned by the `Runtime`'s thread pool. + /// + /// The default name fn is `|| "tokio-runtime-worker".into()`. + /// + /// # Examples + /// + /// ``` + /// # use tokio::runtime; + /// # use std::sync::atomic::{AtomicUsize, Ordering}; + /// # pub fn main() { + /// let rt = runtime::Builder::new_multi_thread() + /// .thread_name_fn(|| { + /// static ATOMIC_ID: AtomicUsize = AtomicUsize::new(0); + /// let id = ATOMIC_ID.fetch_add(1, Ordering::SeqCst); + /// format!("my-pool-{}", id) + /// }) + /// .build(); + /// # } + /// ``` + pub fn thread_name_fn<F>(&mut self, f: F) -> &mut Self + where + F: Fn() -> String + Send + Sync + 'static, + { + self.thread_name = std::sync::Arc::new(f); + self + } + + /// Sets the stack size (in bytes) for worker threads. + /// + /// The actual stack size may be greater than this value if the platform + /// specifies minimal stack size. + /// + /// The default stack size for spawned threads is 2 MiB, though this + /// particular stack size is subject to change in the future. + /// + /// # Examples + /// + /// ``` + /// # use tokio::runtime; + /// + /// # pub fn main() { + /// let rt = runtime::Builder::new_multi_thread() + /// .thread_stack_size(32 * 1024) + /// .build(); + /// # } + /// ``` + pub fn thread_stack_size(&mut self, val: usize) -> &mut Self { + self.thread_stack_size = Some(val); + self + } + + /// Executes function `f` after each thread is started but before it starts + /// doing work. + /// + /// This is intended for bookkeeping and monitoring use cases. + /// + /// # Examples + /// + /// ``` + /// # use tokio::runtime; + /// # pub fn main() { + /// let runtime = runtime::Builder::new_multi_thread() + /// .on_thread_start(|| { + /// println!("thread started"); + /// }) + /// .build(); + /// # } + /// ``` + #[cfg(not(loom))] + pub fn on_thread_start<F>(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static, + { + self.after_start = Some(std::sync::Arc::new(f)); + self + } + + /// Executes function `f` before each thread stops. + /// + /// This is intended for bookkeeping and monitoring use cases. + /// + /// # Examples + /// + /// ``` + /// # use tokio::runtime; + /// # pub fn main() { + /// let runtime = runtime::Builder::new_multi_thread() + /// .on_thread_stop(|| { + /// println!("thread stopping"); + /// }) + /// .build(); + /// # } + /// ``` + #[cfg(not(loom))] + pub fn on_thread_stop<F>(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static, + { + self.before_stop = Some(std::sync::Arc::new(f)); + self + } + + /// Executes function `f` just before a thread is parked (goes idle). + /// `f` is called within the Tokio context, so functions like [`tokio::spawn`](crate::spawn) + /// can be called, and may result in this thread being unparked immediately. + /// + /// This can be used to start work only when the executor is idle, or for bookkeeping + /// and monitoring purposes. + /// + /// Note: There can only be one park callback for a runtime; calling this function + /// more than once replaces the last callback defined, rather than adding to it. + /// + /// # Examples + /// + /// ## Multithreaded executor + /// ``` + /// # use std::sync::Arc; + /// # use std::sync::atomic::{AtomicBool, Ordering}; + /// # use tokio::runtime; + /// # use tokio::sync::Barrier; + /// # pub fn main() { + /// let once = AtomicBool::new(true); + /// let barrier = Arc::new(Barrier::new(2)); + /// + /// let runtime = runtime::Builder::new_multi_thread() + /// .worker_threads(1) + /// .on_thread_park({ + /// let barrier = barrier.clone(); + /// move || { + /// let barrier = barrier.clone(); + /// if once.swap(false, Ordering::Relaxed) { + /// tokio::spawn(async move { barrier.wait().await; }); + /// } + /// } + /// }) + /// .build() + /// .unwrap(); + /// + /// runtime.block_on(async { + /// barrier.wait().await; + /// }) + /// # } + /// ``` + /// ## Current thread executor + /// ``` + /// # use std::sync::Arc; + /// # use std::sync::atomic::{AtomicBool, Ordering}; + /// # use tokio::runtime; + /// # use tokio::sync::Barrier; + /// # pub fn main() { + /// let once = AtomicBool::new(true); + /// let barrier = Arc::new(Barrier::new(2)); + /// + /// let runtime = runtime::Builder::new_current_thread() + /// .on_thread_park({ + /// let barrier = barrier.clone(); + /// move || { + /// let barrier = barrier.clone(); + /// if once.swap(false, Ordering::Relaxed) { + /// tokio::spawn(async move { barrier.wait().await; }); + /// } + /// } + /// }) + /// .build() + /// .unwrap(); + /// + /// runtime.block_on(async { + /// barrier.wait().await; + /// }) + /// # } + /// ``` + #[cfg(not(loom))] + pub fn on_thread_park<F>(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static, + { + self.before_park = Some(std::sync::Arc::new(f)); + self + } + + /// Executes function `f` just after a thread unparks (starts executing tasks). + /// + /// This is intended for bookkeeping and monitoring use cases; note that work + /// in this callback will increase latencies when the application has allowed one or + /// more runtime threads to go idle. + /// + /// Note: There can only be one unpark callback for a runtime; calling this function + /// more than once replaces the last callback defined, rather than adding to it. + /// + /// # Examples + /// + /// ``` + /// # use tokio::runtime; + /// # pub fn main() { + /// let runtime = runtime::Builder::new_multi_thread() + /// .on_thread_unpark(|| { + /// println!("thread unparking"); + /// }) + /// .build(); + /// + /// runtime.unwrap().block_on(async { + /// tokio::task::yield_now().await; + /// println!("Hello from Tokio!"); + /// }) + /// # } + /// ``` + #[cfg(not(loom))] + pub fn on_thread_unpark<F>(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static, + { + self.after_unpark = Some(std::sync::Arc::new(f)); + self + } + + /// Creates the configured `Runtime`. + /// + /// The returned `Runtime` instance is ready to spawn tasks. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Builder; + /// + /// let rt = Builder::new_multi_thread().build().unwrap(); + /// + /// rt.block_on(async { + /// println!("Hello from the Tokio runtime"); + /// }); + /// ``` + pub fn build(&mut self) -> io::Result<Runtime> { + match &self.kind { + Kind::CurrentThread => self.build_current_thread_runtime(), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Kind::MultiThread => self.build_threaded_runtime(), + } + } + + fn get_cfg(&self) -> driver::Cfg { + driver::Cfg { + enable_pause_time: match self.kind { + Kind::CurrentThread => true, + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Kind::MultiThread => false, + }, + enable_io: self.enable_io, + enable_time: self.enable_time, + start_paused: self.start_paused, + nevents: self.nevents, + } + } + + /// Sets a custom timeout for a thread in the blocking pool. + /// + /// By default, the timeout for a thread is set to 10 seconds. This can + /// be overridden using .thread_keep_alive(). + /// + /// # Example + /// + /// ``` + /// # use tokio::runtime; + /// # use std::time::Duration; + /// # pub fn main() { + /// let rt = runtime::Builder::new_multi_thread() + /// .thread_keep_alive(Duration::from_millis(100)) + /// .build(); + /// # } + /// ``` + pub fn thread_keep_alive(&mut self, duration: Duration) -> &mut Self { + self.keep_alive = Some(duration); + self + } + + /// Sets the number of scheduler ticks after which the scheduler will poll the global + /// task queue. + /// + /// A scheduler "tick" roughly corresponds to one `poll` invocation on a task. + /// + /// By default the global queue interval is: + /// + /// * `31` for the current-thread scheduler. + /// * `61` for the multithreaded scheduler. + /// + /// Schedulers have a local queue of already-claimed tasks, and a global queue of incoming + /// tasks. Setting the interval to a smaller value increases the fairness of the scheduler, + /// at the cost of more synchronization overhead. That can be beneficial for prioritizing + /// getting started on new work, especially if tasks frequently yield rather than complete + /// or await on further I/O. Conversely, a higher value prioritizes existing work, and + /// is a good choice when most tasks quickly complete polling. + /// + /// # Examples + /// + /// ``` + /// # use tokio::runtime; + /// # pub fn main() { + /// let rt = runtime::Builder::new_multi_thread() + /// .global_queue_interval(31) + /// .build(); + /// # } + /// ``` + pub fn global_queue_interval(&mut self, val: u32) -> &mut Self { + self.global_queue_interval = Some(val); + self + } + + /// Sets the number of scheduler ticks after which the scheduler will poll for + /// external events (timers, I/O, and so on). + /// + /// A scheduler "tick" roughly corresponds to one `poll` invocation on a task. + /// + /// By default, the event interval is `61` for all scheduler types. + /// + /// Setting the event interval determines the effective "priority" of delivering + /// these external events (which may wake up additional tasks), compared to + /// executing tasks that are currently ready to run. A smaller value is useful + /// when tasks frequently spend a long time in polling, or frequently yield, + /// which can result in overly long delays picking up I/O events. Conversely, + /// picking up new events requires extra synchronization and syscall overhead, + /// so if tasks generally complete their polling quickly, a higher event interval + /// will minimize that overhead while still keeping the scheduler responsive to + /// events. + /// + /// # Examples + /// + /// ``` + /// # use tokio::runtime; + /// # pub fn main() { + /// let rt = runtime::Builder::new_multi_thread() + /// .event_interval(31) + /// .build(); + /// # } + /// ``` + pub fn event_interval(&mut self, val: u32) -> &mut Self { + self.event_interval = val; + self + } + + cfg_unstable! { + /// Configure how the runtime responds to an unhandled panic on a + /// spawned task. + /// + /// By default, an unhandled panic (i.e. a panic not caught by + /// [`std::panic::catch_unwind`]) has no impact on the runtime's + /// execution. The panic is error value is forwarded to the task's + /// [`JoinHandle`] and all other spawned tasks continue running. + /// + /// The `unhandled_panic` option enables configuring this behavior. + /// + /// * `UnhandledPanic::Ignore` is the default behavior. Panics on + /// spawned tasks have no impact on the runtime's execution. + /// * `UnhandledPanic::ShutdownRuntime` will force the runtime to + /// shutdown immediately when a spawned task panics even if that + /// task's `JoinHandle` has not been dropped. All other spawned tasks + /// will immediately terminate and further calls to + /// [`Runtime::block_on`] will panic. + /// + /// # Unstable + /// + /// This option is currently unstable and its implementation is + /// incomplete. The API may change or be removed in the future. See + /// tokio-rs/tokio#4516 for more details. + /// + /// # Examples + /// + /// The following demonstrates a runtime configured to shutdown on + /// panic. The first spawned task panics and results in the runtime + /// shutting down. The second spawned task never has a chance to + /// execute. The call to `block_on` will panic due to the runtime being + /// forcibly shutdown. + /// + /// ```should_panic + /// use tokio::runtime::{self, UnhandledPanic}; + /// + /// # pub fn main() { + /// let rt = runtime::Builder::new_current_thread() + /// .unhandled_panic(UnhandledPanic::ShutdownRuntime) + /// .build() + /// .unwrap(); + /// + /// rt.spawn(async { panic!("boom"); }); + /// rt.spawn(async { + /// // This task never completes. + /// }); + /// + /// rt.block_on(async { + /// // Do some work + /// # loop { tokio::task::yield_now().await; } + /// }) + /// # } + /// ``` + /// + /// [`JoinHandle`]: struct@crate::task::JoinHandle + pub fn unhandled_panic(&mut self, behavior: UnhandledPanic) -> &mut Self { + self.unhandled_panic = behavior; + self + } + + /// Disables the LIFO task scheduler heuristic. + /// + /// The multi-threaded scheduler includes a heuristic for optimizing + /// message-passing patterns. This heuristic results in the **last** + /// scheduled task being polled first. + /// + /// To implement this heuristic, each worker thread has a slot which + /// holds the task that should be polled next. However, this slot cannot + /// be stolen by other worker threads, which can result in lower total + /// throughput when tasks tend to have longer poll times. + /// + /// This configuration option will disable this heuristic resulting in + /// all scheduled tasks being pushed into the worker-local queue, which + /// is stealable. + /// + /// Consider trying this option when the task "scheduled" time is high + /// but the runtime is underutilized. Use tokio-rs/tokio-metrics to + /// collect this data. + /// + /// # Unstable + /// + /// This configuration option is considered a workaround for the LIFO + /// slot not being stealable. When the slot becomes stealable, we will + /// revisit whether or not this option is necessary. See + /// tokio-rs/tokio#4941. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .disable_lifo_slot() + /// .build() + /// .unwrap(); + /// ``` + pub fn disable_lifo_slot(&mut self) -> &mut Self { + self.disable_lifo_slot = true; + self + } + + /// Specifies the random number generation seed to use within all + /// threads associated with the runtime being built. + /// + /// This option is intended to make certain parts of the runtime + /// deterministic (e.g. the [`tokio::select!`] macro). In the case of + /// [`tokio::select!`] it will ensure that the order that branches are + /// polled is deterministic. + /// + /// In addition to the code specifying `rng_seed` and interacting with + /// the runtime, the internals of Tokio and the Rust compiler may affect + /// the sequences of random numbers. In order to ensure repeatable + /// results, the version of Tokio, the versions of all other + /// dependencies that interact with Tokio, and the Rust compiler version + /// should also all remain constant. + /// + /// # Examples + /// + /// ``` + /// # use tokio::runtime::{self, RngSeed}; + /// # pub fn main() { + /// let seed = RngSeed::from_bytes(b"place your seed here"); + /// let rt = runtime::Builder::new_current_thread() + /// .rng_seed(seed) + /// .build(); + /// # } + /// ``` + /// + /// [`tokio::select!`]: crate::select + pub fn rng_seed(&mut self, seed: RngSeed) -> &mut Self { + self.seed_generator = RngSeedGenerator::new(seed); + self + } + } + + cfg_metrics! { + /// Enables tracking the distribution of task poll times. + /// + /// Task poll times are not instrumented by default as doing so requires + /// calling [`Instant::now()`] twice per task poll, which could add + /// measurable overhead. Use the [`Handle::metrics()`] to access the + /// metrics data. + /// + /// The histogram uses fixed bucket sizes. In other words, the histogram + /// buckets are not dynamic based on input values. Use the + /// `metrics_poll_count_histogram_` builder methods to configure the + /// histogram details. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .enable_metrics_poll_count_histogram() + /// .build() + /// .unwrap(); + /// # // Test default values here + /// # fn us(n: u64) -> std::time::Duration { std::time::Duration::from_micros(n) } + /// # let m = rt.handle().metrics(); + /// # assert_eq!(m.poll_count_histogram_num_buckets(), 10); + /// # assert_eq!(m.poll_count_histogram_bucket_range(0), us(0)..us(100)); + /// # assert_eq!(m.poll_count_histogram_bucket_range(1), us(100)..us(200)); + /// ``` + /// + /// [`Handle::metrics()`]: crate::runtime::Handle::metrics + /// [`Instant::now()`]: std::time::Instant::now + pub fn enable_metrics_poll_count_histogram(&mut self) -> &mut Self { + self.metrics_poll_count_histogram_enable = true; + self + } + + /// Sets the histogram scale for tracking the distribution of task poll + /// times. + /// + /// Tracking the distribution of task poll times can be done using a + /// linear or log scale. When using linear scale, each histogram bucket + /// will represent the same range of poll times. When using log scale, + /// each histogram bucket will cover a range twice as big as the + /// previous bucket. + /// + /// **Default:** linear scale. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::{self, HistogramScale}; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .enable_metrics_poll_count_histogram() + /// .metrics_poll_count_histogram_scale(HistogramScale::Log) + /// .build() + /// .unwrap(); + /// ``` + pub fn metrics_poll_count_histogram_scale(&mut self, histogram_scale: crate::runtime::HistogramScale) -> &mut Self { + self.metrics_poll_count_histogram.scale = histogram_scale; + self + } + + /// Sets the histogram resolution for tracking the distribution of task + /// poll times. + /// + /// The resolution is the histogram's first bucket's range. When using a + /// linear histogram scale, each bucket will cover the same range. When + /// using a log scale, each bucket will cover a range twice as big as + /// the previous bucket. In the log case, the resolution represents the + /// smallest bucket range. + /// + /// Note that, when using log scale, the resolution is rounded up to the + /// nearest power of 2 in nanoseconds. + /// + /// **Default:** 100 microseconds. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// use std::time::Duration; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .enable_metrics_poll_count_histogram() + /// .metrics_poll_count_histogram_resolution(Duration::from_micros(100)) + /// .build() + /// .unwrap(); + /// ``` + pub fn metrics_poll_count_histogram_resolution(&mut self, resolution: Duration) -> &mut Self { + assert!(resolution > Duration::from_secs(0)); + // Sanity check the argument and also make the cast below safe. + assert!(resolution <= Duration::from_secs(1)); + + let resolution = resolution.as_nanos() as u64; + self.metrics_poll_count_histogram.resolution = resolution; + self + } + + /// Sets the number of buckets for the histogram tracking the + /// distribution of task poll times. + /// + /// The last bucket tracks all greater values that fall out of other + /// ranges. So, configuring the histogram using a linear scale, + /// resolution of 50ms, and 10 buckets, the 10th bucket will track task + /// polls that take more than 450ms to complete. + /// + /// **Default:** 10 + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .enable_metrics_poll_count_histogram() + /// .metrics_poll_count_histogram_buckets(15) + /// .build() + /// .unwrap(); + /// ``` + pub fn metrics_poll_count_histogram_buckets(&mut self, buckets: usize) -> &mut Self { + self.metrics_poll_count_histogram.num_buckets = buckets; + self + } + } + + fn build_current_thread_runtime(&mut self) -> io::Result<Runtime> { + use crate::runtime::scheduler::{self, CurrentThread}; + use crate::runtime::{runtime::Scheduler, Config}; + + let (driver, driver_handle) = driver::Driver::new(self.get_cfg())?; + + // Blocking pool + let blocking_pool = blocking::create_blocking_pool(self, self.max_blocking_threads); + let blocking_spawner = blocking_pool.spawner().clone(); + + // Generate a rng seed for this runtime. + let seed_generator_1 = self.seed_generator.next_generator(); + let seed_generator_2 = self.seed_generator.next_generator(); + + // And now put a single-threaded scheduler on top of the timer. When + // there are no futures ready to do something, it'll let the timer or + // the reactor to generate some new stimuli for the futures to continue + // in their life. + let (scheduler, handle) = CurrentThread::new( + driver, + driver_handle, + blocking_spawner, + seed_generator_2, + Config { + before_park: self.before_park.clone(), + after_unpark: self.after_unpark.clone(), + global_queue_interval: self.global_queue_interval, + event_interval: self.event_interval, + #[cfg(tokio_unstable)] + unhandled_panic: self.unhandled_panic.clone(), + disable_lifo_slot: self.disable_lifo_slot, + seed_generator: seed_generator_1, + metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(), + }, + ); + + let handle = Handle { + inner: scheduler::Handle::CurrentThread(handle), + }; + + Ok(Runtime::from_parts( + Scheduler::CurrentThread(scheduler), + handle, + blocking_pool, + )) + } + + fn metrics_poll_count_histogram_builder(&self) -> Option<HistogramBuilder> { + if self.metrics_poll_count_histogram_enable { + Some(self.metrics_poll_count_histogram.clone()) + } else { + None + } + } +} + +cfg_io_driver! { + impl Builder { + /// Enables the I/O driver. + /// + /// Doing this enables using net, process, signal, and some I/O types on + /// the runtime. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .enable_io() + /// .build() + /// .unwrap(); + /// ``` + pub fn enable_io(&mut self) -> &mut Self { + self.enable_io = true; + self + } + + /// Enables the I/O driver and configures the max number of events to be + /// processed per tick. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_current_thread() + /// .enable_io() + /// .max_io_events_per_tick(1024) + /// .build() + /// .unwrap(); + /// ``` + pub fn max_io_events_per_tick(&mut self, capacity: usize) -> &mut Self { + self.nevents = capacity; + self + } + } +} + +cfg_time! { + impl Builder { + /// Enables the time driver. + /// + /// Doing this enables using `tokio::time` on the runtime. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .enable_time() + /// .build() + /// .unwrap(); + /// ``` + pub fn enable_time(&mut self) -> &mut Self { + self.enable_time = true; + self + } + } +} + +cfg_test_util! { + impl Builder { + /// Controls if the runtime's clock starts paused or advancing. + /// + /// Pausing time requires the current-thread runtime; construction of + /// the runtime will panic otherwise. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_current_thread() + /// .enable_time() + /// .start_paused(true) + /// .build() + /// .unwrap(); + /// ``` + pub fn start_paused(&mut self, start_paused: bool) -> &mut Self { + self.start_paused = start_paused; + self + } + } +} + +cfg_rt_multi_thread! { + impl Builder { + fn build_threaded_runtime(&mut self) -> io::Result<Runtime> { + use crate::loom::sys::num_cpus; + use crate::runtime::{Config, runtime::Scheduler}; + use crate::runtime::scheduler::{self, MultiThread}; + + let core_threads = self.worker_threads.unwrap_or_else(num_cpus); + + let (driver, driver_handle) = driver::Driver::new(self.get_cfg())?; + + // Create the blocking pool + let blocking_pool = + blocking::create_blocking_pool(self, self.max_blocking_threads + core_threads); + let blocking_spawner = blocking_pool.spawner().clone(); + + // Generate a rng seed for this runtime. + let seed_generator_1 = self.seed_generator.next_generator(); + let seed_generator_2 = self.seed_generator.next_generator(); + + let (scheduler, handle, launch) = MultiThread::new( + core_threads, + driver, + driver_handle, + blocking_spawner, + seed_generator_2, + Config { + before_park: self.before_park.clone(), + after_unpark: self.after_unpark.clone(), + global_queue_interval: self.global_queue_interval, + event_interval: self.event_interval, + #[cfg(tokio_unstable)] + unhandled_panic: self.unhandled_panic.clone(), + disable_lifo_slot: self.disable_lifo_slot, + seed_generator: seed_generator_1, + metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(), + }, + ); + + let handle = Handle { inner: scheduler::Handle::MultiThread(handle) }; + + // Spawn the thread pool workers + let _enter = handle.enter(); + launch.launch(); + + Ok(Runtime::from_parts(Scheduler::MultiThread(scheduler), handle, blocking_pool)) + } + } +} + +impl fmt::Debug for Builder { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("Builder") + .field("worker_threads", &self.worker_threads) + .field("max_blocking_threads", &self.max_blocking_threads) + .field( + "thread_name", + &"<dyn Fn() -> String + Send + Sync + 'static>", + ) + .field("thread_stack_size", &self.thread_stack_size) + .field("after_start", &self.after_start.as_ref().map(|_| "...")) + .field("before_stop", &self.before_stop.as_ref().map(|_| "...")) + .field("before_park", &self.before_park.as_ref().map(|_| "...")) + .field("after_unpark", &self.after_unpark.as_ref().map(|_| "...")) + .finish() + } +} diff --git a/third_party/rust/tokio/src/runtime/config.rs b/third_party/rust/tokio/src/runtime/config.rs new file mode 100644 index 0000000000..c42e4fe5a8 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/config.rs @@ -0,0 +1,37 @@ +#![cfg_attr(any(not(feature = "full"), tokio_wasm), allow(dead_code))] +use crate::runtime::Callback; +use crate::util::RngSeedGenerator; + +pub(crate) struct Config { + /// How many ticks before pulling a task from the global/remote queue? + pub(crate) global_queue_interval: Option<u32>, + + /// How many ticks before yielding to the driver for timer and I/O events? + pub(crate) event_interval: u32, + + /// Callback for a worker parking itself + pub(crate) before_park: Option<Callback>, + + /// Callback for a worker unparking itself + pub(crate) after_unpark: Option<Callback>, + + /// The multi-threaded scheduler includes a per-worker LIFO slot used to + /// store the last scheduled task. This can improve certain usage patterns, + /// especially message passing between tasks. However, this LIFO slot is not + /// currently stealable. + /// + /// Eventually, the LIFO slot **will** become stealable, however as a + /// stop-gap, this unstable option lets users disable the LIFO task. + pub(crate) disable_lifo_slot: bool, + + /// Random number generator seed to configure runtimes to act in a + /// deterministic way. + pub(crate) seed_generator: RngSeedGenerator, + + /// How to build poll time histograms + pub(crate) metrics_poll_count_histogram: Option<crate::runtime::HistogramBuilder>, + + #[cfg(tokio_unstable)] + /// How to respond to unhandled task panics. + pub(crate) unhandled_panic: crate::runtime::UnhandledPanic, +} diff --git a/third_party/rust/tokio/src/runtime/context.rs b/third_party/rust/tokio/src/runtime/context.rs new file mode 100644 index 0000000000..5943e9aa97 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/context.rs @@ -0,0 +1,191 @@ +use crate::loom::thread::AccessError; +use crate::runtime::coop; + +use std::cell::Cell; + +#[cfg(any(feature = "rt", feature = "macros"))] +use crate::util::rand::FastRand; + +cfg_rt! { + mod blocking; + pub(crate) use blocking::{disallow_block_in_place, try_enter_blocking_region, BlockingRegionGuard}; + + mod current; + pub(crate) use current::{with_current, try_set_current, SetCurrentGuard}; + + mod runtime; + pub(crate) use runtime::{EnterRuntime, enter_runtime}; + + mod scoped; + use scoped::Scoped; + + use crate::runtime::{scheduler, task::Id}; + + use std::task::Waker; + + cfg_taskdump! { + use crate::runtime::task::trace; + } +} + +cfg_rt_multi_thread! { + mod runtime_mt; + pub(crate) use runtime_mt::{current_enter_context, exit_runtime}; +} + +struct Context { + /// Uniquely identifies the current thread + #[cfg(feature = "rt")] + thread_id: Cell<Option<ThreadId>>, + + /// Handle to the runtime scheduler running on the current thread. + #[cfg(feature = "rt")] + current: current::HandleCell, + + /// Handle to the scheduler's internal "context" + #[cfg(feature = "rt")] + scheduler: Scoped<scheduler::Context>, + + #[cfg(feature = "rt")] + current_task_id: Cell<Option<Id>>, + + /// Tracks if the current thread is currently driving a runtime. + /// Note, that if this is set to "entered", the current scheduler + /// handle may not reference the runtime currently executing. This + /// is because other runtime handles may be set to current from + /// within a runtime. + #[cfg(feature = "rt")] + runtime: Cell<EnterRuntime>, + + #[cfg(any(feature = "rt", feature = "macros"))] + rng: Cell<Option<FastRand>>, + + /// Tracks the amount of "work" a task may still do before yielding back to + /// the sheduler + budget: Cell<coop::Budget>, + + #[cfg(all( + tokio_unstable, + tokio_taskdump, + feature = "rt", + target_os = "linux", + any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64") + ))] + trace: trace::Context, +} + +tokio_thread_local! { + static CONTEXT: Context = const { + Context { + #[cfg(feature = "rt")] + thread_id: Cell::new(None), + + /// Tracks the current runtime handle to use when spawning, + /// accessing drivers, etc... + #[cfg(feature = "rt")] + current: current::HandleCell::new(), + + /// Tracks the current scheduler internal context + #[cfg(feature = "rt")] + scheduler: Scoped::new(), + + #[cfg(feature = "rt")] + current_task_id: Cell::new(None), + + /// Tracks if the current thread is currently driving a runtime. + /// Note, that if this is set to "entered", the current scheduler + /// handle may not reference the runtime currently executing. This + /// is because other runtime handles may be set to current from + /// within a runtime. + #[cfg(feature = "rt")] + runtime: Cell::new(EnterRuntime::NotEntered), + + #[cfg(any(feature = "rt", feature = "macros"))] + rng: Cell::new(None), + + budget: Cell::new(coop::Budget::unconstrained()), + + #[cfg(all( + tokio_unstable, + tokio_taskdump, + feature = "rt", + target_os = "linux", + any( + target_arch = "aarch64", + target_arch = "x86", + target_arch = "x86_64" + ) + ))] + trace: trace::Context::new(), + } + } +} + +#[cfg(any(feature = "macros", all(feature = "sync", feature = "rt")))] +pub(crate) fn thread_rng_n(n: u32) -> u32 { + CONTEXT.with(|ctx| { + let mut rng = ctx.rng.get().unwrap_or_else(FastRand::new); + let ret = rng.fastrand_n(n); + ctx.rng.set(Some(rng)); + ret + }) +} + +pub(super) fn budget<R>(f: impl FnOnce(&Cell<coop::Budget>) -> R) -> Result<R, AccessError> { + CONTEXT.try_with(|ctx| f(&ctx.budget)) +} + +cfg_rt! { + use crate::runtime::ThreadId; + + pub(crate) fn thread_id() -> Result<ThreadId, AccessError> { + CONTEXT.try_with(|ctx| { + match ctx.thread_id.get() { + Some(id) => id, + None => { + let id = ThreadId::next(); + ctx.thread_id.set(Some(id)); + id + } + } + }) + } + + pub(crate) fn set_current_task_id(id: Option<Id>) -> Option<Id> { + CONTEXT.try_with(|ctx| ctx.current_task_id.replace(id)).unwrap_or(None) + } + + pub(crate) fn current_task_id() -> Option<Id> { + CONTEXT.try_with(|ctx| ctx.current_task_id.get()).unwrap_or(None) + } + + #[track_caller] + pub(crate) fn defer(waker: &Waker) { + with_scheduler(|maybe_scheduler| { + if let Some(scheduler) = maybe_scheduler { + scheduler.defer(waker); + } else { + // Called from outside of the runtime, immediately wake the + // task. + waker.wake_by_ref(); + } + }); + } + + pub(super) fn set_scheduler<R>(v: &scheduler::Context, f: impl FnOnce() -> R) -> R { + CONTEXT.with(|c| c.scheduler.set(v, f)) + } + + #[track_caller] + pub(super) fn with_scheduler<R>(f: impl FnOnce(Option<&scheduler::Context>) -> R) -> R { + CONTEXT.with(|c| c.scheduler.with(f)) + } + + cfg_taskdump! { + /// SAFETY: Callers of this function must ensure that trace frames always + /// form a valid linked list. + pub(crate) unsafe fn with_trace<R>(f: impl FnOnce(&trace::Context) -> R) -> Option<R> { + CONTEXT.try_with(|c| f(&c.trace)).ok() + } + } +} diff --git a/third_party/rust/tokio/src/runtime/context/blocking.rs b/third_party/rust/tokio/src/runtime/context/blocking.rs new file mode 100644 index 0000000000..8ae4f570e8 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/context/blocking.rs @@ -0,0 +1,121 @@ +use super::{EnterRuntime, CONTEXT}; + +use crate::loom::thread::AccessError; +use crate::util::markers::NotSendOrSync; + +use std::marker::PhantomData; +use std::time::Duration; + +/// Guard tracking that a caller has entered a blocking region. +#[must_use] +pub(crate) struct BlockingRegionGuard { + _p: PhantomData<NotSendOrSync>, +} + +pub(crate) struct DisallowBlockInPlaceGuard(bool); + +pub(crate) fn try_enter_blocking_region() -> Option<BlockingRegionGuard> { + CONTEXT + .try_with(|c| { + if c.runtime.get().is_entered() { + None + } else { + Some(BlockingRegionGuard::new()) + } + // If accessing the thread-local fails, the thread is terminating + // and thread-locals are being destroyed. Because we don't know if + // we are currently in a runtime or not, we default to being + // permissive. + }) + .unwrap_or_else(|_| Some(BlockingRegionGuard::new())) +} + +/// Disallows blocking in the current runtime context until the guard is dropped. +pub(crate) fn disallow_block_in_place() -> DisallowBlockInPlaceGuard { + let reset = CONTEXT.with(|c| { + if let EnterRuntime::Entered { + allow_block_in_place: true, + } = c.runtime.get() + { + c.runtime.set(EnterRuntime::Entered { + allow_block_in_place: false, + }); + true + } else { + false + } + }); + + DisallowBlockInPlaceGuard(reset) +} + +impl BlockingRegionGuard { + pub(super) fn new() -> BlockingRegionGuard { + BlockingRegionGuard { _p: PhantomData } + } + + /// Blocks the thread on the specified future, returning the value with + /// which that future completes. + pub(crate) fn block_on<F>(&mut self, f: F) -> Result<F::Output, AccessError> + where + F: std::future::Future, + { + use crate::runtime::park::CachedParkThread; + + let mut park = CachedParkThread::new(); + park.block_on(f) + } + + /// Blocks the thread on the specified future for **at most** `timeout` + /// + /// If the future completes before `timeout`, the result is returned. If + /// `timeout` elapses, then `Err` is returned. + pub(crate) fn block_on_timeout<F>(&mut self, f: F, timeout: Duration) -> Result<F::Output, ()> + where + F: std::future::Future, + { + use crate::runtime::park::CachedParkThread; + use std::task::Context; + use std::task::Poll::Ready; + use std::time::Instant; + + let mut park = CachedParkThread::new(); + let waker = park.waker().map_err(|_| ())?; + let mut cx = Context::from_waker(&waker); + + pin!(f); + let when = Instant::now() + timeout; + + loop { + if let Ready(v) = crate::runtime::coop::budget(|| f.as_mut().poll(&mut cx)) { + return Ok(v); + } + + let now = Instant::now(); + + if now >= when { + return Err(()); + } + + park.park_timeout(when - now); + } + } +} + +impl Drop for DisallowBlockInPlaceGuard { + fn drop(&mut self) { + if self.0 { + // XXX: Do we want some kind of assertion here, or is "best effort" okay? + CONTEXT.with(|c| { + if let EnterRuntime::Entered { + allow_block_in_place: false, + } = c.runtime.get() + { + c.runtime.set(EnterRuntime::Entered { + allow_block_in_place: true, + }); + } + }) + } + } +} diff --git a/third_party/rust/tokio/src/runtime/context/current.rs b/third_party/rust/tokio/src/runtime/context/current.rs new file mode 100644 index 0000000000..c3dc5c8994 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/context/current.rs @@ -0,0 +1,99 @@ +use super::{Context, CONTEXT}; + +use crate::runtime::{scheduler, TryCurrentError}; +use crate::util::markers::SyncNotSend; + +use std::cell::{Cell, RefCell}; +use std::marker::PhantomData; + +#[derive(Debug)] +#[must_use] +pub(crate) struct SetCurrentGuard { + // The previous handle + prev: Option<scheduler::Handle>, + + // The depth for this guard + depth: usize, + + // Don't let the type move across threads. + _p: PhantomData<SyncNotSend>, +} + +pub(super) struct HandleCell { + /// Current handle + handle: RefCell<Option<scheduler::Handle>>, + + /// Tracks the number of nested calls to `try_set_current`. + depth: Cell<usize>, +} + +/// Sets this [`Handle`] as the current active [`Handle`]. +/// +/// [`Handle`]: crate::runtime::scheduler::Handle +pub(crate) fn try_set_current(handle: &scheduler::Handle) -> Option<SetCurrentGuard> { + CONTEXT.try_with(|ctx| ctx.set_current(handle)).ok() +} + +pub(crate) fn with_current<F, R>(f: F) -> Result<R, TryCurrentError> +where + F: FnOnce(&scheduler::Handle) -> R, +{ + match CONTEXT.try_with(|ctx| ctx.current.handle.borrow().as_ref().map(f)) { + Ok(Some(ret)) => Ok(ret), + Ok(None) => Err(TryCurrentError::new_no_context()), + Err(_access_error) => Err(TryCurrentError::new_thread_local_destroyed()), + } +} + +impl Context { + pub(super) fn set_current(&self, handle: &scheduler::Handle) -> SetCurrentGuard { + let old_handle = self.current.handle.borrow_mut().replace(handle.clone()); + let depth = self.current.depth.get(); + + if depth == usize::MAX { + panic!("reached max `enter` depth"); + } + + let depth = depth + 1; + self.current.depth.set(depth); + + SetCurrentGuard { + prev: old_handle, + depth, + _p: PhantomData, + } + } +} + +impl HandleCell { + pub(super) const fn new() -> HandleCell { + HandleCell { + handle: RefCell::new(None), + depth: Cell::new(0), + } + } +} + +impl Drop for SetCurrentGuard { + fn drop(&mut self) { + CONTEXT.with(|ctx| { + let depth = ctx.current.depth.get(); + + if depth != self.depth { + if !std::thread::panicking() { + panic!( + "`EnterGuard` values dropped out of order. Guards returned by \ + `tokio::runtime::Handle::enter()` must be dropped in the reverse \ + order as they were acquired." + ); + } else { + // Just return... this will leave handles in a wonky state though... + return; + } + } + + *ctx.current.handle.borrow_mut() = self.prev.take(); + ctx.current.depth.set(depth - 1); + }); + } +} diff --git a/third_party/rust/tokio/src/runtime/context/runtime.rs b/third_party/rust/tokio/src/runtime/context/runtime.rs new file mode 100644 index 0000000000..f2e29899a4 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/context/runtime.rs @@ -0,0 +1,99 @@ +use super::{BlockingRegionGuard, SetCurrentGuard, CONTEXT}; + +use crate::runtime::scheduler; +use crate::util::rand::{FastRand, RngSeed}; + +use std::fmt; + +#[derive(Debug, Clone, Copy)] +#[must_use] +pub(crate) enum EnterRuntime { + /// Currently in a runtime context. + #[cfg_attr(not(feature = "rt"), allow(dead_code))] + Entered { allow_block_in_place: bool }, + + /// Not in a runtime context **or** a blocking region. + NotEntered, +} + +/// Guard tracking that a caller has entered a runtime context. +#[must_use] +pub(crate) struct EnterRuntimeGuard { + /// Tracks that the current thread has entered a blocking function call. + pub(crate) blocking: BlockingRegionGuard, + + #[allow(dead_code)] // Only tracking the guard. + pub(crate) handle: SetCurrentGuard, + + // Tracks the previous random number generator seed + old_seed: RngSeed, +} + +/// Marks the current thread as being within the dynamic extent of an +/// executor. +#[track_caller] +pub(crate) fn enter_runtime<F, R>(handle: &scheduler::Handle, allow_block_in_place: bool, f: F) -> R +where + F: FnOnce(&mut BlockingRegionGuard) -> R, +{ + let maybe_guard = CONTEXT.with(|c| { + if c.runtime.get().is_entered() { + None + } else { + // Set the entered flag + c.runtime.set(EnterRuntime::Entered { + allow_block_in_place, + }); + + // Generate a new seed + let rng_seed = handle.seed_generator().next_seed(); + + // Swap the RNG seed + let mut rng = c.rng.get().unwrap_or_else(FastRand::new); + let old_seed = rng.replace_seed(rng_seed); + c.rng.set(Some(rng)); + + Some(EnterRuntimeGuard { + blocking: BlockingRegionGuard::new(), + handle: c.set_current(handle), + old_seed, + }) + } + }); + + if let Some(mut guard) = maybe_guard { + return f(&mut guard.blocking); + } + + panic!( + "Cannot start a runtime from within a runtime. This happens \ + because a function (like `block_on`) attempted to block the \ + current thread while the thread is being used to drive \ + asynchronous tasks." + ); +} + +impl fmt::Debug for EnterRuntimeGuard { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Enter").finish() + } +} + +impl Drop for EnterRuntimeGuard { + fn drop(&mut self) { + CONTEXT.with(|c| { + assert!(c.runtime.get().is_entered()); + c.runtime.set(EnterRuntime::NotEntered); + // Replace the previous RNG seed + let mut rng = c.rng.get().unwrap_or_else(FastRand::new); + rng.replace_seed(self.old_seed.clone()); + c.rng.set(Some(rng)); + }); + } +} + +impl EnterRuntime { + pub(crate) fn is_entered(self) -> bool { + matches!(self, EnterRuntime::Entered { .. }) + } +} diff --git a/third_party/rust/tokio/src/runtime/context/runtime_mt.rs b/third_party/rust/tokio/src/runtime/context/runtime_mt.rs new file mode 100644 index 0000000000..728caeae99 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/context/runtime_mt.rs @@ -0,0 +1,36 @@ +use super::{EnterRuntime, CONTEXT}; + +/// Returns true if in a runtime context. +pub(crate) fn current_enter_context() -> EnterRuntime { + CONTEXT.with(|c| c.runtime.get()) +} + +/// Forces the current "entered" state to be cleared while the closure +/// is executed. +pub(crate) fn exit_runtime<F: FnOnce() -> R, R>(f: F) -> R { + // Reset in case the closure panics + struct Reset(EnterRuntime); + + impl Drop for Reset { + fn drop(&mut self) { + CONTEXT.with(|c| { + assert!( + !c.runtime.get().is_entered(), + "closure claimed permanent executor" + ); + c.runtime.set(self.0); + }); + } + } + + let was = CONTEXT.with(|c| { + let e = c.runtime.get(); + assert!(e.is_entered(), "asked to exit when not entered"); + c.runtime.set(EnterRuntime::NotEntered); + e + }); + + let _reset = Reset(was); + // dropping _reset after f() will reset ENTERED + f() +} diff --git a/third_party/rust/tokio/src/runtime/context/scoped.rs b/third_party/rust/tokio/src/runtime/context/scoped.rs new file mode 100644 index 0000000000..7b202a16c0 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/context/scoped.rs @@ -0,0 +1,56 @@ +use std::cell::Cell; +use std::ptr; + +/// Scoped thread-local storage +pub(super) struct Scoped<T> { + pub(super) inner: Cell<*const T>, +} + +impl<T> Scoped<T> { + pub(super) const fn new() -> Scoped<T> { + Scoped { + inner: Cell::new(ptr::null()), + } + } + + /// Inserts a value into the scoped cell for the duration of the closure + pub(super) fn set<F, R>(&self, t: &T, f: F) -> R + where + F: FnOnce() -> R, + { + struct Reset<'a, T> { + cell: &'a Cell<*const T>, + prev: *const T, + } + + impl<T> Drop for Reset<'_, T> { + fn drop(&mut self) { + self.cell.set(self.prev); + } + } + + let prev = self.inner.get(); + self.inner.set(t as *const _); + + let _reset = Reset { + cell: &self.inner, + prev, + }; + + f() + } + + /// Gets the value out of the scoped cell; + pub(super) fn with<F, R>(&self, f: F) -> R + where + F: FnOnce(Option<&T>) -> R, + { + let val = self.inner.get(); + + if val.is_null() { + f(None) + } else { + unsafe { f(Some(&*val)) } + } + } +} diff --git a/third_party/rust/tokio/src/runtime/coop.rs b/third_party/rust/tokio/src/runtime/coop.rs new file mode 100644 index 0000000000..2dba246159 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/coop.rs @@ -0,0 +1,323 @@ +#![cfg_attr(not(feature = "full"), allow(dead_code))] + +//! Yield points for improved cooperative scheduling. +//! +//! Documentation for this can be found in the [`tokio::task`] module. +//! +//! [`tokio::task`]: crate::task. + +// ```ignore +// # use tokio_stream::{Stream, StreamExt}; +// async fn drop_all<I: Stream + Unpin>(mut input: I) { +// while let Some(_) = input.next().await { +// tokio::coop::proceed().await; +// } +// } +// ``` +// +// The `proceed` future will coordinate with the executor to make sure that +// every so often control is yielded back to the executor so it can run other +// tasks. +// +// # Placing yield points +// +// Voluntary yield points should be placed _after_ at least some work has been +// done. If they are not, a future sufficiently deep in the task hierarchy may +// end up _never_ getting to run because of the number of yield points that +// inevitably appear before it is reached. In general, you will want yield +// points to only appear in "leaf" futures -- those that do not themselves poll +// other futures. By doing this, you avoid double-counting each iteration of +// the outer future against the cooperating budget. + +use crate::runtime::context; + +/// Opaque type tracking the amount of "work" a task may still do before +/// yielding back to the scheduler. +#[derive(Debug, Copy, Clone)] +pub(crate) struct Budget(Option<u8>); + +pub(crate) struct BudgetDecrement { + success: bool, + hit_zero: bool, +} + +impl Budget { + /// Budget assigned to a task on each poll. + /// + /// The value itself is chosen somewhat arbitrarily. It needs to be high + /// enough to amortize wakeup and scheduling costs, but low enough that we + /// do not starve other tasks for too long. The value also needs to be high + /// enough that particularly deep tasks are able to do at least some useful + /// work at all. + /// + /// Note that as more yield points are added in the ecosystem, this value + /// will probably also have to be raised. + const fn initial() -> Budget { + Budget(Some(128)) + } + + /// Returns an unconstrained budget. Operations will not be limited. + pub(super) const fn unconstrained() -> Budget { + Budget(None) + } + + fn has_remaining(self) -> bool { + self.0.map(|budget| budget > 0).unwrap_or(true) + } +} + +/// Runs the given closure with a cooperative task budget. When the function +/// returns, the budget is reset to the value prior to calling the function. +#[inline(always)] +pub(crate) fn budget<R>(f: impl FnOnce() -> R) -> R { + with_budget(Budget::initial(), f) +} + +/// Runs the given closure with an unconstrained task budget. When the function returns, the budget +/// is reset to the value prior to calling the function. +#[inline(always)] +pub(crate) fn with_unconstrained<R>(f: impl FnOnce() -> R) -> R { + with_budget(Budget::unconstrained(), f) +} + +#[inline(always)] +fn with_budget<R>(budget: Budget, f: impl FnOnce() -> R) -> R { + struct ResetGuard { + prev: Budget, + } + + impl Drop for ResetGuard { + fn drop(&mut self) { + let _ = context::budget(|cell| { + cell.set(self.prev); + }); + } + } + + #[allow(unused_variables)] + let maybe_guard = context::budget(|cell| { + let prev = cell.get(); + cell.set(budget); + + ResetGuard { prev } + }); + + // The function is called regardless even if the budget is not successfully + // set due to the thread-local being destroyed. + f() +} + +#[inline(always)] +pub(crate) fn has_budget_remaining() -> bool { + // If the current budget cannot be accessed due to the thread-local being + // shutdown, then we assume there is budget remaining. + context::budget(|cell| cell.get().has_remaining()).unwrap_or(true) +} + +cfg_rt_multi_thread! { + /// Sets the current task's budget. + pub(crate) fn set(budget: Budget) { + let _ = context::budget(|cell| cell.set(budget)); + } +} + +cfg_rt! { + /// Forcibly removes the budgeting constraints early. + /// + /// Returns the remaining budget + pub(crate) fn stop() -> Budget { + context::budget(|cell| { + let prev = cell.get(); + cell.set(Budget::unconstrained()); + prev + }).unwrap_or(Budget::unconstrained()) + } +} + +cfg_coop! { + use std::cell::Cell; + use std::task::{Context, Poll}; + + #[must_use] + pub(crate) struct RestoreOnPending(Cell<Budget>); + + impl RestoreOnPending { + pub(crate) fn made_progress(&self) { + self.0.set(Budget::unconstrained()); + } + } + + impl Drop for RestoreOnPending { + fn drop(&mut self) { + // Don't reset if budget was unconstrained or if we made progress. + // They are both represented as the remembered budget being unconstrained. + let budget = self.0.get(); + if !budget.is_unconstrained() { + let _ = context::budget(|cell| { + cell.set(budget); + }); + } + } + } + + /// Returns `Poll::Pending` if the current task has exceeded its budget and should yield. + /// + /// When you call this method, the current budget is decremented. However, to ensure that + /// progress is made every time a task is polled, the budget is automatically restored to its + /// former value if the returned `RestoreOnPending` is dropped. It is the caller's + /// responsibility to call `RestoreOnPending::made_progress` if it made progress, to ensure + /// that the budget empties appropriately. + /// + /// Note that `RestoreOnPending` restores the budget **as it was before `poll_proceed`**. + /// Therefore, if the budget is _further_ adjusted between when `poll_proceed` returns and + /// `RestRestoreOnPending` is dropped, those adjustments are erased unless the caller indicates + /// that progress was made. + #[inline] + pub(crate) fn poll_proceed(cx: &mut Context<'_>) -> Poll<RestoreOnPending> { + context::budget(|cell| { + let mut budget = cell.get(); + + let decrement = budget.decrement(); + + if decrement.success { + let restore = RestoreOnPending(Cell::new(cell.get())); + cell.set(budget); + + // avoid double counting + if decrement.hit_zero { + inc_budget_forced_yield_count(); + } + + Poll::Ready(restore) + } else { + cx.waker().wake_by_ref(); + Poll::Pending + } + }).unwrap_or(Poll::Ready(RestoreOnPending(Cell::new(Budget::unconstrained())))) + } + + cfg_rt! { + cfg_metrics! { + #[inline(always)] + fn inc_budget_forced_yield_count() { + let _ = context::with_current(|handle| { + handle.scheduler_metrics().inc_budget_forced_yield_count(); + }); + } + } + + cfg_not_metrics! { + #[inline(always)] + fn inc_budget_forced_yield_count() {} + } + } + + cfg_not_rt! { + #[inline(always)] + fn inc_budget_forced_yield_count() {} + } + + impl Budget { + /// Decrements the budget. Returns `true` if successful. Decrementing fails + /// when there is not enough remaining budget. + fn decrement(&mut self) -> BudgetDecrement { + if let Some(num) = &mut self.0 { + if *num > 0 { + *num -= 1; + + let hit_zero = *num == 0; + + BudgetDecrement { success: true, hit_zero } + } else { + BudgetDecrement { success: false, hit_zero: false } + } + } else { + BudgetDecrement { success: true, hit_zero: false } + } + } + + fn is_unconstrained(self) -> bool { + self.0.is_none() + } + } +} + +#[cfg(all(test, not(loom)))] +mod test { + use super::*; + + #[cfg(tokio_wasm_not_wasi)] + use wasm_bindgen_test::wasm_bindgen_test as test; + + fn get() -> Budget { + context::budget(|cell| cell.get()).unwrap_or(Budget::unconstrained()) + } + + #[test] + fn budgeting() { + use futures::future::poll_fn; + use tokio_test::*; + + assert!(get().0.is_none()); + + let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx))); + + assert!(get().0.is_none()); + drop(coop); + assert!(get().0.is_none()); + + budget(|| { + assert_eq!(get().0, Budget::initial().0); + + let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx))); + assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1); + drop(coop); + // we didn't make progress + assert_eq!(get().0, Budget::initial().0); + + let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx))); + assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1); + coop.made_progress(); + drop(coop); + // we _did_ make progress + assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1); + + let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx))); + assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 2); + coop.made_progress(); + drop(coop); + assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 2); + + budget(|| { + assert_eq!(get().0, Budget::initial().0); + + let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx))); + assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1); + coop.made_progress(); + drop(coop); + assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1); + }); + + assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 2); + }); + + assert!(get().0.is_none()); + + budget(|| { + let n = get().0.unwrap(); + + for _ in 0..n { + let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx))); + coop.made_progress(); + } + + let mut task = task::spawn(poll_fn(|cx| { + let coop = ready!(poll_proceed(cx)); + coop.made_progress(); + Poll::Ready(()) + })); + + assert_pending!(task.poll()); + }); + } +} diff --git a/third_party/rust/tokio/src/runtime/driver.rs b/third_party/rust/tokio/src/runtime/driver.rs new file mode 100644 index 0000000000..572fdefb0d --- /dev/null +++ b/third_party/rust/tokio/src/runtime/driver.rs @@ -0,0 +1,341 @@ +//! Abstracts out the entire chain of runtime sub-drivers into common types. + +// Eventually, this file will see significant refactoring / cleanup. For now, we +// don't need to worry much about dead code with certain feature permutations. +#![cfg_attr(not(feature = "full"), allow(dead_code))] + +use crate::runtime::park::{ParkThread, UnparkThread}; + +use std::io; +use std::time::Duration; + +#[derive(Debug)] +pub(crate) struct Driver { + inner: TimeDriver, +} + +#[derive(Debug)] +pub(crate) struct Handle { + /// IO driver handle + pub(crate) io: IoHandle, + + /// Signal driver handle + #[cfg_attr(any(not(unix), loom), allow(dead_code))] + pub(crate) signal: SignalHandle, + + /// Time driver handle + pub(crate) time: TimeHandle, + + /// Source of `Instant::now()` + #[cfg_attr(not(all(feature = "time", feature = "test-util")), allow(dead_code))] + pub(crate) clock: Clock, +} + +pub(crate) struct Cfg { + pub(crate) enable_io: bool, + pub(crate) enable_time: bool, + pub(crate) enable_pause_time: bool, + pub(crate) start_paused: bool, + pub(crate) nevents: usize, +} + +impl Driver { + pub(crate) fn new(cfg: Cfg) -> io::Result<(Self, Handle)> { + let (io_stack, io_handle, signal_handle) = create_io_stack(cfg.enable_io, cfg.nevents)?; + + let clock = create_clock(cfg.enable_pause_time, cfg.start_paused); + + let (time_driver, time_handle) = create_time_driver(cfg.enable_time, io_stack, &clock); + + Ok(( + Self { inner: time_driver }, + Handle { + io: io_handle, + signal: signal_handle, + time: time_handle, + clock, + }, + )) + } + + pub(crate) fn park(&mut self, handle: &Handle) { + self.inner.park(handle) + } + + pub(crate) fn park_timeout(&mut self, handle: &Handle, duration: Duration) { + self.inner.park_timeout(handle, duration) + } + + pub(crate) fn shutdown(&mut self, handle: &Handle) { + self.inner.shutdown(handle) + } +} + +impl Handle { + pub(crate) fn unpark(&self) { + #[cfg(feature = "time")] + if let Some(handle) = &self.time { + handle.unpark(); + } + + self.io.unpark(); + } + + cfg_io_driver! { + #[track_caller] + pub(crate) fn io(&self) -> &crate::runtime::io::Handle { + self.io + .as_ref() + .expect("A Tokio 1.x context was found, but IO is disabled. Call `enable_io` on the runtime builder to enable IO.") + } + } + + cfg_signal_internal_and_unix! { + #[track_caller] + pub(crate) fn signal(&self) -> &crate::runtime::signal::Handle { + self.signal + .as_ref() + .expect("there is no signal driver running, must be called from the context of Tokio runtime") + } + } + + cfg_time! { + /// Returns a reference to the time driver handle. + /// + /// Panics if no time driver is present. + #[track_caller] + pub(crate) fn time(&self) -> &crate::runtime::time::Handle { + self.time + .as_ref() + .expect("A Tokio 1.x context was found, but timers are disabled. Call `enable_time` on the runtime builder to enable timers.") + } + + pub(crate) fn clock(&self) -> &Clock { + &self.clock + } + } +} + +// ===== io driver ===== + +cfg_io_driver! { + pub(crate) type IoDriver = crate::runtime::io::Driver; + + #[derive(Debug)] + pub(crate) enum IoStack { + Enabled(ProcessDriver), + Disabled(ParkThread), + } + + #[derive(Debug)] + pub(crate) enum IoHandle { + Enabled(crate::runtime::io::Handle), + Disabled(UnparkThread), + } + + fn create_io_stack(enabled: bool, nevents: usize) -> io::Result<(IoStack, IoHandle, SignalHandle)> { + #[cfg(loom)] + assert!(!enabled); + + let ret = if enabled { + let (io_driver, io_handle) = crate::runtime::io::Driver::new(nevents)?; + + let (signal_driver, signal_handle) = create_signal_driver(io_driver, &io_handle)?; + let process_driver = create_process_driver(signal_driver); + + (IoStack::Enabled(process_driver), IoHandle::Enabled(io_handle), signal_handle) + } else { + let park_thread = ParkThread::new(); + let unpark_thread = park_thread.unpark(); + (IoStack::Disabled(park_thread), IoHandle::Disabled(unpark_thread), Default::default()) + }; + + Ok(ret) + } + + impl IoStack { + pub(crate) fn park(&mut self, handle: &Handle) { + match self { + IoStack::Enabled(v) => v.park(handle), + IoStack::Disabled(v) => v.park(), + } + } + + pub(crate) fn park_timeout(&mut self, handle: &Handle, duration: Duration) { + match self { + IoStack::Enabled(v) => v.park_timeout(handle, duration), + IoStack::Disabled(v) => v.park_timeout(duration), + } + } + + pub(crate) fn shutdown(&mut self, handle: &Handle) { + match self { + IoStack::Enabled(v) => v.shutdown(handle), + IoStack::Disabled(v) => v.shutdown(), + } + } + } + + impl IoHandle { + pub(crate) fn unpark(&self) { + match self { + IoHandle::Enabled(handle) => handle.unpark(), + IoHandle::Disabled(handle) => handle.unpark(), + } + } + + pub(crate) fn as_ref(&self) -> Option<&crate::runtime::io::Handle> { + match self { + IoHandle::Enabled(v) => Some(v), + IoHandle::Disabled(..) => None, + } + } + } +} + +cfg_not_io_driver! { + pub(crate) type IoHandle = UnparkThread; + + #[derive(Debug)] + pub(crate) struct IoStack(ParkThread); + + fn create_io_stack(_enabled: bool, _nevents: usize) -> io::Result<(IoStack, IoHandle, SignalHandle)> { + let park_thread = ParkThread::new(); + let unpark_thread = park_thread.unpark(); + Ok((IoStack(park_thread), unpark_thread, Default::default())) + } + + impl IoStack { + pub(crate) fn park(&mut self, _handle: &Handle) { + self.0.park(); + } + + pub(crate) fn park_timeout(&mut self, _handle: &Handle, duration: Duration) { + self.0.park_timeout(duration); + } + + pub(crate) fn shutdown(&mut self, _handle: &Handle) { + self.0.shutdown(); + } + } +} + +// ===== signal driver ===== + +cfg_signal_internal_and_unix! { + type SignalDriver = crate::runtime::signal::Driver; + pub(crate) type SignalHandle = Option<crate::runtime::signal::Handle>; + + fn create_signal_driver(io_driver: IoDriver, io_handle: &crate::runtime::io::Handle) -> io::Result<(SignalDriver, SignalHandle)> { + let driver = crate::runtime::signal::Driver::new(io_driver, io_handle)?; + let handle = driver.handle(); + Ok((driver, Some(handle))) + } +} + +cfg_not_signal_internal! { + pub(crate) type SignalHandle = (); + + cfg_io_driver! { + type SignalDriver = IoDriver; + + fn create_signal_driver(io_driver: IoDriver, _io_handle: &crate::runtime::io::Handle) -> io::Result<(SignalDriver, SignalHandle)> { + Ok((io_driver, ())) + } + } +} + +// ===== process driver ===== + +cfg_process_driver! { + type ProcessDriver = crate::runtime::process::Driver; + + fn create_process_driver(signal_driver: SignalDriver) -> ProcessDriver { + ProcessDriver::new(signal_driver) + } +} + +cfg_not_process_driver! { + cfg_io_driver! { + type ProcessDriver = SignalDriver; + + fn create_process_driver(signal_driver: SignalDriver) -> ProcessDriver { + signal_driver + } + } +} + +// ===== time driver ===== + +cfg_time! { + #[derive(Debug)] + pub(crate) enum TimeDriver { + Enabled { + driver: crate::runtime::time::Driver, + }, + Disabled(IoStack), + } + + pub(crate) type Clock = crate::time::Clock; + pub(crate) type TimeHandle = Option<crate::runtime::time::Handle>; + + fn create_clock(enable_pausing: bool, start_paused: bool) -> Clock { + crate::time::Clock::new(enable_pausing, start_paused) + } + + fn create_time_driver( + enable: bool, + io_stack: IoStack, + clock: &Clock, + ) -> (TimeDriver, TimeHandle) { + if enable { + let (driver, handle) = crate::runtime::time::Driver::new(io_stack, clock); + + (TimeDriver::Enabled { driver }, Some(handle)) + } else { + (TimeDriver::Disabled(io_stack), None) + } + } + + impl TimeDriver { + pub(crate) fn park(&mut self, handle: &Handle) { + match self { + TimeDriver::Enabled { driver, .. } => driver.park(handle), + TimeDriver::Disabled(v) => v.park(handle), + } + } + + pub(crate) fn park_timeout(&mut self, handle: &Handle, duration: Duration) { + match self { + TimeDriver::Enabled { driver } => driver.park_timeout(handle, duration), + TimeDriver::Disabled(v) => v.park_timeout(handle, duration), + } + } + + pub(crate) fn shutdown(&mut self, handle: &Handle) { + match self { + TimeDriver::Enabled { driver } => driver.shutdown(handle), + TimeDriver::Disabled(v) => v.shutdown(handle), + } + } + } +} + +cfg_not_time! { + type TimeDriver = IoStack; + + pub(crate) type Clock = (); + pub(crate) type TimeHandle = (); + + fn create_clock(_enable_pausing: bool, _start_paused: bool) -> Clock { + () + } + + fn create_time_driver( + _enable: bool, + io_stack: IoStack, + _clock: &Clock, + ) -> (TimeDriver, TimeHandle) { + (io_stack, ()) + } +} diff --git a/third_party/rust/tokio/src/runtime/dump.rs b/third_party/rust/tokio/src/runtime/dump.rs new file mode 100644 index 0000000000..994b7f9c01 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/dump.rs @@ -0,0 +1,76 @@ +//! Snapshots of runtime state. +//! +//! See [Handle::dump][crate::runtime::Handle::dump]. + +use std::fmt; + +/// A snapshot of a runtime's state. +/// +/// See [Handle::dump][crate::runtime::Handle::dump]. +#[derive(Debug)] +pub struct Dump { + tasks: Tasks, +} + +/// Snapshots of tasks. +/// +/// See [Handle::dump][crate::runtime::Handle::dump]. +#[derive(Debug)] +pub struct Tasks { + tasks: Vec<Task>, +} + +/// A snapshot of a task. +/// +/// See [Handle::dump][crate::runtime::Handle::dump]. +#[derive(Debug)] +pub struct Task { + trace: Trace, +} + +/// An execution trace of a task's last poll. +/// +/// See [Handle::dump][crate::runtime::Handle::dump]. +#[derive(Debug)] +pub struct Trace { + inner: super::task::trace::Trace, +} + +impl Dump { + pub(crate) fn new(tasks: Vec<Task>) -> Self { + Self { + tasks: Tasks { tasks }, + } + } + + /// Tasks in this snapshot. + pub fn tasks(&self) -> &Tasks { + &self.tasks + } +} + +impl Tasks { + /// Iterate over tasks. + pub fn iter(&self) -> impl Iterator<Item = &Task> { + self.tasks.iter() + } +} + +impl Task { + pub(crate) fn new(trace: super::task::trace::Trace) -> Self { + Self { + trace: Trace { inner: trace }, + } + } + + /// A trace of this task's state. + pub fn trace(&self) -> &Trace { + &self.trace + } +} + +impl fmt::Display for Trace { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.inner.fmt(f) + } +} diff --git a/third_party/rust/tokio/src/runtime/handle.rs b/third_party/rust/tokio/src/runtime/handle.rs new file mode 100644 index 0000000000..be4743d477 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/handle.rs @@ -0,0 +1,587 @@ +use crate::runtime::{context, scheduler, RuntimeFlavor}; + +/// Handle to the runtime. +/// +/// The handle is internally reference-counted and can be freely cloned. A handle can be +/// obtained using the [`Runtime::handle`] method. +/// +/// [`Runtime::handle`]: crate::runtime::Runtime::handle() +#[derive(Debug, Clone)] +// When the `rt` feature is *not* enabled, this type is still defined, but not +// included in the public API. +pub struct Handle { + pub(crate) inner: scheduler::Handle, +} + +use crate::runtime::task::JoinHandle; +use crate::util::error::{CONTEXT_MISSING_ERROR, THREAD_LOCAL_DESTROYED_ERROR}; + +use std::future::Future; +use std::marker::PhantomData; +use std::{error, fmt}; + +/// Runtime context guard. +/// +/// Returned by [`Runtime::enter`] and [`Handle::enter`], the context guard exits +/// the runtime context on drop. +/// +/// [`Runtime::enter`]: fn@crate::runtime::Runtime::enter +#[derive(Debug)] +#[must_use = "Creating and dropping a guard does nothing"] +pub struct EnterGuard<'a> { + _guard: context::SetCurrentGuard, + _handle_lifetime: PhantomData<&'a Handle>, +} + +impl Handle { + /// Enters the runtime context. This allows you to construct types that must + /// have an executor available on creation such as [`Sleep`] or + /// [`TcpStream`]. It will also allow you to call methods such as + /// [`tokio::spawn`] and [`Handle::current`] without panicking. + /// + /// # Panics + /// + /// When calling `Handle::enter` multiple times, the returned guards + /// **must** be dropped in the reverse order that they were acquired. + /// Failure to do so will result in a panic and possible memory leaks. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// let rt = Runtime::new().unwrap(); + /// + /// let _guard = rt.enter(); + /// tokio::spawn(async { + /// println!("Hello world!"); + /// }); + /// ``` + /// + /// Do **not** do the following, this shows a scenario that will result in a + /// panic and possible memory leak. + /// + /// ```should_panic + /// use tokio::runtime::Runtime; + /// + /// let rt1 = Runtime::new().unwrap(); + /// let rt2 = Runtime::new().unwrap(); + /// + /// let enter1 = rt1.enter(); + /// let enter2 = rt2.enter(); + /// + /// drop(enter1); + /// drop(enter2); + /// ``` + /// + /// [`Sleep`]: struct@crate::time::Sleep + /// [`TcpStream`]: struct@crate::net::TcpStream + /// [`tokio::spawn`]: fn@crate::spawn + pub fn enter(&self) -> EnterGuard<'_> { + EnterGuard { + _guard: match context::try_set_current(&self.inner) { + Some(guard) => guard, + None => panic!("{}", crate::util::error::THREAD_LOCAL_DESTROYED_ERROR), + }, + _handle_lifetime: PhantomData, + } + } + + /// Returns a `Handle` view over the currently running `Runtime`. + /// + /// # Panics + /// + /// This will panic if called outside the context of a Tokio runtime. That means that you must + /// call this on one of the threads **being run by the runtime**, or from a thread with an active + /// `EnterGuard`. Calling this from within a thread created by `std::thread::spawn` (for example) + /// will cause a panic unless that thread has an active `EnterGuard`. + /// + /// # Examples + /// + /// This can be used to obtain the handle of the surrounding runtime from an async + /// block or function running on that runtime. + /// + /// ``` + /// # use std::thread; + /// # use tokio::runtime::Runtime; + /// # fn dox() { + /// # let rt = Runtime::new().unwrap(); + /// # rt.spawn(async { + /// use tokio::runtime::Handle; + /// + /// // Inside an async block or function. + /// let handle = Handle::current(); + /// handle.spawn(async { + /// println!("now running in the existing Runtime"); + /// }); + /// + /// # let handle = + /// thread::spawn(move || { + /// // Notice that the handle is created outside of this thread and then moved in + /// handle.spawn(async { /* ... */ }); + /// // This next line would cause a panic because we haven't entered the runtime + /// // and created an EnterGuard + /// // let handle2 = Handle::current(); // panic + /// // So we create a guard here with Handle::enter(); + /// let _guard = handle.enter(); + /// // Now we can call Handle::current(); + /// let handle2 = Handle::current(); + /// }); + /// # handle.join().unwrap(); + /// # }); + /// # } + /// ``` + #[track_caller] + pub fn current() -> Self { + Handle { + inner: scheduler::Handle::current(), + } + } + + /// Returns a Handle view over the currently running Runtime + /// + /// Returns an error if no Runtime has been started + /// + /// Contrary to `current`, this never panics + pub fn try_current() -> Result<Self, TryCurrentError> { + context::with_current(|inner| Handle { + inner: inner.clone(), + }) + } + + /// Spawns a future onto the Tokio runtime. + /// + /// This spawns the given future onto the runtime's executor, usually a + /// thread pool. The thread pool is then responsible for polling the future + /// until it completes. + /// + /// The provided future will start running in the background immediately + /// when `spawn` is called, even if you don't await the returned + /// `JoinHandle`. + /// + /// See [module level][mod] documentation for more details. + /// + /// [mod]: index.html + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// # fn dox() { + /// // Create the runtime + /// let rt = Runtime::new().unwrap(); + /// // Get a handle from this runtime + /// let handle = rt.handle(); + /// + /// // Spawn a future onto the runtime using the handle + /// handle.spawn(async { + /// println!("now running on a worker thread"); + /// }); + /// # } + /// ``` + #[track_caller] + pub fn spawn<F>(&self, future: F) -> JoinHandle<F::Output> + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.spawn_named(future, None) + } + + /// Runs the provided function on an executor dedicated to blocking + /// operations. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// # fn dox() { + /// // Create the runtime + /// let rt = Runtime::new().unwrap(); + /// // Get a handle from this runtime + /// let handle = rt.handle(); + /// + /// // Spawn a blocking function onto the runtime using the handle + /// handle.spawn_blocking(|| { + /// println!("now running on a worker thread"); + /// }); + /// # } + #[track_caller] + pub fn spawn_blocking<F, R>(&self, func: F) -> JoinHandle<R> + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + self.inner.blocking_spawner().spawn_blocking(self, func) + } + + /// Runs a future to completion on this `Handle`'s associated `Runtime`. + /// + /// This runs the given future on the current thread, blocking until it is + /// complete, and yielding its resolved result. Any tasks or timers which + /// the future spawns internally will be executed on the runtime. + /// + /// When this is used on a `current_thread` runtime, only the + /// [`Runtime::block_on`] method can drive the IO and timer drivers, but the + /// `Handle::block_on` method cannot drive them. This means that, when using + /// this method on a current_thread runtime, anything that relies on IO or + /// timers will not work unless there is another thread currently calling + /// [`Runtime::block_on`] on the same runtime. + /// + /// # If the runtime has been shut down + /// + /// If the `Handle`'s associated `Runtime` has been shut down (through + /// [`Runtime::shutdown_background`], [`Runtime::shutdown_timeout`], or by + /// dropping it) and `Handle::block_on` is used it might return an error or + /// panic. Specifically IO resources will return an error and timers will + /// panic. Runtime independent futures will run as normal. + /// + /// # Panics + /// + /// This function panics if the provided future panics, if called within an + /// asynchronous execution context, or if a timer future is executed on a + /// runtime that has been shut down. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// // Create the runtime + /// let rt = Runtime::new().unwrap(); + /// + /// // Get a handle from this runtime + /// let handle = rt.handle(); + /// + /// // Execute the future, blocking the current thread until completion + /// handle.block_on(async { + /// println!("hello"); + /// }); + /// ``` + /// + /// Or using `Handle::current`: + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main () { + /// let handle = Handle::current(); + /// std::thread::spawn(move || { + /// // Using Handle::block_on to run async code in the new thread. + /// handle.block_on(async { + /// println!("hello"); + /// }); + /// }); + /// } + /// ``` + /// + /// [`JoinError`]: struct@crate::task::JoinError + /// [`JoinHandle`]: struct@crate::task::JoinHandle + /// [`Runtime::block_on`]: fn@crate::runtime::Runtime::block_on + /// [`Runtime::shutdown_background`]: fn@crate::runtime::Runtime::shutdown_background + /// [`Runtime::shutdown_timeout`]: fn@crate::runtime::Runtime::shutdown_timeout + /// [`spawn_blocking`]: crate::task::spawn_blocking + /// [`tokio::fs`]: crate::fs + /// [`tokio::net`]: crate::net + /// [`tokio::time`]: crate::time + #[track_caller] + pub fn block_on<F: Future>(&self, future: F) -> F::Output { + #[cfg(all( + tokio_unstable, + tokio_taskdump, + feature = "rt", + target_os = "linux", + any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64") + ))] + let future = super::task::trace::Trace::root(future); + + #[cfg(all(tokio_unstable, feature = "tracing"))] + let future = + crate::util::trace::task(future, "block_on", None, super::task::Id::next().as_u64()); + + // Enter the runtime context. This sets the current driver handles and + // prevents blocking an existing runtime. + context::enter_runtime(&self.inner, true, |blocking| { + blocking.block_on(future).expect("failed to park thread") + }) + } + + #[track_caller] + pub(crate) fn spawn_named<F>(&self, future: F, _name: Option<&str>) -> JoinHandle<F::Output> + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + let id = crate::runtime::task::Id::next(); + #[cfg(all( + tokio_unstable, + tokio_taskdump, + feature = "rt", + target_os = "linux", + any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64") + ))] + let future = super::task::trace::Trace::root(future); + #[cfg(all(tokio_unstable, feature = "tracing"))] + let future = crate::util::trace::task(future, "task", _name, id.as_u64()); + self.inner.spawn(future, id) + } + + /// Returns the flavor of the current `Runtime`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::{Handle, RuntimeFlavor}; + /// + /// #[tokio::main(flavor = "current_thread")] + /// async fn main() { + /// assert_eq!(RuntimeFlavor::CurrentThread, Handle::current().runtime_flavor()); + /// } + /// ``` + /// + /// ``` + /// use tokio::runtime::{Handle, RuntimeFlavor}; + /// + /// #[tokio::main(flavor = "multi_thread", worker_threads = 4)] + /// async fn main() { + /// assert_eq!(RuntimeFlavor::MultiThread, Handle::current().runtime_flavor()); + /// } + /// ``` + pub fn runtime_flavor(&self) -> RuntimeFlavor { + match self.inner { + scheduler::Handle::CurrentThread(_) => RuntimeFlavor::CurrentThread, + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThread(_) => RuntimeFlavor::MultiThread, + } + } +} + +cfg_metrics! { + use crate::runtime::RuntimeMetrics; + + impl Handle { + /// Returns a view that lets you get information about how the runtime + /// is performing. + pub fn metrics(&self) -> RuntimeMetrics { + RuntimeMetrics::new(self.clone()) + } + } +} + +cfg_taskdump! { + impl Handle { + /// Captures a snapshot of the runtime's state. + /// + /// This functionality is experimental, and comes with a number of + /// requirements and limitations. + /// + /// # Examples + /// + /// This can be used to get call traces of each task in the runtime. + /// Calls to `Handle::dump` should usually be enclosed in a + /// [timeout][crate::time::timeout], so that dumping does not escalate a + /// single blocked runtime thread into an entirely blocked runtime. + /// + /// ``` + /// # use tokio::runtime::Runtime; + /// # fn dox() { + /// # let rt = Runtime::new().unwrap(); + /// # rt.spawn(async { + /// use tokio::runtime::Handle; + /// use tokio::time::{timeout, Duration}; + /// + /// // Inside an async block or function. + /// let handle = Handle::current(); + /// if let Ok(dump) = timeout(Duration::from_secs(2), handle.dump()).await { + /// for (i, task) in dump.tasks().iter().enumerate() { + /// let trace = task.trace(); + /// println!("TASK {i}:"); + /// println!("{trace}\n"); + /// } + /// } + /// # }); + /// # } + /// ``` + /// + /// This produces highly detailed traces of tasks; e.g.: + /// + /// ```plain + /// TASK 0: + /// ╼ dump::main::{{closure}}::a::{{closure}} at /tokio/examples/dump.rs:18:20 + /// └╼ dump::main::{{closure}}::b::{{closure}} at /tokio/examples/dump.rs:23:20 + /// └╼ dump::main::{{closure}}::c::{{closure}} at /tokio/examples/dump.rs:28:24 + /// └╼ tokio::sync::barrier::Barrier::wait::{{closure}} at /tokio/tokio/src/sync/barrier.rs:129:10 + /// └╼ <tokio::util::trace::InstrumentedAsyncOp<F> as core::future::future::Future>::poll at /tokio/tokio/src/util/trace.rs:77:46 + /// └╼ tokio::sync::barrier::Barrier::wait_internal::{{closure}} at /tokio/tokio/src/sync/barrier.rs:183:36 + /// └╼ tokio::sync::watch::Receiver<T>::changed::{{closure}} at /tokio/tokio/src/sync/watch.rs:604:55 + /// └╼ tokio::sync::watch::changed_impl::{{closure}} at /tokio/tokio/src/sync/watch.rs:755:18 + /// └╼ <tokio::sync::notify::Notified as core::future::future::Future>::poll at /tokio/tokio/src/sync/notify.rs:1103:9 + /// └╼ tokio::sync::notify::Notified::poll_notified at /tokio/tokio/src/sync/notify.rs:996:32 + /// ``` + /// + /// # Requirements + /// + /// ## Debug Info Must Be Available + /// + /// To produce task traces, the application must **not** be compiled + /// with split debuginfo. On Linux, including debuginfo within the + /// application binary is the (correct) default. You can further ensure + /// this behavior with the following directive in your `Cargo.toml`: + /// + /// ```toml + /// [profile.*] + /// split-debuginfo = "off" + /// ``` + /// + /// ## Unstable Features + /// + /// This functionality is **unstable**, and requires both the + /// `tokio_unstable` and `tokio_taskdump` cfg flags to be set. + /// + /// You can do this by setting the `RUSTFLAGS` environment variable + /// before invoking `cargo`; e.g.: + /// ```bash + /// RUSTFLAGS="--cfg tokio_unstable --cfg tokio_taskdump" cargo run --example dump + /// ``` + /// + /// Or by [configuring][cargo-config] `rustflags` in + /// `.cargo/config.toml`: + /// ```text + /// [build] + /// rustflags = ["--cfg tokio_unstable", "--cfg tokio_taskdump"] + /// ``` + /// + /// [cargo-config]: + /// https://doc.rust-lang.org/cargo/reference/config.html + /// + /// ## Platform Requirements + /// + /// Task dumps are supported on Linux atop aarch64, x86 and x86_64. + /// + /// ## Current Thread Runtime Requirements + /// + /// On the `current_thread` runtime, task dumps may only be requested + /// from *within* the context of the runtime being dumped. Do not, for + /// example, await `Handle::dump()` on a different runtime. + /// + /// # Limitations + /// + /// ## Performance + /// + /// Although enabling the `tokio_taskdump` feature imposes virtually no + /// additional runtime overhead, actually calling `Handle::dump` is + /// expensive. The runtime must synchronize and pause its workers, then + /// re-poll every task in a special tracing mode. Avoid requesting dumps + /// often. + /// + /// ## Local Executors + /// + /// Tasks managed by local executors (e.g., `FuturesUnordered` and + /// [`LocalSet`][crate::task::LocalSet]) may not appear in task dumps. + /// + /// ## Non-Termination When Workers Are Blocked + /// + /// The future produced by `Handle::dump` may never produce `Ready` if + /// another runtime worker is blocked for more than 250ms. This may + /// occur if a dump is requested during shutdown, or if another runtime + /// worker is infinite looping or synchronously deadlocked. For these + /// reasons, task dumping should usually be paired with an explicit + /// [timeout][crate::time::timeout]. + pub async fn dump(&self) -> crate::runtime::Dump { + match &self.inner { + scheduler::Handle::CurrentThread(handle) => handle.dump(), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThread(handle) => { + // perform the trace in a separate thread so that the + // trace itself does not appear in the taskdump. + let handle = handle.clone(); + spawn_thread(async { + let handle = handle; + handle.dump().await + }).await + }, + } + } + } + + cfg_rt_multi_thread! { + /// Spawn a new thread and asynchronously await on its result. + async fn spawn_thread<F>(f: F) -> <F as Future>::Output + where + F: Future + Send + 'static, + <F as Future>::Output: Send + 'static + { + let (tx, rx) = crate::sync::oneshot::channel(); + crate::loom::thread::spawn(|| { + let rt = crate::runtime::Builder::new_current_thread().build().unwrap(); + rt.block_on(async { + let _ = tx.send(f.await); + }); + }); + rx.await.unwrap() + } + } +} + +/// Error returned by `try_current` when no Runtime has been started +#[derive(Debug)] +pub struct TryCurrentError { + kind: TryCurrentErrorKind, +} + +impl TryCurrentError { + pub(crate) fn new_no_context() -> Self { + Self { + kind: TryCurrentErrorKind::NoContext, + } + } + + pub(crate) fn new_thread_local_destroyed() -> Self { + Self { + kind: TryCurrentErrorKind::ThreadLocalDestroyed, + } + } + + /// Returns true if the call failed because there is currently no runtime in + /// the Tokio context. + pub fn is_missing_context(&self) -> bool { + matches!(self.kind, TryCurrentErrorKind::NoContext) + } + + /// Returns true if the call failed because the Tokio context thread-local + /// had been destroyed. This can usually only happen if in the destructor of + /// other thread-locals. + pub fn is_thread_local_destroyed(&self) -> bool { + matches!(self.kind, TryCurrentErrorKind::ThreadLocalDestroyed) + } +} + +enum TryCurrentErrorKind { + NoContext, + ThreadLocalDestroyed, +} + +impl fmt::Debug for TryCurrentErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use TryCurrentErrorKind::*; + match self { + NoContext => f.write_str("NoContext"), + ThreadLocalDestroyed => f.write_str("ThreadLocalDestroyed"), + } + } +} + +impl fmt::Display for TryCurrentError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use TryCurrentErrorKind::*; + match self.kind { + NoContext => f.write_str(CONTEXT_MISSING_ERROR), + ThreadLocalDestroyed => f.write_str(THREAD_LOCAL_DESTROYED_ERROR), + } + } +} + +impl error::Error for TryCurrentError {} diff --git a/third_party/rust/tokio/src/runtime/io/metrics.rs b/third_party/rust/tokio/src/runtime/io/metrics.rs new file mode 100644 index 0000000000..ec341efe68 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/io/metrics.rs @@ -0,0 +1,24 @@ +//! This file contains mocks of the metrics types used in the I/O driver. +//! +//! The reason these mocks don't live in `src/runtime/mock.rs` is because +//! these need to be available in the case when `net` is enabled but +//! `rt` is not. + +cfg_not_rt_and_metrics_and_net! { + #[derive(Default)] + pub(crate) struct IoDriverMetrics {} + + impl IoDriverMetrics { + pub(crate) fn incr_fd_count(&self) {} + pub(crate) fn dec_fd_count(&self) {} + pub(crate) fn incr_ready_count_by(&self, _amt: u64) {} + } +} + +cfg_net! { + cfg_rt! { + cfg_metrics! { + pub(crate) use crate::runtime::IoDriverMetrics; + } + } +} diff --git a/third_party/rust/tokio/src/runtime/io/mod.rs b/third_party/rust/tokio/src/runtime/io/mod.rs new file mode 100644 index 0000000000..2dd426f111 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/io/mod.rs @@ -0,0 +1,356 @@ +#![cfg_attr(not(all(feature = "rt", feature = "net")), allow(dead_code))] + +mod registration; +pub(crate) use registration::Registration; + +mod scheduled_io; +use scheduled_io::ScheduledIo; + +mod metrics; + +use crate::io::interest::Interest; +use crate::io::ready::Ready; +use crate::runtime::driver; +use crate::util::slab::{self, Slab}; +use crate::{loom::sync::RwLock, util::bit}; + +use metrics::IoDriverMetrics; + +use std::fmt; +use std::io; +use std::time::Duration; + +/// I/O driver, backed by Mio. +pub(crate) struct Driver { + /// Tracks the number of times `turn` is called. It is safe for this to wrap + /// as it is mostly used to determine when to call `compact()`. + tick: u8, + + /// True when an event with the signal token is received + signal_ready: bool, + + /// Reuse the `mio::Events` value across calls to poll. + events: mio::Events, + + /// Primary slab handle containing the state for each resource registered + /// with this driver. + resources: Slab<ScheduledIo>, + + /// The system event queue. + poll: mio::Poll, +} + +/// A reference to an I/O driver. +pub(crate) struct Handle { + /// Registers I/O resources. + registry: mio::Registry, + + /// Allocates `ScheduledIo` handles when creating new resources. + io_dispatch: RwLock<IoDispatcher>, + + /// Used to wake up the reactor from a call to `turn`. + /// Not supported on Wasi due to lack of threading support. + #[cfg(not(tokio_wasi))] + waker: mio::Waker, + + pub(crate) metrics: IoDriverMetrics, +} + +#[derive(Debug)] +pub(crate) struct ReadyEvent { + tick: u8, + pub(crate) ready: Ready, + is_shutdown: bool, +} + +cfg_net_unix!( + impl ReadyEvent { + pub(crate) fn with_ready(&self, ready: Ready) -> Self { + Self { + ready, + tick: self.tick, + is_shutdown: self.is_shutdown, + } + } + } +); + +struct IoDispatcher { + allocator: slab::Allocator<ScheduledIo>, + is_shutdown: bool, +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +enum Direction { + Read, + Write, +} + +enum Tick { + Set(u8), + Clear(u8), +} + +// TODO: Don't use a fake token. Instead, reserve a slot entry for the wakeup +// token. +const TOKEN_WAKEUP: mio::Token = mio::Token(1 << 31); +const TOKEN_SIGNAL: mio::Token = mio::Token(1 + (1 << 31)); + +const ADDRESS: bit::Pack = bit::Pack::least_significant(24); + +// Packs the generation value in the `readiness` field. +// +// The generation prevents a race condition where a slab slot is reused for a +// new socket while the I/O driver is about to apply a readiness event. The +// generation value is checked when setting new readiness. If the generation do +// not match, then the readiness event is discarded. +const GENERATION: bit::Pack = ADDRESS.then(7); + +fn _assert_kinds() { + fn _assert<T: Send + Sync>() {} + + _assert::<Handle>(); +} + +// ===== impl Driver ===== + +impl Driver { + /// Creates a new event loop, returning any error that happened during the + /// creation. + pub(crate) fn new(nevents: usize) -> io::Result<(Driver, Handle)> { + let poll = mio::Poll::new()?; + #[cfg(not(tokio_wasi))] + let waker = mio::Waker::new(poll.registry(), TOKEN_WAKEUP)?; + let registry = poll.registry().try_clone()?; + + let slab = Slab::new(); + let allocator = slab.allocator(); + + let driver = Driver { + tick: 0, + signal_ready: false, + events: mio::Events::with_capacity(nevents), + poll, + resources: slab, + }; + + let handle = Handle { + registry, + io_dispatch: RwLock::new(IoDispatcher::new(allocator)), + #[cfg(not(tokio_wasi))] + waker, + metrics: IoDriverMetrics::default(), + }; + + Ok((driver, handle)) + } + + pub(crate) fn park(&mut self, rt_handle: &driver::Handle) { + let handle = rt_handle.io(); + self.turn(handle, None); + } + + pub(crate) fn park_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) { + let handle = rt_handle.io(); + self.turn(handle, Some(duration)); + } + + pub(crate) fn shutdown(&mut self, rt_handle: &driver::Handle) { + let handle = rt_handle.io(); + + if handle.shutdown() { + self.resources.for_each(|io| { + // If a task is waiting on the I/O resource, notify it that the + // runtime is being shutdown. And shutdown will clear all wakers. + io.shutdown(); + }); + } + } + + fn turn(&mut self, handle: &Handle, max_wait: Option<Duration>) { + // How often to call `compact()` on the resource slab + const COMPACT_INTERVAL: u8 = 255; + + self.tick = self.tick.wrapping_add(1); + + if self.tick == COMPACT_INTERVAL { + self.resources.compact() + } + + let events = &mut self.events; + + // Block waiting for an event to happen, peeling out how many events + // happened. + match self.poll.poll(events, max_wait) { + Ok(_) => {} + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + #[cfg(tokio_wasi)] + Err(e) if e.kind() == io::ErrorKind::InvalidInput => { + // In case of wasm32_wasi this error happens, when trying to poll without subscriptions + // just return from the park, as there would be nothing, which wakes us up. + } + Err(e) => panic!("unexpected error when polling the I/O driver: {:?}", e), + } + + // Process all the events that came in, dispatching appropriately + let mut ready_count = 0; + for event in events.iter() { + let token = event.token(); + + if token == TOKEN_WAKEUP { + // Nothing to do, the event is used to unblock the I/O driver + } else if token == TOKEN_SIGNAL { + self.signal_ready = true; + } else { + Self::dispatch( + &mut self.resources, + self.tick, + token, + Ready::from_mio(event), + ); + ready_count += 1; + } + } + + handle.metrics.incr_ready_count_by(ready_count); + } + + fn dispatch(resources: &mut Slab<ScheduledIo>, tick: u8, token: mio::Token, ready: Ready) { + let addr = slab::Address::from_usize(ADDRESS.unpack(token.0)); + + let io = match resources.get(addr) { + Some(io) => io, + None => return, + }; + + let res = io.set_readiness(Some(token.0), Tick::Set(tick), |curr| curr | ready); + + if res.is_err() { + // token no longer valid! + return; + } + + io.wake(ready); + } +} + +impl fmt::Debug for Driver { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Driver") + } +} + +impl Handle { + /// Forces a reactor blocked in a call to `turn` to wakeup, or otherwise + /// makes the next call to `turn` return immediately. + /// + /// This method is intended to be used in situations where a notification + /// needs to otherwise be sent to the main reactor. If the reactor is + /// currently blocked inside of `turn` then it will wake up and soon return + /// after this method has been called. If the reactor is not currently + /// blocked in `turn`, then the next call to `turn` will not block and + /// return immediately. + pub(crate) fn unpark(&self) { + #[cfg(not(tokio_wasi))] + self.waker.wake().expect("failed to wake I/O driver"); + } + + /// Registers an I/O resource with the reactor for a given `mio::Ready` state. + /// + /// The registration token is returned. + pub(super) fn add_source( + &self, + source: &mut impl mio::event::Source, + interest: Interest, + ) -> io::Result<slab::Ref<ScheduledIo>> { + let (address, shared) = self.allocate()?; + + let token = GENERATION.pack(shared.generation(), ADDRESS.pack(address.as_usize(), 0)); + + self.registry + .register(source, mio::Token(token), interest.to_mio())?; + + self.metrics.incr_fd_count(); + + Ok(shared) + } + + /// Deregisters an I/O resource from the reactor. + pub(super) fn deregister_source(&self, source: &mut impl mio::event::Source) -> io::Result<()> { + self.registry.deregister(source)?; + + self.metrics.dec_fd_count(); + + Ok(()) + } + + /// shutdown the dispatcher. + fn shutdown(&self) -> bool { + let mut io = self.io_dispatch.write().unwrap(); + if io.is_shutdown { + return false; + } + io.is_shutdown = true; + true + } + + fn allocate(&self) -> io::Result<(slab::Address, slab::Ref<ScheduledIo>)> { + let io = self.io_dispatch.read().unwrap(); + if io.is_shutdown { + return Err(io::Error::new( + io::ErrorKind::Other, + crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR, + )); + } + io.allocator.allocate().ok_or_else(|| { + io::Error::new( + io::ErrorKind::Other, + "reactor at max registered I/O resources", + ) + }) + } +} + +impl fmt::Debug for Handle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Handle") + } +} + +// ===== impl IoDispatcher ===== + +impl IoDispatcher { + fn new(allocator: slab::Allocator<ScheduledIo>) -> Self { + Self { + allocator, + is_shutdown: false, + } + } +} + +impl Direction { + pub(super) fn mask(self) -> Ready { + match self { + Direction::Read => Ready::READABLE | Ready::READ_CLOSED, + Direction::Write => Ready::WRITABLE | Ready::WRITE_CLOSED, + } + } +} + +// Signal handling +cfg_signal_internal_and_unix! { + impl Handle { + pub(crate) fn register_signal_receiver(&self, receiver: &mut mio::net::UnixStream) -> io::Result<()> { + self.registry.register(receiver, TOKEN_SIGNAL, mio::Interest::READABLE)?; + Ok(()) + } + } + + impl Driver { + pub(crate) fn consume_signal_ready(&mut self) -> bool { + let ret = self.signal_ready; + self.signal_ready = false; + ret + } + } +} diff --git a/third_party/rust/tokio/src/runtime/io/registration.rs b/third_party/rust/tokio/src/runtime/io/registration.rs new file mode 100644 index 0000000000..341fa0539a --- /dev/null +++ b/third_party/rust/tokio/src/runtime/io/registration.rs @@ -0,0 +1,252 @@ +#![cfg_attr(not(feature = "net"), allow(dead_code))] + +use crate::io::interest::Interest; +use crate::runtime::io::{Direction, Handle, ReadyEvent, ScheduledIo}; +use crate::runtime::scheduler; +use crate::util::slab; + +use mio::event::Source; +use std::io; +use std::task::{Context, Poll}; + +cfg_io_driver! { + /// Associates an I/O resource with the reactor instance that drives it. + /// + /// A registration represents an I/O resource registered with a Reactor such + /// that it will receive task notifications on readiness. This is the lowest + /// level API for integrating with a reactor. + /// + /// The association between an I/O resource is made by calling + /// [`new_with_interest_and_handle`]. + /// Once the association is established, it remains established until the + /// registration instance is dropped. + /// + /// A registration instance represents two separate readiness streams. One + /// for the read readiness and one for write readiness. These streams are + /// independent and can be consumed from separate tasks. + /// + /// **Note**: while `Registration` is `Sync`, the caller must ensure that + /// there are at most two tasks that use a registration instance + /// concurrently. One task for [`poll_read_ready`] and one task for + /// [`poll_write_ready`]. While violating this requirement is "safe" from a + /// Rust memory safety point of view, it will result in unexpected behavior + /// in the form of lost notifications and tasks hanging. + /// + /// ## Platform-specific events + /// + /// `Registration` also allows receiving platform-specific `mio::Ready` + /// events. These events are included as part of the read readiness event + /// stream. The write readiness event stream is only for `Ready::writable()` + /// events. + /// + /// [`new_with_interest_and_handle`]: method@Self::new_with_interest_and_handle + /// [`poll_read_ready`]: method@Self::poll_read_ready` + /// [`poll_write_ready`]: method@Self::poll_write_ready` + #[derive(Debug)] + pub(crate) struct Registration { + /// Handle to the associated runtime. + handle: scheduler::Handle, + + /// Reference to state stored by the driver. + shared: slab::Ref<ScheduledIo>, + } +} + +unsafe impl Send for Registration {} +unsafe impl Sync for Registration {} + +// ===== impl Registration ===== + +impl Registration { + /// Registers the I/O resource with the reactor for the provided handle, for + /// a specific `Interest`. This does not add `hup` or `error` so if you are + /// interested in those states, you will need to add them to the readiness + /// state passed to this function. + /// + /// # Return + /// + /// - `Ok` if the registration happened successfully + /// - `Err` if an error was encountered during registration + #[track_caller] + pub(crate) fn new_with_interest_and_handle( + io: &mut impl Source, + interest: Interest, + handle: scheduler::Handle, + ) -> io::Result<Registration> { + let shared = handle.driver().io().add_source(io, interest)?; + + Ok(Registration { handle, shared }) + } + + /// Deregisters the I/O resource from the reactor it is associated with. + /// + /// This function must be called before the I/O resource associated with the + /// registration is dropped. + /// + /// Note that deregistering does not guarantee that the I/O resource can be + /// registered with a different reactor. Some I/O resource types can only be + /// associated with a single reactor instance for their lifetime. + /// + /// # Return + /// + /// If the deregistration was successful, `Ok` is returned. Any calls to + /// `Reactor::turn` that happen after a successful call to `deregister` will + /// no longer result in notifications getting sent for this registration. + /// + /// `Err` is returned if an error is encountered. + pub(crate) fn deregister(&mut self, io: &mut impl Source) -> io::Result<()> { + self.handle().deregister_source(io) + } + + pub(crate) fn clear_readiness(&self, event: ReadyEvent) { + self.shared.clear_readiness(event); + } + + // Uses the poll path, requiring the caller to ensure mutual exclusion for + // correctness. Only the last task to call this function is notified. + pub(crate) fn poll_read_ready(&self, cx: &mut Context<'_>) -> Poll<io::Result<ReadyEvent>> { + self.poll_ready(cx, Direction::Read) + } + + // Uses the poll path, requiring the caller to ensure mutual exclusion for + // correctness. Only the last task to call this function is notified. + pub(crate) fn poll_write_ready(&self, cx: &mut Context<'_>) -> Poll<io::Result<ReadyEvent>> { + self.poll_ready(cx, Direction::Write) + } + + // Uses the poll path, requiring the caller to ensure mutual exclusion for + // correctness. Only the last task to call this function is notified. + #[cfg(not(tokio_wasi))] + pub(crate) fn poll_read_io<R>( + &self, + cx: &mut Context<'_>, + f: impl FnMut() -> io::Result<R>, + ) -> Poll<io::Result<R>> { + self.poll_io(cx, Direction::Read, f) + } + + // Uses the poll path, requiring the caller to ensure mutual exclusion for + // correctness. Only the last task to call this function is notified. + pub(crate) fn poll_write_io<R>( + &self, + cx: &mut Context<'_>, + f: impl FnMut() -> io::Result<R>, + ) -> Poll<io::Result<R>> { + self.poll_io(cx, Direction::Write, f) + } + + /// Polls for events on the I/O resource's `direction` readiness stream. + /// + /// If called with a task context, notify the task when a new event is + /// received. + fn poll_ready( + &self, + cx: &mut Context<'_>, + direction: Direction, + ) -> Poll<io::Result<ReadyEvent>> { + ready!(crate::trace::trace_leaf(cx)); + // Keep track of task budget + let coop = ready!(crate::runtime::coop::poll_proceed(cx)); + let ev = ready!(self.shared.poll_readiness(cx, direction)); + + if ev.is_shutdown { + return Poll::Ready(Err(gone())); + } + + coop.made_progress(); + Poll::Ready(Ok(ev)) + } + + fn poll_io<R>( + &self, + cx: &mut Context<'_>, + direction: Direction, + mut f: impl FnMut() -> io::Result<R>, + ) -> Poll<io::Result<R>> { + loop { + let ev = ready!(self.poll_ready(cx, direction))?; + + match f() { + Ok(ret) => { + return Poll::Ready(Ok(ret)); + } + Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + self.clear_readiness(ev); + } + Err(e) => return Poll::Ready(Err(e)), + } + } + } + + pub(crate) fn try_io<R>( + &self, + interest: Interest, + f: impl FnOnce() -> io::Result<R>, + ) -> io::Result<R> { + let ev = self.shared.ready_event(interest); + + // Don't attempt the operation if the resource is not ready. + if ev.ready.is_empty() { + return Err(io::ErrorKind::WouldBlock.into()); + } + + match f() { + Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + self.clear_readiness(ev); + Err(io::ErrorKind::WouldBlock.into()) + } + res => res, + } + } + + fn handle(&self) -> &Handle { + self.handle.driver().io() + } +} + +impl Drop for Registration { + fn drop(&mut self) { + // It is possible for a cycle to be created between wakers stored in + // `ScheduledIo` instances and `Arc<driver::Inner>`. To break this + // cycle, wakers are cleared. This is an imperfect solution as it is + // possible to store a `Registration` in a waker. In this case, the + // cycle would remain. + // + // See tokio-rs/tokio#3481 for more details. + self.shared.clear_wakers(); + } +} + +fn gone() -> io::Error { + io::Error::new( + io::ErrorKind::Other, + crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR, + ) +} + +cfg_io_readiness! { + impl Registration { + pub(crate) async fn readiness(&self, interest: Interest) -> io::Result<ReadyEvent> { + let ev = self.shared.readiness(interest).await; + + if ev.is_shutdown { + return Err(gone()) + } + + Ok(ev) + } + + pub(crate) async fn async_io<R>(&self, interest: Interest, mut f: impl FnMut() -> io::Result<R>) -> io::Result<R> { + loop { + let event = self.readiness(interest).await?; + + match f() { + Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => { + self.clear_readiness(event); + } + x => return x, + } + } + } + } +} diff --git a/third_party/rust/tokio/src/runtime/io/scheduled_io.rs b/third_party/rust/tokio/src/runtime/io/scheduled_io.rs new file mode 100644 index 0000000000..197a4e0e21 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/io/scheduled_io.rs @@ -0,0 +1,558 @@ +use super::{ReadyEvent, Tick}; +use crate::io::interest::Interest; +use crate::io::ready::Ready; +use crate::loom::sync::atomic::AtomicUsize; +use crate::loom::sync::Mutex; +use crate::util::bit; +use crate::util::slab::Entry; +use crate::util::WakeList; + +use std::sync::atomic::Ordering::{AcqRel, Acquire, Release}; +use std::task::{Context, Poll, Waker}; + +use super::Direction; + +cfg_io_readiness! { + use crate::util::linked_list::{self, LinkedList}; + + use std::cell::UnsafeCell; + use std::future::Future; + use std::marker::PhantomPinned; + use std::pin::Pin; + use std::ptr::NonNull; +} + +/// Stored in the I/O driver resource slab. +#[derive(Debug)] +pub(crate) struct ScheduledIo { + /// Packs the resource's readiness with the resource's generation. + readiness: AtomicUsize, + + waiters: Mutex<Waiters>, +} + +cfg_io_readiness! { + type WaitList = LinkedList<Waiter, <Waiter as linked_list::Link>::Target>; +} + +#[derive(Debug, Default)] +struct Waiters { + #[cfg(feature = "net")] + /// List of all current waiters. + list: WaitList, + + /// Waker used for AsyncRead. + reader: Option<Waker>, + + /// Waker used for AsyncWrite. + writer: Option<Waker>, +} + +cfg_io_readiness! { + #[derive(Debug)] + struct Waiter { + pointers: linked_list::Pointers<Waiter>, + + /// The waker for this task. + waker: Option<Waker>, + + /// The interest this waiter is waiting on. + interest: Interest, + + is_ready: bool, + + /// Should never be `!Unpin`. + _p: PhantomPinned, + } + + generate_addr_of_methods! { + impl<> Waiter { + unsafe fn addr_of_pointers(self: NonNull<Self>) -> NonNull<linked_list::Pointers<Waiter>> { + &self.pointers + } + } + } + + /// Future returned by `readiness()`. + struct Readiness<'a> { + scheduled_io: &'a ScheduledIo, + + state: State, + + /// Entry in the waiter `LinkedList`. + waiter: UnsafeCell<Waiter>, + } + + enum State { + Init, + Waiting, + Done, + } +} + +// The `ScheduledIo::readiness` (`AtomicUsize`) is packed full of goodness. +// +// | shutdown | generation | driver tick | readiness | +// |----------+------------+--------------+-----------| +// | 1 bit | 7 bits + 8 bits + 16 bits | + +const READINESS: bit::Pack = bit::Pack::least_significant(16); + +const TICK: bit::Pack = READINESS.then(8); + +const GENERATION: bit::Pack = TICK.then(7); + +const SHUTDOWN: bit::Pack = GENERATION.then(1); + +#[test] +fn test_generations_assert_same() { + assert_eq!(super::GENERATION, GENERATION); +} + +// ===== impl ScheduledIo ===== + +impl Entry for ScheduledIo { + fn reset(&self) { + let state = self.readiness.load(Acquire); + + let generation = GENERATION.unpack(state); + let next = GENERATION.pack_lossy(generation + 1, 0); + + self.readiness.store(next, Release); + } +} + +impl Default for ScheduledIo { + fn default() -> ScheduledIo { + ScheduledIo { + readiness: AtomicUsize::new(0), + waiters: Mutex::new(Default::default()), + } + } +} + +impl ScheduledIo { + pub(crate) fn generation(&self) -> usize { + GENERATION.unpack(self.readiness.load(Acquire)) + } + + /// Invoked when the IO driver is shut down; forces this ScheduledIo into a + /// permanently shutdown state. + pub(super) fn shutdown(&self) { + let mask = SHUTDOWN.pack(1, 0); + self.readiness.fetch_or(mask, AcqRel); + self.wake(Ready::ALL); + } + + /// Sets the readiness on this `ScheduledIo` by invoking the given closure on + /// the current value, returning the previous readiness value. + /// + /// # Arguments + /// - `token`: the token for this `ScheduledIo`. + /// - `tick`: whether setting the tick or trying to clear readiness for a + /// specific tick. + /// - `f`: a closure returning a new readiness value given the previous + /// readiness. + /// + /// # Returns + /// + /// If the given token's generation no longer matches the `ScheduledIo`'s + /// generation, then the corresponding IO resource has been removed and + /// replaced with a new resource. In that case, this method returns `Err`. + /// Otherwise, this returns the previous readiness. + pub(super) fn set_readiness( + &self, + token: Option<usize>, + tick: Tick, + f: impl Fn(Ready) -> Ready, + ) -> Result<(), ()> { + let mut current = self.readiness.load(Acquire); + + loop { + let current_generation = GENERATION.unpack(current); + + if let Some(token) = token { + // Check that the generation for this access is still the + // current one. + if GENERATION.unpack(token) != current_generation { + return Err(()); + } + } + + // Mask out the tick/generation bits so that the modifying + // function doesn't see them. + let current_readiness = Ready::from_usize(current); + let new = f(current_readiness); + + let packed = match tick { + Tick::Set(t) => TICK.pack(t as usize, new.as_usize()), + Tick::Clear(t) => { + if TICK.unpack(current) as u8 != t { + // Trying to clear readiness with an old event! + return Err(()); + } + + TICK.pack(t as usize, new.as_usize()) + } + }; + + let next = GENERATION.pack(current_generation, packed); + + match self + .readiness + .compare_exchange(current, next, AcqRel, Acquire) + { + Ok(_) => return Ok(()), + // we lost the race, retry! + Err(actual) => current = actual, + } + } + } + + /// Notifies all pending waiters that have registered interest in `ready`. + /// + /// There may be many waiters to notify. Waking the pending task **must** be + /// done from outside of the lock otherwise there is a potential for a + /// deadlock. + /// + /// A stack array of wakers is created and filled with wakers to notify, the + /// lock is released, and the wakers are notified. Because there may be more + /// than 32 wakers to notify, if the stack array fills up, the lock is + /// released, the array is cleared, and the iteration continues. + pub(super) fn wake(&self, ready: Ready) { + let mut wakers = WakeList::new(); + + let mut waiters = self.waiters.lock(); + + // check for AsyncRead slot + if ready.is_readable() { + if let Some(waker) = waiters.reader.take() { + wakers.push(waker); + } + } + + // check for AsyncWrite slot + if ready.is_writable() { + if let Some(waker) = waiters.writer.take() { + wakers.push(waker); + } + } + + #[cfg(feature = "net")] + 'outer: loop { + let mut iter = waiters.list.drain_filter(|w| ready.satisfies(w.interest)); + + while wakers.can_push() { + match iter.next() { + Some(waiter) => { + let waiter = unsafe { &mut *waiter.as_ptr() }; + + if let Some(waker) = waiter.waker.take() { + waiter.is_ready = true; + wakers.push(waker); + } + } + None => { + break 'outer; + } + } + } + + drop(waiters); + + wakers.wake_all(); + + // Acquire the lock again. + waiters = self.waiters.lock(); + } + + // Release the lock before notifying + drop(waiters); + + wakers.wake_all(); + } + + pub(super) fn ready_event(&self, interest: Interest) -> ReadyEvent { + let curr = self.readiness.load(Acquire); + + ReadyEvent { + tick: TICK.unpack(curr) as u8, + ready: interest.mask() & Ready::from_usize(READINESS.unpack(curr)), + is_shutdown: SHUTDOWN.unpack(curr) != 0, + } + } + + /// Polls for readiness events in a given direction. + /// + /// These are to support `AsyncRead` and `AsyncWrite` polling methods, + /// which cannot use the `async fn` version. This uses reserved reader + /// and writer slots. + pub(super) fn poll_readiness( + &self, + cx: &mut Context<'_>, + direction: Direction, + ) -> Poll<ReadyEvent> { + let curr = self.readiness.load(Acquire); + + let ready = direction.mask() & Ready::from_usize(READINESS.unpack(curr)); + let is_shutdown = SHUTDOWN.unpack(curr) != 0; + + if ready.is_empty() && !is_shutdown { + // Update the task info + let mut waiters = self.waiters.lock(); + let slot = match direction { + Direction::Read => &mut waiters.reader, + Direction::Write => &mut waiters.writer, + }; + + // Avoid cloning the waker if one is already stored that matches the + // current task. + match slot { + Some(existing) => { + if !existing.will_wake(cx.waker()) { + *existing = cx.waker().clone(); + } + } + None => { + *slot = Some(cx.waker().clone()); + } + } + + // Try again, in case the readiness was changed while we were + // taking the waiters lock + let curr = self.readiness.load(Acquire); + let ready = direction.mask() & Ready::from_usize(READINESS.unpack(curr)); + let is_shutdown = SHUTDOWN.unpack(curr) != 0; + if is_shutdown { + Poll::Ready(ReadyEvent { + tick: TICK.unpack(curr) as u8, + ready: direction.mask(), + is_shutdown, + }) + } else if ready.is_empty() { + Poll::Pending + } else { + Poll::Ready(ReadyEvent { + tick: TICK.unpack(curr) as u8, + ready, + is_shutdown, + }) + } + } else { + Poll::Ready(ReadyEvent { + tick: TICK.unpack(curr) as u8, + ready, + is_shutdown, + }) + } + } + + pub(crate) fn clear_readiness(&self, event: ReadyEvent) { + // This consumes the current readiness state **except** for closed + // states. Closed states are excluded because they are final states. + let mask_no_closed = event.ready - Ready::READ_CLOSED - Ready::WRITE_CLOSED; + + // result isn't important + let _ = self.set_readiness(None, Tick::Clear(event.tick), |curr| curr - mask_no_closed); + } + + pub(crate) fn clear_wakers(&self) { + let mut waiters = self.waiters.lock(); + waiters.reader.take(); + waiters.writer.take(); + } +} + +impl Drop for ScheduledIo { + fn drop(&mut self) { + self.wake(Ready::ALL); + } +} + +unsafe impl Send for ScheduledIo {} +unsafe impl Sync for ScheduledIo {} + +cfg_io_readiness! { + impl ScheduledIo { + /// An async version of `poll_readiness` which uses a linked list of wakers. + pub(crate) async fn readiness(&self, interest: Interest) -> ReadyEvent { + self.readiness_fut(interest).await + } + + // This is in a separate function so that the borrow checker doesn't think + // we are borrowing the `UnsafeCell` possibly over await boundaries. + // + // Go figure. + fn readiness_fut(&self, interest: Interest) -> Readiness<'_> { + Readiness { + scheduled_io: self, + state: State::Init, + waiter: UnsafeCell::new(Waiter { + pointers: linked_list::Pointers::new(), + waker: None, + is_ready: false, + interest, + _p: PhantomPinned, + }), + } + } + } + + unsafe impl linked_list::Link for Waiter { + type Handle = NonNull<Waiter>; + type Target = Waiter; + + fn as_raw(handle: &NonNull<Waiter>) -> NonNull<Waiter> { + *handle + } + + unsafe fn from_raw(ptr: NonNull<Waiter>) -> NonNull<Waiter> { + ptr + } + + unsafe fn pointers(target: NonNull<Waiter>) -> NonNull<linked_list::Pointers<Waiter>> { + Waiter::addr_of_pointers(target) + } + } + + // ===== impl Readiness ===== + + impl Future for Readiness<'_> { + type Output = ReadyEvent; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> { + use std::sync::atomic::Ordering::SeqCst; + + let (scheduled_io, state, waiter) = unsafe { + let me = self.get_unchecked_mut(); + (&me.scheduled_io, &mut me.state, &me.waiter) + }; + + loop { + match *state { + State::Init => { + // Optimistically check existing readiness + let curr = scheduled_io.readiness.load(SeqCst); + let ready = Ready::from_usize(READINESS.unpack(curr)); + let is_shutdown = SHUTDOWN.unpack(curr) != 0; + + // Safety: `waiter.interest` never changes + let interest = unsafe { (*waiter.get()).interest }; + let ready = ready.intersection(interest); + + if !ready.is_empty() || is_shutdown { + // Currently ready! + let tick = TICK.unpack(curr) as u8; + *state = State::Done; + return Poll::Ready(ReadyEvent { tick, ready, is_shutdown }); + } + + // Wasn't ready, take the lock (and check again while locked). + let mut waiters = scheduled_io.waiters.lock(); + + let curr = scheduled_io.readiness.load(SeqCst); + let mut ready = Ready::from_usize(READINESS.unpack(curr)); + let is_shutdown = SHUTDOWN.unpack(curr) != 0; + + if is_shutdown { + ready = Ready::ALL; + } + + let ready = ready.intersection(interest); + + if !ready.is_empty() || is_shutdown { + // Currently ready! + let tick = TICK.unpack(curr) as u8; + *state = State::Done; + return Poll::Ready(ReadyEvent { tick, ready, is_shutdown }); + } + + // Not ready even after locked, insert into list... + + // Safety: called while locked + unsafe { + (*waiter.get()).waker = Some(cx.waker().clone()); + } + + // Insert the waiter into the linked list + // + // safety: pointers from `UnsafeCell` are never null. + waiters + .list + .push_front(unsafe { NonNull::new_unchecked(waiter.get()) }); + *state = State::Waiting; + } + State::Waiting => { + // Currently in the "Waiting" state, implying the caller has + // a waiter stored in the waiter list (guarded by + // `notify.waiters`). In order to access the waker fields, + // we must hold the lock. + + let waiters = scheduled_io.waiters.lock(); + + // Safety: called while locked + let w = unsafe { &mut *waiter.get() }; + + if w.is_ready { + // Our waker has been notified. + *state = State::Done; + } else { + // Update the waker, if necessary. + if !w.waker.as_ref().unwrap().will_wake(cx.waker()) { + w.waker = Some(cx.waker().clone()); + } + + return Poll::Pending; + } + + // Explicit drop of the lock to indicate the scope that the + // lock is held. Because holding the lock is required to + // ensure safe access to fields not held within the lock, it + // is helpful to visualize the scope of the critical + // section. + drop(waiters); + } + State::Done => { + // Safety: State::Done means it is no longer shared + let w = unsafe { &mut *waiter.get() }; + + let curr = scheduled_io.readiness.load(Acquire); + let is_shutdown = SHUTDOWN.unpack(curr) != 0; + + // The returned tick might be newer than the event + // which notified our waker. This is ok because the future + // still didn't return `Poll::Ready`. + let tick = TICK.unpack(curr) as u8; + + // The readiness state could have been cleared in the meantime, + // but we allow the returned ready set to be empty. + let curr_ready = Ready::from_usize(READINESS.unpack(curr)); + let ready = curr_ready.intersection(w.interest); + + return Poll::Ready(ReadyEvent { + tick, + ready, + is_shutdown, + }); + } + } + } + } + } + + impl Drop for Readiness<'_> { + fn drop(&mut self) { + let mut waiters = self.scheduled_io.waiters.lock(); + + // Safety: `waiter` is only ever stored in `waiters` + unsafe { + waiters + .list + .remove(NonNull::new_unchecked(self.waiter.get())) + }; + } + } + + unsafe impl Send for Readiness<'_> {} + unsafe impl Sync for Readiness<'_> {} +} diff --git a/third_party/rust/tokio/src/runtime/metrics/batch.rs b/third_party/rust/tokio/src/runtime/metrics/batch.rs new file mode 100644 index 0000000000..1bb4e261f7 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/metrics/batch.rs @@ -0,0 +1,162 @@ +use crate::runtime::metrics::{HistogramBatch, WorkerMetrics}; + +use std::sync::atomic::Ordering::Relaxed; +use std::time::{Duration, Instant}; + +pub(crate) struct MetricsBatch { + /// Number of times the worker parked. + park_count: u64, + + /// Number of times the worker woke w/o doing work. + noop_count: u64, + + /// Number of tasks stolen. + steal_count: u64, + + /// Number of times tasks where stolen. + steal_operations: u64, + + /// Number of tasks that were polled by the worker. + poll_count: u64, + + /// Number of tasks polled when the worker entered park. This is used to + /// track the noop count. + poll_count_on_last_park: u64, + + /// Number of tasks that were scheduled locally on this worker. + local_schedule_count: u64, + + /// Number of tasks moved to the global queue to make space in the local + /// queue + overflow_count: u64, + + /// The total busy duration in nanoseconds. + busy_duration_total: u64, + + /// Instant at which work last resumed (continued after park). + processing_scheduled_tasks_started_at: Instant, + + /// If `Some`, tracks poll times in nanoseconds + poll_timer: Option<PollTimer>, +} + +struct PollTimer { + /// Histogram of poll counts within each band. + poll_counts: HistogramBatch, + + /// Instant when the most recent task started polling. + poll_started_at: Instant, +} + +impl MetricsBatch { + pub(crate) fn new(worker_metrics: &WorkerMetrics) -> MetricsBatch { + let now = Instant::now(); + + MetricsBatch { + park_count: 0, + noop_count: 0, + steal_count: 0, + steal_operations: 0, + poll_count: 0, + poll_count_on_last_park: 0, + local_schedule_count: 0, + overflow_count: 0, + busy_duration_total: 0, + processing_scheduled_tasks_started_at: now, + poll_timer: worker_metrics + .poll_count_histogram + .as_ref() + .map(|worker_poll_counts| PollTimer { + poll_counts: HistogramBatch::from_histogram(worker_poll_counts), + poll_started_at: now, + }), + } + } + + pub(crate) fn submit(&mut self, worker: &WorkerMetrics) { + worker.park_count.store(self.park_count, Relaxed); + worker.noop_count.store(self.noop_count, Relaxed); + worker.steal_count.store(self.steal_count, Relaxed); + worker + .steal_operations + .store(self.steal_operations, Relaxed); + worker.poll_count.store(self.poll_count, Relaxed); + + worker + .busy_duration_total + .store(self.busy_duration_total, Relaxed); + + worker + .local_schedule_count + .store(self.local_schedule_count, Relaxed); + worker.overflow_count.store(self.overflow_count, Relaxed); + + if let Some(poll_timer) = &self.poll_timer { + let dst = worker.poll_count_histogram.as_ref().unwrap(); + poll_timer.poll_counts.submit(dst); + } + } + + /// The worker is about to park. + pub(crate) fn about_to_park(&mut self) { + self.park_count += 1; + + if self.poll_count_on_last_park == self.poll_count { + self.noop_count += 1; + } else { + self.poll_count_on_last_park = self.poll_count; + } + } + + /// Start processing a batch of tasks + pub(crate) fn start_processing_scheduled_tasks(&mut self) { + self.processing_scheduled_tasks_started_at = Instant::now(); + } + + /// Stop processing a batch of tasks + pub(crate) fn end_processing_scheduled_tasks(&mut self) { + let busy_duration = self.processing_scheduled_tasks_started_at.elapsed(); + self.busy_duration_total += duration_as_u64(busy_duration); + } + + /// Start polling an individual task + pub(crate) fn start_poll(&mut self) { + self.poll_count += 1; + + if let Some(poll_timer) = &mut self.poll_timer { + poll_timer.poll_started_at = Instant::now(); + } + } + + /// Stop polling an individual task + pub(crate) fn end_poll(&mut self) { + if let Some(poll_timer) = &mut self.poll_timer { + let elapsed = duration_as_u64(poll_timer.poll_started_at.elapsed()); + poll_timer.poll_counts.measure(elapsed, 1); + } + } + + pub(crate) fn inc_local_schedule_count(&mut self) { + self.local_schedule_count += 1; + } +} + +cfg_rt_multi_thread! { + impl MetricsBatch { + pub(crate) fn incr_steal_count(&mut self, by: u16) { + self.steal_count += by as u64; + } + + pub(crate) fn incr_steal_operations(&mut self) { + self.steal_operations += 1; + } + + pub(crate) fn incr_overflow_count(&mut self) { + self.overflow_count += 1; + } + } +} + +fn duration_as_u64(dur: Duration) -> u64 { + u64::try_from(dur.as_nanos()).unwrap_or(u64::MAX) +} diff --git a/third_party/rust/tokio/src/runtime/metrics/histogram.rs b/third_party/rust/tokio/src/runtime/metrics/histogram.rs new file mode 100644 index 0000000000..976f54fe85 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/metrics/histogram.rs @@ -0,0 +1,502 @@ +use crate::loom::sync::atomic::{AtomicU64, Ordering::Relaxed}; + +use std::cmp; +use std::ops::Range; + +#[derive(Debug)] +pub(crate) struct Histogram { + /// The histogram buckets + buckets: Box<[AtomicU64]>, + + /// Bucket scale, linear or log + scale: HistogramScale, + + /// Minimum resolution + resolution: u64, +} + +#[derive(Debug, Clone)] +pub(crate) struct HistogramBuilder { + /// Histogram scale + pub(crate) scale: HistogramScale, + + /// Must be a power of 2 + pub(crate) resolution: u64, + + /// Number of buckets + pub(crate) num_buckets: usize, +} + +#[derive(Debug)] +pub(crate) struct HistogramBatch { + buckets: Box<[u64]>, + scale: HistogramScale, + resolution: u64, +} + +cfg_unstable! { + /// Whether the histogram used to aggregate a metric uses a linear or + /// logarithmic scale. + #[derive(Debug, Copy, Clone, Eq, PartialEq)] + #[non_exhaustive] + pub enum HistogramScale { + /// Linear bucket scale + Linear, + + /// Logarithmic bucket scale + Log, + } +} + +impl Histogram { + pub(crate) fn num_buckets(&self) -> usize { + self.buckets.len() + } + + pub(crate) fn get(&self, bucket: usize) -> u64 { + self.buckets[bucket].load(Relaxed) + } + + pub(crate) fn bucket_range(&self, bucket: usize) -> Range<u64> { + match self.scale { + HistogramScale::Log => Range { + start: if bucket == 0 { + 0 + } else { + self.resolution << (bucket - 1) + }, + end: if bucket == self.buckets.len() - 1 { + u64::MAX + } else { + self.resolution << bucket + }, + }, + HistogramScale::Linear => Range { + start: self.resolution * bucket as u64, + end: if bucket == self.buckets.len() - 1 { + u64::MAX + } else { + self.resolution * (bucket as u64 + 1) + }, + }, + } + } +} + +impl HistogramBatch { + pub(crate) fn from_histogram(histogram: &Histogram) -> HistogramBatch { + let buckets = vec![0; histogram.buckets.len()].into_boxed_slice(); + + HistogramBatch { + buckets, + scale: histogram.scale, + resolution: histogram.resolution, + } + } + + pub(crate) fn measure(&mut self, value: u64, count: u64) { + self.buckets[self.value_to_bucket(value)] += count; + } + + pub(crate) fn submit(&self, histogram: &Histogram) { + debug_assert_eq!(self.scale, histogram.scale); + debug_assert_eq!(self.resolution, histogram.resolution); + debug_assert_eq!(self.buckets.len(), histogram.buckets.len()); + + for i in 0..self.buckets.len() { + histogram.buckets[i].store(self.buckets[i], Relaxed); + } + } + + fn value_to_bucket(&self, value: u64) -> usize { + match self.scale { + HistogramScale::Linear => { + let max = self.buckets.len() - 1; + cmp::min(value / self.resolution, max as u64) as usize + } + HistogramScale::Log => { + let max = self.buckets.len() - 1; + + if value < self.resolution { + 0 + } else { + let significant_digits = 64 - value.leading_zeros(); + let bucket_digits = 64 - (self.resolution - 1).leading_zeros(); + cmp::min(significant_digits as usize - bucket_digits as usize, max) + } + } + } + } +} + +impl HistogramBuilder { + pub(crate) fn new() -> HistogramBuilder { + HistogramBuilder { + scale: HistogramScale::Linear, + // Resolution is in nanoseconds. + resolution: 100_000, + num_buckets: 10, + } + } + + pub(crate) fn build(&self) -> Histogram { + let mut resolution = self.resolution; + + assert!(resolution > 0); + + if matches!(self.scale, HistogramScale::Log) { + resolution = resolution.next_power_of_two(); + } + + Histogram { + buckets: (0..self.num_buckets) + .map(|_| AtomicU64::new(0)) + .collect::<Vec<_>>() + .into_boxed_slice(), + resolution, + scale: self.scale, + } + } +} + +impl Default for HistogramBuilder { + fn default() -> HistogramBuilder { + HistogramBuilder::new() + } +} + +#[cfg(test)] +mod test { + use super::*; + + macro_rules! assert_bucket_eq { + ($h:expr, $bucket:expr, $val:expr) => {{ + assert_eq!($h.buckets[$bucket], $val); + }}; + } + + #[test] + fn log_scale_resolution_1() { + let h = HistogramBuilder { + scale: HistogramScale::Log, + resolution: 1, + num_buckets: 10, + } + .build(); + + assert_eq!(h.bucket_range(0), 0..1); + assert_eq!(h.bucket_range(1), 1..2); + assert_eq!(h.bucket_range(2), 2..4); + assert_eq!(h.bucket_range(3), 4..8); + assert_eq!(h.bucket_range(9), 256..u64::MAX); + + let mut b = HistogramBatch::from_histogram(&h); + + b.measure(0, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 0); + + b.measure(1, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 1); + assert_bucket_eq!(b, 2, 0); + + b.measure(2, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 1); + assert_bucket_eq!(b, 2, 1); + + b.measure(3, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 1); + assert_bucket_eq!(b, 2, 2); + + b.measure(4, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 1); + assert_bucket_eq!(b, 2, 2); + assert_bucket_eq!(b, 3, 1); + + b.measure(100, 1); + assert_bucket_eq!(b, 7, 1); + + b.measure(128, 1); + assert_bucket_eq!(b, 8, 1); + + b.measure(4096, 1); + assert_bucket_eq!(b, 9, 1); + } + + #[test] + fn log_scale_resolution_2() { + let h = HistogramBuilder { + scale: HistogramScale::Log, + resolution: 2, + num_buckets: 10, + } + .build(); + + assert_eq!(h.bucket_range(0), 0..2); + assert_eq!(h.bucket_range(1), 2..4); + assert_eq!(h.bucket_range(2), 4..8); + assert_eq!(h.bucket_range(3), 8..16); + assert_eq!(h.bucket_range(9), 512..u64::MAX); + + let mut b = HistogramBatch::from_histogram(&h); + + b.measure(0, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 0); + + b.measure(1, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 0); + + b.measure(2, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 1); + assert_bucket_eq!(b, 2, 0); + + b.measure(3, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 0); + + b.measure(4, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 1); + + b.measure(5, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 2); + + b.measure(6, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 3); + + b.measure(7, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 4); + + b.measure(8, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 4); + assert_bucket_eq!(b, 3, 1); + + b.measure(100, 1); + assert_bucket_eq!(b, 6, 1); + + b.measure(128, 1); + assert_bucket_eq!(b, 7, 1); + + b.measure(4096, 1); + assert_bucket_eq!(b, 9, 1); + + for bucket in h.buckets.iter() { + assert_eq!(bucket.load(Relaxed), 0); + } + + b.submit(&h); + + for i in 0..h.buckets.len() { + assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]); + } + + b.submit(&h); + + for i in 0..h.buckets.len() { + assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]); + } + } + + #[test] + fn linear_scale_resolution_1() { + let h = HistogramBuilder { + scale: HistogramScale::Linear, + resolution: 1, + num_buckets: 10, + } + .build(); + + assert_eq!(h.bucket_range(0), 0..1); + assert_eq!(h.bucket_range(1), 1..2); + assert_eq!(h.bucket_range(2), 2..3); + assert_eq!(h.bucket_range(3), 3..4); + assert_eq!(h.bucket_range(9), 9..u64::MAX); + + let mut b = HistogramBatch::from_histogram(&h); + + b.measure(0, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 0); + + b.measure(1, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 1); + assert_bucket_eq!(b, 2, 0); + + b.measure(2, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 1); + assert_bucket_eq!(b, 2, 1); + assert_bucket_eq!(b, 3, 0); + + b.measure(3, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 1); + assert_bucket_eq!(b, 2, 1); + assert_bucket_eq!(b, 3, 1); + + b.measure(5, 1); + assert_bucket_eq!(b, 5, 1); + + b.measure(4096, 1); + assert_bucket_eq!(b, 9, 1); + + for bucket in h.buckets.iter() { + assert_eq!(bucket.load(Relaxed), 0); + } + + b.submit(&h); + + for i in 0..h.buckets.len() { + assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]); + } + + b.submit(&h); + + for i in 0..h.buckets.len() { + assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]); + } + } + + #[test] + fn linear_scale_resolution_100() { + let h = HistogramBuilder { + scale: HistogramScale::Linear, + resolution: 100, + num_buckets: 10, + } + .build(); + + assert_eq!(h.bucket_range(0), 0..100); + assert_eq!(h.bucket_range(1), 100..200); + assert_eq!(h.bucket_range(2), 200..300); + assert_eq!(h.bucket_range(3), 300..400); + assert_eq!(h.bucket_range(9), 900..u64::MAX); + + let mut b = HistogramBatch::from_histogram(&h); + + b.measure(0, 1); + assert_bucket_eq!(b, 0, 1); + assert_bucket_eq!(b, 1, 0); + + b.measure(50, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 0); + + b.measure(100, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 1); + assert_bucket_eq!(b, 2, 0); + + b.measure(101, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 0); + + b.measure(200, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 1); + + b.measure(299, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 2); + + b.measure(222, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 3); + + b.measure(300, 1); + assert_bucket_eq!(b, 0, 2); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 3); + assert_bucket_eq!(b, 3, 1); + + b.measure(888, 1); + assert_bucket_eq!(b, 8, 1); + + b.measure(4096, 1); + assert_bucket_eq!(b, 9, 1); + + for bucket in h.buckets.iter() { + assert_eq!(bucket.load(Relaxed), 0); + } + + b.submit(&h); + + for i in 0..h.buckets.len() { + assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]); + } + + b.submit(&h); + + for i in 0..h.buckets.len() { + assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]); + } + } + + #[test] + fn inc_by_more_than_one() { + let h = HistogramBuilder { + scale: HistogramScale::Linear, + resolution: 100, + num_buckets: 10, + } + .build(); + + let mut b = HistogramBatch::from_histogram(&h); + + b.measure(0, 3); + assert_bucket_eq!(b, 0, 3); + assert_bucket_eq!(b, 1, 0); + + b.measure(50, 5); + assert_bucket_eq!(b, 0, 8); + assert_bucket_eq!(b, 1, 0); + + b.measure(100, 2); + assert_bucket_eq!(b, 0, 8); + assert_bucket_eq!(b, 1, 2); + assert_bucket_eq!(b, 2, 0); + + b.measure(101, 19); + assert_bucket_eq!(b, 0, 8); + assert_bucket_eq!(b, 1, 21); + assert_bucket_eq!(b, 2, 0); + + for bucket in h.buckets.iter() { + assert_eq!(bucket.load(Relaxed), 0); + } + + b.submit(&h); + + for i in 0..h.buckets.len() { + assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]); + } + + b.submit(&h); + + for i in 0..h.buckets.len() { + assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]); + } + } +} diff --git a/third_party/rust/tokio/src/runtime/metrics/io.rs b/third_party/rust/tokio/src/runtime/metrics/io.rs new file mode 100644 index 0000000000..06efdd42d7 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/metrics/io.rs @@ -0,0 +1,24 @@ +#![cfg_attr(not(feature = "net"), allow(dead_code))] + +use crate::loom::sync::atomic::{AtomicU64, Ordering::Relaxed}; + +#[derive(Default)] +pub(crate) struct IoDriverMetrics { + pub(super) fd_registered_count: AtomicU64, + pub(super) fd_deregistered_count: AtomicU64, + pub(super) ready_count: AtomicU64, +} + +impl IoDriverMetrics { + pub(crate) fn incr_fd_count(&self) { + self.fd_registered_count.fetch_add(1, Relaxed); + } + + pub(crate) fn dec_fd_count(&self) { + self.fd_deregistered_count.fetch_add(1, Relaxed); + } + + pub(crate) fn incr_ready_count_by(&self, amt: u64) { + self.ready_count.fetch_add(amt, Relaxed); + } +} diff --git a/third_party/rust/tokio/src/runtime/metrics/mock.rs b/third_party/rust/tokio/src/runtime/metrics/mock.rs new file mode 100644 index 0000000000..8f8345c08b --- /dev/null +++ b/third_party/rust/tokio/src/runtime/metrics/mock.rs @@ -0,0 +1,55 @@ +//! This file contains mocks of the types in src/runtime/metrics + +pub(crate) struct SchedulerMetrics {} + +pub(crate) struct WorkerMetrics {} + +pub(crate) struct MetricsBatch {} + +#[derive(Clone, Default)] +pub(crate) struct HistogramBuilder {} + +impl SchedulerMetrics { + pub(crate) fn new() -> Self { + Self {} + } + + /// Increment the number of tasks scheduled externally + pub(crate) fn inc_remote_schedule_count(&self) {} +} + +impl WorkerMetrics { + pub(crate) fn new() -> Self { + Self {} + } + + pub(crate) fn from_config(config: &crate::runtime::Config) -> Self { + // Prevent the dead-code warning from being triggered + let _ = &config.metrics_poll_count_histogram; + Self::new() + } + + pub(crate) fn set_queue_depth(&self, _len: usize) {} +} + +impl MetricsBatch { + pub(crate) fn new(_: &WorkerMetrics) -> Self { + Self {} + } + + pub(crate) fn submit(&mut self, _to: &WorkerMetrics) {} + pub(crate) fn about_to_park(&mut self) {} + pub(crate) fn inc_local_schedule_count(&mut self) {} + pub(crate) fn start_processing_scheduled_tasks(&mut self) {} + pub(crate) fn end_processing_scheduled_tasks(&mut self) {} + pub(crate) fn start_poll(&mut self) {} + pub(crate) fn end_poll(&mut self) {} +} + +cfg_rt_multi_thread! { + impl MetricsBatch { + pub(crate) fn incr_steal_count(&mut self, _by: u16) {} + pub(crate) fn incr_steal_operations(&mut self) {} + pub(crate) fn incr_overflow_count(&mut self) {} + } +} diff --git a/third_party/rust/tokio/src/runtime/metrics/mod.rs b/third_party/rust/tokio/src/runtime/metrics/mod.rs new file mode 100644 index 0000000000..88be4a5211 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/metrics/mod.rs @@ -0,0 +1,40 @@ +//! This module contains information need to view information about how the +//! runtime is performing. +//! +//! **Note**: This is an [unstable API][unstable]. The public API of types in +//! this module may break in 1.x releases. See [the documentation on unstable +//! features][unstable] for details. +//! +//! [unstable]: crate#unstable-features +#![allow(clippy::module_inception)] + +cfg_metrics! { + mod batch; + pub(crate) use batch::MetricsBatch; + + mod histogram; + pub(crate) use histogram::{Histogram, HistogramBatch, HistogramBuilder}; + #[allow(unreachable_pub)] // rust-lang/rust#57411 + pub use histogram::HistogramScale; + + mod runtime; + #[allow(unreachable_pub)] // rust-lang/rust#57411 + pub use runtime::RuntimeMetrics; + + mod scheduler; + pub(crate) use scheduler::SchedulerMetrics; + + mod worker; + pub(crate) use worker::WorkerMetrics; + + cfg_net! { + mod io; + pub(crate) use io::IoDriverMetrics; + } +} + +cfg_not_metrics! { + mod mock; + + pub(crate) use mock::{SchedulerMetrics, WorkerMetrics, MetricsBatch, HistogramBuilder}; +} diff --git a/third_party/rust/tokio/src/runtime/metrics/runtime.rs b/third_party/rust/tokio/src/runtime/metrics/runtime.rs new file mode 100644 index 0000000000..1f990a1f85 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/metrics/runtime.rs @@ -0,0 +1,883 @@ +use crate::runtime::Handle; + +use std::ops::Range; +use std::sync::atomic::Ordering::Relaxed; +use std::time::Duration; + +/// Handle to the runtime's metrics. +/// +/// This handle is internally reference-counted and can be freely cloned. A +/// `RuntimeMetrics` handle is obtained using the [`Runtime::metrics`] method. +/// +/// [`Runtime::metrics`]: crate::runtime::Runtime::metrics() +#[derive(Clone, Debug)] +pub struct RuntimeMetrics { + handle: Handle, +} + +impl RuntimeMetrics { + pub(crate) fn new(handle: Handle) -> RuntimeMetrics { + RuntimeMetrics { handle } + } + + /// Returns the number of worker threads used by the runtime. + /// + /// The number of workers is set by configuring `worker_threads` on + /// `runtime::Builder`. When using the `current_thread` runtime, the return + /// value is always `1`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.num_workers(); + /// println!("Runtime is using {} workers", n); + /// } + /// ``` + pub fn num_workers(&self) -> usize { + self.handle.inner.num_workers() + } + + /// Returns the number of additional threads spawned by the runtime. + /// + /// The number of workers is set by configuring `max_blocking_threads` on + /// `runtime::Builder`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let _ = tokio::task::spawn_blocking(move || { + /// // Stand-in for compute-heavy work or using synchronous APIs + /// 1 + 1 + /// }).await; + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.num_blocking_threads(); + /// println!("Runtime has created {} threads", n); + /// } + /// ``` + pub fn num_blocking_threads(&self) -> usize { + self.handle.inner.num_blocking_threads() + } + + /// Returns the number of active tasks in the runtime. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.active_tasks_count(); + /// println!("Runtime has {} active tasks", n); + /// } + /// ``` + pub fn active_tasks_count(&self) -> usize { + self.handle.inner.active_tasks_count() + } + + /// Returns the number of idle threads, which have spawned by the runtime + /// for `spawn_blocking` calls. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let _ = tokio::task::spawn_blocking(move || { + /// // Stand-in for compute-heavy work or using synchronous APIs + /// 1 + 1 + /// }).await; + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.num_idle_blocking_threads(); + /// println!("Runtime has {} idle blocking thread pool threads", n); + /// } + /// ``` + pub fn num_idle_blocking_threads(&self) -> usize { + self.handle.inner.num_idle_blocking_threads() + } + + /// Returns the number of tasks scheduled from **outside** of the runtime. + /// + /// The remote schedule count starts at zero when the runtime is created and + /// increases by one each time a task is woken from **outside** of the + /// runtime. This usually means that a task is spawned or notified from a + /// non-runtime thread and must be queued using the Runtime's injection + /// queue, which tends to be slower. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.remote_schedule_count(); + /// println!("{} tasks were scheduled from outside the runtime", n); + /// } + /// ``` + pub fn remote_schedule_count(&self) -> u64 { + self.handle + .inner + .scheduler_metrics() + .remote_schedule_count + .load(Relaxed) + } + + /// Returns the number of times that tasks have been forced to yield back to the scheduler + /// after exhausting their task budgets. + /// + /// This count starts at zero when the runtime is created and increases by one each time a task yields due to exhausting its budget. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + pub fn budget_forced_yield_count(&self) -> u64 { + self.handle + .inner + .scheduler_metrics() + .budget_forced_yield_count + .load(Relaxed) + } + + /// Returns the total number of times the given worker thread has parked. + /// + /// The worker park count starts at zero when the runtime is created and + /// increases by one each time the worker parks the thread waiting for new + /// inbound events to process. This usually means the worker has processed + /// all pending work and is currently idle. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_park_count(0); + /// println!("worker 0 parked {} times", n); + /// } + /// ``` + pub fn worker_park_count(&self, worker: usize) -> u64 { + self.handle + .inner + .worker_metrics(worker) + .park_count + .load(Relaxed) + } + + /// Returns the number of times the given worker thread unparked but + /// performed no work before parking again. + /// + /// The worker no-op count starts at zero when the runtime is created and + /// increases by one each time the worker unparks the thread but finds no + /// new work and goes back to sleep. This indicates a false-positive wake up. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_noop_count(0); + /// println!("worker 0 had {} no-op unparks", n); + /// } + /// ``` + pub fn worker_noop_count(&self, worker: usize) -> u64 { + self.handle + .inner + .worker_metrics(worker) + .noop_count + .load(Relaxed) + } + + /// Returns the number of tasks the given worker thread stole from + /// another worker thread. + /// + /// This metric only applies to the **multi-threaded** runtime and will + /// always return `0` when using the current thread runtime. + /// + /// The worker steal count starts at zero when the runtime is created and + /// increases by `N` each time the worker has processed its scheduled queue + /// and successfully steals `N` more pending tasks from another worker. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_steal_count(0); + /// println!("worker 0 has stolen {} tasks", n); + /// } + /// ``` + pub fn worker_steal_count(&self, worker: usize) -> u64 { + self.handle + .inner + .worker_metrics(worker) + .steal_count + .load(Relaxed) + } + + /// Returns the number of times the given worker thread stole tasks from + /// another worker thread. + /// + /// This metric only applies to the **multi-threaded** runtime and will + /// always return `0` when using the current thread runtime. + /// + /// The worker steal count starts at zero when the runtime is created and + /// increases by one each time the worker has processed its scheduled queue + /// and successfully steals more pending tasks from another worker. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_steal_operations(0); + /// println!("worker 0 has stolen tasks {} times", n); + /// } + /// ``` + pub fn worker_steal_operations(&self, worker: usize) -> u64 { + self.handle + .inner + .worker_metrics(worker) + .steal_operations + .load(Relaxed) + } + + /// Returns the number of tasks the given worker thread has polled. + /// + /// The worker poll count starts at zero when the runtime is created and + /// increases by one each time the worker polls a scheduled task. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_poll_count(0); + /// println!("worker 0 has polled {} tasks", n); + /// } + /// ``` + pub fn worker_poll_count(&self, worker: usize) -> u64 { + self.handle + .inner + .worker_metrics(worker) + .poll_count + .load(Relaxed) + } + + /// Returns the amount of time the given worker thread has been busy. + /// + /// The worker busy duration starts at zero when the runtime is created and + /// increases whenever the worker is spending time processing work. Using + /// this value can indicate the load of the given worker. If a lot of time + /// is spent busy, then the worker is under load and will check for inbound + /// events less often. + /// + /// The timer is monotonically increasing. It is never decremented or reset + /// to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_total_busy_duration(0); + /// println!("worker 0 was busy for a total of {:?}", n); + /// } + /// ``` + pub fn worker_total_busy_duration(&self, worker: usize) -> Duration { + let nanos = self + .handle + .inner + .worker_metrics(worker) + .busy_duration_total + .load(Relaxed); + Duration::from_nanos(nanos) + } + + /// Returns the number of tasks scheduled from **within** the runtime on the + /// given worker's local queue. + /// + /// The local schedule count starts at zero when the runtime is created and + /// increases by one each time a task is woken from **inside** of the + /// runtime on the given worker. This usually means that a task is spawned + /// or notified from within a runtime thread and will be queued on the + /// worker-local queue. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_local_schedule_count(0); + /// println!("{} tasks were scheduled on the worker's local queue", n); + /// } + /// ``` + pub fn worker_local_schedule_count(&self, worker: usize) -> u64 { + self.handle + .inner + .worker_metrics(worker) + .local_schedule_count + .load(Relaxed) + } + + /// Returns the number of times the given worker thread saturated its local + /// queue. + /// + /// This metric only applies to the **multi-threaded** scheduler. + /// + /// The worker steal count starts at zero when the runtime is created and + /// increases by one each time the worker attempts to schedule a task + /// locally, but its local queue is full. When this happens, half of the + /// local queue is moved to the injection queue. + /// + /// The counter is monotonically increasing. It is never decremented or + /// reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_overflow_count(0); + /// println!("worker 0 has overflowed its queue {} times", n); + /// } + /// ``` + pub fn worker_overflow_count(&self, worker: usize) -> u64 { + self.handle + .inner + .worker_metrics(worker) + .overflow_count + .load(Relaxed) + } + + /// Returns the number of tasks currently scheduled in the runtime's + /// injection queue. + /// + /// Tasks that are spawned or notified from a non-runtime thread are + /// scheduled using the runtime's injection queue. This metric returns the + /// **current** number of tasks pending in the injection queue. As such, the + /// returned value may increase or decrease as new tasks are scheduled and + /// processed. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.injection_queue_depth(); + /// println!("{} tasks currently pending in the runtime's injection queue", n); + /// } + /// ``` + pub fn injection_queue_depth(&self) -> usize { + self.handle.inner.injection_queue_depth() + } + + /// Returns the number of tasks currently scheduled in the given worker's + /// local queue. + /// + /// Tasks that are spawned or notified from within a runtime thread are + /// scheduled using that worker's local queue. This metric returns the + /// **current** number of tasks pending in the worker's local queue. As + /// such, the returned value may increase or decrease as new tasks are + /// scheduled and processed. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.worker_local_queue_depth(0); + /// println!("{} tasks currently pending in worker 0's local queue", n); + /// } + /// ``` + pub fn worker_local_queue_depth(&self, worker: usize) -> usize { + self.handle.inner.worker_local_queue_depth(worker) + } + + /// Returns `true` if the runtime is tracking the distribution of task poll + /// times. + /// + /// Task poll times are not instrumented by default as doing so requires + /// calling [`Instant::now()`] twice per task poll. The feature is enabled + /// by calling [`enable_metrics_poll_count_histogram()`] when building the + /// runtime. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::{self, Handle}; + /// + /// fn main() { + /// runtime::Builder::new_current_thread() + /// .enable_metrics_poll_count_histogram() + /// .build() + /// .unwrap() + /// .block_on(async { + /// let metrics = Handle::current().metrics(); + /// let enabled = metrics.poll_count_histogram_enabled(); + /// + /// println!("Tracking task poll time distribution: {:?}", enabled); + /// }); + /// } + /// ``` + /// + /// [`enable_metrics_poll_count_histogram()`]: crate::runtime::Builder::enable_metrics_poll_count_histogram + /// [`Instant::now()`]: std::time::Instant::now + pub fn poll_count_histogram_enabled(&self) -> bool { + self.handle + .inner + .worker_metrics(0) + .poll_count_histogram + .is_some() + } + + /// Returns the number of histogram buckets tracking the distribution of + /// task poll times. + /// + /// This value is configured by calling + /// [`metrics_poll_count_histogram_buckets()`] when building the runtime. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::{self, Handle}; + /// + /// fn main() { + /// runtime::Builder::new_current_thread() + /// .enable_metrics_poll_count_histogram() + /// .build() + /// .unwrap() + /// .block_on(async { + /// let metrics = Handle::current().metrics(); + /// let buckets = metrics.poll_count_histogram_num_buckets(); + /// + /// println!("Histogram buckets: {:?}", buckets); + /// }); + /// } + /// ``` + /// + /// [`metrics_poll_count_histogram_buckets()`]: + /// crate::runtime::Builder::metrics_poll_count_histogram_buckets + pub fn poll_count_histogram_num_buckets(&self) -> usize { + self.handle + .inner + .worker_metrics(0) + .poll_count_histogram + .as_ref() + .map(|histogram| histogram.num_buckets()) + .unwrap_or_default() + } + + /// Returns the range of task poll times tracked by the given bucket. + /// + /// This value is configured by calling + /// [`metrics_poll_count_histogram_resolution()`] when building the runtime. + /// + /// # Panics + /// + /// The method panics if `bucket` represents an invalid bucket index, i.e. + /// is greater than or equal to `poll_count_histogram_num_buckets()`. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::{self, Handle}; + /// + /// fn main() { + /// runtime::Builder::new_current_thread() + /// .enable_metrics_poll_count_histogram() + /// .build() + /// .unwrap() + /// .block_on(async { + /// let metrics = Handle::current().metrics(); + /// let buckets = metrics.poll_count_histogram_num_buckets(); + /// + /// for i in 0..buckets { + /// let range = metrics.poll_count_histogram_bucket_range(i); + /// println!("Histogram bucket {} range: {:?}", i, range); + /// } + /// }); + /// } + /// ``` + /// + /// [`metrics_poll_count_histogram_resolution()`]: + /// crate::runtime::Builder::metrics_poll_count_histogram_resolution + #[track_caller] + pub fn poll_count_histogram_bucket_range(&self, bucket: usize) -> Range<Duration> { + self.handle + .inner + .worker_metrics(0) + .poll_count_histogram + .as_ref() + .map(|histogram| { + let range = histogram.bucket_range(bucket); + std::ops::Range { + start: Duration::from_nanos(range.start), + end: Duration::from_nanos(range.end), + } + }) + .unwrap_or_default() + } + + /// Returns the number of times the given worker polled tasks with a poll + /// duration within the given bucket's range. + /// + /// Each worker maintains its own histogram and the counts for each bucket + /// starts at zero when the runtime is created. Each time the worker polls a + /// task, it tracks the duration the task poll time took and increments the + /// associated bucket by 1. + /// + /// Each bucket is a monotonically increasing counter. It is never + /// decremented or reset to zero. + /// + /// # Arguments + /// + /// `worker` is the index of the worker being queried. The given value must + /// be between 0 and `num_workers()`. The index uniquely identifies a single + /// worker and will continue to identify the worker throughout the lifetime + /// of the runtime instance. + /// + /// `bucket` is the index of the bucket being queried. The bucket is scoped + /// to the worker. The range represented by the bucket can be queried by + /// calling [`poll_count_histogram_bucket_range()`]. Each worker maintains + /// identical bucket ranges. + /// + /// # Panics + /// + /// The method panics when `worker` represents an invalid worker, i.e. is + /// greater than or equal to `num_workers()` or if `bucket` represents an + /// invalid bucket. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::{self, Handle}; + /// + /// fn main() { + /// runtime::Builder::new_current_thread() + /// .enable_metrics_poll_count_histogram() + /// .build() + /// .unwrap() + /// .block_on(async { + /// let metrics = Handle::current().metrics(); + /// let buckets = metrics.poll_count_histogram_num_buckets(); + /// + /// for worker in 0..metrics.num_workers() { + /// for i in 0..buckets { + /// let count = metrics.poll_count_histogram_bucket_count(worker, i); + /// println!("Poll count {}", count); + /// } + /// } + /// }); + /// } + /// ``` + /// + /// [`poll_count_histogram_bucket_range()`]: crate::runtime::RuntimeMetrics::poll_count_histogram_bucket_range + #[track_caller] + pub fn poll_count_histogram_bucket_count(&self, worker: usize, bucket: usize) -> u64 { + self.handle + .inner + .worker_metrics(worker) + .poll_count_histogram + .as_ref() + .map(|histogram| histogram.get(bucket)) + .unwrap_or_default() + } + + /// Returns the number of tasks currently scheduled in the blocking + /// thread pool, spawned using `spawn_blocking`. + /// + /// This metric returns the **current** number of tasks pending in + /// blocking thread pool. As such, the returned value may increase + /// or decrease as new tasks are scheduled and processed. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.blocking_queue_depth(); + /// println!("{} tasks currently pending in the blocking thread pool", n); + /// } + /// ``` + pub fn blocking_queue_depth(&self) -> usize { + self.handle.inner.blocking_queue_depth() + } +} + +cfg_net! { + impl RuntimeMetrics { + /// Returns the number of file descriptors that have been registered with the + /// runtime's I/O driver. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let registered_fds = metrics.io_driver_fd_registered_count(); + /// println!("{} fds have been registered with the runtime's I/O driver.", registered_fds); + /// + /// let deregistered_fds = metrics.io_driver_fd_deregistered_count(); + /// + /// let current_fd_count = registered_fds - deregistered_fds; + /// println!("{} fds are currently registered by the runtime's I/O driver.", current_fd_count); + /// } + /// ``` + pub fn io_driver_fd_registered_count(&self) -> u64 { + self.with_io_driver_metrics(|m| { + m.fd_registered_count.load(Relaxed) + }) + } + + /// Returns the number of file descriptors that have been deregistered by the + /// runtime's I/O driver. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.io_driver_fd_deregistered_count(); + /// println!("{} fds have been deregistered by the runtime's I/O driver.", n); + /// } + /// ``` + pub fn io_driver_fd_deregistered_count(&self) -> u64 { + self.with_io_driver_metrics(|m| { + m.fd_deregistered_count.load(Relaxed) + }) + } + + /// Returns the number of ready events processed by the runtime's + /// I/O driver. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Handle; + /// + /// #[tokio::main] + /// async fn main() { + /// let metrics = Handle::current().metrics(); + /// + /// let n = metrics.io_driver_ready_count(); + /// println!("{} ready events processed by the runtime's I/O driver.", n); + /// } + /// ``` + pub fn io_driver_ready_count(&self) -> u64 { + self.with_io_driver_metrics(|m| m.ready_count.load(Relaxed)) + } + + fn with_io_driver_metrics<F>(&self, f: F) -> u64 + where + F: Fn(&super::IoDriverMetrics) -> u64, + { + // TODO: Investigate if this should return 0, most of our metrics always increase + // thus this breaks that guarantee. + self.handle + .inner + .driver() + .io + .as_ref() + .map(|h| f(&h.metrics)) + .unwrap_or(0) + } + } +} diff --git a/third_party/rust/tokio/src/runtime/metrics/scheduler.rs b/third_party/rust/tokio/src/runtime/metrics/scheduler.rs new file mode 100644 index 0000000000..d9f8edfaab --- /dev/null +++ b/third_party/rust/tokio/src/runtime/metrics/scheduler.rs @@ -0,0 +1,34 @@ +use crate::loom::sync::atomic::{AtomicU64, Ordering::Relaxed}; + +/// Retrieves metrics from the Tokio runtime. +/// +/// **Note**: This is an [unstable API][unstable]. The public API of this type +/// may break in 1.x releases. See [the documentation on unstable +/// features][unstable] for details. +/// +/// [unstable]: crate#unstable-features +#[derive(Debug)] +pub(crate) struct SchedulerMetrics { + /// Number of tasks that are scheduled from outside the runtime. + pub(super) remote_schedule_count: AtomicU64, + pub(super) budget_forced_yield_count: AtomicU64, +} + +impl SchedulerMetrics { + pub(crate) fn new() -> SchedulerMetrics { + SchedulerMetrics { + remote_schedule_count: AtomicU64::new(0), + budget_forced_yield_count: AtomicU64::new(0), + } + } + + /// Increment the number of tasks scheduled externally + pub(crate) fn inc_remote_schedule_count(&self) { + self.remote_schedule_count.fetch_add(1, Relaxed); + } + + /// Increment the number of tasks forced to yield due to budget exhaustion + pub(crate) fn inc_budget_forced_yield_count(&self) { + self.budget_forced_yield_count.fetch_add(1, Relaxed); + } +} diff --git a/third_party/rust/tokio/src/runtime/metrics/worker.rs b/third_party/rust/tokio/src/runtime/metrics/worker.rs new file mode 100644 index 0000000000..e0f23e6a08 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/metrics/worker.rs @@ -0,0 +1,80 @@ +use crate::loom::sync::atomic::Ordering::Relaxed; +use crate::loom::sync::atomic::{AtomicU64, AtomicUsize}; +use crate::runtime::metrics::Histogram; +use crate::runtime::Config; + +/// Retrieve runtime worker metrics. +/// +/// **Note**: This is an [unstable API][unstable]. The public API of this type +/// may break in 1.x releases. See [the documentation on unstable +/// features][unstable] for details. +/// +/// [unstable]: crate#unstable-features +#[derive(Debug)] +#[repr(align(128))] +pub(crate) struct WorkerMetrics { + /// Number of times the worker parked. + pub(crate) park_count: AtomicU64, + + /// Number of times the worker woke then parked again without doing work. + pub(crate) noop_count: AtomicU64, + + /// Number of tasks the worker stole. + pub(crate) steal_count: AtomicU64, + + /// Number of times the worker stole + pub(crate) steal_operations: AtomicU64, + + /// Number of tasks the worker polled. + pub(crate) poll_count: AtomicU64, + + /// Amount of time the worker spent doing work vs. parking. + pub(crate) busy_duration_total: AtomicU64, + + /// Number of tasks scheduled for execution on the worker's local queue. + pub(crate) local_schedule_count: AtomicU64, + + /// Number of tasks moved from the local queue to the global queue to free space. + pub(crate) overflow_count: AtomicU64, + + /// Number of tasks currently in the local queue. Used only by the + /// current-thread scheduler. + pub(crate) queue_depth: AtomicUsize, + + /// If `Some`, tracks the the number of polls by duration range. + pub(super) poll_count_histogram: Option<Histogram>, +} + +impl WorkerMetrics { + pub(crate) fn from_config(config: &Config) -> WorkerMetrics { + let mut worker_metrics = WorkerMetrics::new(); + worker_metrics.poll_count_histogram = config + .metrics_poll_count_histogram + .as_ref() + .map(|histogram_builder| histogram_builder.build()); + worker_metrics + } + + pub(crate) fn new() -> WorkerMetrics { + WorkerMetrics { + park_count: AtomicU64::new(0), + noop_count: AtomicU64::new(0), + steal_count: AtomicU64::new(0), + steal_operations: AtomicU64::new(0), + poll_count: AtomicU64::new(0), + overflow_count: AtomicU64::new(0), + busy_duration_total: AtomicU64::new(0), + local_schedule_count: AtomicU64::new(0), + queue_depth: AtomicUsize::new(0), + poll_count_histogram: None, + } + } + + pub(crate) fn queue_depth(&self) -> usize { + self.queue_depth.load(Relaxed) + } + + pub(crate) fn set_queue_depth(&self, len: usize) { + self.queue_depth.store(len, Relaxed); + } +} diff --git a/third_party/rust/tokio/src/runtime/mod.rs b/third_party/rust/tokio/src/runtime/mod.rs new file mode 100644 index 0000000000..cb198f51f0 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/mod.rs @@ -0,0 +1,265 @@ +//! The Tokio runtime. +//! +//! Unlike other Rust programs, asynchronous applications require runtime +//! support. In particular, the following runtime services are necessary: +//! +//! * An **I/O event loop**, called the driver, which drives I/O resources and +//! dispatches I/O events to tasks that depend on them. +//! * A **scheduler** to execute [tasks] that use these I/O resources. +//! * A **timer** for scheduling work to run after a set period of time. +//! +//! Tokio's [`Runtime`] bundles all of these services as a single type, allowing +//! them to be started, shut down, and configured together. However, often it is +//! not required to configure a [`Runtime`] manually, and a user may just use the +//! [`tokio::main`] attribute macro, which creates a [`Runtime`] under the hood. +//! +//! # Usage +//! +//! When no fine tuning is required, the [`tokio::main`] attribute macro can be +//! used. +//! +//! ```no_run +//! use tokio::net::TcpListener; +//! use tokio::io::{AsyncReadExt, AsyncWriteExt}; +//! +//! #[tokio::main] +//! async fn main() -> Result<(), Box<dyn std::error::Error>> { +//! let listener = TcpListener::bind("127.0.0.1:8080").await?; +//! +//! loop { +//! let (mut socket, _) = listener.accept().await?; +//! +//! tokio::spawn(async move { +//! let mut buf = [0; 1024]; +//! +//! // In a loop, read data from the socket and write the data back. +//! loop { +//! let n = match socket.read(&mut buf).await { +//! // socket closed +//! Ok(n) if n == 0 => return, +//! Ok(n) => n, +//! Err(e) => { +//! println!("failed to read from socket; err = {:?}", e); +//! return; +//! } +//! }; +//! +//! // Write the data back +//! if let Err(e) = socket.write_all(&buf[0..n]).await { +//! println!("failed to write to socket; err = {:?}", e); +//! return; +//! } +//! } +//! }); +//! } +//! } +//! ``` +//! +//! From within the context of the runtime, additional tasks are spawned using +//! the [`tokio::spawn`] function. Futures spawned using this function will be +//! executed on the same thread pool used by the [`Runtime`]. +//! +//! A [`Runtime`] instance can also be used directly. +//! +//! ```no_run +//! use tokio::net::TcpListener; +//! use tokio::io::{AsyncReadExt, AsyncWriteExt}; +//! use tokio::runtime::Runtime; +//! +//! fn main() -> Result<(), Box<dyn std::error::Error>> { +//! // Create the runtime +//! let rt = Runtime::new()?; +//! +//! // Spawn the root task +//! rt.block_on(async { +//! let listener = TcpListener::bind("127.0.0.1:8080").await?; +//! +//! loop { +//! let (mut socket, _) = listener.accept().await?; +//! +//! tokio::spawn(async move { +//! let mut buf = [0; 1024]; +//! +//! // In a loop, read data from the socket and write the data back. +//! loop { +//! let n = match socket.read(&mut buf).await { +//! // socket closed +//! Ok(n) if n == 0 => return, +//! Ok(n) => n, +//! Err(e) => { +//! println!("failed to read from socket; err = {:?}", e); +//! return; +//! } +//! }; +//! +//! // Write the data back +//! if let Err(e) = socket.write_all(&buf[0..n]).await { +//! println!("failed to write to socket; err = {:?}", e); +//! return; +//! } +//! } +//! }); +//! } +//! }) +//! } +//! ``` +//! +//! ## Runtime Configurations +//! +//! Tokio provides multiple task scheduling strategies, suitable for different +//! applications. The [runtime builder] or `#[tokio::main]` attribute may be +//! used to select which scheduler to use. +//! +//! #### Multi-Thread Scheduler +//! +//! The multi-thread scheduler executes futures on a _thread pool_, using a +//! work-stealing strategy. By default, it will start a worker thread for each +//! CPU core available on the system. This tends to be the ideal configuration +//! for most applications. The multi-thread scheduler requires the `rt-multi-thread` +//! feature flag, and is selected by default: +//! ``` +//! use tokio::runtime; +//! +//! # fn main() -> Result<(), Box<dyn std::error::Error>> { +//! let threaded_rt = runtime::Runtime::new()?; +//! # Ok(()) } +//! ``` +//! +//! Most applications should use the multi-thread scheduler, except in some +//! niche use-cases, such as when running only a single thread is required. +//! +//! #### Current-Thread Scheduler +//! +//! The current-thread scheduler provides a _single-threaded_ future executor. +//! All tasks will be created and executed on the current thread. This requires +//! the `rt` feature flag. +//! ``` +//! use tokio::runtime; +//! +//! # fn main() -> Result<(), Box<dyn std::error::Error>> { +//! let rt = runtime::Builder::new_current_thread() +//! .build()?; +//! # Ok(()) } +//! ``` +//! +//! #### Resource drivers +//! +//! When configuring a runtime by hand, no resource drivers are enabled by +//! default. In this case, attempting to use networking types or time types will +//! fail. In order to enable these types, the resource drivers must be enabled. +//! This is done with [`Builder::enable_io`] and [`Builder::enable_time`]. As a +//! shorthand, [`Builder::enable_all`] enables both resource drivers. +//! +//! ## Lifetime of spawned threads +//! +//! The runtime may spawn threads depending on its configuration and usage. The +//! multi-thread scheduler spawns threads to schedule tasks and for `spawn_blocking` +//! calls. +//! +//! While the `Runtime` is active, threads may shut down after periods of being +//! idle. Once `Runtime` is dropped, all runtime threads have usually been +//! terminated, but in the presence of unstoppable spawned work are not +//! guaranteed to have been terminated. See the +//! [struct level documentation](Runtime#shutdown) for more details. +//! +//! [tasks]: crate::task +//! [`Runtime`]: Runtime +//! [`tokio::spawn`]: crate::spawn +//! [`tokio::main`]: ../attr.main.html +//! [runtime builder]: crate::runtime::Builder +//! [`Runtime::new`]: crate::runtime::Runtime::new +//! [`Builder::threaded_scheduler`]: crate::runtime::Builder::threaded_scheduler +//! [`Builder::enable_io`]: crate::runtime::Builder::enable_io +//! [`Builder::enable_time`]: crate::runtime::Builder::enable_time +//! [`Builder::enable_all`]: crate::runtime::Builder::enable_all + +// At the top due to macros +#[cfg(test)] +#[cfg(not(tokio_wasm))] +#[macro_use] +mod tests; + +pub(crate) mod context; + +pub(crate) mod coop; + +pub(crate) mod park; + +mod driver; + +pub(crate) mod scheduler; + +cfg_io_driver_impl! { + pub(crate) mod io; +} + +cfg_process_driver! { + mod process; +} + +cfg_time! { + pub(crate) mod time; +} + +cfg_signal_internal_and_unix! { + pub(crate) mod signal; +} + +cfg_rt! { + pub(crate) mod task; + + mod config; + use config::Config; + + mod blocking; + #[cfg_attr(tokio_wasi, allow(unused_imports))] + pub(crate) use blocking::spawn_blocking; + + cfg_trace! { + pub(crate) use blocking::Mandatory; + } + + cfg_fs! { + pub(crate) use blocking::spawn_mandatory_blocking; + } + + mod builder; + pub use self::builder::Builder; + cfg_unstable! { + pub use self::builder::UnhandledPanic; + pub use crate::util::rand::RngSeed; + } + + cfg_taskdump! { + pub mod dump; + pub use dump::Dump; + } + + mod handle; + pub use handle::{EnterGuard, Handle, TryCurrentError}; + + mod runtime; + pub use runtime::{Runtime, RuntimeFlavor}; + + mod thread_id; + pub(crate) use thread_id::ThreadId; + + cfg_metrics! { + mod metrics; + pub use metrics::{RuntimeMetrics, HistogramScale}; + + pub(crate) use metrics::{MetricsBatch, SchedulerMetrics, WorkerMetrics, HistogramBuilder}; + + cfg_net! { + pub(crate) use metrics::IoDriverMetrics; + } + } + + cfg_not_metrics! { + pub(crate) mod metrics; + pub(crate) use metrics::{SchedulerMetrics, WorkerMetrics, MetricsBatch, HistogramBuilder}; + } + + /// After thread starts / before thread stops + type Callback = std::sync::Arc<dyn Fn() + Send + Sync>; +} diff --git a/third_party/rust/tokio/src/runtime/park.rs b/third_party/rust/tokio/src/runtime/park.rs new file mode 100644 index 0000000000..2392846abe --- /dev/null +++ b/third_party/rust/tokio/src/runtime/park.rs @@ -0,0 +1,348 @@ +#![cfg_attr(not(feature = "full"), allow(dead_code))] + +use crate::loom::sync::atomic::AtomicUsize; +use crate::loom::sync::{Arc, Condvar, Mutex}; + +use std::sync::atomic::Ordering::SeqCst; +use std::time::Duration; + +#[derive(Debug)] +pub(crate) struct ParkThread { + inner: Arc<Inner>, +} + +/// Unblocks a thread that was blocked by `ParkThread`. +#[derive(Clone, Debug)] +pub(crate) struct UnparkThread { + inner: Arc<Inner>, +} + +#[derive(Debug)] +struct Inner { + state: AtomicUsize, + mutex: Mutex<()>, + condvar: Condvar, +} + +const EMPTY: usize = 0; +const PARKED: usize = 1; +const NOTIFIED: usize = 2; + +tokio_thread_local! { + static CURRENT_PARKER: ParkThread = ParkThread::new(); +} + +// Bit of a hack, but it is only for loom +#[cfg(loom)] +tokio_thread_local! { + static CURRENT_THREAD_PARK_COUNT: AtomicUsize = AtomicUsize::new(0); +} + +// ==== impl ParkThread ==== + +impl ParkThread { + pub(crate) fn new() -> Self { + Self { + inner: Arc::new(Inner { + state: AtomicUsize::new(EMPTY), + mutex: Mutex::new(()), + condvar: Condvar::new(), + }), + } + } + + pub(crate) fn unpark(&self) -> UnparkThread { + let inner = self.inner.clone(); + UnparkThread { inner } + } + + pub(crate) fn park(&mut self) { + #[cfg(loom)] + CURRENT_THREAD_PARK_COUNT.with(|count| count.fetch_add(1, SeqCst)); + self.inner.park(); + } + + pub(crate) fn park_timeout(&mut self, duration: Duration) { + #[cfg(loom)] + CURRENT_THREAD_PARK_COUNT.with(|count| count.fetch_add(1, SeqCst)); + + // Wasm doesn't have threads, so just sleep. + #[cfg(not(tokio_wasm))] + self.inner.park_timeout(duration); + #[cfg(tokio_wasm)] + std::thread::sleep(duration); + } + + pub(crate) fn shutdown(&mut self) { + self.inner.shutdown(); + } +} + +// ==== impl Inner ==== + +impl Inner { + /// Parks the current thread for at most `dur`. + fn park(&self) { + // If we were previously notified then we consume this notification and + // return quickly. + if self + .state + .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) + .is_ok() + { + return; + } + + // Otherwise we need to coordinate going to sleep + let mut m = self.mutex.lock(); + + match self.state.compare_exchange(EMPTY, PARKED, SeqCst, SeqCst) { + Ok(_) => {} + Err(NOTIFIED) => { + // We must read here, even though we know it will be `NOTIFIED`. + // This is because `unpark` may have been called again since we read + // `NOTIFIED` in the `compare_exchange` above. We must perform an + // acquire operation that synchronizes with that `unpark` to observe + // any writes it made before the call to unpark. To do that we must + // read from the write it made to `state`. + let old = self.state.swap(EMPTY, SeqCst); + debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly"); + + return; + } + Err(actual) => panic!("inconsistent park state; actual = {}", actual), + } + + loop { + m = self.condvar.wait(m).unwrap(); + + if self + .state + .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) + .is_ok() + { + // got a notification + return; + } + + // spurious wakeup, go back to sleep + } + } + + fn park_timeout(&self, dur: Duration) { + // Like `park` above we have a fast path for an already-notified thread, + // and afterwards we start coordinating for a sleep. Return quickly. + if self + .state + .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) + .is_ok() + { + return; + } + + if dur == Duration::from_millis(0) { + return; + } + + let m = self.mutex.lock(); + + match self.state.compare_exchange(EMPTY, PARKED, SeqCst, SeqCst) { + Ok(_) => {} + Err(NOTIFIED) => { + // We must read again here, see `park`. + let old = self.state.swap(EMPTY, SeqCst); + debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly"); + + return; + } + Err(actual) => panic!("inconsistent park_timeout state; actual = {}", actual), + } + + // Wait with a timeout, and if we spuriously wake up or otherwise wake up + // from a notification, we just want to unconditionally set the state back to + // empty, either consuming a notification or un-flagging ourselves as + // parked. + let (_m, _result) = self.condvar.wait_timeout(m, dur).unwrap(); + + match self.state.swap(EMPTY, SeqCst) { + NOTIFIED => {} // got a notification, hurray! + PARKED => {} // no notification, alas + n => panic!("inconsistent park_timeout state: {}", n), + } + } + + fn unpark(&self) { + // To ensure the unparked thread will observe any writes we made before + // this call, we must perform a release operation that `park` can + // synchronize with. To do that we must write `NOTIFIED` even if `state` + // is already `NOTIFIED`. That is why this must be a swap rather than a + // compare-and-swap that returns if it reads `NOTIFIED` on failure. + match self.state.swap(NOTIFIED, SeqCst) { + EMPTY => return, // no one was waiting + NOTIFIED => return, // already unparked + PARKED => {} // gotta go wake someone up + _ => panic!("inconsistent state in unpark"), + } + + // There is a period between when the parked thread sets `state` to + // `PARKED` (or last checked `state` in the case of a spurious wake + // up) and when it actually waits on `cvar`. If we were to notify + // during this period it would be ignored and then when the parked + // thread went to sleep it would never wake up. Fortunately, it has + // `lock` locked at this stage so we can acquire `lock` to wait until + // it is ready to receive the notification. + // + // Releasing `lock` before the call to `notify_one` means that when the + // parked thread wakes it doesn't get woken only to have to wait for us + // to release `lock`. + drop(self.mutex.lock()); + + self.condvar.notify_one() + } + + fn shutdown(&self) { + self.condvar.notify_all(); + } +} + +impl Default for ParkThread { + fn default() -> Self { + Self::new() + } +} + +// ===== impl UnparkThread ===== + +impl UnparkThread { + pub(crate) fn unpark(&self) { + self.inner.unpark(); + } +} + +use crate::loom::thread::AccessError; +use std::future::Future; +use std::marker::PhantomData; +use std::mem; +use std::rc::Rc; +use std::task::{RawWaker, RawWakerVTable, Waker}; + +/// Blocks the current thread using a condition variable. +#[derive(Debug)] +pub(crate) struct CachedParkThread { + _anchor: PhantomData<Rc<()>>, +} + +impl CachedParkThread { + /// Creates a new `ParkThread` handle for the current thread. + /// + /// This type cannot be moved to other threads, so it should be created on + /// the thread that the caller intends to park. + pub(crate) fn new() -> CachedParkThread { + CachedParkThread { + _anchor: PhantomData, + } + } + + pub(crate) fn waker(&self) -> Result<Waker, AccessError> { + self.unpark().map(|unpark| unpark.into_waker()) + } + + fn unpark(&self) -> Result<UnparkThread, AccessError> { + self.with_current(|park_thread| park_thread.unpark()) + } + + pub(crate) fn park(&mut self) { + self.with_current(|park_thread| park_thread.inner.park()) + .unwrap(); + } + + pub(crate) fn park_timeout(&mut self, duration: Duration) { + self.with_current(|park_thread| park_thread.inner.park_timeout(duration)) + .unwrap(); + } + + /// Gets a reference to the `ParkThread` handle for this thread. + fn with_current<F, R>(&self, f: F) -> Result<R, AccessError> + where + F: FnOnce(&ParkThread) -> R, + { + CURRENT_PARKER.try_with(|inner| f(inner)) + } + + pub(crate) fn block_on<F: Future>(&mut self, f: F) -> Result<F::Output, AccessError> { + use std::task::Context; + use std::task::Poll::Ready; + + // `get_unpark()` should not return a Result + let waker = self.waker()?; + let mut cx = Context::from_waker(&waker); + + pin!(f); + + loop { + if let Ready(v) = crate::runtime::coop::budget(|| f.as_mut().poll(&mut cx)) { + return Ok(v); + } + + self.park(); + } + } +} + +impl UnparkThread { + pub(crate) fn into_waker(self) -> Waker { + unsafe { + let raw = unparker_to_raw_waker(self.inner); + Waker::from_raw(raw) + } + } +} + +impl Inner { + #[allow(clippy::wrong_self_convention)] + fn into_raw(this: Arc<Inner>) -> *const () { + Arc::into_raw(this) as *const () + } + + unsafe fn from_raw(ptr: *const ()) -> Arc<Inner> { + Arc::from_raw(ptr as *const Inner) + } +} + +unsafe fn unparker_to_raw_waker(unparker: Arc<Inner>) -> RawWaker { + RawWaker::new( + Inner::into_raw(unparker), + &RawWakerVTable::new(clone, wake, wake_by_ref, drop_waker), + ) +} + +unsafe fn clone(raw: *const ()) -> RawWaker { + let unparker = Inner::from_raw(raw); + + // Increment the ref count + mem::forget(unparker.clone()); + + unparker_to_raw_waker(unparker) +} + +unsafe fn drop_waker(raw: *const ()) { + let _ = Inner::from_raw(raw); +} + +unsafe fn wake(raw: *const ()) { + let unparker = Inner::from_raw(raw); + unparker.unpark(); +} + +unsafe fn wake_by_ref(raw: *const ()) { + let unparker = Inner::from_raw(raw); + unparker.unpark(); + + // We don't actually own a reference to the unparker + mem::forget(unparker); +} + +#[cfg(loom)] +pub(crate) fn current_thread_park_count() -> usize { + CURRENT_THREAD_PARK_COUNT.with(|count| count.load(SeqCst)) +} diff --git a/third_party/rust/tokio/src/runtime/process.rs b/third_party/rust/tokio/src/runtime/process.rs new file mode 100644 index 0000000000..df339b0e72 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/process.rs @@ -0,0 +1,44 @@ +#![cfg_attr(not(feature = "rt"), allow(dead_code))] + +//! Process driver. + +use crate::process::unix::GlobalOrphanQueue; +use crate::runtime::driver; +use crate::runtime::signal::{Driver as SignalDriver, Handle as SignalHandle}; + +use std::time::Duration; + +/// Responsible for cleaning up orphaned child processes on Unix platforms. +#[derive(Debug)] +pub(crate) struct Driver { + park: SignalDriver, + signal_handle: SignalHandle, +} + +// ===== impl Driver ===== + +impl Driver { + /// Creates a new signal `Driver` instance that delegates wakeups to `park`. + pub(crate) fn new(park: SignalDriver) -> Self { + let signal_handle = park.handle(); + + Self { + park, + signal_handle, + } + } + + pub(crate) fn park(&mut self, handle: &driver::Handle) { + self.park.park(handle); + GlobalOrphanQueue::reap_orphans(&self.signal_handle); + } + + pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { + self.park.park_timeout(handle, duration); + GlobalOrphanQueue::reap_orphans(&self.signal_handle); + } + + pub(crate) fn shutdown(&mut self, handle: &driver::Handle) { + self.park.shutdown(handle) + } +} diff --git a/third_party/rust/tokio/src/runtime/runtime.rs b/third_party/rust/tokio/src/runtime/runtime.rs new file mode 100644 index 0000000000..3f34999758 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/runtime.rs @@ -0,0 +1,445 @@ +use crate::runtime::blocking::BlockingPool; +use crate::runtime::scheduler::CurrentThread; +use crate::runtime::{context, EnterGuard, Handle}; +use crate::task::JoinHandle; + +use std::future::Future; +use std::time::Duration; + +cfg_rt_multi_thread! { + use crate::runtime::Builder; + use crate::runtime::scheduler::MultiThread; +} + +/// The Tokio runtime. +/// +/// The runtime provides an I/O driver, task scheduler, [timer], and +/// blocking pool, necessary for running asynchronous tasks. +/// +/// Instances of `Runtime` can be created using [`new`], or [`Builder`]. +/// However, most users will use the `#[tokio::main]` annotation on their +/// entry point instead. +/// +/// See [module level][mod] documentation for more details. +/// +/// # Shutdown +/// +/// Shutting down the runtime is done by dropping the value, or calling +/// [`Runtime::shutdown_background`] or [`Runtime::shutdown_timeout`]. +/// +/// Tasks spawned through [`Runtime::spawn`] keep running until they yield. +/// Then they are dropped. They are not *guaranteed* to run to completion, but +/// *might* do so if they do not yield until completion. +/// +/// Blocking functions spawned through [`Runtime::spawn_blocking`] keep running +/// until they return. +/// +/// The thread initiating the shutdown blocks until all spawned work has been +/// stopped. This can take an indefinite amount of time. The `Drop` +/// implementation waits forever for this. +/// +/// `shutdown_background` and `shutdown_timeout` can be used if waiting forever +/// is undesired. When the timeout is reached, spawned work that did not stop +/// in time and threads running it are leaked. The work continues to run until +/// one of the stopping conditions is fulfilled, but the thread initiating the +/// shutdown is unblocked. +/// +/// Once the runtime has been dropped, any outstanding I/O resources bound to +/// it will no longer function. Calling any method on them will result in an +/// error. +/// +/// # Sharing +/// +/// The Tokio runtime implements `Sync` and `Send` to allow you to wrap it +/// in a `Arc`. Most fn take `&self` to allow you to call them concurrently +/// across multiple threads. +/// +/// Calls to `shutdown` and `shutdown_timeout` require exclusive ownership of +/// the runtime type and this can be achieved via `Arc::try_unwrap` when only +/// one strong count reference is left over. +/// +/// [timer]: crate::time +/// [mod]: index.html +/// [`new`]: method@Self::new +/// [`Builder`]: struct@Builder +#[derive(Debug)] +pub struct Runtime { + /// Task scheduler + scheduler: Scheduler, + + /// Handle to runtime, also contains driver handles + handle: Handle, + + /// Blocking pool handle, used to signal shutdown + blocking_pool: BlockingPool, +} + +/// The flavor of a `Runtime`. +/// +/// This is the return type for [`Handle::runtime_flavor`](crate::runtime::Handle::runtime_flavor()). +#[derive(Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum RuntimeFlavor { + /// The flavor that executes all tasks on the current thread. + CurrentThread, + /// The flavor that executes tasks across multiple threads. + MultiThread, +} + +/// The runtime scheduler is either a multi-thread or a current-thread executor. +#[derive(Debug)] +pub(super) enum Scheduler { + /// Execute all tasks on the current-thread. + CurrentThread(CurrentThread), + + /// Execute tasks across multiple threads. + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThread(MultiThread), +} + +impl Runtime { + pub(super) fn from_parts( + scheduler: Scheduler, + handle: Handle, + blocking_pool: BlockingPool, + ) -> Runtime { + Runtime { + scheduler, + handle, + blocking_pool, + } + } + + cfg_not_wasi! { + /// Creates a new runtime instance with default configuration values. + /// + /// This results in the multi threaded scheduler, I/O driver, and time driver being + /// initialized. + /// + /// Most applications will not need to call this function directly. Instead, + /// they will use the [`#[tokio::main]` attribute][main]. When a more complex + /// configuration is necessary, the [runtime builder] may be used. + /// + /// See [module level][mod] documentation for more details. + /// + /// # Examples + /// + /// Creating a new `Runtime` with default configuration values. + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// let rt = Runtime::new() + /// .unwrap(); + /// + /// // Use the runtime... + /// ``` + /// + /// [mod]: index.html + /// [main]: ../attr.main.html + /// [threaded scheduler]: index.html#threaded-scheduler + /// [runtime builder]: crate::runtime::Builder + #[cfg(feature = "rt-multi-thread")] + #[cfg_attr(docsrs, doc(cfg(feature = "rt-multi-thread")))] + pub fn new() -> std::io::Result<Runtime> { + Builder::new_multi_thread().enable_all().build() + } + } + + /// Returns a handle to the runtime's spawner. + /// + /// The returned handle can be used to spawn tasks that run on this runtime, and can + /// be cloned to allow moving the `Handle` to other threads. + /// + /// Calling [`Handle::block_on`] on a handle to a `current_thread` runtime is error-prone. + /// Refer to the documentation of [`Handle::block_on`] for more. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// let rt = Runtime::new() + /// .unwrap(); + /// + /// let handle = rt.handle(); + /// + /// // Use the handle... + /// ``` + pub fn handle(&self) -> &Handle { + &self.handle + } + + /// Spawns a future onto the Tokio runtime. + /// + /// This spawns the given future onto the runtime's executor, usually a + /// thread pool. The thread pool is then responsible for polling the future + /// until it completes. + /// + /// The provided future will start running in the background immediately + /// when `spawn` is called, even if you don't await the returned + /// `JoinHandle`. + /// + /// See [module level][mod] documentation for more details. + /// + /// [mod]: index.html + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// # fn dox() { + /// // Create the runtime + /// let rt = Runtime::new().unwrap(); + /// + /// // Spawn a future onto the runtime + /// rt.spawn(async { + /// println!("now running on a worker thread"); + /// }); + /// # } + /// ``` + #[track_caller] + pub fn spawn<F>(&self, future: F) -> JoinHandle<F::Output> + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.handle.spawn(future) + } + + /// Runs the provided function on an executor dedicated to blocking operations. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// # fn dox() { + /// // Create the runtime + /// let rt = Runtime::new().unwrap(); + /// + /// // Spawn a blocking function onto the runtime + /// rt.spawn_blocking(|| { + /// println!("now running on a worker thread"); + /// }); + /// # } + #[track_caller] + pub fn spawn_blocking<F, R>(&self, func: F) -> JoinHandle<R> + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + self.handle.spawn_blocking(func) + } + + /// Runs a future to completion on the Tokio runtime. This is the + /// runtime's entry point. + /// + /// This runs the given future on the current thread, blocking until it is + /// complete, and yielding its resolved result. Any tasks or timers + /// which the future spawns internally will be executed on the runtime. + /// + /// # Non-worker future + /// + /// Note that the future required by this function does not run as a + /// worker. The expectation is that other tasks are spawned by the future here. + /// Awaiting on other futures from the future provided here will not + /// perform as fast as those spawned as workers. + /// + /// # Multi thread scheduler + /// + /// When the multi thread scheduler is used this will allow futures + /// to run within the io driver and timer context of the overall runtime. + /// + /// Any spawned tasks will continue running after `block_on` returns. + /// + /// # Current thread scheduler + /// + /// When the current thread scheduler is enabled `block_on` + /// can be called concurrently from multiple threads. The first call + /// will take ownership of the io and timer drivers. This means + /// other threads which do not own the drivers will hook into that one. + /// When the first `block_on` completes, other threads will be able to + /// "steal" the driver to allow continued execution of their futures. + /// + /// Any spawned tasks will be suspended after `block_on` returns. Calling + /// `block_on` again will resume previously spawned tasks. + /// + /// # Panics + /// + /// This function panics if the provided future panics, or if called within an + /// asynchronous execution context. + /// + /// # Examples + /// + /// ```no_run + /// use tokio::runtime::Runtime; + /// + /// // Create the runtime + /// let rt = Runtime::new().unwrap(); + /// + /// // Execute the future, blocking the current thread until completion + /// rt.block_on(async { + /// println!("hello"); + /// }); + /// ``` + /// + /// [handle]: fn@Handle::block_on + #[track_caller] + pub fn block_on<F: Future>(&self, future: F) -> F::Output { + #[cfg(all( + tokio_unstable, + tokio_taskdump, + feature = "rt", + target_os = "linux", + any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64") + ))] + let future = super::task::trace::Trace::root(future); + + #[cfg(all(tokio_unstable, feature = "tracing"))] + let future = crate::util::trace::task( + future, + "block_on", + None, + crate::runtime::task::Id::next().as_u64(), + ); + + let _enter = self.enter(); + + match &self.scheduler { + Scheduler::CurrentThread(exec) => exec.block_on(&self.handle.inner, future), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Scheduler::MultiThread(exec) => exec.block_on(&self.handle.inner, future), + } + } + + /// Enters the runtime context. + /// + /// This allows you to construct types that must have an executor + /// available on creation such as [`Sleep`] or [`TcpStream`]. It will + /// also allow you to call methods such as [`tokio::spawn`]. + /// + /// [`Sleep`]: struct@crate::time::Sleep + /// [`TcpStream`]: struct@crate::net::TcpStream + /// [`tokio::spawn`]: fn@crate::spawn + /// + /// # Example + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// fn function_that_spawns(msg: String) { + /// // Had we not used `rt.enter` below, this would panic. + /// tokio::spawn(async move { + /// println!("{}", msg); + /// }); + /// } + /// + /// fn main() { + /// let rt = Runtime::new().unwrap(); + /// + /// let s = "Hello World!".to_string(); + /// + /// // By entering the context, we tie `tokio::spawn` to this executor. + /// let _guard = rt.enter(); + /// function_that_spawns(s); + /// } + /// ``` + pub fn enter(&self) -> EnterGuard<'_> { + self.handle.enter() + } + + /// Shuts down the runtime, waiting for at most `duration` for all spawned + /// work to stop. + /// + /// See the [struct level documentation](Runtime#shutdown) for more details. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime::Runtime; + /// use tokio::task; + /// + /// use std::thread; + /// use std::time::Duration; + /// + /// fn main() { + /// let runtime = Runtime::new().unwrap(); + /// + /// runtime.block_on(async move { + /// task::spawn_blocking(move || { + /// thread::sleep(Duration::from_secs(10_000)); + /// }); + /// }); + /// + /// runtime.shutdown_timeout(Duration::from_millis(100)); + /// } + /// ``` + pub fn shutdown_timeout(mut self, duration: Duration) { + // Wakeup and shutdown all the worker threads + self.handle.inner.shutdown(); + self.blocking_pool.shutdown(Some(duration)); + } + + /// Shuts down the runtime, without waiting for any spawned work to stop. + /// + /// This can be useful if you want to drop a runtime from within another runtime. + /// Normally, dropping a runtime will block indefinitely for spawned blocking tasks + /// to complete, which would normally not be permitted within an asynchronous context. + /// By calling `shutdown_background()`, you can drop the runtime from such a context. + /// + /// Note however, that because we do not wait for any blocking tasks to complete, this + /// may result in a resource leak (in that any blocking tasks are still running until they + /// return. + /// + /// See the [struct level documentation](Runtime#shutdown) for more details. + /// + /// This function is equivalent to calling `shutdown_timeout(Duration::from_nanos(0))`. + /// + /// ``` + /// use tokio::runtime::Runtime; + /// + /// fn main() { + /// let runtime = Runtime::new().unwrap(); + /// + /// runtime.block_on(async move { + /// let inner_runtime = Runtime::new().unwrap(); + /// // ... + /// inner_runtime.shutdown_background(); + /// }); + /// } + /// ``` + pub fn shutdown_background(self) { + self.shutdown_timeout(Duration::from_nanos(0)) + } +} + +#[allow(clippy::single_match)] // there are comments in the error branch, so we don't want if-let +impl Drop for Runtime { + fn drop(&mut self) { + match &mut self.scheduler { + Scheduler::CurrentThread(current_thread) => { + // This ensures that tasks spawned on the current-thread + // runtime are dropped inside the runtime's context. + let _guard = context::try_set_current(&self.handle.inner); + current_thread.shutdown(&self.handle.inner); + } + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Scheduler::MultiThread(multi_thread) => { + // The threaded scheduler drops its tasks on its worker threads, which is + // already in the runtime's context. + multi_thread.shutdown(&self.handle.inner); + } + } + } +} + +cfg_metrics! { + impl Runtime { + /// TODO + pub fn metrics(&self) -> crate::runtime::RuntimeMetrics { + self.handle.metrics() + } + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/current_thread.rs b/third_party/rust/tokio/src/runtime/scheduler/current_thread.rs new file mode 100644 index 0000000000..ac4a8d6fac --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/current_thread.rs @@ -0,0 +1,750 @@ +use crate::future::poll_fn; +use crate::loom::sync::atomic::AtomicBool; +use crate::loom::sync::Arc; +use crate::runtime::driver::{self, Driver}; +use crate::runtime::scheduler::{self, Defer, Inject}; +use crate::runtime::task::{self, JoinHandle, OwnedTasks, Schedule, Task}; +use crate::runtime::{blocking, context, Config, MetricsBatch, SchedulerMetrics, WorkerMetrics}; +use crate::sync::notify::Notify; +use crate::util::atomic_cell::AtomicCell; +use crate::util::{waker_ref, RngSeedGenerator, Wake, WakerRef}; + +use std::cell::RefCell; +use std::collections::VecDeque; +use std::fmt; +use std::future::Future; +use std::sync::atomic::Ordering::{AcqRel, Release}; +use std::task::Poll::{Pending, Ready}; +use std::task::Waker; +use std::time::Duration; + +/// Executes tasks on the current thread +pub(crate) struct CurrentThread { + /// Core scheduler data is acquired by a thread entering `block_on`. + core: AtomicCell<Core>, + + /// Notifier for waking up other threads to steal the + /// driver. + notify: Notify, +} + +/// Handle to the current thread scheduler +pub(crate) struct Handle { + /// Scheduler state shared across threads + shared: Shared, + + /// Resource driver handles + pub(crate) driver: driver::Handle, + + /// Blocking pool spawner + pub(crate) blocking_spawner: blocking::Spawner, + + /// Current random number generator seed + pub(crate) seed_generator: RngSeedGenerator, +} + +/// Data required for executing the scheduler. The struct is passed around to +/// a function that will perform the scheduling work and acts as a capability token. +struct Core { + /// Scheduler run queue + tasks: VecDeque<Notified>, + + /// Current tick + tick: u32, + + /// Runtime driver + /// + /// The driver is removed before starting to park the thread + driver: Option<Driver>, + + /// Metrics batch + metrics: MetricsBatch, + + /// How often to check the global queue + global_queue_interval: u32, + + /// True if a task panicked without being handled and the runtime is + /// configured to shutdown on unhandled panic. + unhandled_panic: bool, +} + +/// Scheduler state shared between threads. +struct Shared { + /// Remote run queue + inject: Inject<Arc<Handle>>, + + /// Collection of all active tasks spawned onto this executor. + owned: OwnedTasks<Arc<Handle>>, + + /// Indicates whether the blocked on thread was woken. + woken: AtomicBool, + + /// Scheduler configuration options + config: Config, + + /// Keeps track of various runtime metrics. + scheduler_metrics: SchedulerMetrics, + + /// This scheduler only has one worker. + worker_metrics: WorkerMetrics, +} + +/// Thread-local context. +/// +/// pub(crate) to store in `runtime::context`. +pub(crate) struct Context { + /// Scheduler handle + handle: Arc<Handle>, + + /// Scheduler core, enabling the holder of `Context` to execute the + /// scheduler. + core: RefCell<Option<Box<Core>>>, + + /// Deferred tasks, usually ones that called `task::yield_now()`. + pub(crate) defer: Defer, +} + +type Notified = task::Notified<Arc<Handle>>; + +/// Initial queue capacity. +const INITIAL_CAPACITY: usize = 64; + +/// Used if none is specified. This is a temporary constant and will be removed +/// as we unify tuning logic between the multi-thread and current-thread +/// schedulers. +const DEFAULT_GLOBAL_QUEUE_INTERVAL: u32 = 31; + +impl CurrentThread { + pub(crate) fn new( + driver: Driver, + driver_handle: driver::Handle, + blocking_spawner: blocking::Spawner, + seed_generator: RngSeedGenerator, + config: Config, + ) -> (CurrentThread, Arc<Handle>) { + let worker_metrics = WorkerMetrics::from_config(&config); + + // Get the configured global queue interval, or use the default. + let global_queue_interval = config + .global_queue_interval + .unwrap_or(DEFAULT_GLOBAL_QUEUE_INTERVAL); + + let handle = Arc::new(Handle { + shared: Shared { + inject: Inject::new(), + owned: OwnedTasks::new(), + woken: AtomicBool::new(false), + config, + scheduler_metrics: SchedulerMetrics::new(), + worker_metrics, + }, + driver: driver_handle, + blocking_spawner, + seed_generator, + }); + + let core = AtomicCell::new(Some(Box::new(Core { + tasks: VecDeque::with_capacity(INITIAL_CAPACITY), + tick: 0, + driver: Some(driver), + metrics: MetricsBatch::new(&handle.shared.worker_metrics), + global_queue_interval, + unhandled_panic: false, + }))); + + let scheduler = CurrentThread { + core, + notify: Notify::new(), + }; + + (scheduler, handle) + } + + #[track_caller] + pub(crate) fn block_on<F: Future>(&self, handle: &scheduler::Handle, future: F) -> F::Output { + pin!(future); + + crate::runtime::context::enter_runtime(handle, false, |blocking| { + let handle = handle.as_current_thread(); + + // Attempt to steal the scheduler core and block_on the future if we can + // there, otherwise, lets select on a notification that the core is + // available or the future is complete. + loop { + if let Some(core) = self.take_core(handle) { + return core.block_on(future); + } else { + let notified = self.notify.notified(); + pin!(notified); + + if let Some(out) = blocking + .block_on(poll_fn(|cx| { + if notified.as_mut().poll(cx).is_ready() { + return Ready(None); + } + + if let Ready(out) = future.as_mut().poll(cx) { + return Ready(Some(out)); + } + + Pending + })) + .expect("Failed to `Enter::block_on`") + { + return out; + } + } + } + }) + } + + fn take_core(&self, handle: &Arc<Handle>) -> Option<CoreGuard<'_>> { + let core = self.core.take()?; + + Some(CoreGuard { + context: scheduler::Context::CurrentThread(Context { + handle: handle.clone(), + core: RefCell::new(Some(core)), + defer: Defer::new(), + }), + scheduler: self, + }) + } + + pub(crate) fn shutdown(&mut self, handle: &scheduler::Handle) { + let handle = handle.as_current_thread(); + + // Avoid a double panic if we are currently panicking and + // the lock may be poisoned. + + let core = match self.take_core(handle) { + Some(core) => core, + None if std::thread::panicking() => return, + None => panic!("Oh no! We never placed the Core back, this is a bug!"), + }; + + // Check that the thread-local is not being destroyed + let tls_available = context::with_current(|_| ()).is_ok(); + + if tls_available { + core.enter(|core, _context| { + let core = shutdown2(core, handle); + (core, ()) + }); + } else { + // Shutdown without setting the context. `tokio::spawn` calls will + // fail, but those will fail either way because the thread-local is + // not available anymore. + let context = core.context.expect_current_thread(); + let core = context.core.borrow_mut().take().unwrap(); + + let core = shutdown2(core, handle); + *context.core.borrow_mut() = Some(core); + } + } +} + +fn shutdown2(mut core: Box<Core>, handle: &Handle) -> Box<Core> { + // Drain the OwnedTasks collection. This call also closes the + // collection, ensuring that no tasks are ever pushed after this + // call returns. + handle.shared.owned.close_and_shutdown_all(); + + // Drain local queue + // We already shut down every task, so we just need to drop the task. + while let Some(task) = core.next_local_task(handle) { + drop(task); + } + + // Close the injection queue + handle.shared.inject.close(); + + // Drain remote queue + while let Some(task) = handle.shared.inject.pop() { + drop(task); + } + + assert!(handle.shared.owned.is_empty()); + + // Submit metrics + core.submit_metrics(handle); + + // Shutdown the resource drivers + if let Some(driver) = core.driver.as_mut() { + driver.shutdown(&handle.driver); + } + + core +} + +impl fmt::Debug for CurrentThread { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("CurrentThread").finish() + } +} + +// ===== impl Core ===== + +impl Core { + /// Get and increment the current tick + fn tick(&mut self) { + self.tick = self.tick.wrapping_add(1); + } + + fn next_task(&mut self, handle: &Handle) -> Option<Notified> { + if self.tick % self.global_queue_interval == 0 { + handle + .next_remote_task() + .or_else(|| self.next_local_task(handle)) + } else { + self.next_local_task(handle) + .or_else(|| handle.next_remote_task()) + } + } + + fn next_local_task(&mut self, handle: &Handle) -> Option<Notified> { + let ret = self.tasks.pop_front(); + handle + .shared + .worker_metrics + .set_queue_depth(self.tasks.len()); + ret + } + + fn push_task(&mut self, handle: &Handle, task: Notified) { + self.tasks.push_back(task); + self.metrics.inc_local_schedule_count(); + handle + .shared + .worker_metrics + .set_queue_depth(self.tasks.len()); + } + + fn submit_metrics(&mut self, handle: &Handle) { + self.metrics.submit(&handle.shared.worker_metrics); + } +} + +#[cfg(tokio_taskdump)] +fn wake_deferred_tasks_and_free(context: &Context) { + let wakers = context.defer.take_deferred(); + for waker in wakers { + waker.wake(); + } +} + +// ===== impl Context ===== + +impl Context { + /// Execute the closure with the given scheduler core stored in the + /// thread-local context. + fn run_task<R>(&self, mut core: Box<Core>, f: impl FnOnce() -> R) -> (Box<Core>, R) { + core.metrics.start_poll(); + let mut ret = self.enter(core, || crate::runtime::coop::budget(f)); + ret.0.metrics.end_poll(); + ret + } + + /// Blocks the current thread until an event is received by the driver, + /// including I/O events, timer events, ... + fn park(&self, mut core: Box<Core>, handle: &Handle) -> Box<Core> { + let mut driver = core.driver.take().expect("driver missing"); + + if let Some(f) = &handle.shared.config.before_park { + // Incorrect lint, the closures are actually different types so `f` + // cannot be passed as an argument to `enter`. + #[allow(clippy::redundant_closure)] + let (c, _) = self.enter(core, || f()); + core = c; + } + + // This check will fail if `before_park` spawns a task for us to run + // instead of parking the thread + if core.tasks.is_empty() { + // Park until the thread is signaled + core.metrics.about_to_park(); + core.submit_metrics(handle); + + let (c, _) = self.enter(core, || { + driver.park(&handle.driver); + self.defer.wake(); + }); + + core = c; + } + + if let Some(f) = &handle.shared.config.after_unpark { + // Incorrect lint, the closures are actually different types so `f` + // cannot be passed as an argument to `enter`. + #[allow(clippy::redundant_closure)] + let (c, _) = self.enter(core, || f()); + core = c; + } + + core.driver = Some(driver); + core + } + + /// Checks the driver for new events without blocking the thread. + fn park_yield(&self, mut core: Box<Core>, handle: &Handle) -> Box<Core> { + let mut driver = core.driver.take().expect("driver missing"); + + core.submit_metrics(handle); + + let (mut core, _) = self.enter(core, || { + driver.park_timeout(&handle.driver, Duration::from_millis(0)); + self.defer.wake(); + }); + + core.driver = Some(driver); + core + } + + fn enter<R>(&self, core: Box<Core>, f: impl FnOnce() -> R) -> (Box<Core>, R) { + // Store the scheduler core in the thread-local context + // + // A drop-guard is employed at a higher level. + *self.core.borrow_mut() = Some(core); + + // Execute the closure while tracking the execution budget + let ret = f(); + + // Take the scheduler core back + let core = self.core.borrow_mut().take().expect("core missing"); + (core, ret) + } + + pub(crate) fn defer(&self, waker: &Waker) { + self.defer.defer(waker); + } +} + +// ===== impl Handle ===== + +impl Handle { + /// Spawns a future onto the `CurrentThread` scheduler + pub(crate) fn spawn<F>( + me: &Arc<Self>, + future: F, + id: crate::runtime::task::Id, + ) -> JoinHandle<F::Output> + where + F: crate::future::Future + Send + 'static, + F::Output: Send + 'static, + { + let (handle, notified) = me.shared.owned.bind(future, me.clone(), id); + + if let Some(notified) = notified { + me.schedule(notified); + } + + handle + } + + /// Capture a snapshot of this runtime's state. + #[cfg(all( + tokio_unstable, + tokio_taskdump, + target_os = "linux", + any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64") + ))] + pub(crate) fn dump(&self) -> crate::runtime::Dump { + use crate::runtime::dump; + use task::trace::trace_current_thread; + + let mut traces = vec![]; + + // todo: how to make this work outside of a runtime context? + context::with_scheduler(|maybe_context| { + // drain the local queue + let context = if let Some(context) = maybe_context { + context.expect_current_thread() + } else { + return; + }; + let mut maybe_core = context.core.borrow_mut(); + let core = if let Some(core) = maybe_core.as_mut() { + core + } else { + return; + }; + let local = &mut core.tasks; + + if self.shared.inject.is_closed() { + return; + } + + traces = trace_current_thread(&self.shared.owned, local, &self.shared.inject) + .into_iter() + .map(dump::Task::new) + .collect(); + + // Avoid double borrow panic + drop(maybe_core); + + // Taking a taskdump could wakes every task, but we probably don't want + // the `yield_now` vector to be that large under normal circumstances. + // Therefore, we free its allocation. + wake_deferred_tasks_and_free(context); + }); + + dump::Dump::new(traces) + } + + fn next_remote_task(&self) -> Option<Notified> { + self.shared.inject.pop() + } + + fn waker_ref(me: &Arc<Self>) -> WakerRef<'_> { + // Set woken to true when enter block_on, ensure outer future + // be polled for the first time when enter loop + me.shared.woken.store(true, Release); + waker_ref(me) + } + + // reset woken to false and return original value + pub(crate) fn reset_woken(&self) -> bool { + self.shared.woken.swap(false, AcqRel) + } +} + +cfg_metrics! { + impl Handle { + pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics { + &self.shared.scheduler_metrics + } + + pub(crate) fn injection_queue_depth(&self) -> usize { + self.shared.inject.len() + } + + pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics { + assert_eq!(0, worker); + &self.shared.worker_metrics + } + + pub(crate) fn num_blocking_threads(&self) -> usize { + self.blocking_spawner.num_threads() + } + + pub(crate) fn num_idle_blocking_threads(&self) -> usize { + self.blocking_spawner.num_idle_threads() + } + + pub(crate) fn blocking_queue_depth(&self) -> usize { + self.blocking_spawner.queue_depth() + } + + pub(crate) fn active_tasks_count(&self) -> usize { + self.shared.owned.active_tasks_count() + } + } +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("current_thread::Handle { ... }").finish() + } +} + +// ===== impl Shared ===== + +impl Schedule for Arc<Handle> { + fn release(&self, task: &Task<Self>) -> Option<Task<Self>> { + self.shared.owned.remove(task) + } + + fn schedule(&self, task: task::Notified<Self>) { + use scheduler::Context::CurrentThread; + + context::with_scheduler(|maybe_cx| match maybe_cx { + Some(CurrentThread(cx)) if Arc::ptr_eq(self, &cx.handle) => { + let mut core = cx.core.borrow_mut(); + + // If `None`, the runtime is shutting down, so there is no need + // to schedule the task. + if let Some(core) = core.as_mut() { + core.push_task(self, task); + } + } + _ => { + // Track that a task was scheduled from **outside** of the runtime. + self.shared.scheduler_metrics.inc_remote_schedule_count(); + + // Schedule the task + self.shared.inject.push(task); + self.driver.unpark(); + } + }); + } + + cfg_unstable! { + fn unhandled_panic(&self) { + use crate::runtime::UnhandledPanic; + + match self.shared.config.unhandled_panic { + UnhandledPanic::Ignore => { + // Do nothing + } + UnhandledPanic::ShutdownRuntime => { + use scheduler::Context::CurrentThread; + + // This hook is only called from within the runtime, so + // `context::with_scheduler` should match with `&self`, i.e. + // there is no opportunity for a nested scheduler to be + // called. + context::with_scheduler(|maybe_cx| match maybe_cx { + Some(CurrentThread(cx)) if Arc::ptr_eq(self, &cx.handle) => { + let mut core = cx.core.borrow_mut(); + + // If `None`, the runtime is shutting down, so there is no need to signal shutdown + if let Some(core) = core.as_mut() { + core.unhandled_panic = true; + self.shared.owned.close_and_shutdown_all(); + } + } + _ => unreachable!("runtime core not set in CURRENT thread-local"), + }) + } + } + } + } +} + +impl Wake for Handle { + fn wake(arc_self: Arc<Self>) { + Wake::wake_by_ref(&arc_self) + } + + /// Wake by reference + fn wake_by_ref(arc_self: &Arc<Self>) { + arc_self.shared.woken.store(true, Release); + arc_self.driver.unpark(); + } +} + +// ===== CoreGuard ===== + +/// Used to ensure we always place the `Core` value back into its slot in +/// `CurrentThread`, even if the future panics. +struct CoreGuard<'a> { + context: scheduler::Context, + scheduler: &'a CurrentThread, +} + +impl CoreGuard<'_> { + #[track_caller] + fn block_on<F: Future>(self, future: F) -> F::Output { + let ret = self.enter(|mut core, context| { + let waker = Handle::waker_ref(&context.handle); + let mut cx = std::task::Context::from_waker(&waker); + + pin!(future); + + core.metrics.start_processing_scheduled_tasks(); + + 'outer: loop { + let handle = &context.handle; + + if handle.reset_woken() { + let (c, res) = context.enter(core, || { + crate::runtime::coop::budget(|| future.as_mut().poll(&mut cx)) + }); + + core = c; + + if let Ready(v) = res { + return (core, Some(v)); + } + } + + for _ in 0..handle.shared.config.event_interval { + // Make sure we didn't hit an unhandled_panic + if core.unhandled_panic { + return (core, None); + } + + core.tick(); + + let entry = core.next_task(handle); + + let task = match entry { + Some(entry) => entry, + None => { + core.metrics.end_processing_scheduled_tasks(); + + core = if !context.defer.is_empty() { + context.park_yield(core, handle) + } else { + context.park(core, handle) + }; + + core.metrics.start_processing_scheduled_tasks(); + + // Try polling the `block_on` future next + continue 'outer; + } + }; + + let task = context.handle.shared.owned.assert_owner(task); + + let (c, _) = context.run_task(core, || { + task.run(); + }); + + core = c; + } + + core.metrics.end_processing_scheduled_tasks(); + + // Yield to the driver, this drives the timer and pulls any + // pending I/O events. + core = context.park_yield(core, handle); + + core.metrics.start_processing_scheduled_tasks(); + } + }); + + match ret { + Some(ret) => ret, + None => { + // `block_on` panicked. + panic!("a spawned task panicked and the runtime is configured to shut down on unhandled panic"); + } + } + } + + /// Enters the scheduler context. This sets the queue and other necessary + /// scheduler state in the thread-local. + fn enter<F, R>(self, f: F) -> R + where + F: FnOnce(Box<Core>, &Context) -> (Box<Core>, R), + { + let context = self.context.expect_current_thread(); + + // Remove `core` from `context` to pass into the closure. + let core = context.core.borrow_mut().take().expect("core missing"); + + // Call the closure and place `core` back + let (core, ret) = context::set_scheduler(&self.context, || f(core, context)); + + *context.core.borrow_mut() = Some(core); + + ret + } +} + +impl Drop for CoreGuard<'_> { + fn drop(&mut self) { + let context = self.context.expect_current_thread(); + + if let Some(core) = context.core.borrow_mut().take() { + // Replace old scheduler back into the state to allow + // other threads to pick it up and drive it. + self.scheduler.core.set(core); + + // Wake up other possible threads that could steal the driver. + self.scheduler.notify.notify_one() + } + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/defer.rs b/third_party/rust/tokio/src/runtime/scheduler/defer.rs new file mode 100644 index 0000000000..a4be8ef2e5 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/defer.rs @@ -0,0 +1,43 @@ +use std::cell::RefCell; +use std::task::Waker; + +pub(crate) struct Defer { + deferred: RefCell<Vec<Waker>>, +} + +impl Defer { + pub(crate) fn new() -> Defer { + Defer { + deferred: Default::default(), + } + } + + pub(crate) fn defer(&self, waker: &Waker) { + let mut deferred = self.deferred.borrow_mut(); + + // If the same task adds itself a bunch of times, then only add it once. + if let Some(last) = deferred.last() { + if last.will_wake(waker) { + return; + } + } + + deferred.push(waker.clone()); + } + + pub(crate) fn is_empty(&self) -> bool { + self.deferred.borrow().is_empty() + } + + pub(crate) fn wake(&self) { + while let Some(waker) = self.deferred.borrow_mut().pop() { + waker.wake(); + } + } + + #[cfg(tokio_taskdump)] + pub(crate) fn take_deferred(&self) -> Vec<Waker> { + let mut deferred = self.deferred.borrow_mut(); + std::mem::take(&mut *deferred) + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject.rs b/third_party/rust/tokio/src/runtime/scheduler/inject.rs new file mode 100644 index 0000000000..39976fcd7a --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/inject.rs @@ -0,0 +1,72 @@ +//! Inject queue used to send wakeups to a work-stealing scheduler + +use crate::loom::sync::Mutex; +use crate::runtime::task; + +mod pop; +pub(crate) use pop::Pop; + +mod shared; +pub(crate) use shared::Shared; + +mod synced; +pub(crate) use synced::Synced; + +cfg_rt_multi_thread! { + mod rt_multi_thread; +} + +cfg_metrics! { + mod metrics; +} + +/// Growable, MPMC queue used to inject new tasks into the scheduler and as an +/// overflow queue when the local, fixed-size, array queue overflows. +pub(crate) struct Inject<T: 'static> { + shared: Shared<T>, + synced: Mutex<Synced>, +} + +impl<T: 'static> Inject<T> { + pub(crate) fn new() -> Inject<T> { + let (shared, synced) = Shared::new(); + + Inject { + shared, + synced: Mutex::new(synced), + } + } + + // Kind of annoying to have to include the cfg here + #[cfg(tokio_taskdump)] + pub(crate) fn is_closed(&self) -> bool { + let synced = self.synced.lock(); + self.shared.is_closed(&synced) + } + + /// Closes the injection queue, returns `true` if the queue is open when the + /// transition is made. + pub(crate) fn close(&self) -> bool { + let mut synced = self.synced.lock(); + self.shared.close(&mut synced) + } + + /// Pushes a value into the queue. + /// + /// This does nothing if the queue is closed. + pub(crate) fn push(&self, task: task::Notified<T>) { + let mut synced = self.synced.lock(); + // safety: passing correct `Synced` + unsafe { self.shared.push(&mut synced, task) } + } + + pub(crate) fn pop(&self) -> Option<task::Notified<T>> { + if self.shared.is_empty() { + return None; + } + + let mut synced = self.synced.lock(); + // safety: passing correct `Synced` + unsafe { self.shared.pop(&mut synced) } + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/metrics.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/metrics.rs new file mode 100644 index 0000000000..76f045fdbd --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/inject/metrics.rs @@ -0,0 +1,7 @@ +use super::Inject; + +impl<T: 'static> Inject<T> { + pub(crate) fn len(&self) -> usize { + self.shared.len() + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/pop.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/pop.rs new file mode 100644 index 0000000000..4e6d5d3be3 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/inject/pop.rs @@ -0,0 +1,55 @@ +use super::Synced; + +use crate::runtime::task; + +use std::marker::PhantomData; + +pub(crate) struct Pop<'a, T: 'static> { + len: usize, + synced: &'a mut Synced, + _p: PhantomData<T>, +} + +impl<'a, T: 'static> Pop<'a, T> { + pub(super) fn new(len: usize, synced: &'a mut Synced) -> Pop<'a, T> { + Pop { + len, + synced, + _p: PhantomData, + } + } +} + +impl<'a, T: 'static> Iterator for Pop<'a, T> { + type Item = task::Notified<T>; + + fn next(&mut self) -> Option<Self::Item> { + if self.len == 0 { + return None; + } + + let ret = self.synced.pop(); + + // Should be `Some` when `len > 0` + debug_assert!(ret.is_some()); + + self.len -= 1; + ret + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.len, Some(self.len)) + } +} + +impl<'a, T: 'static> ExactSizeIterator for Pop<'a, T> { + fn len(&self) -> usize { + self.len + } +} + +impl<'a, T: 'static> Drop for Pop<'a, T> { + fn drop(&mut self) { + for _ in self.by_ref() {} + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs new file mode 100644 index 0000000000..07d1063c5d --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs @@ -0,0 +1,98 @@ +use super::{Shared, Synced}; + +use crate::runtime::scheduler::Lock; +use crate::runtime::task; + +use std::sync::atomic::Ordering::Release; + +impl<'a> Lock<Synced> for &'a mut Synced { + type Handle = &'a mut Synced; + + fn lock(self) -> Self::Handle { + self + } +} + +impl AsMut<Synced> for Synced { + fn as_mut(&mut self) -> &mut Synced { + self + } +} + +impl<T: 'static> Shared<T> { + /// Pushes several values into the queue. + /// + /// # Safety + /// + /// Must be called with the same `Synced` instance returned by `Inject::new` + #[inline] + pub(crate) unsafe fn push_batch<L, I>(&self, shared: L, mut iter: I) + where + L: Lock<Synced>, + I: Iterator<Item = task::Notified<T>>, + { + let first = match iter.next() { + Some(first) => first.into_raw(), + None => return, + }; + + // Link up all the tasks. + let mut prev = first; + let mut counter = 1; + + // We are going to be called with an `std::iter::Chain`, and that + // iterator overrides `for_each` to something that is easier for the + // compiler to optimize than a loop. + iter.for_each(|next| { + let next = next.into_raw(); + + // safety: Holding the Notified for a task guarantees exclusive + // access to the `queue_next` field. + unsafe { prev.set_queue_next(Some(next)) }; + prev = next; + counter += 1; + }); + + // Now that the tasks are linked together, insert them into the + // linked list. + self.push_batch_inner(shared, first, prev, counter); + } + + /// Inserts several tasks that have been linked together into the queue. + /// + /// The provided head and tail may be be the same task. In this case, a + /// single task is inserted. + #[inline] + unsafe fn push_batch_inner<L>( + &self, + shared: L, + batch_head: task::RawTask, + batch_tail: task::RawTask, + num: usize, + ) where + L: Lock<Synced>, + { + debug_assert!(unsafe { batch_tail.get_queue_next().is_none() }); + + let mut synced = shared.lock(); + let synced = synced.as_mut(); + + if let Some(tail) = synced.tail { + unsafe { + tail.set_queue_next(Some(batch_head)); + } + } else { + synced.head = Some(batch_head); + } + + synced.tail = Some(batch_tail); + + // Increment the count. + // + // safety: All updates to the len atomic are guarded by the mutex. As + // such, a non-atomic load followed by a store is safe. + let len = self.len.unsync_load(); + + self.len.store(len + num, Release); + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/shared.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/shared.rs new file mode 100644 index 0000000000..7fdd2839dd --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/inject/shared.rs @@ -0,0 +1,119 @@ +use super::{Pop, Synced}; + +use crate::loom::sync::atomic::AtomicUsize; +use crate::runtime::task; + +use std::marker::PhantomData; +use std::sync::atomic::Ordering::{Acquire, Release}; + +pub(crate) struct Shared<T: 'static> { + /// Number of pending tasks in the queue. This helps prevent unnecessary + /// locking in the hot path. + pub(super) len: AtomicUsize, + + _p: PhantomData<T>, +} + +unsafe impl<T> Send for Shared<T> {} +unsafe impl<T> Sync for Shared<T> {} + +impl<T: 'static> Shared<T> { + pub(crate) fn new() -> (Shared<T>, Synced) { + let inject = Shared { + len: AtomicUsize::new(0), + _p: PhantomData, + }; + + let synced = Synced { + is_closed: false, + head: None, + tail: None, + }; + + (inject, synced) + } + + pub(crate) fn is_empty(&self) -> bool { + self.len() == 0 + } + + // Kind of annoying to have to include the cfg here + #[cfg(any(tokio_taskdump, all(feature = "rt-multi-thread", not(tokio_wasi))))] + pub(crate) fn is_closed(&self, synced: &Synced) -> bool { + synced.is_closed + } + + /// Closes the injection queue, returns `true` if the queue is open when the + /// transition is made. + pub(crate) fn close(&self, synced: &mut Synced) -> bool { + if synced.is_closed { + return false; + } + + synced.is_closed = true; + true + } + + pub(crate) fn len(&self) -> usize { + self.len.load(Acquire) + } + + /// Pushes a value into the queue. + /// + /// This does nothing if the queue is closed. + /// + /// # Safety + /// + /// Must be called with the same `Synced` instance returned by `Inject::new` + pub(crate) unsafe fn push(&self, synced: &mut Synced, task: task::Notified<T>) { + if synced.is_closed { + return; + } + + // safety: only mutated with the lock held + let len = self.len.unsync_load(); + let task = task.into_raw(); + + // The next pointer should already be null + debug_assert!(unsafe { task.get_queue_next().is_none() }); + + if let Some(tail) = synced.tail { + // safety: Holding the Notified for a task guarantees exclusive + // access to the `queue_next` field. + unsafe { tail.set_queue_next(Some(task)) }; + } else { + synced.head = Some(task); + } + + synced.tail = Some(task); + self.len.store(len + 1, Release); + } + + /// Pop a value from the queue. + /// + /// # Safety + /// + /// Must be called with the same `Synced` instance returned by `Inject::new` + pub(crate) unsafe fn pop(&self, synced: &mut Synced) -> Option<task::Notified<T>> { + self.pop_n(synced, 1).next() + } + + /// Pop `n` values from the queue + /// + /// # Safety + /// + /// Must be called with the same `Synced` instance returned by `Inject::new` + pub(crate) unsafe fn pop_n<'a>(&'a self, synced: &'a mut Synced, n: usize) -> Pop<'a, T> { + use std::cmp; + + // safety: All updates to the len atomic are guarded by the mutex. As + // such, a non-atomic load followed by a store is safe. + let len = self.len.unsync_load(); + let n = cmp::min(n, len); + + // Decrement the count. + self.len.store(len - n, Release); + + Pop::new(n, synced) + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/synced.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/synced.rs new file mode 100644 index 0000000000..6847f68e5d --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/inject/synced.rs @@ -0,0 +1,32 @@ +use crate::runtime::task; + +pub(crate) struct Synced { + /// True if the queue is closed. + pub(super) is_closed: bool, + + /// Linked-list head. + pub(super) head: Option<task::RawTask>, + + /// Linked-list tail. + pub(super) tail: Option<task::RawTask>, +} + +unsafe impl Send for Synced {} +unsafe impl Sync for Synced {} + +impl Synced { + pub(super) fn pop<T: 'static>(&mut self) -> Option<task::Notified<T>> { + let task = self.head?; + + self.head = unsafe { task.get_queue_next() }; + + if self.head.is_none() { + self.tail = None; + } + + unsafe { task.set_queue_next(None) }; + + // safety: a `Notified` is pushed into the queue and now it is popped! + Some(unsafe { task::Notified::from_raw(task) }) + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/lock.rs b/third_party/rust/tokio/src/runtime/scheduler/lock.rs new file mode 100644 index 0000000000..0901c2b37c --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/lock.rs @@ -0,0 +1,6 @@ +/// A lock (mutex) yielding generic data. +pub(crate) trait Lock<T> { + type Handle: AsMut<T>; + + fn lock(self) -> Self::Handle; +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/mod.rs b/third_party/rust/tokio/src/runtime/scheduler/mod.rs new file mode 100644 index 0000000000..3e3151711f --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/mod.rs @@ -0,0 +1,249 @@ +cfg_rt! { + pub(crate) mod current_thread; + pub(crate) use current_thread::CurrentThread; + + mod defer; + use defer::Defer; + + pub(crate) mod inject; + pub(crate) use inject::Inject; +} + +cfg_rt_multi_thread! { + mod lock; + use lock::Lock; + + pub(crate) mod multi_thread; + pub(crate) use multi_thread::MultiThread; +} + +use crate::runtime::driver; + +#[derive(Debug, Clone)] +pub(crate) enum Handle { + #[cfg(feature = "rt")] + CurrentThread(Arc<current_thread::Handle>), + + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThread(Arc<multi_thread::Handle>), + + // TODO: This is to avoid triggering "dead code" warnings many other places + // in the codebase. Remove this during a later cleanup + #[cfg(not(feature = "rt"))] + #[allow(dead_code)] + Disabled, +} + +#[cfg(feature = "rt")] +pub(super) enum Context { + CurrentThread(current_thread::Context), + + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThread(multi_thread::Context), +} + +impl Handle { + #[cfg_attr(not(feature = "full"), allow(dead_code))] + pub(crate) fn driver(&self) -> &driver::Handle { + match *self { + #[cfg(feature = "rt")] + Handle::CurrentThread(ref h) => &h.driver, + + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(ref h) => &h.driver, + + #[cfg(not(feature = "rt"))] + Handle::Disabled => unreachable!(), + } + } +} + +cfg_rt! { + use crate::future::Future; + use crate::loom::sync::Arc; + use crate::runtime::{blocking, task::Id}; + use crate::runtime::context; + use crate::task::JoinHandle; + use crate::util::RngSeedGenerator; + use std::task::Waker; + + impl Handle { + #[track_caller] + pub(crate) fn current() -> Handle { + match context::with_current(Clone::clone) { + Ok(handle) => handle, + Err(e) => panic!("{}", e), + } + } + + pub(crate) fn blocking_spawner(&self) -> &blocking::Spawner { + match self { + Handle::CurrentThread(h) => &h.blocking_spawner, + + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(h) => &h.blocking_spawner, + } + } + + pub(crate) fn spawn<F>(&self, future: F, id: Id) -> JoinHandle<F::Output> + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + match self { + Handle::CurrentThread(h) => current_thread::Handle::spawn(h, future, id), + + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(h) => multi_thread::Handle::spawn(h, future, id), + } + } + + pub(crate) fn shutdown(&self) { + match *self { + Handle::CurrentThread(_) => {}, + + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(ref h) => h.shutdown(), + } + } + + pub(crate) fn seed_generator(&self) -> &RngSeedGenerator { + match self { + Handle::CurrentThread(h) => &h.seed_generator, + + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(h) => &h.seed_generator, + } + } + + pub(crate) fn as_current_thread(&self) -> &Arc<current_thread::Handle> { + match self { + Handle::CurrentThread(handle) => handle, + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + _ => panic!("not a CurrentThread handle"), + } + } + } + + cfg_metrics! { + use crate::runtime::{SchedulerMetrics, WorkerMetrics}; + + impl Handle { + pub(crate) fn num_workers(&self) -> usize { + match self { + Handle::CurrentThread(_) => 1, + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(handle) => handle.num_workers(), + } + } + + pub(crate) fn num_blocking_threads(&self) -> usize { + match self { + Handle::CurrentThread(handle) => handle.num_blocking_threads(), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(handle) => handle.num_blocking_threads(), + } + } + + pub(crate) fn num_idle_blocking_threads(&self) -> usize { + match self { + Handle::CurrentThread(handle) => handle.num_idle_blocking_threads(), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(handle) => handle.num_idle_blocking_threads(), + } + } + + pub(crate) fn active_tasks_count(&self) -> usize { + match self { + Handle::CurrentThread(handle) => handle.active_tasks_count(), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(handle) => handle.active_tasks_count(), + } + } + + pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics { + match self { + Handle::CurrentThread(handle) => handle.scheduler_metrics(), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(handle) => handle.scheduler_metrics(), + } + } + + pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics { + match self { + Handle::CurrentThread(handle) => handle.worker_metrics(worker), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(handle) => handle.worker_metrics(worker), + } + } + + pub(crate) fn injection_queue_depth(&self) -> usize { + match self { + Handle::CurrentThread(handle) => handle.injection_queue_depth(), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(handle) => handle.injection_queue_depth(), + } + } + + pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { + match self { + Handle::CurrentThread(handle) => handle.worker_metrics(worker).queue_depth(), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(handle) => handle.worker_local_queue_depth(worker), + } + } + + pub(crate) fn blocking_queue_depth(&self) -> usize { + match self { + Handle::CurrentThread(handle) => handle.blocking_queue_depth(), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThread(handle) => handle.blocking_queue_depth(), + } + } + } + } + + impl Context { + #[track_caller] + pub(crate) fn expect_current_thread(&self) -> ¤t_thread::Context { + match self { + Context::CurrentThread(context) => context, + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + _ => panic!("expected `CurrentThread::Context`") + } + } + + pub(crate) fn defer(&self, waker: &Waker) { + match self { + Context::CurrentThread(context) => context.defer(waker), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + Context::MultiThread(context) => context.defer(waker), + } + } + + cfg_rt_multi_thread! { + #[track_caller] + pub(crate) fn expect_multi_thread(&self) -> &multi_thread::Context { + match self { + Context::MultiThread(context) => context, + _ => panic!("expected `MultiThread::Context`") + } + } + } + } +} + +cfg_not_rt! { + #[cfg(any( + feature = "net", + all(unix, feature = "process"), + all(unix, feature = "signal"), + feature = "time", + ))] + impl Handle { + #[track_caller] + pub(crate) fn current() -> Handle { + panic!("{}", crate::util::error::CONTEXT_MISSING_ERROR) + } + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/counters.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/counters.rs new file mode 100644 index 0000000000..50bcc11985 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/counters.rs @@ -0,0 +1,62 @@ +#[cfg(tokio_internal_mt_counters)] +mod imp { + use std::sync::atomic::AtomicUsize; + use std::sync::atomic::Ordering::Relaxed; + + static NUM_MAINTENANCE: AtomicUsize = AtomicUsize::new(0); + static NUM_NOTIFY_LOCAL: AtomicUsize = AtomicUsize::new(0); + static NUM_UNPARKS_LOCAL: AtomicUsize = AtomicUsize::new(0); + static NUM_LIFO_SCHEDULES: AtomicUsize = AtomicUsize::new(0); + static NUM_LIFO_CAPPED: AtomicUsize = AtomicUsize::new(0); + + impl Drop for super::Counters { + fn drop(&mut self) { + let notifies_local = NUM_NOTIFY_LOCAL.load(Relaxed); + let unparks_local = NUM_UNPARKS_LOCAL.load(Relaxed); + let maintenance = NUM_MAINTENANCE.load(Relaxed); + let lifo_scheds = NUM_LIFO_SCHEDULES.load(Relaxed); + let lifo_capped = NUM_LIFO_CAPPED.load(Relaxed); + + println!("---"); + println!("notifies (local): {}", notifies_local); + println!(" unparks (local): {}", unparks_local); + println!(" maintenance: {}", maintenance); + println!(" LIFO schedules: {}", lifo_scheds); + println!(" LIFO capped: {}", lifo_capped); + } + } + + pub(crate) fn inc_num_inc_notify_local() { + NUM_NOTIFY_LOCAL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_unparks_local() { + NUM_UNPARKS_LOCAL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_maintenance() { + NUM_MAINTENANCE.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_lifo_schedules() { + NUM_LIFO_SCHEDULES.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_lifo_capped() { + NUM_LIFO_CAPPED.fetch_add(1, Relaxed); + } +} + +#[cfg(not(tokio_internal_mt_counters))] +mod imp { + pub(crate) fn inc_num_inc_notify_local() {} + pub(crate) fn inc_num_unparks_local() {} + pub(crate) fn inc_num_maintenance() {} + pub(crate) fn inc_lifo_schedules() {} + pub(crate) fn inc_lifo_capped() {} +} + +#[derive(Debug)] +pub(crate) struct Counters; + +pub(super) use imp::*; diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle.rs new file mode 100644 index 0000000000..98e4765856 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle.rs @@ -0,0 +1,68 @@ +use crate::future::Future; +use crate::loom::sync::Arc; +use crate::runtime::scheduler::multi_thread::worker; +use crate::runtime::{ + blocking, driver, + task::{self, JoinHandle}, +}; +use crate::util::RngSeedGenerator; + +use std::fmt; + +cfg_metrics! { + mod metrics; +} + +cfg_taskdump! { + mod taskdump; +} + +/// Handle to the multi thread scheduler +pub(crate) struct Handle { + /// Task spawner + pub(super) shared: worker::Shared, + + /// Resource driver handles + pub(crate) driver: driver::Handle, + + /// Blocking pool spawner + pub(crate) blocking_spawner: blocking::Spawner, + + /// Current random number generator seed + pub(crate) seed_generator: RngSeedGenerator, +} + +impl Handle { + /// Spawns a future onto the thread pool + pub(crate) fn spawn<F>(me: &Arc<Self>, future: F, id: task::Id) -> JoinHandle<F::Output> + where + F: crate::future::Future + Send + 'static, + F::Output: Send + 'static, + { + Self::bind_new_task(me, future, id) + } + + pub(crate) fn shutdown(&self) { + self.close(); + } + + pub(super) fn bind_new_task<T>(me: &Arc<Self>, future: T, id: task::Id) -> JoinHandle<T::Output> + where + T: Future + Send + 'static, + T::Output: Send + 'static, + { + let (handle, notified) = me.shared.owned.bind(future, me.clone(), id); + + if let Some(notified) = notified { + me.schedule_task(notified, false); + } + + handle + } +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("multi_thread::Handle { ... }").finish() + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/metrics.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/metrics.rs new file mode 100644 index 0000000000..838694fc89 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/metrics.rs @@ -0,0 +1,41 @@ +use super::Handle; + +use crate::runtime::{SchedulerMetrics, WorkerMetrics}; + +impl Handle { + pub(crate) fn num_workers(&self) -> usize { + self.shared.worker_metrics.len() + } + + pub(crate) fn num_blocking_threads(&self) -> usize { + self.blocking_spawner.num_threads() + } + + pub(crate) fn num_idle_blocking_threads(&self) -> usize { + self.blocking_spawner.num_idle_threads() + } + + pub(crate) fn active_tasks_count(&self) -> usize { + self.shared.owned.active_tasks_count() + } + + pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics { + &self.shared.scheduler_metrics + } + + pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics { + &self.shared.worker_metrics[worker] + } + + pub(crate) fn injection_queue_depth(&self) -> usize { + self.shared.injection_queue_depth() + } + + pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { + self.shared.worker_local_queue_depth(worker) + } + + pub(crate) fn blocking_queue_depth(&self) -> usize { + self.blocking_spawner.queue_depth() + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/taskdump.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/taskdump.rs new file mode 100644 index 0000000000..477d857d88 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/taskdump.rs @@ -0,0 +1,26 @@ +use super::Handle; + +use crate::runtime::Dump; + +impl Handle { + pub(crate) async fn dump(&self) -> Dump { + let trace_status = &self.shared.trace_status; + + // If a dump is in progress, block. + trace_status.start_trace_request(&self).await; + + let result = loop { + if let Some(result) = trace_status.take_result() { + break result; + } else { + self.notify_all(); + trace_status.result_ready.notified().await; + } + }; + + // Allow other queued dumps to proceed. + trace_status.end_trace_request(&self).await; + + result + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/idle.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/idle.rs new file mode 100644 index 0000000000..834bc2b66f --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/idle.rs @@ -0,0 +1,240 @@ +//! Coordinates idling workers + +use crate::loom::sync::atomic::AtomicUsize; +use crate::runtime::scheduler::multi_thread::Shared; + +use std::fmt; +use std::sync::atomic::Ordering::{self, SeqCst}; + +pub(super) struct Idle { + /// Tracks both the number of searching workers and the number of unparked + /// workers. + /// + /// Used as a fast-path to avoid acquiring the lock when needed. + state: AtomicUsize, + + /// Total number of workers. + num_workers: usize, +} + +/// Data synchronized by the scheduler mutex +pub(super) struct Synced { + /// Sleeping workers + sleepers: Vec<usize>, +} + +const UNPARK_SHIFT: usize = 16; +const UNPARK_MASK: usize = !SEARCH_MASK; +const SEARCH_MASK: usize = (1 << UNPARK_SHIFT) - 1; + +#[derive(Copy, Clone)] +struct State(usize); + +impl Idle { + pub(super) fn new(num_workers: usize) -> (Idle, Synced) { + let init = State::new(num_workers); + + let idle = Idle { + state: AtomicUsize::new(init.into()), + num_workers, + }; + + let synced = Synced { + sleepers: Vec::with_capacity(num_workers), + }; + + (idle, synced) + } + + /// If there are no workers actively searching, returns the index of a + /// worker currently sleeping. + pub(super) fn worker_to_notify(&self, shared: &Shared) -> Option<usize> { + // If at least one worker is spinning, work being notified will + // eventually be found. A searching thread will find **some** work and + // notify another worker, eventually leading to our work being found. + // + // For this to happen, this load must happen before the thread + // transitioning `num_searching` to zero. Acquire / Release does not + // provide sufficient guarantees, so this load is done with `SeqCst` and + // will pair with the `fetch_sub(1)` when transitioning out of + // searching. + if !self.notify_should_wakeup() { + return None; + } + + // Acquire the lock + let mut lock = shared.synced.lock(); + + // Check again, now that the lock is acquired + if !self.notify_should_wakeup() { + return None; + } + + // A worker should be woken up, atomically increment the number of + // searching workers as well as the number of unparked workers. + State::unpark_one(&self.state, 1); + + // Get the worker to unpark + let ret = lock.idle.sleepers.pop(); + debug_assert!(ret.is_some()); + + ret + } + + /// Returns `true` if the worker needs to do a final check for submitted + /// work. + pub(super) fn transition_worker_to_parked( + &self, + shared: &Shared, + worker: usize, + is_searching: bool, + ) -> bool { + // Acquire the lock + let mut lock = shared.synced.lock(); + + // Decrement the number of unparked threads + let ret = State::dec_num_unparked(&self.state, is_searching); + + // Track the sleeping worker + lock.idle.sleepers.push(worker); + + ret + } + + pub(super) fn transition_worker_to_searching(&self) -> bool { + let state = State::load(&self.state, SeqCst); + if 2 * state.num_searching() >= self.num_workers { + return false; + } + + // It is possible for this routine to allow more than 50% of the workers + // to search. That is OK. Limiting searchers is only an optimization to + // prevent too much contention. + State::inc_num_searching(&self.state, SeqCst); + true + } + + /// A lightweight transition from searching -> running. + /// + /// Returns `true` if this is the final searching worker. The caller + /// **must** notify a new worker. + pub(super) fn transition_worker_from_searching(&self) -> bool { + State::dec_num_searching(&self.state) + } + + /// Unpark a specific worker. This happens if tasks are submitted from + /// within the worker's park routine. + /// + /// Returns `true` if the worker was parked before calling the method. + pub(super) fn unpark_worker_by_id(&self, shared: &Shared, worker_id: usize) -> bool { + let mut lock = shared.synced.lock(); + let sleepers = &mut lock.idle.sleepers; + + for index in 0..sleepers.len() { + if sleepers[index] == worker_id { + sleepers.swap_remove(index); + + // Update the state accordingly while the lock is held. + State::unpark_one(&self.state, 0); + + return true; + } + } + + false + } + + /// Returns `true` if `worker_id` is contained in the sleep set. + pub(super) fn is_parked(&self, shared: &Shared, worker_id: usize) -> bool { + let lock = shared.synced.lock(); + lock.idle.sleepers.contains(&worker_id) + } + + fn notify_should_wakeup(&self) -> bool { + let state = State(self.state.fetch_add(0, SeqCst)); + state.num_searching() == 0 && state.num_unparked() < self.num_workers + } +} + +impl State { + fn new(num_workers: usize) -> State { + // All workers start in the unparked state + let ret = State(num_workers << UNPARK_SHIFT); + debug_assert_eq!(num_workers, ret.num_unparked()); + debug_assert_eq!(0, ret.num_searching()); + ret + } + + fn load(cell: &AtomicUsize, ordering: Ordering) -> State { + State(cell.load(ordering)) + } + + fn unpark_one(cell: &AtomicUsize, num_searching: usize) { + cell.fetch_add(num_searching | (1 << UNPARK_SHIFT), SeqCst); + } + + fn inc_num_searching(cell: &AtomicUsize, ordering: Ordering) { + cell.fetch_add(1, ordering); + } + + /// Returns `true` if this is the final searching worker + fn dec_num_searching(cell: &AtomicUsize) -> bool { + let state = State(cell.fetch_sub(1, SeqCst)); + state.num_searching() == 1 + } + + /// Track a sleeping worker + /// + /// Returns `true` if this is the final searching worker. + fn dec_num_unparked(cell: &AtomicUsize, is_searching: bool) -> bool { + let mut dec = 1 << UNPARK_SHIFT; + + if is_searching { + dec += 1; + } + + let prev = State(cell.fetch_sub(dec, SeqCst)); + is_searching && prev.num_searching() == 1 + } + + /// Number of workers currently searching + fn num_searching(self) -> usize { + self.0 & SEARCH_MASK + } + + /// Number of workers currently unparked + fn num_unparked(self) -> usize { + (self.0 & UNPARK_MASK) >> UNPARK_SHIFT + } +} + +impl From<usize> for State { + fn from(src: usize) -> State { + State(src) + } +} + +impl From<State> for usize { + fn from(src: State) -> usize { + src.0 + } +} + +impl fmt::Debug for State { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("worker::State") + .field("num_unparked", &self.num_unparked()) + .field("num_searching", &self.num_searching()) + .finish() + } +} + +#[test] +fn test_state() { + assert_eq!(0, UNPARK_MASK & SEARCH_MASK); + assert_eq!(0, !(UNPARK_MASK | SEARCH_MASK)); + + let state = State::new(10); + assert_eq!(10, state.num_unparked()); + assert_eq!(0, state.num_searching()); +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/mod.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/mod.rs new file mode 100644 index 0000000000..d85a0ae0a2 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/mod.rs @@ -0,0 +1,103 @@ +//! Multi-threaded runtime + +mod counters; +use counters::Counters; + +mod handle; +pub(crate) use handle::Handle; + +mod overflow; +pub(crate) use overflow::Overflow; + +mod idle; +use self::idle::Idle; + +mod stats; +pub(crate) use stats::Stats; + +mod park; +pub(crate) use park::{Parker, Unparker}; + +pub(crate) mod queue; + +mod worker; +pub(crate) use worker::{Context, Launch, Shared}; + +cfg_taskdump! { + mod trace; + use trace::TraceStatus; + + pub(crate) use worker::Synced; +} + +cfg_not_taskdump! { + mod trace_mock; + use trace_mock::TraceStatus; +} + +pub(crate) use worker::block_in_place; + +use crate::loom::sync::Arc; +use crate::runtime::{ + blocking, + driver::{self, Driver}, + scheduler, Config, +}; +use crate::util::RngSeedGenerator; + +use std::fmt; +use std::future::Future; + +/// Work-stealing based thread pool for executing futures. +pub(crate) struct MultiThread; + +// ===== impl MultiThread ===== + +impl MultiThread { + pub(crate) fn new( + size: usize, + driver: Driver, + driver_handle: driver::Handle, + blocking_spawner: blocking::Spawner, + seed_generator: RngSeedGenerator, + config: Config, + ) -> (MultiThread, Arc<Handle>, Launch) { + let parker = Parker::new(driver); + let (handle, launch) = worker::create( + size, + parker, + driver_handle, + blocking_spawner, + seed_generator, + config, + ); + + (MultiThread, handle, launch) + } + + /// Blocks the current thread waiting for the future to complete. + /// + /// The future will execute on the current thread, but all spawned tasks + /// will be executed on the thread pool. + pub(crate) fn block_on<F>(&self, handle: &scheduler::Handle, future: F) -> F::Output + where + F: Future, + { + crate::runtime::context::enter_runtime(handle, true, |blocking| { + blocking.block_on(future).expect("failed to park thread") + }) + } + + pub(crate) fn shutdown(&mut self, handle: &scheduler::Handle) { + match handle { + scheduler::Handle::MultiThread(handle) => handle.shutdown(), + _ => panic!("expected MultiThread scheduler"), + } + } +} + +impl fmt::Debug for MultiThread { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("MultiThread").finish() + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/overflow.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/overflow.rs new file mode 100644 index 0000000000..ab664811cf --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/overflow.rs @@ -0,0 +1,26 @@ +use crate::runtime::task; + +#[cfg(test)] +use std::cell::RefCell; + +pub(crate) trait Overflow<T: 'static> { + fn push(&self, task: task::Notified<T>); + + fn push_batch<I>(&self, iter: I) + where + I: Iterator<Item = task::Notified<T>>; +} + +#[cfg(test)] +impl<T: 'static> Overflow<T> for RefCell<Vec<task::Notified<T>>> { + fn push(&self, task: task::Notified<T>) { + self.borrow_mut().push(task); + } + + fn push_batch<I>(&self, iter: I) + where + I: Iterator<Item = task::Notified<T>>, + { + self.borrow_mut().extend(iter); + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/park.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/park.rs new file mode 100644 index 0000000000..0a00ea004e --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/park.rs @@ -0,0 +1,232 @@ +//! Parks the runtime. +//! +//! A combination of the various resource driver park handles. + +use crate::loom::sync::atomic::AtomicUsize; +use crate::loom::sync::{Arc, Condvar, Mutex}; +use crate::runtime::driver::{self, Driver}; +use crate::util::TryLock; + +use std::sync::atomic::Ordering::SeqCst; +use std::time::Duration; + +pub(crate) struct Parker { + inner: Arc<Inner>, +} + +pub(crate) struct Unparker { + inner: Arc<Inner>, +} + +struct Inner { + /// Avoids entering the park if possible + state: AtomicUsize, + + /// Used to coordinate access to the driver / condvar + mutex: Mutex<()>, + + /// Condvar to block on if the driver is unavailable. + condvar: Condvar, + + /// Resource (I/O, time, ...) driver + shared: Arc<Shared>, +} + +const EMPTY: usize = 0; +const PARKED_CONDVAR: usize = 1; +const PARKED_DRIVER: usize = 2; +const NOTIFIED: usize = 3; + +/// Shared across multiple Parker handles +struct Shared { + /// Shared driver. Only one thread at a time can use this + driver: TryLock<Driver>, +} + +impl Parker { + pub(crate) fn new(driver: Driver) -> Parker { + Parker { + inner: Arc::new(Inner { + state: AtomicUsize::new(EMPTY), + mutex: Mutex::new(()), + condvar: Condvar::new(), + shared: Arc::new(Shared { + driver: TryLock::new(driver), + }), + }), + } + } + + pub(crate) fn unpark(&self) -> Unparker { + Unparker { + inner: self.inner.clone(), + } + } + + pub(crate) fn park(&mut self, handle: &driver::Handle) { + self.inner.park(handle); + } + + pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { + // Only parking with zero is supported... + assert_eq!(duration, Duration::from_millis(0)); + + if let Some(mut driver) = self.inner.shared.driver.try_lock() { + driver.park_timeout(handle, duration) + } + } + + pub(crate) fn shutdown(&mut self, handle: &driver::Handle) { + self.inner.shutdown(handle); + } +} + +impl Clone for Parker { + fn clone(&self) -> Parker { + Parker { + inner: Arc::new(Inner { + state: AtomicUsize::new(EMPTY), + mutex: Mutex::new(()), + condvar: Condvar::new(), + shared: self.inner.shared.clone(), + }), + } + } +} + +impl Unparker { + pub(crate) fn unpark(&self, driver: &driver::Handle) { + self.inner.unpark(driver); + } +} + +impl Inner { + /// Parks the current thread for at most `dur`. + fn park(&self, handle: &driver::Handle) { + // If we were previously notified then we consume this notification and + // return quickly. + if self + .state + .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) + .is_ok() + { + return; + } + + if let Some(mut driver) = self.shared.driver.try_lock() { + self.park_driver(&mut driver, handle); + } else { + self.park_condvar(); + } + } + + fn park_condvar(&self) { + // Otherwise we need to coordinate going to sleep + let mut m = self.mutex.lock(); + + match self + .state + .compare_exchange(EMPTY, PARKED_CONDVAR, SeqCst, SeqCst) + { + Ok(_) => {} + Err(NOTIFIED) => { + // We must read here, even though we know it will be `NOTIFIED`. + // This is because `unpark` may have been called again since we read + // `NOTIFIED` in the `compare_exchange` above. We must perform an + // acquire operation that synchronizes with that `unpark` to observe + // any writes it made before the call to unpark. To do that we must + // read from the write it made to `state`. + let old = self.state.swap(EMPTY, SeqCst); + debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly"); + + return; + } + Err(actual) => panic!("inconsistent park state; actual = {}", actual), + } + + loop { + m = self.condvar.wait(m).unwrap(); + + if self + .state + .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) + .is_ok() + { + // got a notification + return; + } + + // spurious wakeup, go back to sleep + } + } + + fn park_driver(&self, driver: &mut Driver, handle: &driver::Handle) { + match self + .state + .compare_exchange(EMPTY, PARKED_DRIVER, SeqCst, SeqCst) + { + Ok(_) => {} + Err(NOTIFIED) => { + // We must read here, even though we know it will be `NOTIFIED`. + // This is because `unpark` may have been called again since we read + // `NOTIFIED` in the `compare_exchange` above. We must perform an + // acquire operation that synchronizes with that `unpark` to observe + // any writes it made before the call to unpark. To do that we must + // read from the write it made to `state`. + let old = self.state.swap(EMPTY, SeqCst); + debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly"); + + return; + } + Err(actual) => panic!("inconsistent park state; actual = {}", actual), + } + + driver.park(handle); + + match self.state.swap(EMPTY, SeqCst) { + NOTIFIED => {} // got a notification, hurray! + PARKED_DRIVER => {} // no notification, alas + n => panic!("inconsistent park_timeout state: {}", n), + } + } + + fn unpark(&self, driver: &driver::Handle) { + // To ensure the unparked thread will observe any writes we made before + // this call, we must perform a release operation that `park` can + // synchronize with. To do that we must write `NOTIFIED` even if `state` + // is already `NOTIFIED`. That is why this must be a swap rather than a + // compare-and-swap that returns if it reads `NOTIFIED` on failure. + match self.state.swap(NOTIFIED, SeqCst) { + EMPTY => {} // no one was waiting + NOTIFIED => {} // already unparked + PARKED_CONDVAR => self.unpark_condvar(), + PARKED_DRIVER => driver.unpark(), + actual => panic!("inconsistent state in unpark; actual = {}", actual), + } + } + + fn unpark_condvar(&self) { + // There is a period between when the parked thread sets `state` to + // `PARKED` (or last checked `state` in the case of a spurious wake + // up) and when it actually waits on `cvar`. If we were to notify + // during this period it would be ignored and then when the parked + // thread went to sleep it would never wake up. Fortunately, it has + // `lock` locked at this stage so we can acquire `lock` to wait until + // it is ready to receive the notification. + // + // Releasing `lock` before the call to `notify_one` means that when the + // parked thread wakes it doesn't get woken only to have to wait for us + // to release `lock`. + drop(self.mutex.lock()); + + self.condvar.notify_one() + } + + fn shutdown(&self, handle: &driver::Handle) { + if let Some(mut driver) = self.shared.driver.try_lock() { + driver.shutdown(handle); + } + + self.condvar.notify_all(); + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/queue.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/queue.rs new file mode 100644 index 0000000000..dd66fa2dde --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/queue.rs @@ -0,0 +1,608 @@ +//! Run-queue structures to support a work-stealing scheduler + +use crate::loom::cell::UnsafeCell; +use crate::loom::sync::Arc; +use crate::runtime::scheduler::multi_thread::{Overflow, Stats}; +use crate::runtime::task; + +use std::mem::{self, MaybeUninit}; +use std::ptr; +use std::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release}; + +// Use wider integers when possible to increase ABA resilience. +// +// See issue #5041: <https://github.com/tokio-rs/tokio/issues/5041>. +cfg_has_atomic_u64! { + type UnsignedShort = u32; + type UnsignedLong = u64; + type AtomicUnsignedShort = crate::loom::sync::atomic::AtomicU32; + type AtomicUnsignedLong = crate::loom::sync::atomic::AtomicU64; +} +cfg_not_has_atomic_u64! { + type UnsignedShort = u16; + type UnsignedLong = u32; + type AtomicUnsignedShort = crate::loom::sync::atomic::AtomicU16; + type AtomicUnsignedLong = crate::loom::sync::atomic::AtomicU32; +} + +/// Producer handle. May only be used from a single thread. +pub(crate) struct Local<T: 'static> { + inner: Arc<Inner<T>>, +} + +/// Consumer handle. May be used from many threads. +pub(crate) struct Steal<T: 'static>(Arc<Inner<T>>); + +pub(crate) struct Inner<T: 'static> { + /// Concurrently updated by many threads. + /// + /// Contains two `UnsignedShort` values. The LSB byte is the "real" head of + /// the queue. The `UnsignedShort` in the MSB is set by a stealer in process + /// of stealing values. It represents the first value being stolen in the + /// batch. The `UnsignedShort` indices are intentionally wider than strictly + /// required for buffer indexing in order to provide ABA mitigation and make + /// it possible to distinguish between full and empty buffers. + /// + /// When both `UnsignedShort` values are the same, there is no active + /// stealer. + /// + /// Tracking an in-progress stealer prevents a wrapping scenario. + head: AtomicUnsignedLong, + + /// Only updated by producer thread but read by many threads. + tail: AtomicUnsignedShort, + + /// Elements + buffer: Box<[UnsafeCell<MaybeUninit<task::Notified<T>>>; LOCAL_QUEUE_CAPACITY]>, +} + +unsafe impl<T> Send for Inner<T> {} +unsafe impl<T> Sync for Inner<T> {} + +#[cfg(not(loom))] +const LOCAL_QUEUE_CAPACITY: usize = 256; + +// Shrink the size of the local queue when using loom. This shouldn't impact +// logic, but allows loom to test more edge cases in a reasonable a mount of +// time. +#[cfg(loom)] +const LOCAL_QUEUE_CAPACITY: usize = 4; + +const MASK: usize = LOCAL_QUEUE_CAPACITY - 1; + +// Constructing the fixed size array directly is very awkward. The only way to +// do it is to repeat `UnsafeCell::new(MaybeUninit::uninit())` 256 times, as +// the contents are not Copy. The trick with defining a const doesn't work for +// generic types. +fn make_fixed_size<T>(buffer: Box<[T]>) -> Box<[T; LOCAL_QUEUE_CAPACITY]> { + assert_eq!(buffer.len(), LOCAL_QUEUE_CAPACITY); + + // safety: We check that the length is correct. + unsafe { Box::from_raw(Box::into_raw(buffer).cast()) } +} + +/// Create a new local run-queue +pub(crate) fn local<T: 'static>() -> (Steal<T>, Local<T>) { + let mut buffer = Vec::with_capacity(LOCAL_QUEUE_CAPACITY); + + for _ in 0..LOCAL_QUEUE_CAPACITY { + buffer.push(UnsafeCell::new(MaybeUninit::uninit())); + } + + let inner = Arc::new(Inner { + head: AtomicUnsignedLong::new(0), + tail: AtomicUnsignedShort::new(0), + buffer: make_fixed_size(buffer.into_boxed_slice()), + }); + + let local = Local { + inner: inner.clone(), + }; + + let remote = Steal(inner); + + (remote, local) +} + +impl<T> Local<T> { + /// Returns the number of entries in the queue + pub(crate) fn len(&self) -> usize { + self.inner.len() as usize + } + + /// How many tasks can be pushed into the queue + pub(crate) fn remaining_slots(&self) -> usize { + self.inner.remaining_slots() + } + + pub(crate) fn max_capacity(&self) -> usize { + LOCAL_QUEUE_CAPACITY + } + + /// Returns false if there are any entries in the queue + /// + /// Separate to is_stealable so that refactors of is_stealable to "protect" + /// some tasks from stealing won't affect this + pub(crate) fn has_tasks(&self) -> bool { + !self.inner.is_empty() + } + + /// Pushes a batch of tasks to the back of the queue. All tasks must fit in + /// the local queue. + /// + /// # Panics + /// + /// The method panics if there is not enough capacity to fit in the queue. + pub(crate) fn push_back(&mut self, tasks: impl ExactSizeIterator<Item = task::Notified<T>>) { + let len = tasks.len(); + assert!(len <= LOCAL_QUEUE_CAPACITY); + + if len == 0 { + // Nothing to do + return; + } + + let head = self.inner.head.load(Acquire); + let (steal, _) = unpack(head); + + // safety: this is the **only** thread that updates this cell. + let mut tail = unsafe { self.inner.tail.unsync_load() }; + + if tail.wrapping_sub(steal) <= (LOCAL_QUEUE_CAPACITY - len) as UnsignedShort { + // Yes, this if condition is structured a bit weird (first block + // does nothing, second returns an error). It is this way to match + // `push_back_or_overflow`. + } else { + panic!() + } + + for task in tasks { + let idx = tail as usize & MASK; + + self.inner.buffer[idx].with_mut(|ptr| { + // Write the task to the slot + // + // Safety: There is only one producer and the above `if` + // condition ensures we don't touch a cell if there is a + // value, thus no consumer. + unsafe { + ptr::write((*ptr).as_mut_ptr(), task); + } + }); + + tail = tail.wrapping_add(1); + } + + self.inner.tail.store(tail, Release); + } + + /// Pushes a task to the back of the local queue, if there is not enough + /// capacity in the queue, this triggers the overflow operation. + /// + /// When the queue overflows, half of the curent contents of the queue is + /// moved to the given Injection queue. This frees up capacity for more + /// tasks to be pushed into the local queue. + pub(crate) fn push_back_or_overflow<O: Overflow<T>>( + &mut self, + mut task: task::Notified<T>, + overflow: &O, + stats: &mut Stats, + ) { + let tail = loop { + let head = self.inner.head.load(Acquire); + let (steal, real) = unpack(head); + + // safety: this is the **only** thread that updates this cell. + let tail = unsafe { self.inner.tail.unsync_load() }; + + if tail.wrapping_sub(steal) < LOCAL_QUEUE_CAPACITY as UnsignedShort { + // There is capacity for the task + break tail; + } else if steal != real { + // Concurrently stealing, this will free up capacity, so only + // push the task onto the inject queue + overflow.push(task); + return; + } else { + // Push the current task and half of the queue into the + // inject queue. + match self.push_overflow(task, real, tail, overflow, stats) { + Ok(_) => return, + // Lost the race, try again + Err(v) => { + task = v; + } + } + } + }; + + self.push_back_finish(task, tail); + } + + // Second half of `push_back` + fn push_back_finish(&self, task: task::Notified<T>, tail: UnsignedShort) { + // Map the position to a slot index. + let idx = tail as usize & MASK; + + self.inner.buffer[idx].with_mut(|ptr| { + // Write the task to the slot + // + // Safety: There is only one producer and the above `if` + // condition ensures we don't touch a cell if there is a + // value, thus no consumer. + unsafe { + ptr::write((*ptr).as_mut_ptr(), task); + } + }); + + // Make the task available. Synchronizes with a load in + // `steal_into2`. + self.inner.tail.store(tail.wrapping_add(1), Release); + } + + /// Moves a batch of tasks into the inject queue. + /// + /// This will temporarily make some of the tasks unavailable to stealers. + /// Once `push_overflow` is done, a notification is sent out, so if other + /// workers "missed" some of the tasks during a steal, they will get + /// another opportunity. + #[inline(never)] + fn push_overflow<O: Overflow<T>>( + &mut self, + task: task::Notified<T>, + head: UnsignedShort, + tail: UnsignedShort, + overflow: &O, + stats: &mut Stats, + ) -> Result<(), task::Notified<T>> { + /// How many elements are we taking from the local queue. + /// + /// This is one less than the number of tasks pushed to the inject + /// queue as we are also inserting the `task` argument. + const NUM_TASKS_TAKEN: UnsignedShort = (LOCAL_QUEUE_CAPACITY / 2) as UnsignedShort; + + assert_eq!( + tail.wrapping_sub(head) as usize, + LOCAL_QUEUE_CAPACITY, + "queue is not full; tail = {}; head = {}", + tail, + head + ); + + let prev = pack(head, head); + + // Claim a bunch of tasks + // + // We are claiming the tasks **before** reading them out of the buffer. + // This is safe because only the **current** thread is able to push new + // tasks. + // + // There isn't really any need for memory ordering... Relaxed would + // work. This is because all tasks are pushed into the queue from the + // current thread (or memory has been acquired if the local queue handle + // moved). + if self + .inner + .head + .compare_exchange( + prev, + pack( + head.wrapping_add(NUM_TASKS_TAKEN), + head.wrapping_add(NUM_TASKS_TAKEN), + ), + Release, + Relaxed, + ) + .is_err() + { + // We failed to claim the tasks, losing the race. Return out of + // this function and try the full `push` routine again. The queue + // may not be full anymore. + return Err(task); + } + + /// An iterator that takes elements out of the run queue. + struct BatchTaskIter<'a, T: 'static> { + buffer: &'a [UnsafeCell<MaybeUninit<task::Notified<T>>>; LOCAL_QUEUE_CAPACITY], + head: UnsignedLong, + i: UnsignedLong, + } + impl<'a, T: 'static> Iterator for BatchTaskIter<'a, T> { + type Item = task::Notified<T>; + + #[inline] + fn next(&mut self) -> Option<task::Notified<T>> { + if self.i == UnsignedLong::from(NUM_TASKS_TAKEN) { + None + } else { + let i_idx = self.i.wrapping_add(self.head) as usize & MASK; + let slot = &self.buffer[i_idx]; + + // safety: Our CAS from before has assumed exclusive ownership + // of the task pointers in this range. + let task = slot.with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) }); + + self.i += 1; + Some(task) + } + } + } + + // safety: The CAS above ensures that no consumer will look at these + // values again, and we are the only producer. + let batch_iter = BatchTaskIter { + buffer: &self.inner.buffer, + head: head as UnsignedLong, + i: 0, + }; + overflow.push_batch(batch_iter.chain(std::iter::once(task))); + + // Add 1 to factor in the task currently being scheduled. + stats.incr_overflow_count(); + + Ok(()) + } + + /// Pops a task from the local queue. + pub(crate) fn pop(&mut self) -> Option<task::Notified<T>> { + let mut head = self.inner.head.load(Acquire); + + let idx = loop { + let (steal, real) = unpack(head); + + // safety: this is the **only** thread that updates this cell. + let tail = unsafe { self.inner.tail.unsync_load() }; + + if real == tail { + // queue is empty + return None; + } + + let next_real = real.wrapping_add(1); + + // If `steal == real` there are no concurrent stealers. Both `steal` + // and `real` are updated. + let next = if steal == real { + pack(next_real, next_real) + } else { + assert_ne!(steal, next_real); + pack(steal, next_real) + }; + + // Attempt to claim a task. + let res = self + .inner + .head + .compare_exchange(head, next, AcqRel, Acquire); + + match res { + Ok(_) => break real as usize & MASK, + Err(actual) => head = actual, + } + }; + + Some(self.inner.buffer[idx].with(|ptr| unsafe { ptr::read(ptr).assume_init() })) + } +} + +impl<T> Steal<T> { + pub(crate) fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Steals half the tasks from self and place them into `dst`. + pub(crate) fn steal_into( + &self, + dst: &mut Local<T>, + dst_stats: &mut Stats, + ) -> Option<task::Notified<T>> { + // Safety: the caller is the only thread that mutates `dst.tail` and + // holds a mutable reference. + let dst_tail = unsafe { dst.inner.tail.unsync_load() }; + + // To the caller, `dst` may **look** empty but still have values + // contained in the buffer. If another thread is concurrently stealing + // from `dst` there may not be enough capacity to steal. + let (steal, _) = unpack(dst.inner.head.load(Acquire)); + + if dst_tail.wrapping_sub(steal) > LOCAL_QUEUE_CAPACITY as UnsignedShort / 2 { + // we *could* try to steal less here, but for simplicity, we're just + // going to abort. + return None; + } + + // Steal the tasks into `dst`'s buffer. This does not yet expose the + // tasks in `dst`. + let mut n = self.steal_into2(dst, dst_tail); + + if n == 0 { + // No tasks were stolen + return None; + } + + dst_stats.incr_steal_count(n as u16); + dst_stats.incr_steal_operations(); + + // We are returning a task here + n -= 1; + + let ret_pos = dst_tail.wrapping_add(n); + let ret_idx = ret_pos as usize & MASK; + + // safety: the value was written as part of `steal_into2` and not + // exposed to stealers, so no other thread can access it. + let ret = dst.inner.buffer[ret_idx].with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) }); + + if n == 0 { + // The `dst` queue is empty, but a single task was stolen + return Some(ret); + } + + // Make the stolen items available to consumers + dst.inner.tail.store(dst_tail.wrapping_add(n), Release); + + Some(ret) + } + + // Steal tasks from `self`, placing them into `dst`. Returns the number of + // tasks that were stolen. + fn steal_into2(&self, dst: &mut Local<T>, dst_tail: UnsignedShort) -> UnsignedShort { + let mut prev_packed = self.0.head.load(Acquire); + let mut next_packed; + + let n = loop { + let (src_head_steal, src_head_real) = unpack(prev_packed); + let src_tail = self.0.tail.load(Acquire); + + // If these two do not match, another thread is concurrently + // stealing from the queue. + if src_head_steal != src_head_real { + return 0; + } + + // Number of available tasks to steal + let n = src_tail.wrapping_sub(src_head_real); + let n = n - n / 2; + + if n == 0 { + // No tasks available to steal + return 0; + } + + // Update the real head index to acquire the tasks. + let steal_to = src_head_real.wrapping_add(n); + assert_ne!(src_head_steal, steal_to); + next_packed = pack(src_head_steal, steal_to); + + // Claim all those tasks. This is done by incrementing the "real" + // head but not the steal. By doing this, no other thread is able to + // steal from this queue until the current thread completes. + let res = self + .0 + .head + .compare_exchange(prev_packed, next_packed, AcqRel, Acquire); + + match res { + Ok(_) => break n, + Err(actual) => prev_packed = actual, + } + }; + + assert!( + n <= LOCAL_QUEUE_CAPACITY as UnsignedShort / 2, + "actual = {}", + n + ); + + let (first, _) = unpack(next_packed); + + // Take all the tasks + for i in 0..n { + // Compute the positions + let src_pos = first.wrapping_add(i); + let dst_pos = dst_tail.wrapping_add(i); + + // Map to slots + let src_idx = src_pos as usize & MASK; + let dst_idx = dst_pos as usize & MASK; + + // Read the task + // + // safety: We acquired the task with the atomic exchange above. + let task = self.0.buffer[src_idx].with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) }); + + // Write the task to the new slot + // + // safety: `dst` queue is empty and we are the only producer to + // this queue. + dst.inner.buffer[dst_idx] + .with_mut(|ptr| unsafe { ptr::write((*ptr).as_mut_ptr(), task) }); + } + + let mut prev_packed = next_packed; + + // Update `src_head_steal` to match `src_head_real` signalling that the + // stealing routine is complete. + loop { + let head = unpack(prev_packed).1; + next_packed = pack(head, head); + + let res = self + .0 + .head + .compare_exchange(prev_packed, next_packed, AcqRel, Acquire); + + match res { + Ok(_) => return n, + Err(actual) => { + let (actual_steal, actual_real) = unpack(actual); + + assert_ne!(actual_steal, actual_real); + + prev_packed = actual; + } + } + } + } +} + +cfg_metrics! { + impl<T> Steal<T> { + pub(crate) fn len(&self) -> usize { + self.0.len() as _ + } + } +} + +impl<T> Clone for Steal<T> { + fn clone(&self) -> Steal<T> { + Steal(self.0.clone()) + } +} + +impl<T> Drop for Local<T> { + fn drop(&mut self) { + if !std::thread::panicking() { + assert!(self.pop().is_none(), "queue not empty"); + } + } +} + +impl<T> Inner<T> { + fn remaining_slots(&self) -> usize { + let (steal, _) = unpack(self.head.load(Acquire)); + let tail = self.tail.load(Acquire); + + LOCAL_QUEUE_CAPACITY - (tail.wrapping_sub(steal) as usize) + } + + fn len(&self) -> UnsignedShort { + let (_, head) = unpack(self.head.load(Acquire)); + let tail = self.tail.load(Acquire); + + tail.wrapping_sub(head) + } + + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +/// Split the head value into the real head and the index a stealer is working +/// on. +fn unpack(n: UnsignedLong) -> (UnsignedShort, UnsignedShort) { + let real = n & UnsignedShort::MAX as UnsignedLong; + let steal = n >> (mem::size_of::<UnsignedShort>() * 8); + + (steal as UnsignedShort, real as UnsignedShort) +} + +/// Join the two head values +fn pack(steal: UnsignedShort, real: UnsignedShort) -> UnsignedLong { + (real as UnsignedLong) | ((steal as UnsignedLong) << (mem::size_of::<UnsignedShort>() * 8)) +} + +#[test] +fn test_local_queue_capacity() { + assert!(LOCAL_QUEUE_CAPACITY - 1 <= u8::MAX as usize); +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/stats.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/stats.rs new file mode 100644 index 0000000000..f01daaa1bf --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/stats.rs @@ -0,0 +1,140 @@ +use crate::runtime::{Config, MetricsBatch, WorkerMetrics}; + +use std::cmp; +use std::time::{Duration, Instant}; + +/// Per-worker statistics. This is used for both tuning the scheduler and +/// reporting runtime-level metrics/stats. +pub(crate) struct Stats { + /// The metrics batch used to report runtime-level metrics/stats to the + /// user. + batch: MetricsBatch, + + /// Instant at which work last resumed (continued after park). + /// + /// This duplicates the value stored in `MetricsBatch`. We will unify + /// `Stats` and `MetricsBatch` when we stabilize metrics. + processing_scheduled_tasks_started_at: Instant, + + /// Number of tasks polled in the batch of scheduled tasks + tasks_polled_in_batch: usize, + + /// Exponentially-weighted moving average of time spent polling scheduled a + /// task. + /// + /// Tracked in nanoseconds, stored as a f64 since that is what we use with + /// the EWMA calculations + task_poll_time_ewma: f64, +} + +/// How to weigh each individual poll time, value is plucked from thin air. +const TASK_POLL_TIME_EWMA_ALPHA: f64 = 0.1; + +/// Ideally, we wouldn't go above this, value is plucked from thin air. +const TARGET_GLOBAL_QUEUE_INTERVAL: f64 = Duration::from_micros(200).as_nanos() as f64; + +/// Max value for the global queue interval. This is 2x the previous default +const MAX_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL: u32 = 127; + +/// This is the previous default +const TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL: u32 = 61; + +impl Stats { + pub(crate) fn new(worker_metrics: &WorkerMetrics) -> Stats { + // Seed the value with what we hope to see. + let task_poll_time_ewma = + TARGET_GLOBAL_QUEUE_INTERVAL / TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL as f64; + + Stats { + batch: MetricsBatch::new(worker_metrics), + processing_scheduled_tasks_started_at: Instant::now(), + tasks_polled_in_batch: 0, + task_poll_time_ewma, + } + } + + pub(crate) fn tuned_global_queue_interval(&self, config: &Config) -> u32 { + // If an interval is explicitly set, don't tune. + if let Some(configured) = config.global_queue_interval { + return configured; + } + + // As of Rust 1.45, casts from f64 -> u32 are saturating, which is fine here. + let tasks_per_interval = (TARGET_GLOBAL_QUEUE_INTERVAL / self.task_poll_time_ewma) as u32; + + cmp::max( + // We don't want to return less than 2 as that would result in the + // global queue always getting checked first. + 2, + cmp::min( + MAX_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL, + tasks_per_interval, + ), + ) + } + + pub(crate) fn submit(&mut self, to: &WorkerMetrics) { + self.batch.submit(to); + } + + pub(crate) fn about_to_park(&mut self) { + self.batch.about_to_park(); + } + + pub(crate) fn inc_local_schedule_count(&mut self) { + self.batch.inc_local_schedule_count(); + } + + pub(crate) fn start_processing_scheduled_tasks(&mut self) { + self.batch.start_processing_scheduled_tasks(); + + self.processing_scheduled_tasks_started_at = Instant::now(); + self.tasks_polled_in_batch = 0; + } + + pub(crate) fn end_processing_scheduled_tasks(&mut self) { + self.batch.end_processing_scheduled_tasks(); + + // Update the EWMA task poll time + if self.tasks_polled_in_batch > 0 { + let now = Instant::now(); + + // If we "overflow" this conversion, we have bigger problems than + // slightly off stats. + let elapsed = (now - self.processing_scheduled_tasks_started_at).as_nanos() as f64; + let num_polls = self.tasks_polled_in_batch as f64; + + // Calculate the mean poll duration for a single task in the batch + let mean_poll_duration = elapsed / num_polls; + + // Compute the alpha weighted by the number of tasks polled this batch. + let weighted_alpha = 1.0 - (1.0 - TASK_POLL_TIME_EWMA_ALPHA).powf(num_polls); + + // Now compute the new weighted average task poll time. + self.task_poll_time_ewma = weighted_alpha * mean_poll_duration + + (1.0 - weighted_alpha) * self.task_poll_time_ewma; + } + } + + pub(crate) fn start_poll(&mut self) { + self.batch.start_poll(); + + self.tasks_polled_in_batch += 1; + } + + pub(crate) fn end_poll(&mut self) { + self.batch.end_poll(); + } + + pub(crate) fn incr_steal_count(&mut self, by: u16) { + self.batch.incr_steal_count(by); + } + + pub(crate) fn incr_steal_operations(&mut self) { + self.batch.incr_steal_operations(); + } + + pub(crate) fn incr_overflow_count(&mut self) { + self.batch.incr_overflow_count(); + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace.rs new file mode 100644 index 0000000000..7b4aeb5c1d --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace.rs @@ -0,0 +1,61 @@ +use crate::loom::sync::atomic::{AtomicBool, Ordering}; +use crate::loom::sync::{Barrier, Mutex}; +use crate::runtime::dump::Dump; +use crate::runtime::scheduler::multi_thread::Handle; +use crate::sync::notify::Notify; + +/// Tracing status of the worker. +pub(super) struct TraceStatus { + pub(super) trace_requested: AtomicBool, + pub(super) trace_start: Barrier, + pub(super) trace_end: Barrier, + pub(super) result_ready: Notify, + pub(super) trace_result: Mutex<Option<Dump>>, +} + +impl TraceStatus { + pub(super) fn new(remotes_len: usize) -> Self { + Self { + trace_requested: AtomicBool::new(false), + trace_start: Barrier::new(remotes_len), + trace_end: Barrier::new(remotes_len), + result_ready: Notify::new(), + trace_result: Mutex::new(None), + } + } + + pub(super) fn trace_requested(&self) -> bool { + self.trace_requested.load(Ordering::Relaxed) + } + + pub(super) async fn start_trace_request(&self, handle: &Handle) { + while self + .trace_requested + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_err() + { + handle.notify_all(); + crate::task::yield_now().await; + } + } + + pub(super) fn stash_result(&self, dump: Dump) { + let _ = self.trace_result.lock().insert(dump); + self.result_ready.notify_one(); + } + + pub(super) fn take_result(&self) -> Option<Dump> { + self.trace_result.lock().take() + } + + pub(super) async fn end_trace_request(&self, handle: &Handle) { + while self + .trace_requested + .compare_exchange(true, false, Ordering::Acquire, Ordering::Relaxed) + .is_err() + { + handle.notify_all(); + crate::task::yield_now().await; + } + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace_mock.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace_mock.rs new file mode 100644 index 0000000000..2c17a4e38b --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace_mock.rs @@ -0,0 +1,11 @@ +pub(super) struct TraceStatus {} + +impl TraceStatus { + pub(super) fn new(_: usize) -> Self { + Self {} + } + + pub(super) fn trace_requested(&self) -> bool { + false + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker.rs new file mode 100644 index 0000000000..6ae1146337 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -0,0 +1,1216 @@ +//! A scheduler is initialized with a fixed number of workers. Each worker is +//! driven by a thread. Each worker has a "core" which contains data such as the +//! run queue and other state. When `block_in_place` is called, the worker's +//! "core" is handed off to a new thread allowing the scheduler to continue to +//! make progress while the originating thread blocks. +//! +//! # Shutdown +//! +//! Shutting down the runtime involves the following steps: +//! +//! 1. The Shared::close method is called. This closes the inject queue and +//! OwnedTasks instance and wakes up all worker threads. +//! +//! 2. Each worker thread observes the close signal next time it runs +//! Core::maintenance by checking whether the inject queue is closed. +//! The Core::is_shutdown flag is set to true. +//! +//! 3. The worker thread calls `pre_shutdown` in parallel. Here, the worker +//! will keep removing tasks from OwnedTasks until it is empty. No new +//! tasks can be pushed to the OwnedTasks during or after this step as it +//! was closed in step 1. +//! +//! 5. The workers call Shared::shutdown to enter the single-threaded phase of +//! shutdown. These calls will push their core to Shared::shutdown_cores, +//! and the last thread to push its core will finish the shutdown procedure. +//! +//! 6. The local run queue of each core is emptied, then the inject queue is +//! emptied. +//! +//! At this point, shutdown has completed. It is not possible for any of the +//! collections to contain any tasks at this point, as each collection was +//! closed first, then emptied afterwards. +//! +//! ## Spawns during shutdown +//! +//! When spawning tasks during shutdown, there are two cases: +//! +//! * The spawner observes the OwnedTasks being open, and the inject queue is +//! closed. +//! * The spawner observes the OwnedTasks being closed and doesn't check the +//! inject queue. +//! +//! The first case can only happen if the OwnedTasks::bind call happens before +//! or during step 1 of shutdown. In this case, the runtime will clean up the +//! task in step 3 of shutdown. +//! +//! In the latter case, the task was not spawned and the task is immediately +//! cancelled by the spawner. +//! +//! The correctness of shutdown requires both the inject queue and OwnedTasks +//! collection to have a closed bit. With a close bit on only the inject queue, +//! spawning could run in to a situation where a task is successfully bound long +//! after the runtime has shut down. With a close bit on only the OwnedTasks, +//! the first spawning situation could result in the notification being pushed +//! to the inject queue after step 6 of shutdown, which would leave a task in +//! the inject queue indefinitely. This would be a ref-count cycle and a memory +//! leak. + +use crate::loom::sync::{Arc, Mutex}; +use crate::runtime; +use crate::runtime::context; +use crate::runtime::scheduler::multi_thread::{ + idle, queue, Counters, Handle, Idle, Overflow, Parker, Stats, TraceStatus, Unparker, +}; +use crate::runtime::scheduler::{inject, Defer, Lock}; +use crate::runtime::task::OwnedTasks; +use crate::runtime::{ + blocking, coop, driver, scheduler, task, Config, SchedulerMetrics, WorkerMetrics, +}; +use crate::util::atomic_cell::AtomicCell; +use crate::util::rand::{FastRand, RngSeedGenerator}; + +use std::cell::RefCell; +use std::task::Waker; +use std::time::Duration; + +cfg_metrics! { + mod metrics; +} + +cfg_taskdump! { + mod taskdump; +} + +cfg_not_taskdump! { + mod taskdump_mock; +} + +/// A scheduler worker +pub(super) struct Worker { + /// Reference to scheduler's handle + handle: Arc<Handle>, + + /// Index holding this worker's remote state + index: usize, + + /// Used to hand-off a worker's core to another thread. + core: AtomicCell<Core>, +} + +/// Core data +struct Core { + /// Used to schedule bookkeeping tasks every so often. + tick: u32, + + /// When a task is scheduled from a worker, it is stored in this slot. The + /// worker will check this slot for a task **before** checking the run + /// queue. This effectively results in the **last** scheduled task to be run + /// next (LIFO). This is an optimization for improving locality which + /// benefits message passing patterns and helps to reduce latency. + lifo_slot: Option<Notified>, + + /// When `true`, locally scheduled tasks go to the LIFO slot. When `false`, + /// they go to the back of the `run_queue`. + lifo_enabled: bool, + + /// The worker-local run queue. + run_queue: queue::Local<Arc<Handle>>, + + /// True if the worker is currently searching for more work. Searching + /// involves attempting to steal from other workers. + is_searching: bool, + + /// True if the scheduler is being shutdown + is_shutdown: bool, + + /// True if the scheduler is being traced + is_traced: bool, + + /// Parker + /// + /// Stored in an `Option` as the parker is added / removed to make the + /// borrow checker happy. + park: Option<Parker>, + + /// Per-worker runtime stats + stats: Stats, + + /// How often to check the global queue + global_queue_interval: u32, + + /// Fast random number generator. + rand: FastRand, +} + +/// State shared across all workers +pub(crate) struct Shared { + /// Per-worker remote state. All other workers have access to this and is + /// how they communicate between each other. + remotes: Box<[Remote]>, + + /// Global task queue used for: + /// 1. Submit work to the scheduler while **not** currently on a worker thread. + /// 2. Submit work to the scheduler when a worker run queue is saturated + pub(super) inject: inject::Shared<Arc<Handle>>, + + /// Coordinates idle workers + idle: Idle, + + /// Collection of all active tasks spawned onto this executor. + pub(super) owned: OwnedTasks<Arc<Handle>>, + + /// Data synchronized by the scheduler mutex + pub(super) synced: Mutex<Synced>, + + /// Cores that have observed the shutdown signal + /// + /// The core is **not** placed back in the worker to avoid it from being + /// stolen by a thread that was spawned as part of `block_in_place`. + #[allow(clippy::vec_box)] // we're moving an already-boxed value + shutdown_cores: Mutex<Vec<Box<Core>>>, + + /// The number of cores that have observed the trace signal. + pub(super) trace_status: TraceStatus, + + /// Scheduler configuration options + config: Config, + + /// Collects metrics from the runtime. + pub(super) scheduler_metrics: SchedulerMetrics, + + pub(super) worker_metrics: Box<[WorkerMetrics]>, + + /// Only held to trigger some code on drop. This is used to get internal + /// runtime metrics that can be useful when doing performance + /// investigations. This does nothing (empty struct, no drop impl) unless + /// the `tokio_internal_mt_counters` cfg flag is set. + _counters: Counters, +} + +/// Data synchronized by the scheduler mutex +pub(crate) struct Synced { + /// Synchronized state for `Idle`. + pub(super) idle: idle::Synced, + + /// Synchronized state for `Inject`. + pub(crate) inject: inject::Synced, +} + +/// Used to communicate with a worker from other threads. +struct Remote { + /// Steals tasks from this worker. + pub(super) steal: queue::Steal<Arc<Handle>>, + + /// Unparks the associated worker thread + unpark: Unparker, +} + +/// Thread-local context +pub(crate) struct Context { + /// Worker + worker: Arc<Worker>, + + /// Core data + core: RefCell<Option<Box<Core>>>, + + /// Tasks to wake after resource drivers are polled. This is mostly to + /// handle yielded tasks. + pub(crate) defer: Defer, +} + +/// Starts the workers +pub(crate) struct Launch(Vec<Arc<Worker>>); + +/// Running a task may consume the core. If the core is still available when +/// running the task completes, it is returned. Otherwise, the worker will need +/// to stop processing. +type RunResult = Result<Box<Core>, ()>; + +/// A task handle +type Task = task::Task<Arc<Handle>>; + +/// A notified task handle +type Notified = task::Notified<Arc<Handle>>; + +/// Value picked out of thin-air. Running the LIFO slot a handful of times +/// seemms sufficient to benefit from locality. More than 3 times probably is +/// overweighing. The value can be tuned in the future with data that shows +/// improvements. +const MAX_LIFO_POLLS_PER_TICK: usize = 3; + +pub(super) fn create( + size: usize, + park: Parker, + driver_handle: driver::Handle, + blocking_spawner: blocking::Spawner, + seed_generator: RngSeedGenerator, + config: Config, +) -> (Arc<Handle>, Launch) { + let mut cores = Vec::with_capacity(size); + let mut remotes = Vec::with_capacity(size); + let mut worker_metrics = Vec::with_capacity(size); + + // Create the local queues + for _ in 0..size { + let (steal, run_queue) = queue::local(); + + let park = park.clone(); + let unpark = park.unpark(); + let metrics = WorkerMetrics::from_config(&config); + let stats = Stats::new(&metrics); + + cores.push(Box::new(Core { + tick: 0, + lifo_slot: None, + lifo_enabled: !config.disable_lifo_slot, + run_queue, + is_searching: false, + is_shutdown: false, + is_traced: false, + park: Some(park), + global_queue_interval: stats.tuned_global_queue_interval(&config), + stats, + rand: FastRand::from_seed(config.seed_generator.next_seed()), + })); + + remotes.push(Remote { steal, unpark }); + worker_metrics.push(metrics); + } + + let (idle, idle_synced) = Idle::new(size); + let (inject, inject_synced) = inject::Shared::new(); + + let remotes_len = remotes.len(); + let handle = Arc::new(Handle { + shared: Shared { + remotes: remotes.into_boxed_slice(), + inject, + idle, + owned: OwnedTasks::new(), + synced: Mutex::new(Synced { + idle: idle_synced, + inject: inject_synced, + }), + shutdown_cores: Mutex::new(vec![]), + trace_status: TraceStatus::new(remotes_len), + config, + scheduler_metrics: SchedulerMetrics::new(), + worker_metrics: worker_metrics.into_boxed_slice(), + _counters: Counters, + }, + driver: driver_handle, + blocking_spawner, + seed_generator, + }); + + let mut launch = Launch(vec![]); + + for (index, core) in cores.drain(..).enumerate() { + launch.0.push(Arc::new(Worker { + handle: handle.clone(), + index, + core: AtomicCell::new(Some(core)), + })); + } + + (handle, launch) +} + +#[track_caller] +pub(crate) fn block_in_place<F, R>(f: F) -> R +where + F: FnOnce() -> R, +{ + // Try to steal the worker core back + struct Reset { + take_core: bool, + budget: coop::Budget, + } + + impl Drop for Reset { + fn drop(&mut self) { + with_current(|maybe_cx| { + if let Some(cx) = maybe_cx { + if self.take_core { + let core = cx.worker.core.take(); + let mut cx_core = cx.core.borrow_mut(); + assert!(cx_core.is_none()); + *cx_core = core; + } + + // Reset the task budget as we are re-entering the + // runtime. + coop::set(self.budget); + } + }); + } + } + + let mut had_entered = false; + let mut take_core = false; + + let setup_result = with_current(|maybe_cx| { + match ( + crate::runtime::context::current_enter_context(), + maybe_cx.is_some(), + ) { + (context::EnterRuntime::Entered { .. }, true) => { + // We are on a thread pool runtime thread, so we just need to + // set up blocking. + had_entered = true; + } + ( + context::EnterRuntime::Entered { + allow_block_in_place, + }, + false, + ) => { + // We are on an executor, but _not_ on the thread pool. That is + // _only_ okay if we are in a thread pool runtime's block_on + // method: + if allow_block_in_place { + had_entered = true; + return Ok(()); + } else { + // This probably means we are on the current_thread runtime or in a + // LocalSet, where it is _not_ okay to block. + return Err( + "can call blocking only when running on the multi-threaded runtime", + ); + } + } + (context::EnterRuntime::NotEntered, true) => { + // This is a nested call to block_in_place (we already exited). + // All the necessary setup has already been done. + return Ok(()); + } + (context::EnterRuntime::NotEntered, false) => { + // We are outside of the tokio runtime, so blocking is fine. + // We can also skip all of the thread pool blocking setup steps. + return Ok(()); + } + } + + let cx = maybe_cx.expect("no .is_some() == false cases above should lead here"); + + // Get the worker core. If none is set, then blocking is fine! + let core = match cx.core.borrow_mut().take() { + Some(core) => core, + None => return Ok(()), + }; + + // We are taking the core from the context and sending it to another + // thread. + take_core = true; + + // The parker should be set here + assert!(core.park.is_some()); + + // In order to block, the core must be sent to another thread for + // execution. + // + // First, move the core back into the worker's shared core slot. + cx.worker.core.set(core); + + // Next, clone the worker handle and send it to a new thread for + // processing. + // + // Once the blocking task is done executing, we will attempt to + // steal the core back. + let worker = cx.worker.clone(); + runtime::spawn_blocking(move || run(worker)); + Ok(()) + }); + + if let Err(panic_message) = setup_result { + panic!("{}", panic_message); + } + + if had_entered { + // Unset the current task's budget. Blocking sections are not + // constrained by task budgets. + let _reset = Reset { + take_core, + budget: coop::stop(), + }; + + crate::runtime::context::exit_runtime(f) + } else { + f() + } +} + +impl Launch { + pub(crate) fn launch(mut self) { + for worker in self.0.drain(..) { + runtime::spawn_blocking(move || run(worker)); + } + } +} + +fn run(worker: Arc<Worker>) { + struct AbortOnPanic; + + impl Drop for AbortOnPanic { + fn drop(&mut self) { + if std::thread::panicking() { + eprintln!("worker thread panicking; aborting process"); + std::process::abort(); + } + } + } + + // Catching panics on worker threads in tests is quite tricky. Instead, when + // debug assertions are enabled, we just abort the process. + #[cfg(debug_assertions)] + let _abort_on_panic = AbortOnPanic; + + // Acquire a core. If this fails, then another thread is running this + // worker and there is nothing further to do. + let core = match worker.core.take() { + Some(core) => core, + None => return, + }; + + let handle = scheduler::Handle::MultiThread(worker.handle.clone()); + + crate::runtime::context::enter_runtime(&handle, true, |_| { + // Set the worker context. + let cx = scheduler::Context::MultiThread(Context { + worker, + core: RefCell::new(None), + defer: Defer::new(), + }); + + context::set_scheduler(&cx, || { + let cx = cx.expect_multi_thread(); + + // This should always be an error. It only returns a `Result` to support + // using `?` to short circuit. + assert!(cx.run(core).is_err()); + + // Check if there are any deferred tasks to notify. This can happen when + // the worker core is lost due to `block_in_place()` being called from + // within the task. + cx.defer.wake(); + }); + }); +} + +impl Context { + fn run(&self, mut core: Box<Core>) -> RunResult { + // Reset `lifo_enabled` here in case the core was previously stolen from + // a task that had the LIFO slot disabled. + self.reset_lifo_enabled(&mut core); + + // Start as "processing" tasks as polling tasks from the local queue + // will be one of the first things we do. + core.stats.start_processing_scheduled_tasks(); + + while !core.is_shutdown { + self.assert_lifo_enabled_is_correct(&core); + + if core.is_traced { + core = self.worker.handle.trace_core(core); + } + + // Increment the tick + core.tick(); + + // Run maintenance, if needed + core = self.maintenance(core); + + // First, check work available to the current worker. + if let Some(task) = core.next_task(&self.worker) { + core = self.run_task(task, core)?; + continue; + } + + // We consumed all work in the queues and will start searching for work. + core.stats.end_processing_scheduled_tasks(); + + // There is no more **local** work to process, try to steal work + // from other workers. + if let Some(task) = core.steal_work(&self.worker) { + // Found work, switch back to processing + core.stats.start_processing_scheduled_tasks(); + core = self.run_task(task, core)?; + } else { + // Wait for work + core = if !self.defer.is_empty() { + self.park_timeout(core, Some(Duration::from_millis(0))) + } else { + self.park(core) + }; + } + } + + core.pre_shutdown(&self.worker); + + // Signal shutdown + self.worker.handle.shutdown_core(core); + Err(()) + } + + fn run_task(&self, task: Notified, mut core: Box<Core>) -> RunResult { + let task = self.worker.handle.shared.owned.assert_owner(task); + + // Make sure the worker is not in the **searching** state. This enables + // another idle worker to try to steal work. + core.transition_from_searching(&self.worker); + + self.assert_lifo_enabled_is_correct(&core); + + // Measure the poll start time. Note that we may end up polling other + // tasks under this measurement. In this case, the tasks came from the + // LIFO slot and are considered part of the current task for scheduling + // purposes. These tasks inherent the "parent"'s limits. + core.stats.start_poll(); + + // Make the core available to the runtime context + *self.core.borrow_mut() = Some(core); + + // Run the task + coop::budget(|| { + task.run(); + let mut lifo_polls = 0; + + // As long as there is budget remaining and a task exists in the + // `lifo_slot`, then keep running. + loop { + // Check if we still have the core. If not, the core was stolen + // by another worker. + let mut core = match self.core.borrow_mut().take() { + Some(core) => core, + None => { + // In this case, we cannot call `reset_lifo_enabled()` + // because the core was stolen. The stealer will handle + // that at the top of `Context::run` + return Err(()); + } + }; + + // Check for a task in the LIFO slot + let task = match core.lifo_slot.take() { + Some(task) => task, + None => { + self.reset_lifo_enabled(&mut core); + core.stats.end_poll(); + return Ok(core); + } + }; + + if !coop::has_budget_remaining() { + core.stats.end_poll(); + + // Not enough budget left to run the LIFO task, push it to + // the back of the queue and return. + core.run_queue.push_back_or_overflow( + task, + &*self.worker.handle, + &mut core.stats, + ); + // If we hit this point, the LIFO slot should be enabled. + // There is no need to reset it. + debug_assert!(core.lifo_enabled); + return Ok(core); + } + + // Track that we are about to run a task from the LIFO slot. + lifo_polls += 1; + super::counters::inc_lifo_schedules(); + + // Disable the LIFO slot if we reach our limit + // + // In ping-ping style workloads where task A notifies task B, + // which notifies task A again, continuously prioritizing the + // LIFO slot can cause starvation as these two tasks will + // repeatedly schedule the other. To mitigate this, we limit the + // number of times the LIFO slot is prioritized. + if lifo_polls >= MAX_LIFO_POLLS_PER_TICK { + core.lifo_enabled = false; + super::counters::inc_lifo_capped(); + } + + // Run the LIFO task, then loop + *self.core.borrow_mut() = Some(core); + let task = self.worker.handle.shared.owned.assert_owner(task); + task.run(); + } + }) + } + + fn reset_lifo_enabled(&self, core: &mut Core) { + core.lifo_enabled = !self.worker.handle.shared.config.disable_lifo_slot; + } + + fn assert_lifo_enabled_is_correct(&self, core: &Core) { + debug_assert_eq!( + core.lifo_enabled, + !self.worker.handle.shared.config.disable_lifo_slot + ); + } + + fn maintenance(&self, mut core: Box<Core>) -> Box<Core> { + if core.tick % self.worker.handle.shared.config.event_interval == 0 { + super::counters::inc_num_maintenance(); + + core.stats.end_processing_scheduled_tasks(); + + // Call `park` with a 0 timeout. This enables the I/O driver, timer, ... + // to run without actually putting the thread to sleep. + core = self.park_timeout(core, Some(Duration::from_millis(0))); + + // Run regularly scheduled maintenance + core.maintenance(&self.worker); + + core.stats.start_processing_scheduled_tasks(); + } + + core + } + + /// Parks the worker thread while waiting for tasks to execute. + /// + /// This function checks if indeed there's no more work left to be done before parking. + /// Also important to notice that, before parking, the worker thread will try to take + /// ownership of the Driver (IO/Time) and dispatch any events that might have fired. + /// Whenever a worker thread executes the Driver loop, all waken tasks are scheduled + /// in its own local queue until the queue saturates (ntasks > LOCAL_QUEUE_CAPACITY). + /// When the local queue is saturated, the overflow tasks are added to the injection queue + /// from where other workers can pick them up. + /// Also, we rely on the workstealing algorithm to spread the tasks amongst workers + /// after all the IOs get dispatched + fn park(&self, mut core: Box<Core>) -> Box<Core> { + if let Some(f) = &self.worker.handle.shared.config.before_park { + f(); + } + + if core.transition_to_parked(&self.worker) { + while !core.is_shutdown && !core.is_traced { + core.stats.about_to_park(); + core = self.park_timeout(core, None); + + // Run regularly scheduled maintenance + core.maintenance(&self.worker); + + if core.transition_from_parked(&self.worker) { + break; + } + } + } + + if let Some(f) = &self.worker.handle.shared.config.after_unpark { + f(); + } + core + } + + fn park_timeout(&self, mut core: Box<Core>, duration: Option<Duration>) -> Box<Core> { + self.assert_lifo_enabled_is_correct(&core); + + // Take the parker out of core + let mut park = core.park.take().expect("park missing"); + + // Store `core` in context + *self.core.borrow_mut() = Some(core); + + // Park thread + if let Some(timeout) = duration { + park.park_timeout(&self.worker.handle.driver, timeout); + } else { + park.park(&self.worker.handle.driver); + } + + self.defer.wake(); + + // Remove `core` from context + core = self.core.borrow_mut().take().expect("core missing"); + + // Place `park` back in `core` + core.park = Some(park); + + if core.should_notify_others() { + self.worker.handle.notify_parked_local(); + } + + core + } + + pub(crate) fn defer(&self, waker: &Waker) { + self.defer.defer(waker); + } +} + +impl Core { + /// Increment the tick + fn tick(&mut self) { + self.tick = self.tick.wrapping_add(1); + } + + /// Return the next notified task available to this worker. + fn next_task(&mut self, worker: &Worker) -> Option<Notified> { + if self.tick % self.global_queue_interval == 0 { + // Update the global queue interval, if needed + self.tune_global_queue_interval(worker); + + worker + .handle + .next_remote_task() + .or_else(|| self.next_local_task()) + } else { + let maybe_task = self.next_local_task(); + + if maybe_task.is_some() { + return maybe_task; + } + + if worker.inject().is_empty() { + return None; + } + + // Other threads can only **remove** tasks from the current worker's + // `run_queue`. So, we can be confident that by the time we call + // `run_queue.push_back` below, there will be *at least* `cap` + // available slots in the queue. + let cap = usize::min( + self.run_queue.remaining_slots(), + self.run_queue.max_capacity() / 2, + ); + + // The worker is currently idle, pull a batch of work from the + // injection queue. We don't want to pull *all* the work so other + // workers can also get some. + let n = usize::min( + worker.inject().len() / worker.handle.shared.remotes.len() + 1, + cap, + ); + + let mut synced = worker.handle.shared.synced.lock(); + // safety: passing in the correct `inject::Synced`. + let mut tasks = unsafe { worker.inject().pop_n(&mut synced.inject, n) }; + + // Pop the first task to return immedietly + let ret = tasks.next(); + + // Push the rest of the on the run queue + self.run_queue.push_back(tasks); + + ret + } + } + + fn next_local_task(&mut self) -> Option<Notified> { + self.lifo_slot.take().or_else(|| self.run_queue.pop()) + } + + /// Function responsible for stealing tasks from another worker + /// + /// Note: Only if less than half the workers are searching for tasks to steal + /// a new worker will actually try to steal. The idea is to make sure not all + /// workers will be trying to steal at the same time. + fn steal_work(&mut self, worker: &Worker) -> Option<Notified> { + if !self.transition_to_searching(worker) { + return None; + } + + let num = worker.handle.shared.remotes.len(); + // Start from a random worker + let start = self.rand.fastrand_n(num as u32) as usize; + + for i in 0..num { + let i = (start + i) % num; + + // Don't steal from ourself! We know we don't have work. + if i == worker.index { + continue; + } + + let target = &worker.handle.shared.remotes[i]; + if let Some(task) = target + .steal + .steal_into(&mut self.run_queue, &mut self.stats) + { + return Some(task); + } + } + + // Fallback on checking the global queue + worker.handle.next_remote_task() + } + + fn transition_to_searching(&mut self, worker: &Worker) -> bool { + if !self.is_searching { + self.is_searching = worker.handle.shared.idle.transition_worker_to_searching(); + } + + self.is_searching + } + + fn transition_from_searching(&mut self, worker: &Worker) { + if !self.is_searching { + return; + } + + self.is_searching = false; + worker.handle.transition_worker_from_searching(); + } + + fn has_tasks(&self) -> bool { + self.lifo_slot.is_some() || self.run_queue.has_tasks() + } + + fn should_notify_others(&self) -> bool { + // If there are tasks available to steal, but this worker is not + // looking for tasks to steal, notify another worker. + if self.is_searching { + return false; + } + self.lifo_slot.is_some() as usize + self.run_queue.len() > 1 + } + + /// Prepares the worker state for parking. + /// + /// Returns true if the transition happened, false if there is work to do first. + fn transition_to_parked(&mut self, worker: &Worker) -> bool { + // Workers should not park if they have work to do + if self.has_tasks() || self.is_traced { + return false; + } + + // When the final worker transitions **out** of searching to parked, it + // must check all the queues one last time in case work materialized + // between the last work scan and transitioning out of searching. + let is_last_searcher = worker.handle.shared.idle.transition_worker_to_parked( + &worker.handle.shared, + worker.index, + self.is_searching, + ); + + // The worker is no longer searching. Setting this is the local cache + // only. + self.is_searching = false; + + if is_last_searcher { + worker.handle.notify_if_work_pending(); + } + + true + } + + /// Returns `true` if the transition happened. + fn transition_from_parked(&mut self, worker: &Worker) -> bool { + // If a task is in the lifo slot/run queue, then we must unpark regardless of + // being notified + if self.has_tasks() { + // When a worker wakes, it should only transition to the "searching" + // state when the wake originates from another worker *or* a new task + // is pushed. We do *not* want the worker to transition to "searching" + // when it wakes when the I/O driver receives new events. + self.is_searching = !worker + .handle + .shared + .idle + .unpark_worker_by_id(&worker.handle.shared, worker.index); + return true; + } + + if worker + .handle + .shared + .idle + .is_parked(&worker.handle.shared, worker.index) + { + return false; + } + + // When unparked, the worker is in the searching state. + self.is_searching = true; + true + } + + /// Runs maintenance work such as checking the pool's state. + fn maintenance(&mut self, worker: &Worker) { + self.stats + .submit(&worker.handle.shared.worker_metrics[worker.index]); + + if !self.is_shutdown { + // Check if the scheduler has been shutdown + let synced = worker.handle.shared.synced.lock(); + self.is_shutdown = worker.inject().is_closed(&synced.inject); + } + + if !self.is_traced { + // Check if the worker should be tracing. + self.is_traced = worker.handle.shared.trace_status.trace_requested(); + } + } + + /// Signals all tasks to shut down, and waits for them to complete. Must run + /// before we enter the single-threaded phase of shutdown processing. + fn pre_shutdown(&mut self, worker: &Worker) { + // Signal to all tasks to shut down. + worker.handle.shared.owned.close_and_shutdown_all(); + + self.stats + .submit(&worker.handle.shared.worker_metrics[worker.index]); + } + + /// Shuts down the core. + fn shutdown(&mut self, handle: &Handle) { + // Take the core + let mut park = self.park.take().expect("park missing"); + + // Drain the queue + while self.next_local_task().is_some() {} + + park.shutdown(&handle.driver); + } + + fn tune_global_queue_interval(&mut self, worker: &Worker) { + let next = self + .stats + .tuned_global_queue_interval(&worker.handle.shared.config); + + debug_assert!(next > 1); + + // Smooth out jitter + if abs_diff(self.global_queue_interval, next) > 2 { + self.global_queue_interval = next; + } + } +} + +impl Worker { + /// Returns a reference to the scheduler's injection queue. + fn inject(&self) -> &inject::Shared<Arc<Handle>> { + &self.handle.shared.inject + } +} + +// TODO: Move `Handle` impls into handle.rs +impl task::Schedule for Arc<Handle> { + fn release(&self, task: &Task) -> Option<Task> { + self.shared.owned.remove(task) + } + + fn schedule(&self, task: Notified) { + self.schedule_task(task, false); + } + + fn yield_now(&self, task: Notified) { + self.schedule_task(task, true); + } +} + +impl Handle { + pub(super) fn schedule_task(&self, task: Notified, is_yield: bool) { + with_current(|maybe_cx| { + if let Some(cx) = maybe_cx { + // Make sure the task is part of the **current** scheduler. + if self.ptr_eq(&cx.worker.handle) { + // And the current thread still holds a core + if let Some(core) = cx.core.borrow_mut().as_mut() { + self.schedule_local(core, task, is_yield); + return; + } + } + } + + // Otherwise, use the inject queue. + self.push_remote_task(task); + self.notify_parked_remote(); + }) + } + + fn schedule_local(&self, core: &mut Core, task: Notified, is_yield: bool) { + core.stats.inc_local_schedule_count(); + + // Spawning from the worker thread. If scheduling a "yield" then the + // task must always be pushed to the back of the queue, enabling other + // tasks to be executed. If **not** a yield, then there is more + // flexibility and the task may go to the front of the queue. + let should_notify = if is_yield || !core.lifo_enabled { + core.run_queue + .push_back_or_overflow(task, self, &mut core.stats); + true + } else { + // Push to the LIFO slot + let prev = core.lifo_slot.take(); + let ret = prev.is_some(); + + if let Some(prev) = prev { + core.run_queue + .push_back_or_overflow(prev, self, &mut core.stats); + } + + core.lifo_slot = Some(task); + + ret + }; + + // Only notify if not currently parked. If `park` is `None`, then the + // scheduling is from a resource driver. As notifications often come in + // batches, the notification is delayed until the park is complete. + if should_notify && core.park.is_some() { + self.notify_parked_local(); + } + } + + fn next_remote_task(&self) -> Option<Notified> { + if self.shared.inject.is_empty() { + return None; + } + + let mut synced = self.shared.synced.lock(); + // safety: passing in correct `idle::Synced` + unsafe { self.shared.inject.pop(&mut synced.inject) } + } + + fn push_remote_task(&self, task: Notified) { + self.shared.scheduler_metrics.inc_remote_schedule_count(); + + let mut synced = self.shared.synced.lock(); + // safety: passing in correct `idle::Synced` + unsafe { + self.shared.inject.push(&mut synced.inject, task); + } + } + + pub(super) fn close(&self) { + if self + .shared + .inject + .close(&mut self.shared.synced.lock().inject) + { + self.notify_all(); + } + } + + fn notify_parked_local(&self) { + super::counters::inc_num_inc_notify_local(); + + if let Some(index) = self.shared.idle.worker_to_notify(&self.shared) { + super::counters::inc_num_unparks_local(); + self.shared.remotes[index].unpark.unpark(&self.driver); + } + } + + fn notify_parked_remote(&self) { + if let Some(index) = self.shared.idle.worker_to_notify(&self.shared) { + self.shared.remotes[index].unpark.unpark(&self.driver); + } + } + + pub(super) fn notify_all(&self) { + for remote in &self.shared.remotes[..] { + remote.unpark.unpark(&self.driver); + } + } + + fn notify_if_work_pending(&self) { + for remote in &self.shared.remotes[..] { + if !remote.steal.is_empty() { + self.notify_parked_local(); + return; + } + } + + if !self.shared.inject.is_empty() { + self.notify_parked_local(); + } + } + + fn transition_worker_from_searching(&self) { + if self.shared.idle.transition_worker_from_searching() { + // We are the final searching worker. Because work was found, we + // need to notify another worker. + self.notify_parked_local(); + } + } + + /// Signals that a worker has observed the shutdown signal and has replaced + /// its core back into its handle. + /// + /// If all workers have reached this point, the final cleanup is performed. + fn shutdown_core(&self, core: Box<Core>) { + let mut cores = self.shared.shutdown_cores.lock(); + cores.push(core); + + if cores.len() != self.shared.remotes.len() { + return; + } + + debug_assert!(self.shared.owned.is_empty()); + + for mut core in cores.drain(..) { + core.shutdown(self); + } + + // Drain the injection queue + // + // We already shut down every task, so we can simply drop the tasks. + while let Some(task) = self.next_remote_task() { + drop(task); + } + } + + fn ptr_eq(&self, other: &Handle) -> bool { + std::ptr::eq(self, other) + } +} + +impl Overflow<Arc<Handle>> for Handle { + fn push(&self, task: task::Notified<Arc<Handle>>) { + self.push_remote_task(task); + } + + fn push_batch<I>(&self, iter: I) + where + I: Iterator<Item = task::Notified<Arc<Handle>>>, + { + unsafe { + self.shared.inject.push_batch(self, iter); + } + } +} + +pub(crate) struct InjectGuard<'a> { + lock: crate::loom::sync::MutexGuard<'a, Synced>, +} + +impl<'a> AsMut<inject::Synced> for InjectGuard<'a> { + fn as_mut(&mut self) -> &mut inject::Synced { + &mut self.lock.inject + } +} + +impl<'a> Lock<inject::Synced> for &'a Handle { + type Handle = InjectGuard<'a>; + + fn lock(self) -> Self::Handle { + InjectGuard { + lock: self.shared.synced.lock(), + } + } +} + +#[track_caller] +fn with_current<R>(f: impl FnOnce(Option<&Context>) -> R) -> R { + use scheduler::Context::MultiThread; + + context::with_scheduler(|ctx| match ctx { + Some(MultiThread(ctx)) => f(Some(ctx)), + _ => f(None), + }) +} + +// `u32::abs_diff` is not available on Tokio's MSRV. +fn abs_diff(a: u32, b: u32) -> u32 { + if a > b { + a - b + } else { + b - a + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/metrics.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/metrics.rs new file mode 100644 index 0000000000..a9a5ab3ed6 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/metrics.rs @@ -0,0 +1,11 @@ +use super::Shared; + +impl Shared { + pub(crate) fn injection_queue_depth(&self) -> usize { + self.inject.len() + } + + pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { + self.remotes[worker].steal.len() + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump.rs new file mode 100644 index 0000000000..d310d9f6d3 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump.rs @@ -0,0 +1,79 @@ +use super::{Core, Handle, Shared}; + +use crate::loom::sync::Arc; +use crate::runtime::scheduler::multi_thread::Stats; +use crate::runtime::task::trace::trace_multi_thread; +use crate::runtime::{dump, WorkerMetrics}; + +use std::time::Duration; + +impl Handle { + pub(super) fn trace_core(&self, mut core: Box<Core>) -> Box<Core> { + core.is_traced = false; + + if core.is_shutdown { + return core; + } + + // wait for other workers, or timeout without tracing + let timeout = Duration::from_millis(250); // a _very_ generous timeout + let barrier = + if let Some(barrier) = self.shared.trace_status.trace_start.wait_timeout(timeout) { + barrier + } else { + // don't attempt to trace + return core; + }; + + if !barrier.is_leader() { + // wait for leader to finish tracing + self.shared.trace_status.trace_end.wait(); + return core; + } + + // trace + + let owned = &self.shared.owned; + let mut local = self.shared.steal_all(); + let synced = &self.shared.synced; + let injection = &self.shared.inject; + + // safety: `trace_multi_thread` is invoked with the same `synced` that `injection` + // was created with. + let traces = unsafe { trace_multi_thread(owned, &mut local, synced, injection) } + .into_iter() + .map(dump::Task::new) + .collect(); + + let result = dump::Dump::new(traces); + + // stash the result + self.shared.trace_status.stash_result(result); + + // allow other workers to proceed + self.shared.trace_status.trace_end.wait(); + + core + } +} + +impl Shared { + /// Steal all tasks from remotes into a single local queue. + pub(super) fn steal_all(&self) -> super::queue::Local<Arc<Handle>> { + let (_steal, mut local) = super::queue::local(); + + let worker_metrics = WorkerMetrics::new(); + let mut stats = Stats::new(&worker_metrics); + + for remote in self.remotes.iter() { + let steal = &remote.steal; + while !steal.is_empty() { + if let Some(task) = steal.steal_into(&mut local, &mut stats) { + local.push_back([task].into_iter()); + } + } + } + + local + } +} diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump_mock.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump_mock.rs new file mode 100644 index 0000000000..24c5600ce2 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump_mock.rs @@ -0,0 +1,7 @@ +use super::{Core, Handle}; + +impl Handle { + pub(super) fn trace_core(&self, core: Box<Core>) -> Box<Core> { + core + } +} diff --git a/third_party/rust/tokio/src/runtime/signal/mod.rs b/third_party/rust/tokio/src/runtime/signal/mod.rs new file mode 100644 index 0000000000..24f2f4c6cb --- /dev/null +++ b/third_party/rust/tokio/src/runtime/signal/mod.rs @@ -0,0 +1,142 @@ +#![cfg_attr(not(feature = "rt"), allow(dead_code))] + +//! Signal driver + +use crate::runtime::{driver, io}; +use crate::signal::registry::globals; + +use mio::net::UnixStream; +use std::io::{self as std_io, Read}; +use std::sync::{Arc, Weak}; +use std::time::Duration; + +/// Responsible for registering wakeups when an OS signal is received, and +/// subsequently dispatching notifications to any signal listeners as appropriate. +/// +/// Note: this driver relies on having an enabled IO driver in order to listen to +/// pipe write wakeups. +#[derive(Debug)] +pub(crate) struct Driver { + /// Thread parker. The `Driver` park implementation delegates to this. + io: io::Driver, + + /// A pipe for receiving wake events from the signal handler + receiver: UnixStream, + + /// Shared state. The driver keeps a strong ref and the handle keeps a weak + /// ref. The weak ref is used to check if the driver is still active before + /// trying to register a signal handler. + inner: Arc<()>, +} + +#[derive(Debug, Default)] +pub(crate) struct Handle { + /// Paired w/ the `Arc` above and is used to check if the driver is still + /// around before attempting to register a signal handler. + inner: Weak<()>, +} + +// ===== impl Driver ===== + +impl Driver { + /// Creates a new signal `Driver` instance that delegates wakeups to `park`. + pub(crate) fn new(io: io::Driver, io_handle: &io::Handle) -> std_io::Result<Self> { + use std::mem::ManuallyDrop; + use std::os::unix::io::{AsRawFd, FromRawFd}; + + // NB: We give each driver a "fresh" receiver file descriptor to avoid + // the issues described in alexcrichton/tokio-process#42. + // + // In the past we would reuse the actual receiver file descriptor and + // swallow any errors around double registration of the same descriptor. + // I'm not sure if the second (failed) registration simply doesn't end + // up receiving wake up notifications, or there could be some race + // condition when consuming readiness events, but having distinct + // descriptors appears to mitigate this. + // + // Unfortunately we cannot just use a single global UnixStream instance + // either, since we can't assume they will always be registered with the + // exact same reactor. + // + // Mio 0.7 removed `try_clone()` as an API due to unexpected behavior + // with registering dups with the same reactor. In this case, duping is + // safe as each dup is registered with separate reactors **and** we + // only expect at least one dup to receive the notification. + + // Manually drop as we don't actually own this instance of UnixStream. + let receiver_fd = globals().receiver.as_raw_fd(); + + // safety: there is nothing unsafe about this, but the `from_raw_fd` fn is marked as unsafe. + let original = + ManuallyDrop::new(unsafe { std::os::unix::net::UnixStream::from_raw_fd(receiver_fd) }); + let mut receiver = UnixStream::from_std(original.try_clone()?); + + io_handle.register_signal_receiver(&mut receiver)?; + + Ok(Self { + io, + receiver, + inner: Arc::new(()), + }) + } + + /// Returns a handle to this event loop which can be sent across threads + /// and can be used as a proxy to the event loop itself. + pub(crate) fn handle(&self) -> Handle { + Handle { + inner: Arc::downgrade(&self.inner), + } + } + + pub(crate) fn park(&mut self, handle: &driver::Handle) { + self.io.park(handle); + self.process(); + } + + pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { + self.io.park_timeout(handle, duration); + self.process(); + } + + pub(crate) fn shutdown(&mut self, handle: &driver::Handle) { + self.io.shutdown(handle) + } + + fn process(&mut self) { + // If the signal pipe has not received a readiness event, then there is + // nothing else to do. + if !self.io.consume_signal_ready() { + return; + } + + // Drain the pipe completely so we can receive a new readiness event + // if another signal has come in. + let mut buf = [0; 128]; + loop { + match self.receiver.read(&mut buf) { + Ok(0) => panic!("EOF on self-pipe"), + Ok(_) => continue, // Keep reading + Err(e) if e.kind() == std_io::ErrorKind::WouldBlock => break, + Err(e) => panic!("Bad read on self-pipe: {}", e), + } + } + + // Broadcast any signals which were received + globals().broadcast(); + } +} + +// ===== impl Handle ===== + +impl Handle { + pub(crate) fn check_inner(&self) -> std_io::Result<()> { + if self.inner.strong_count() > 0 { + Ok(()) + } else { + Err(std_io::Error::new( + std_io::ErrorKind::Other, + "signal driver gone", + )) + } + } +} diff --git a/third_party/rust/tokio/src/runtime/task/abort.rs b/third_party/rust/tokio/src/runtime/task/abort.rs new file mode 100644 index 0000000000..6edca10040 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/abort.rs @@ -0,0 +1,87 @@ +use crate::runtime::task::{Header, RawTask}; +use std::fmt; +use std::panic::{RefUnwindSafe, UnwindSafe}; + +/// An owned permission to abort a spawned task, without awaiting its completion. +/// +/// Unlike a [`JoinHandle`], an `AbortHandle` does *not* represent the +/// permission to await the task's completion, only to terminate it. +/// +/// The task may be aborted by calling the [`AbortHandle::abort`] method. +/// Dropping an `AbortHandle` releases the permission to terminate the task +/// --- it does *not* abort the task. +/// +/// [`JoinHandle`]: crate::task::JoinHandle +#[cfg_attr(docsrs, doc(cfg(feature = "rt")))] +pub struct AbortHandle { + raw: RawTask, +} + +impl AbortHandle { + pub(super) fn new(raw: RawTask) -> Self { + Self { raw } + } + + /// Abort the task associated with the handle. + /// + /// Awaiting a cancelled task might complete as usual if the task was + /// already completed at the time it was cancelled, but most likely it + /// will fail with a [cancelled] `JoinError`. + /// + /// If the task was already cancelled, such as by [`JoinHandle::abort`], + /// this method will do nothing. + /// + /// [cancelled]: method@super::error::JoinError::is_cancelled + /// [`JoinHandle::abort`]: method@super::JoinHandle::abort + pub fn abort(&self) { + self.raw.remote_abort(); + } + + /// Checks if the task associated with this `AbortHandle` has finished. + /// + /// Please note that this method can return `false` even if `abort` has been + /// called on the task. This is because the cancellation process may take + /// some time, and this method does not return `true` until it has + /// completed. + pub fn is_finished(&self) -> bool { + let state = self.raw.state().load(); + state.is_complete() + } + + /// Returns a [task ID] that uniquely identifies this task relative to other + /// currently spawned tasks. + /// + /// **Note**: This is an [unstable API][unstable]. The public API of this type + /// may break in 1.x releases. See [the documentation on unstable + /// features][unstable] for details. + /// + /// [task ID]: crate::task::Id + /// [unstable]: crate#unstable-features + #[cfg(tokio_unstable)] + #[cfg_attr(docsrs, doc(cfg(tokio_unstable)))] + pub fn id(&self) -> super::Id { + // Safety: The header pointer is valid. + unsafe { Header::get_id(self.raw.header_ptr()) } + } +} + +unsafe impl Send for AbortHandle {} +unsafe impl Sync for AbortHandle {} + +impl UnwindSafe for AbortHandle {} +impl RefUnwindSafe for AbortHandle {} + +impl fmt::Debug for AbortHandle { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + // Safety: The header pointer is valid. + let id_ptr = unsafe { Header::get_id_ptr(self.raw.header_ptr()) }; + let id = unsafe { id_ptr.as_ref() }; + fmt.debug_struct("AbortHandle").field("id", id).finish() + } +} + +impl Drop for AbortHandle { + fn drop(&mut self) { + self.raw.drop_abort_handle(); + } +} diff --git a/third_party/rust/tokio/src/runtime/task/core.rs b/third_party/rust/tokio/src/runtime/task/core.rs new file mode 100644 index 0000000000..110933e58f --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/core.rs @@ -0,0 +1,470 @@ +//! Core task module. +//! +//! # Safety +//! +//! The functions in this module are private to the `task` module. All of them +//! should be considered `unsafe` to use, but are not marked as such since it +//! would be too noisy. +//! +//! Make sure to consult the relevant safety section of each function before +//! use. + +use crate::future::Future; +use crate::loom::cell::UnsafeCell; +use crate::runtime::context; +use crate::runtime::task::raw::{self, Vtable}; +use crate::runtime::task::state::State; +use crate::runtime::task::{Id, Schedule}; +use crate::util::linked_list; + +use std::pin::Pin; +use std::ptr::NonNull; +use std::task::{Context, Poll, Waker}; + +/// The task cell. Contains the components of the task. +/// +/// It is critical for `Header` to be the first field as the task structure will +/// be referenced by both *mut Cell and *mut Header. +/// +/// Any changes to the layout of this struct _must_ also be reflected in the +/// const fns in raw.rs. +/// +// # This struct should be cache padded to avoid false sharing. The cache padding rules are copied +// from crossbeam-utils/src/cache_padded.rs +// +// Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache +// lines at a time, so we have to align to 128 bytes rather than 64. +// +// Sources: +// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf +// - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107 +// +// ARM's big.LITTLE architecture has asymmetric cores and "big" cores have 128-byte cache line size. +// +// Sources: +// - https://www.mono-project.com/news/2016/09/12/arm64-icache/ +// +// powerpc64 has 128-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9 +#[cfg_attr( + any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "powerpc64", + ), + repr(align(128)) +)] +// arm, mips, mips64, riscv64, sparc, and hexagon have 32-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_riscv64.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12 +// +// riscv32 is assumed not to exceed the cache line size of riscv64. +#[cfg_attr( + any( + target_arch = "arm", + target_arch = "mips", + target_arch = "mips64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "sparc", + target_arch = "hexagon", + ), + repr(align(32)) +)] +// m68k has 16-byte cache line size. +// +// Sources: +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9 +#[cfg_attr(target_arch = "m68k", repr(align(16)))] +// s390x has 256-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13 +#[cfg_attr(target_arch = "s390x", repr(align(256)))] +// x86, wasm, and sparc64 have 64-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19 +// +// All others are assumed to have 64-byte cache line size. +#[cfg_attr( + not(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "powerpc64", + target_arch = "arm", + target_arch = "mips", + target_arch = "mips64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "sparc", + target_arch = "hexagon", + target_arch = "m68k", + target_arch = "s390x", + )), + repr(align(64)) +)] +#[repr(C)] +pub(super) struct Cell<T: Future, S> { + /// Hot task state data + pub(super) header: Header, + + /// Either the future or output, depending on the execution stage. + pub(super) core: Core<T, S>, + + /// Cold data + pub(super) trailer: Trailer, +} + +pub(super) struct CoreStage<T: Future> { + stage: UnsafeCell<Stage<T>>, +} + +/// The core of the task. +/// +/// Holds the future or output, depending on the stage of execution. +/// +/// Any changes to the layout of this struct _must_ also be reflected in the +/// const fns in raw.rs. +#[repr(C)] +pub(super) struct Core<T: Future, S> { + /// Scheduler used to drive this future. + pub(super) scheduler: S, + + /// The task's ID, used for populating `JoinError`s. + pub(super) task_id: Id, + + /// Either the future or the output. + pub(super) stage: CoreStage<T>, +} + +/// Crate public as this is also needed by the pool. +#[repr(C)] +pub(crate) struct Header { + /// Task state. + pub(super) state: State, + + /// Pointer to next task, used with the injection queue. + pub(super) queue_next: UnsafeCell<Option<NonNull<Header>>>, + + /// Table of function pointers for executing actions on the task. + pub(super) vtable: &'static Vtable, + + /// This integer contains the id of the OwnedTasks or LocalOwnedTasks that + /// this task is stored in. If the task is not in any list, should be the + /// id of the list that it was previously in, or zero if it has never been + /// in any list. + /// + /// Once a task has been bound to a list, it can never be bound to another + /// list, even if removed from the first list. + /// + /// The id is not unset when removed from a list because we want to be able + /// to read the id without synchronization, even if it is concurrently being + /// removed from the list. + pub(super) owner_id: UnsafeCell<u64>, + + /// The tracing ID for this instrumented task. + #[cfg(all(tokio_unstable, feature = "tracing"))] + pub(super) tracing_id: Option<tracing::Id>, +} + +unsafe impl Send for Header {} +unsafe impl Sync for Header {} + +/// Cold data is stored after the future. Data is considered cold if it is only +/// used during creation or shutdown of the task. +pub(super) struct Trailer { + /// Pointers for the linked list in the `OwnedTasks` that owns this task. + pub(super) owned: linked_list::Pointers<Header>, + /// Consumer task waiting on completion of this task. + pub(super) waker: UnsafeCell<Option<Waker>>, +} + +generate_addr_of_methods! { + impl<> Trailer { + pub(super) unsafe fn addr_of_owned(self: NonNull<Self>) -> NonNull<linked_list::Pointers<Header>> { + &self.owned + } + } +} + +/// Either the future or the output. +pub(super) enum Stage<T: Future> { + Running(T), + Finished(super::Result<T::Output>), + Consumed, +} + +impl<T: Future, S: Schedule> Cell<T, S> { + /// Allocates a new task cell, containing the header, trailer, and core + /// structures. + pub(super) fn new(future: T, scheduler: S, state: State, task_id: Id) -> Box<Cell<T, S>> { + #[cfg(all(tokio_unstable, feature = "tracing"))] + let tracing_id = future.id(); + let result = Box::new(Cell { + header: Header { + state, + queue_next: UnsafeCell::new(None), + vtable: raw::vtable::<T, S>(), + owner_id: UnsafeCell::new(0), + #[cfg(all(tokio_unstable, feature = "tracing"))] + tracing_id, + }, + core: Core { + scheduler, + stage: CoreStage { + stage: UnsafeCell::new(Stage::Running(future)), + }, + task_id, + }, + trailer: Trailer { + waker: UnsafeCell::new(None), + owned: linked_list::Pointers::new(), + }, + }); + + #[cfg(debug_assertions)] + { + let trailer_addr = (&result.trailer) as *const Trailer as usize; + let trailer_ptr = unsafe { Header::get_trailer(NonNull::from(&result.header)) }; + assert_eq!(trailer_addr, trailer_ptr.as_ptr() as usize); + + let scheduler_addr = (&result.core.scheduler) as *const S as usize; + let scheduler_ptr = + unsafe { Header::get_scheduler::<S>(NonNull::from(&result.header)) }; + assert_eq!(scheduler_addr, scheduler_ptr.as_ptr() as usize); + + let id_addr = (&result.core.task_id) as *const Id as usize; + let id_ptr = unsafe { Header::get_id_ptr(NonNull::from(&result.header)) }; + assert_eq!(id_addr, id_ptr.as_ptr() as usize); + } + + result + } +} + +impl<T: Future> CoreStage<T> { + pub(super) fn with_mut<R>(&self, f: impl FnOnce(*mut Stage<T>) -> R) -> R { + self.stage.with_mut(f) + } +} + +/// Set and clear the task id in the context when the future is executed or +/// dropped, or when the output produced by the future is dropped. +pub(crate) struct TaskIdGuard { + parent_task_id: Option<Id>, +} + +impl TaskIdGuard { + fn enter(id: Id) -> Self { + TaskIdGuard { + parent_task_id: context::set_current_task_id(Some(id)), + } + } +} + +impl Drop for TaskIdGuard { + fn drop(&mut self) { + context::set_current_task_id(self.parent_task_id); + } +} + +impl<T: Future, S: Schedule> Core<T, S> { + /// Polls the future. + /// + /// # Safety + /// + /// The caller must ensure it is safe to mutate the `state` field. This + /// requires ensuring mutual exclusion between any concurrent thread that + /// might modify the future or output field. + /// + /// The mutual exclusion is implemented by `Harness` and the `Lifecycle` + /// component of the task state. + /// + /// `self` must also be pinned. This is handled by storing the task on the + /// heap. + pub(super) fn poll(&self, mut cx: Context<'_>) -> Poll<T::Output> { + let res = { + self.stage.stage.with_mut(|ptr| { + // Safety: The caller ensures mutual exclusion to the field. + let future = match unsafe { &mut *ptr } { + Stage::Running(future) => future, + _ => unreachable!("unexpected stage"), + }; + + // Safety: The caller ensures the future is pinned. + let future = unsafe { Pin::new_unchecked(future) }; + + let _guard = TaskIdGuard::enter(self.task_id); + future.poll(&mut cx) + }) + }; + + if res.is_ready() { + self.drop_future_or_output(); + } + + res + } + + /// Drops the future. + /// + /// # Safety + /// + /// The caller must ensure it is safe to mutate the `stage` field. + pub(super) fn drop_future_or_output(&self) { + // Safety: the caller ensures mutual exclusion to the field. + unsafe { + self.set_stage(Stage::Consumed); + } + } + + /// Stores the task output. + /// + /// # Safety + /// + /// The caller must ensure it is safe to mutate the `stage` field. + pub(super) fn store_output(&self, output: super::Result<T::Output>) { + // Safety: the caller ensures mutual exclusion to the field. + unsafe { + self.set_stage(Stage::Finished(output)); + } + } + + /// Takes the task output. + /// + /// # Safety + /// + /// The caller must ensure it is safe to mutate the `stage` field. + pub(super) fn take_output(&self) -> super::Result<T::Output> { + use std::mem; + + self.stage.stage.with_mut(|ptr| { + // Safety:: the caller ensures mutual exclusion to the field. + match mem::replace(unsafe { &mut *ptr }, Stage::Consumed) { + Stage::Finished(output) => output, + _ => panic!("JoinHandle polled after completion"), + } + }) + } + + unsafe fn set_stage(&self, stage: Stage<T>) { + let _guard = TaskIdGuard::enter(self.task_id); + self.stage.stage.with_mut(|ptr| *ptr = stage) + } +} + +impl Header { + pub(super) unsafe fn set_next(&self, next: Option<NonNull<Header>>) { + self.queue_next.with_mut(|ptr| *ptr = next); + } + + // safety: The caller must guarantee exclusive access to this field, and + // must ensure that the id is either 0 or the id of the OwnedTasks + // containing this task. + pub(super) unsafe fn set_owner_id(&self, owner: u64) { + self.owner_id.with_mut(|ptr| *ptr = owner); + } + + pub(super) fn get_owner_id(&self) -> u64 { + // safety: If there are concurrent writes, then that write has violated + // the safety requirements on `set_owner_id`. + unsafe { self.owner_id.with(|ptr| *ptr) } + } + + /// Gets a pointer to the `Trailer` of the task containing this `Header`. + /// + /// # Safety + /// + /// The provided raw pointer must point at the header of a task. + pub(super) unsafe fn get_trailer(me: NonNull<Header>) -> NonNull<Trailer> { + let offset = me.as_ref().vtable.trailer_offset; + let trailer = me.as_ptr().cast::<u8>().add(offset).cast::<Trailer>(); + NonNull::new_unchecked(trailer) + } + + /// Gets a pointer to the scheduler of the task containing this `Header`. + /// + /// # Safety + /// + /// The provided raw pointer must point at the header of a task. + /// + /// The generic type S must be set to the correct scheduler type for this + /// task. + pub(super) unsafe fn get_scheduler<S>(me: NonNull<Header>) -> NonNull<S> { + let offset = me.as_ref().vtable.scheduler_offset; + let scheduler = me.as_ptr().cast::<u8>().add(offset).cast::<S>(); + NonNull::new_unchecked(scheduler) + } + + /// Gets a pointer to the id of the task containing this `Header`. + /// + /// # Safety + /// + /// The provided raw pointer must point at the header of a task. + pub(super) unsafe fn get_id_ptr(me: NonNull<Header>) -> NonNull<Id> { + let offset = me.as_ref().vtable.id_offset; + let id = me.as_ptr().cast::<u8>().add(offset).cast::<Id>(); + NonNull::new_unchecked(id) + } + + /// Gets the id of the task containing this `Header`. + /// + /// # Safety + /// + /// The provided raw pointer must point at the header of a task. + pub(super) unsafe fn get_id(me: NonNull<Header>) -> Id { + let ptr = Header::get_id_ptr(me).as_ptr(); + *ptr + } + + /// Gets the tracing id of the task containing this `Header`. + /// + /// # Safety + /// + /// The provided raw pointer must point at the header of a task. + #[cfg(all(tokio_unstable, feature = "tracing"))] + pub(super) unsafe fn get_tracing_id(me: &NonNull<Header>) -> Option<&tracing::Id> { + me.as_ref().tracing_id.as_ref() + } +} + +impl Trailer { + pub(super) unsafe fn set_waker(&self, waker: Option<Waker>) { + self.waker.with_mut(|ptr| { + *ptr = waker; + }); + } + + pub(super) unsafe fn will_wake(&self, waker: &Waker) -> bool { + self.waker + .with(|ptr| (*ptr).as_ref().unwrap().will_wake(waker)) + } + + pub(super) fn wake_join(&self) { + self.waker.with(|ptr| match unsafe { &*ptr } { + Some(waker) => waker.wake_by_ref(), + None => panic!("waker missing"), + }); + } +} + +#[test] +#[cfg(not(loom))] +fn header_lte_cache_line() { + use std::mem::size_of; + + assert!(size_of::<Header>() <= 8 * size_of::<*const ()>()); +} diff --git a/third_party/rust/tokio/src/runtime/task/error.rs b/third_party/rust/tokio/src/runtime/task/error.rs new file mode 100644 index 0000000000..f7ead77b7c --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/error.rs @@ -0,0 +1,165 @@ +use std::any::Any; +use std::fmt; +use std::io; + +use super::Id; +use crate::util::SyncWrapper; +cfg_rt! { + /// Task failed to execute to completion. + pub struct JoinError { + repr: Repr, + id: Id, + } +} + +enum Repr { + Cancelled, + Panic(SyncWrapper<Box<dyn Any + Send + 'static>>), +} + +impl JoinError { + pub(crate) fn cancelled(id: Id) -> JoinError { + JoinError { + repr: Repr::Cancelled, + id, + } + } + + pub(crate) fn panic(id: Id, err: Box<dyn Any + Send + 'static>) -> JoinError { + JoinError { + repr: Repr::Panic(SyncWrapper::new(err)), + id, + } + } + + /// Returns true if the error was caused by the task being cancelled. + pub fn is_cancelled(&self) -> bool { + matches!(&self.repr, Repr::Cancelled) + } + + /// Returns true if the error was caused by the task panicking. + /// + /// # Examples + /// + /// ``` + /// use std::panic; + /// + /// #[tokio::main] + /// async fn main() { + /// let err = tokio::spawn(async { + /// panic!("boom"); + /// }).await.unwrap_err(); + /// + /// assert!(err.is_panic()); + /// } + /// ``` + pub fn is_panic(&self) -> bool { + matches!(&self.repr, Repr::Panic(_)) + } + + /// Consumes the join error, returning the object with which the task panicked. + /// + /// # Panics + /// + /// `into_panic()` panics if the `Error` does not represent the underlying + /// task terminating with a panic. Use `is_panic` to check the error reason + /// or `try_into_panic` for a variant that does not panic. + /// + /// # Examples + /// + /// ```should_panic + /// use std::panic; + /// + /// #[tokio::main] + /// async fn main() { + /// let err = tokio::spawn(async { + /// panic!("boom"); + /// }).await.unwrap_err(); + /// + /// if err.is_panic() { + /// // Resume the panic on the main task + /// panic::resume_unwind(err.into_panic()); + /// } + /// } + /// ``` + #[track_caller] + pub fn into_panic(self) -> Box<dyn Any + Send + 'static> { + self.try_into_panic() + .expect("`JoinError` reason is not a panic.") + } + + /// Consumes the join error, returning the object with which the task + /// panicked if the task terminated due to a panic. Otherwise, `self` is + /// returned. + /// + /// # Examples + /// + /// ```should_panic + /// use std::panic; + /// + /// #[tokio::main] + /// async fn main() { + /// let err = tokio::spawn(async { + /// panic!("boom"); + /// }).await.unwrap_err(); + /// + /// if let Ok(reason) = err.try_into_panic() { + /// // Resume the panic on the main task + /// panic::resume_unwind(reason); + /// } + /// } + /// ``` + pub fn try_into_panic(self) -> Result<Box<dyn Any + Send + 'static>, JoinError> { + match self.repr { + Repr::Panic(p) => Ok(p.into_inner()), + _ => Err(self), + } + } + + /// Returns a [task ID] that identifies the task which errored relative to + /// other currently spawned tasks. + /// + /// **Note**: This is an [unstable API][unstable]. The public API of this type + /// may break in 1.x releases. See [the documentation on unstable + /// features][unstable] for details. + /// + /// [task ID]: crate::task::Id + /// [unstable]: crate#unstable-features + #[cfg(tokio_unstable)] + #[cfg_attr(docsrs, doc(cfg(tokio_unstable)))] + pub fn id(&self) -> Id { + self.id + } +} + +impl fmt::Display for JoinError { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.repr { + Repr::Cancelled => write!(fmt, "task {} was cancelled", self.id), + Repr::Panic(_) => write!(fmt, "task {} panicked", self.id), + } + } +} + +impl fmt::Debug for JoinError { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.repr { + Repr::Cancelled => write!(fmt, "JoinError::Cancelled({:?})", self.id), + Repr::Panic(_) => write!(fmt, "JoinError::Panic({:?}, ...)", self.id), + } + } +} + +impl std::error::Error for JoinError {} + +impl From<JoinError> for io::Error { + fn from(src: JoinError) -> io::Error { + io::Error::new( + io::ErrorKind::Other, + match src.repr { + Repr::Cancelled => "task was cancelled", + Repr::Panic(_) => "task panicked", + }, + ) + } +} diff --git a/third_party/rust/tokio/src/runtime/task/harness.rs b/third_party/rust/tokio/src/runtime/task/harness.rs new file mode 100644 index 0000000000..8e3c3d14fa --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/harness.rs @@ -0,0 +1,501 @@ +use crate::future::Future; +use crate::runtime::task::core::{Cell, Core, Header, Trailer}; +use crate::runtime::task::state::{Snapshot, State}; +use crate::runtime::task::waker::waker_ref; +use crate::runtime::task::{JoinError, Notified, RawTask, Schedule, Task}; + +use std::mem; +use std::mem::ManuallyDrop; +use std::panic; +use std::ptr::NonNull; +use std::task::{Context, Poll, Waker}; + +/// Typed raw task handle. +pub(super) struct Harness<T: Future, S: 'static> { + cell: NonNull<Cell<T, S>>, +} + +impl<T, S> Harness<T, S> +where + T: Future, + S: 'static, +{ + pub(super) unsafe fn from_raw(ptr: NonNull<Header>) -> Harness<T, S> { + Harness { + cell: ptr.cast::<Cell<T, S>>(), + } + } + + fn header_ptr(&self) -> NonNull<Header> { + self.cell.cast() + } + + fn header(&self) -> &Header { + unsafe { &*self.header_ptr().as_ptr() } + } + + fn state(&self) -> &State { + &self.header().state + } + + fn trailer(&self) -> &Trailer { + unsafe { &self.cell.as_ref().trailer } + } + + fn core(&self) -> &Core<T, S> { + unsafe { &self.cell.as_ref().core } + } +} + +/// Task operations that can be implemented without being generic over the +/// scheduler or task. Only one version of these methods should exist in the +/// final binary. +impl RawTask { + pub(super) fn drop_reference(self) { + if self.state().ref_dec() { + self.dealloc(); + } + } + + /// This call consumes a ref-count and notifies the task. This will create a + /// new Notified and submit it if necessary. + /// + /// The caller does not need to hold a ref-count besides the one that was + /// passed to this call. + pub(super) fn wake_by_val(&self) { + use super::state::TransitionToNotifiedByVal; + + match self.state().transition_to_notified_by_val() { + TransitionToNotifiedByVal::Submit => { + // The caller has given us a ref-count, and the transition has + // created a new ref-count, so we now hold two. We turn the new + // ref-count Notified and pass it to the call to `schedule`. + // + // The old ref-count is retained for now to ensure that the task + // is not dropped during the call to `schedule` if the call + // drops the task it was given. + self.schedule(); + + // Now that we have completed the call to schedule, we can + // release our ref-count. + self.drop_reference(); + } + TransitionToNotifiedByVal::Dealloc => { + self.dealloc(); + } + TransitionToNotifiedByVal::DoNothing => {} + } + } + + /// This call notifies the task. It will not consume any ref-counts, but the + /// caller should hold a ref-count. This will create a new Notified and + /// submit it if necessary. + pub(super) fn wake_by_ref(&self) { + use super::state::TransitionToNotifiedByRef; + + match self.state().transition_to_notified_by_ref() { + TransitionToNotifiedByRef::Submit => { + // The transition above incremented the ref-count for a new task + // and the caller also holds a ref-count. The caller's ref-count + // ensures that the task is not destroyed even if the new task + // is dropped before `schedule` returns. + self.schedule(); + } + TransitionToNotifiedByRef::DoNothing => {} + } + } + + /// Remotely aborts the task. + /// + /// The caller should hold a ref-count, but we do not consume it. + /// + /// This is similar to `shutdown` except that it asks the runtime to perform + /// the shutdown. This is necessary to avoid the shutdown happening in the + /// wrong thread for non-Send tasks. + pub(super) fn remote_abort(&self) { + if self.state().transition_to_notified_and_cancel() { + // The transition has created a new ref-count, which we turn into + // a Notified and pass to the task. + // + // Since the caller holds a ref-count, the task cannot be destroyed + // before the call to `schedule` returns even if the call drops the + // `Notified` internally. + self.schedule(); + } + } + + /// Try to set the waker notified when the task is complete. Returns true if + /// the task has already completed. If this call returns false, then the + /// waker will not be notified. + pub(super) fn try_set_join_waker(&self, waker: &Waker) -> bool { + can_read_output(self.header(), self.trailer(), waker) + } +} + +impl<T, S> Harness<T, S> +where + T: Future, + S: Schedule, +{ + pub(super) fn drop_reference(self) { + if self.state().ref_dec() { + self.dealloc(); + } + } + + /// Polls the inner future. A ref-count is consumed. + /// + /// All necessary state checks and transitions are performed. + /// Panics raised while polling the future are handled. + pub(super) fn poll(self) { + // We pass our ref-count to `poll_inner`. + match self.poll_inner() { + PollFuture::Notified => { + // The `poll_inner` call has given us two ref-counts back. + // We give one of them to a new task and call `yield_now`. + self.core() + .scheduler + .yield_now(Notified(self.get_new_task())); + + // The remaining ref-count is now dropped. We kept the extra + // ref-count until now to ensure that even if the `yield_now` + // call drops the provided task, the task isn't deallocated + // before after `yield_now` returns. + self.drop_reference(); + } + PollFuture::Complete => { + self.complete(); + } + PollFuture::Dealloc => { + self.dealloc(); + } + PollFuture::Done => (), + } + } + + /// Polls the task and cancel it if necessary. This takes ownership of a + /// ref-count. + /// + /// If the return value is Notified, the caller is given ownership of two + /// ref-counts. + /// + /// If the return value is Complete, the caller is given ownership of a + /// single ref-count, which should be passed on to `complete`. + /// + /// If the return value is Dealloc, then this call consumed the last + /// ref-count and the caller should call `dealloc`. + /// + /// Otherwise the ref-count is consumed and the caller should not access + /// `self` again. + fn poll_inner(&self) -> PollFuture { + use super::state::{TransitionToIdle, TransitionToRunning}; + + match self.state().transition_to_running() { + TransitionToRunning::Success => { + let header_ptr = self.header_ptr(); + let waker_ref = waker_ref::<T, S>(&header_ptr); + let cx = Context::from_waker(&waker_ref); + let res = poll_future(self.core(), cx); + + if res == Poll::Ready(()) { + // The future completed. Move on to complete the task. + return PollFuture::Complete; + } + + match self.state().transition_to_idle() { + TransitionToIdle::Ok => PollFuture::Done, + TransitionToIdle::OkNotified => PollFuture::Notified, + TransitionToIdle::OkDealloc => PollFuture::Dealloc, + TransitionToIdle::Cancelled => { + // The transition to idle failed because the task was + // cancelled during the poll. + cancel_task(self.core()); + PollFuture::Complete + } + } + } + TransitionToRunning::Cancelled => { + cancel_task(self.core()); + PollFuture::Complete + } + TransitionToRunning::Failed => PollFuture::Done, + TransitionToRunning::Dealloc => PollFuture::Dealloc, + } + } + + /// Forcibly shuts down the task. + /// + /// Attempt to transition to `Running` in order to forcibly shutdown the + /// task. If the task is currently running or in a state of completion, then + /// there is nothing further to do. When the task completes running, it will + /// notice the `CANCELLED` bit and finalize the task. + pub(super) fn shutdown(self) { + if !self.state().transition_to_shutdown() { + // The task is concurrently running. No further work needed. + self.drop_reference(); + return; + } + + // By transitioning the lifecycle to `Running`, we have permission to + // drop the future. + cancel_task(self.core()); + self.complete(); + } + + pub(super) fn dealloc(self) { + // Release the join waker, if there is one. + self.trailer().waker.with_mut(drop); + + // Check causality + self.core().stage.with_mut(drop); + + // Safety: The caller of this method just transitioned our ref-count to + // zero, so it is our responsibility to release the allocation. + // + // We don't hold any references into the allocation at this point, but + // it is possible for another thread to still hold a `&State` into the + // allocation if that other thread has decremented its last ref-count, + // but has not yet returned from the relevant method on `State`. + // + // However, the `State` type consists of just an `AtomicUsize`, and an + // `AtomicUsize` wraps the entirety of its contents in an `UnsafeCell`. + // As explained in the documentation for `UnsafeCell`, such references + // are allowed to be dangling after their last use, even if the + // reference has not yet gone out of scope. + unsafe { + drop(Box::from_raw(self.cell.as_ptr())); + } + } + + // ===== join handle ===== + + /// Read the task output into `dst`. + pub(super) fn try_read_output(self, dst: &mut Poll<super::Result<T::Output>>, waker: &Waker) { + if can_read_output(self.header(), self.trailer(), waker) { + *dst = Poll::Ready(self.core().take_output()); + } + } + + pub(super) fn drop_join_handle_slow(self) { + // Try to unset `JOIN_INTEREST`. This must be done as a first step in + // case the task concurrently completed. + if self.state().unset_join_interested().is_err() { + // It is our responsibility to drop the output. This is critical as + // the task output may not be `Send` and as such must remain with + // the scheduler or `JoinHandle`. i.e. if the output remains in the + // task structure until the task is deallocated, it may be dropped + // by a Waker on any arbitrary thread. + // + // Panics are delivered to the user via the `JoinHandle`. Given that + // they are dropping the `JoinHandle`, we assume they are not + // interested in the panic and swallow it. + let _ = panic::catch_unwind(panic::AssertUnwindSafe(|| { + self.core().drop_future_or_output(); + })); + } + + // Drop the `JoinHandle` reference, possibly deallocating the task + self.drop_reference(); + } + + // ====== internal ====== + + /// Completes the task. This method assumes that the state is RUNNING. + fn complete(self) { + // The future has completed and its output has been written to the task + // stage. We transition from running to complete. + + let snapshot = self.state().transition_to_complete(); + + // We catch panics here in case dropping the future or waking the + // JoinHandle panics. + let _ = panic::catch_unwind(panic::AssertUnwindSafe(|| { + if !snapshot.is_join_interested() { + // The `JoinHandle` is not interested in the output of + // this task. It is our responsibility to drop the + // output. + self.core().drop_future_or_output(); + } else if snapshot.is_join_waker_set() { + // Notify the waker. Reading the waker field is safe per rule 4 + // in task/mod.rs, since the JOIN_WAKER bit is set and the call + // to transition_to_complete() above set the COMPLETE bit. + self.trailer().wake_join(); + } + })); + + // The task has completed execution and will no longer be scheduled. + let num_release = self.release(); + + if self.state().transition_to_terminal(num_release) { + self.dealloc(); + } + } + + /// Releases the task from the scheduler. Returns the number of ref-counts + /// that should be decremented. + fn release(&self) -> usize { + // We don't actually increment the ref-count here, but the new task is + // never destroyed, so that's ok. + let me = ManuallyDrop::new(self.get_new_task()); + + if let Some(task) = self.core().scheduler.release(&me) { + mem::forget(task); + 2 + } else { + 1 + } + } + + /// Creates a new task that holds its own ref-count. + /// + /// # Safety + /// + /// Any use of `self` after this call must ensure that a ref-count to the + /// task holds the task alive until after the use of `self`. Passing the + /// returned Task to any method on `self` is unsound if dropping the Task + /// could drop `self` before the call on `self` returned. + fn get_new_task(&self) -> Task<S> { + // safety: The header is at the beginning of the cell, so this cast is + // safe. + unsafe { Task::from_raw(self.cell.cast()) } + } +} + +fn can_read_output(header: &Header, trailer: &Trailer, waker: &Waker) -> bool { + // Load a snapshot of the current task state + let snapshot = header.state.load(); + + debug_assert!(snapshot.is_join_interested()); + + if !snapshot.is_complete() { + // If the task is not complete, try storing the provided waker in the + // task's waker field. + + let res = if snapshot.is_join_waker_set() { + // If JOIN_WAKER is set, then JoinHandle has previously stored a + // waker in the waker field per step (iii) of rule 5 in task/mod.rs. + + // Optimization: if the stored waker and the provided waker wake the + // same task, then return without touching the waker field. (Reading + // the waker field below is safe per rule 3 in task/mod.rs.) + if unsafe { trailer.will_wake(waker) } { + return false; + } + + // Otherwise swap the stored waker with the provided waker by + // following the rule 5 in task/mod.rs. + header + .state + .unset_waker() + .and_then(|snapshot| set_join_waker(header, trailer, waker.clone(), snapshot)) + } else { + // If JOIN_WAKER is unset, then JoinHandle has mutable access to the + // waker field per rule 2 in task/mod.rs; therefore, skip step (i) + // of rule 5 and try to store the provided waker in the waker field. + set_join_waker(header, trailer, waker.clone(), snapshot) + }; + + match res { + Ok(_) => return false, + Err(snapshot) => { + assert!(snapshot.is_complete()); + } + } + } + true +} + +fn set_join_waker( + header: &Header, + trailer: &Trailer, + waker: Waker, + snapshot: Snapshot, +) -> Result<Snapshot, Snapshot> { + assert!(snapshot.is_join_interested()); + assert!(!snapshot.is_join_waker_set()); + + // Safety: Only the `JoinHandle` may set the `waker` field. When + // `JOIN_INTEREST` is **not** set, nothing else will touch the field. + unsafe { + trailer.set_waker(Some(waker)); + } + + // Update the `JoinWaker` state accordingly + let res = header.state.set_join_waker(); + + // If the state could not be updated, then clear the join waker + if res.is_err() { + unsafe { + trailer.set_waker(None); + } + } + + res +} + +enum PollFuture { + Complete, + Notified, + Done, + Dealloc, +} + +/// Cancels the task and store the appropriate error in the stage field. +fn cancel_task<T: Future, S: Schedule>(core: &Core<T, S>) { + // Drop the future from a panic guard. + let res = panic::catch_unwind(panic::AssertUnwindSafe(|| { + core.drop_future_or_output(); + })); + + match res { + Ok(()) => { + core.store_output(Err(JoinError::cancelled(core.task_id))); + } + Err(panic) => { + core.store_output(Err(JoinError::panic(core.task_id, panic))); + } + } +} + +/// Polls the future. If the future completes, the output is written to the +/// stage field. +fn poll_future<T: Future, S: Schedule>(core: &Core<T, S>, cx: Context<'_>) -> Poll<()> { + // Poll the future. + let output = panic::catch_unwind(panic::AssertUnwindSafe(|| { + struct Guard<'a, T: Future, S: Schedule> { + core: &'a Core<T, S>, + } + impl<'a, T: Future, S: Schedule> Drop for Guard<'a, T, S> { + fn drop(&mut self) { + // If the future panics on poll, we drop it inside the panic + // guard. + self.core.drop_future_or_output(); + } + } + let guard = Guard { core }; + let res = guard.core.poll(cx); + mem::forget(guard); + res + })); + + // Prepare output for being placed in the core stage. + let output = match output { + Ok(Poll::Pending) => return Poll::Pending, + Ok(Poll::Ready(output)) => Ok(output), + Err(panic) => { + core.scheduler.unhandled_panic(); + Err(JoinError::panic(core.task_id, panic)) + } + }; + + // Catch and ignore panics if the future panics on drop. + let res = panic::catch_unwind(panic::AssertUnwindSafe(|| { + core.store_output(output); + })); + + if res.is_err() { + core.scheduler.unhandled_panic(); + } + + Poll::Ready(()) +} diff --git a/third_party/rust/tokio/src/runtime/task/id.rs b/third_party/rust/tokio/src/runtime/task/id.rs new file mode 100644 index 0000000000..2b0d95c024 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/id.rs @@ -0,0 +1,87 @@ +use crate::runtime::context; + +use std::fmt; + +/// An opaque ID that uniquely identifies a task relative to all other currently +/// running tasks. +/// +/// # Notes +/// +/// - Task IDs are unique relative to other *currently running* tasks. When a +/// task completes, the same ID may be used for another task. +/// - Task IDs are *not* sequential, and do not indicate the order in which +/// tasks are spawned, what runtime a task is spawned on, or any other data. +/// - The task ID of the currently running task can be obtained from inside the +/// task via the [`task::try_id()`](crate::task::try_id()) and +/// [`task::id()`](crate::task::id()) functions and from outside the task via +/// the [`JoinHandle::id()`](crate::task::JoinHandle::id()) function. +/// +/// **Note**: This is an [unstable API][unstable]. The public API of this type +/// may break in 1.x releases. See [the documentation on unstable +/// features][unstable] for details. +/// +/// [unstable]: crate#unstable-features +#[cfg_attr(docsrs, doc(cfg(all(feature = "rt", tokio_unstable))))] +#[cfg_attr(not(tokio_unstable), allow(unreachable_pub))] +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] +pub struct Id(u64); + +/// Returns the [`Id`] of the currently running task. +/// +/// # Panics +/// +/// This function panics if called from outside a task. Please note that calls +/// to `block_on` do not have task IDs, so the method will panic if called from +/// within a call to `block_on`. For a version of this function that doesn't +/// panic, see [`task::try_id()`](crate::runtime::task::try_id()). +/// +/// **Note**: This is an [unstable API][unstable]. The public API of this type +/// may break in 1.x releases. See [the documentation on unstable +/// features][unstable] for details. +/// +/// [task ID]: crate::task::Id +/// [unstable]: crate#unstable-features +#[cfg_attr(not(tokio_unstable), allow(unreachable_pub))] +#[track_caller] +pub fn id() -> Id { + context::current_task_id().expect("Can't get a task id when not inside a task") +} + +/// Returns the [`Id`] of the currently running task, or `None` if called outside +/// of a task. +/// +/// This function is similar to [`task::id()`](crate::runtime::task::id()), except +/// that it returns `None` rather than panicking if called outside of a task +/// context. +/// +/// **Note**: This is an [unstable API][unstable]. The public API of this type +/// may break in 1.x releases. See [the documentation on unstable +/// features][unstable] for details. +/// +/// [task ID]: crate::task::Id +/// [unstable]: crate#unstable-features +#[cfg_attr(not(tokio_unstable), allow(unreachable_pub))] +#[track_caller] +pub fn try_id() -> Option<Id> { + context::current_task_id() +} + +impl fmt::Display for Id { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl Id { + pub(crate) fn next() -> Self { + use crate::loom::sync::atomic::{Ordering::Relaxed, StaticAtomicU64}; + + static NEXT_ID: StaticAtomicU64 = StaticAtomicU64::new(1); + + Self(NEXT_ID.fetch_add(1, Relaxed)) + } + + pub(crate) fn as_u64(&self) -> u64 { + self.0 + } +} diff --git a/third_party/rust/tokio/src/runtime/task/join.rs b/third_party/rust/tokio/src/runtime/task/join.rs new file mode 100644 index 0000000000..ee39258846 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/join.rs @@ -0,0 +1,366 @@ +use crate::runtime::task::{Header, RawTask}; + +use std::fmt; +use std::future::Future; +use std::marker::PhantomData; +use std::panic::{RefUnwindSafe, UnwindSafe}; +use std::pin::Pin; +use std::task::{Context, Poll, Waker}; + +cfg_rt! { + /// An owned permission to join on a task (await its termination). + /// + /// This can be thought of as the equivalent of [`std::thread::JoinHandle`] + /// for a Tokio task rather than a thread. Note that the background task + /// associated with this `JoinHandle` started running immediately when you + /// called spawn, even if you have not yet awaited the `JoinHandle`. + /// + /// A `JoinHandle` *detaches* the associated task when it is dropped, which + /// means that there is no longer any handle to the task, and no way to `join` + /// on it. + /// + /// This `struct` is created by the [`task::spawn`] and [`task::spawn_blocking`] + /// functions. + /// + /// # Cancel safety + /// + /// The `&mut JoinHandle<T>` type is cancel safe. If it is used as the event + /// in a `tokio::select!` statement and some other branch completes first, + /// then it is guaranteed that the output of the task is not lost. + /// + /// If a `JoinHandle` is dropped, then the task continues running in the + /// background and its return value is lost. + /// + /// # Examples + /// + /// Creation from [`task::spawn`]: + /// + /// ``` + /// use tokio::task; + /// + /// # async fn doc() { + /// let join_handle: task::JoinHandle<_> = task::spawn(async { + /// // some work here + /// }); + /// # } + /// ``` + /// + /// Creation from [`task::spawn_blocking`]: + /// + /// ``` + /// use tokio::task; + /// + /// # async fn doc() { + /// let join_handle: task::JoinHandle<_> = task::spawn_blocking(|| { + /// // some blocking work here + /// }); + /// # } + /// ``` + /// + /// The generic parameter `T` in `JoinHandle<T>` is the return type of the spawned task. + /// If the return value is an i32, the join handle has type `JoinHandle<i32>`: + /// + /// ``` + /// use tokio::task; + /// + /// # async fn doc() { + /// let join_handle: task::JoinHandle<i32> = task::spawn(async { + /// 5 + 3 + /// }); + /// # } + /// + /// ``` + /// + /// If the task does not have a return value, the join handle has type `JoinHandle<()>`: + /// + /// ``` + /// use tokio::task; + /// + /// # async fn doc() { + /// let join_handle: task::JoinHandle<()> = task::spawn(async { + /// println!("I return nothing."); + /// }); + /// # } + /// ``` + /// + /// Note that `handle.await` doesn't give you the return type directly. It is wrapped in a + /// `Result` because panics in the spawned task are caught by Tokio. The `?` operator has + /// to be double chained to extract the returned value: + /// + /// ``` + /// use tokio::task; + /// use std::io; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let join_handle: task::JoinHandle<Result<i32, io::Error>> = tokio::spawn(async { + /// Ok(5 + 3) + /// }); + /// + /// let result = join_handle.await??; + /// assert_eq!(result, 8); + /// Ok(()) + /// } + /// ``` + /// + /// If the task panics, the error is a [`JoinError`] that contains the panic: + /// + /// ``` + /// use tokio::task; + /// use std::io; + /// use std::panic; + /// + /// #[tokio::main] + /// async fn main() -> io::Result<()> { + /// let join_handle: task::JoinHandle<Result<i32, io::Error>> = tokio::spawn(async { + /// panic!("boom"); + /// }); + /// + /// let err = join_handle.await.unwrap_err(); + /// assert!(err.is_panic()); + /// Ok(()) + /// } + /// + /// ``` + /// Child being detached and outliving its parent: + /// + /// ```no_run + /// use tokio::task; + /// use tokio::time; + /// use std::time::Duration; + /// + /// # #[tokio::main] async fn main() { + /// let original_task = task::spawn(async { + /// let _detached_task = task::spawn(async { + /// // Here we sleep to make sure that the first task returns before. + /// time::sleep(Duration::from_millis(10)).await; + /// // This will be called, even though the JoinHandle is dropped. + /// println!("♫ Still alive ♫"); + /// }); + /// }); + /// + /// original_task.await.expect("The task being joined has panicked"); + /// println!("Original task is joined."); + /// + /// // We make sure that the new task has time to run, before the main + /// // task returns. + /// + /// time::sleep(Duration::from_millis(1000)).await; + /// # } + /// ``` + /// + /// [`task::spawn`]: crate::task::spawn() + /// [`task::spawn_blocking`]: crate::task::spawn_blocking + /// [`std::thread::JoinHandle`]: std::thread::JoinHandle + /// [`JoinError`]: crate::task::JoinError + pub struct JoinHandle<T> { + raw: RawTask, + _p: PhantomData<T>, + } +} + +unsafe impl<T: Send> Send for JoinHandle<T> {} +unsafe impl<T: Send> Sync for JoinHandle<T> {} + +impl<T> UnwindSafe for JoinHandle<T> {} +impl<T> RefUnwindSafe for JoinHandle<T> {} + +impl<T> JoinHandle<T> { + pub(super) fn new(raw: RawTask) -> JoinHandle<T> { + JoinHandle { + raw, + _p: PhantomData, + } + } + + /// Abort the task associated with the handle. + /// + /// Awaiting a cancelled task might complete as usual if the task was + /// already completed at the time it was cancelled, but most likely it + /// will fail with a [cancelled] `JoinError`. + /// + /// ```rust + /// use tokio::time; + /// + /// # #[tokio::main(flavor = "current_thread", start_paused = true)] + /// # async fn main() { + /// let mut handles = Vec::new(); + /// + /// handles.push(tokio::spawn(async { + /// time::sleep(time::Duration::from_secs(10)).await; + /// true + /// })); + /// + /// handles.push(tokio::spawn(async { + /// time::sleep(time::Duration::from_secs(10)).await; + /// false + /// })); + /// + /// for handle in &handles { + /// handle.abort(); + /// } + /// + /// for handle in handles { + /// assert!(handle.await.unwrap_err().is_cancelled()); + /// } + /// # } + /// ``` + /// [cancelled]: method@super::error::JoinError::is_cancelled + pub fn abort(&self) { + self.raw.remote_abort(); + } + + /// Checks if the task associated with this `JoinHandle` has finished. + /// + /// Please note that this method can return `false` even if [`abort`] has been + /// called on the task. This is because the cancellation process may take + /// some time, and this method does not return `true` until it has + /// completed. + /// + /// ```rust + /// use tokio::time; + /// + /// # #[tokio::main(flavor = "current_thread", start_paused = true)] + /// # async fn main() { + /// let handle1 = tokio::spawn(async { + /// // do some stuff here + /// }); + /// let handle2 = tokio::spawn(async { + /// // do some other stuff here + /// time::sleep(time::Duration::from_secs(10)).await; + /// }); + /// // Wait for the task to finish + /// handle2.abort(); + /// time::sleep(time::Duration::from_secs(1)).await; + /// assert!(handle1.is_finished()); + /// assert!(handle2.is_finished()); + /// # } + /// ``` + /// [`abort`]: method@JoinHandle::abort + pub fn is_finished(&self) -> bool { + let state = self.raw.header().state.load(); + state.is_complete() + } + + /// Set the waker that is notified when the task completes. + pub(crate) fn set_join_waker(&mut self, waker: &Waker) { + if self.raw.try_set_join_waker(waker) { + // In this case the task has already completed. We wake the waker immediately. + waker.wake_by_ref(); + } + } + + /// Returns a new `AbortHandle` that can be used to remotely abort this task. + /// + /// Awaiting a task cancelled by the `AbortHandle` might complete as usual if the task was + /// already completed at the time it was cancelled, but most likely it + /// will fail with a [cancelled] `JoinError`. + /// + /// ```rust + /// use tokio::{time, task}; + /// + /// # #[tokio::main(flavor = "current_thread", start_paused = true)] + /// # async fn main() { + /// let mut handles = Vec::new(); + /// + /// handles.push(tokio::spawn(async { + /// time::sleep(time::Duration::from_secs(10)).await; + /// true + /// })); + /// + /// handles.push(tokio::spawn(async { + /// time::sleep(time::Duration::from_secs(10)).await; + /// false + /// })); + /// + /// let abort_handles: Vec<task::AbortHandle> = handles.iter().map(|h| h.abort_handle()).collect(); + /// + /// for handle in abort_handles { + /// handle.abort(); + /// } + /// + /// for handle in handles { + /// assert!(handle.await.unwrap_err().is_cancelled()); + /// } + /// # } + /// ``` + /// [cancelled]: method@super::error::JoinError::is_cancelled + pub fn abort_handle(&self) -> super::AbortHandle { + self.raw.ref_inc(); + super::AbortHandle::new(self.raw) + } + + /// Returns a [task ID] that uniquely identifies this task relative to other + /// currently spawned tasks. + /// + /// **Note**: This is an [unstable API][unstable]. The public API of this type + /// may break in 1.x releases. See [the documentation on unstable + /// features][unstable] for details. + /// + /// [task ID]: crate::task::Id + /// [unstable]: crate#unstable-features + #[cfg(tokio_unstable)] + #[cfg_attr(docsrs, doc(cfg(tokio_unstable)))] + pub fn id(&self) -> super::Id { + // Safety: The header pointer is valid. + unsafe { Header::get_id(self.raw.header_ptr()) } + } +} + +impl<T> Unpin for JoinHandle<T> {} + +impl<T> Future for JoinHandle<T> { + type Output = super::Result<T>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> { + ready!(crate::trace::trace_leaf(cx)); + let mut ret = Poll::Pending; + + // Keep track of task budget + let coop = ready!(crate::runtime::coop::poll_proceed(cx)); + + // Try to read the task output. If the task is not yet complete, the + // waker is stored and is notified once the task does complete. + // + // The function must go via the vtable, which requires erasing generic + // types. To do this, the function "return" is placed on the stack + // **before** calling the function and is passed into the function using + // `*mut ()`. + // + // Safety: + // + // The type of `T` must match the task's output type. + unsafe { + self.raw + .try_read_output(&mut ret as *mut _ as *mut (), cx.waker()); + } + + if ret.is_ready() { + coop.made_progress(); + } + + ret + } +} + +impl<T> Drop for JoinHandle<T> { + fn drop(&mut self) { + if self.raw.state().drop_join_handle_fast().is_ok() { + return; + } + + self.raw.drop_join_handle_slow(); + } +} + +impl<T> fmt::Debug for JoinHandle<T> +where + T: fmt::Debug, +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + // Safety: The header pointer is valid. + let id_ptr = unsafe { Header::get_id_ptr(self.raw.header_ptr()) }; + let id = unsafe { id_ptr.as_ref() }; + fmt.debug_struct("JoinHandle").field("id", id).finish() + } +} diff --git a/third_party/rust/tokio/src/runtime/task/list.rs b/third_party/rust/tokio/src/runtime/task/list.rs new file mode 100644 index 0000000000..fb7dbdc1d9 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/list.rs @@ -0,0 +1,319 @@ +//! This module has containers for storing the tasks spawned on a scheduler. The +//! `OwnedTasks` container is thread-safe but can only store tasks that +//! implement Send. The `LocalOwnedTasks` container is not thread safe, but can +//! store non-Send tasks. +//! +//! The collections can be closed to prevent adding new tasks during shutdown of +//! the scheduler with the collection. + +use crate::future::Future; +use crate::loom::cell::UnsafeCell; +use crate::loom::sync::Mutex; +use crate::runtime::task::{JoinHandle, LocalNotified, Notified, Schedule, Task}; +use crate::util::linked_list::{CountedLinkedList, Link, LinkedList}; + +use std::marker::PhantomData; + +// The id from the module below is used to verify whether a given task is stored +// in this OwnedTasks, or some other task. The counter starts at one so we can +// use zero for tasks not owned by any list. +// +// The safety checks in this file can technically be violated if the counter is +// overflown, but the checks are not supposed to ever fail unless there is a +// bug in Tokio, so we accept that certain bugs would not be caught if the two +// mixed up runtimes happen to have the same id. + +cfg_has_atomic_u64! { + use std::sync::atomic::{AtomicU64, Ordering}; + + static NEXT_OWNED_TASKS_ID: AtomicU64 = AtomicU64::new(1); + + fn get_next_id() -> u64 { + loop { + let id = NEXT_OWNED_TASKS_ID.fetch_add(1, Ordering::Relaxed); + if id != 0 { + return id; + } + } + } +} + +cfg_not_has_atomic_u64! { + use std::sync::atomic::{AtomicU32, Ordering}; + + static NEXT_OWNED_TASKS_ID: AtomicU32 = AtomicU32::new(1); + + fn get_next_id() -> u64 { + loop { + let id = NEXT_OWNED_TASKS_ID.fetch_add(1, Ordering::Relaxed); + if id != 0 { + return u64::from(id); + } + } + } +} + +pub(crate) struct OwnedTasks<S: 'static> { + inner: Mutex<CountedOwnedTasksInner<S>>, + id: u64, +} +struct CountedOwnedTasksInner<S: 'static> { + list: CountedLinkedList<Task<S>, <Task<S> as Link>::Target>, + closed: bool, +} +pub(crate) struct LocalOwnedTasks<S: 'static> { + inner: UnsafeCell<OwnedTasksInner<S>>, + id: u64, + _not_send_or_sync: PhantomData<*const ()>, +} +struct OwnedTasksInner<S: 'static> { + list: LinkedList<Task<S>, <Task<S> as Link>::Target>, + closed: bool, +} + +impl<S: 'static> OwnedTasks<S> { + pub(crate) fn new() -> Self { + Self { + inner: Mutex::new(CountedOwnedTasksInner { + list: CountedLinkedList::new(), + closed: false, + }), + id: get_next_id(), + } + } + + /// Binds the provided task to this OwnedTasks instance. This fails if the + /// OwnedTasks has been closed. + pub(crate) fn bind<T>( + &self, + task: T, + scheduler: S, + id: super::Id, + ) -> (JoinHandle<T::Output>, Option<Notified<S>>) + where + S: Schedule, + T: Future + Send + 'static, + T::Output: Send + 'static, + { + let (task, notified, join) = super::new_task(task, scheduler, id); + + unsafe { + // safety: We just created the task, so we have exclusive access + // to the field. + task.header().set_owner_id(self.id); + } + + let mut lock = self.inner.lock(); + if lock.closed { + drop(lock); + drop(notified); + task.shutdown(); + (join, None) + } else { + lock.list.push_front(task); + (join, Some(notified)) + } + } + + /// Asserts that the given task is owned by this OwnedTasks and convert it to + /// a LocalNotified, giving the thread permission to poll this task. + #[inline] + pub(crate) fn assert_owner(&self, task: Notified<S>) -> LocalNotified<S> { + assert_eq!(task.header().get_owner_id(), self.id); + + // safety: All tasks bound to this OwnedTasks are Send, so it is safe + // to poll it on this thread no matter what thread we are on. + LocalNotified { + task: task.0, + _not_send: PhantomData, + } + } + + /// Shuts down all tasks in the collection. This call also closes the + /// collection, preventing new items from being added. + pub(crate) fn close_and_shutdown_all(&self) + where + S: Schedule, + { + // The first iteration of the loop was unrolled so it can set the + // closed bool. + let first_task = { + let mut lock = self.inner.lock(); + lock.closed = true; + lock.list.pop_back() + }; + match first_task { + Some(task) => task.shutdown(), + None => return, + } + + loop { + let task = match self.inner.lock().list.pop_back() { + Some(task) => task, + None => return, + }; + + task.shutdown(); + } + } + + pub(crate) fn active_tasks_count(&self) -> usize { + self.inner.lock().list.count() + } + + pub(crate) fn remove(&self, task: &Task<S>) -> Option<Task<S>> { + let task_id = task.header().get_owner_id(); + if task_id == 0 { + // The task is unowned. + return None; + } + + assert_eq!(task_id, self.id); + + // safety: We just checked that the provided task is not in some other + // linked list. + unsafe { self.inner.lock().list.remove(task.header_ptr()) } + } + + pub(crate) fn is_empty(&self) -> bool { + self.inner.lock().list.is_empty() + } +} + +cfg_taskdump! { + impl<S: 'static> OwnedTasks<S> { + /// Locks the tasks, and calls `f` on an iterator over them. + pub(crate) fn for_each<F>(&self, f: F) + where + F: FnMut(&Task<S>) + { + self.inner.lock().list.for_each(f) + } + } +} + +impl<S: 'static> LocalOwnedTasks<S> { + pub(crate) fn new() -> Self { + Self { + inner: UnsafeCell::new(OwnedTasksInner { + list: LinkedList::new(), + closed: false, + }), + id: get_next_id(), + _not_send_or_sync: PhantomData, + } + } + + pub(crate) fn bind<T>( + &self, + task: T, + scheduler: S, + id: super::Id, + ) -> (JoinHandle<T::Output>, Option<Notified<S>>) + where + S: Schedule, + T: Future + 'static, + T::Output: 'static, + { + let (task, notified, join) = super::new_task(task, scheduler, id); + + unsafe { + // safety: We just created the task, so we have exclusive access + // to the field. + task.header().set_owner_id(self.id); + } + + if self.is_closed() { + drop(notified); + task.shutdown(); + (join, None) + } else { + self.with_inner(|inner| { + inner.list.push_front(task); + }); + (join, Some(notified)) + } + } + + /// Shuts down all tasks in the collection. This call also closes the + /// collection, preventing new items from being added. + pub(crate) fn close_and_shutdown_all(&self) + where + S: Schedule, + { + self.with_inner(|inner| inner.closed = true); + + while let Some(task) = self.with_inner(|inner| inner.list.pop_back()) { + task.shutdown(); + } + } + + pub(crate) fn remove(&self, task: &Task<S>) -> Option<Task<S>> { + let task_id = task.header().get_owner_id(); + if task_id == 0 { + // The task is unowned. + return None; + } + + assert_eq!(task_id, self.id); + + self.with_inner(|inner| + // safety: We just checked that the provided task is not in some + // other linked list. + unsafe { inner.list.remove(task.header_ptr()) }) + } + + /// Asserts that the given task is owned by this LocalOwnedTasks and convert + /// it to a LocalNotified, giving the thread permission to poll this task. + #[inline] + pub(crate) fn assert_owner(&self, task: Notified<S>) -> LocalNotified<S> { + assert_eq!(task.header().get_owner_id(), self.id); + + // safety: The task was bound to this LocalOwnedTasks, and the + // LocalOwnedTasks is not Send or Sync, so we are on the right thread + // for polling this task. + LocalNotified { + task: task.0, + _not_send: PhantomData, + } + } + + #[inline] + fn with_inner<F, T>(&self, f: F) -> T + where + F: FnOnce(&mut OwnedTasksInner<S>) -> T, + { + // safety: This type is not Sync, so concurrent calls of this method + // can't happen. Furthermore, all uses of this method in this file make + // sure that they don't call `with_inner` recursively. + self.inner.with_mut(|ptr| unsafe { f(&mut *ptr) }) + } + + pub(crate) fn is_closed(&self) -> bool { + self.with_inner(|inner| inner.closed) + } + + pub(crate) fn is_empty(&self) -> bool { + self.with_inner(|inner| inner.list.is_empty()) + } +} + +#[cfg(all(test))] +mod tests { + use super::*; + + // This test may run in parallel with other tests, so we only test that ids + // come in increasing order. + #[test] + fn test_id_not_broken() { + let mut last_id = get_next_id(); + assert_ne!(last_id, 0); + + for _ in 0..1000 { + let next_id = get_next_id(); + assert_ne!(next_id, 0); + assert!(last_id < next_id); + last_id = next_id; + } + } +} diff --git a/third_party/rust/tokio/src/runtime/task/mod.rs b/third_party/rust/tokio/src/runtime/task/mod.rs new file mode 100644 index 0000000000..932552fb91 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/mod.rs @@ -0,0 +1,497 @@ +//! The task module. +//! +//! The task module contains the code that manages spawned tasks and provides a +//! safe API for the rest of the runtime to use. Each task in a runtime is +//! stored in an OwnedTasks or LocalOwnedTasks object. +//! +//! # Task reference types +//! +//! A task is usually referenced by multiple handles, and there are several +//! types of handles. +//! +//! * OwnedTask - tasks stored in an OwnedTasks or LocalOwnedTasks are of this +//! reference type. +//! +//! * JoinHandle - each task has a JoinHandle that allows access to the output +//! of the task. +//! +//! * Waker - every waker for a task has this reference type. There can be any +//! number of waker references. +//! +//! * Notified - tracks whether the task is notified. +//! +//! * Unowned - this task reference type is used for tasks not stored in any +//! runtime. Mainly used for blocking tasks, but also in tests. +//! +//! The task uses a reference count to keep track of how many active references +//! exist. The Unowned reference type takes up two ref-counts. All other +//! reference types take up a single ref-count. +//! +//! Besides the waker type, each task has at most one of each reference type. +//! +//! # State +//! +//! The task stores its state in an atomic usize with various bitfields for the +//! necessary information. The state has the following bitfields: +//! +//! * RUNNING - Tracks whether the task is currently being polled or cancelled. +//! This bit functions as a lock around the task. +//! +//! * COMPLETE - Is one once the future has fully completed and has been +//! dropped. Never unset once set. Never set together with RUNNING. +//! +//! * NOTIFIED - Tracks whether a Notified object currently exists. +//! +//! * CANCELLED - Is set to one for tasks that should be cancelled as soon as +//! possible. May take any value for completed tasks. +//! +//! * JOIN_INTEREST - Is set to one if there exists a JoinHandle. +//! +//! * JOIN_WAKER - Acts as an access control bit for the join handle waker. The +//! protocol for its usage is described below. +//! +//! The rest of the bits are used for the ref-count. +//! +//! # Fields in the task +//! +//! The task has various fields. This section describes how and when it is safe +//! to access a field. +//! +//! * The state field is accessed with atomic instructions. +//! +//! * The OwnedTask reference has exclusive access to the `owned` field. +//! +//! * The Notified reference has exclusive access to the `queue_next` field. +//! +//! * The `owner_id` field can be set as part of construction of the task, but +//! is otherwise immutable and anyone can access the field immutably without +//! synchronization. +//! +//! * If COMPLETE is one, then the JoinHandle has exclusive access to the +//! stage field. If COMPLETE is zero, then the RUNNING bitfield functions as +//! a lock for the stage field, and it can be accessed only by the thread +//! that set RUNNING to one. +//! +//! * The waker field may be concurrently accessed by different threads: in one +//! thread the runtime may complete a task and *read* the waker field to +//! invoke the waker, and in another thread the task's JoinHandle may be +//! polled, and if the task hasn't yet completed, the JoinHandle may *write* +//! a waker to the waker field. The JOIN_WAKER bit ensures safe access by +//! multiple threads to the waker field using the following rules: +//! +//! 1. JOIN_WAKER is initialized to zero. +//! +//! 2. If JOIN_WAKER is zero, then the JoinHandle has exclusive (mutable) +//! access to the waker field. +//! +//! 3. If JOIN_WAKER is one, then the JoinHandle has shared (read-only) +//! access to the waker field. +//! +//! 4. If JOIN_WAKER is one and COMPLETE is one, then the runtime has shared +//! (read-only) access to the waker field. +//! +//! 5. If the JoinHandle needs to write to the waker field, then the +//! JoinHandle needs to (i) successfully set JOIN_WAKER to zero if it is +//! not already zero to gain exclusive access to the waker field per rule +//! 2, (ii) write a waker, and (iii) successfully set JOIN_WAKER to one. +//! +//! 6. The JoinHandle can change JOIN_WAKER only if COMPLETE is zero (i.e. +//! the task hasn't yet completed). +//! +//! Rule 6 implies that the steps (i) or (iii) of rule 5 may fail due to a +//! race. If step (i) fails, then the attempt to write a waker is aborted. If +//! step (iii) fails because COMPLETE is set to one by another thread after +//! step (i), then the waker field is cleared. Once COMPLETE is one (i.e. +//! task has completed), the JoinHandle will not modify JOIN_WAKER. After the +//! runtime sets COMPLETE to one, it invokes the waker if there is one. +//! +//! All other fields are immutable and can be accessed immutably without +//! synchronization by anyone. +//! +//! # Safety +//! +//! This section goes through various situations and explains why the API is +//! safe in that situation. +//! +//! ## Polling or dropping the future +//! +//! Any mutable access to the future happens after obtaining a lock by modifying +//! the RUNNING field, so exclusive access is ensured. +//! +//! When the task completes, exclusive access to the output is transferred to +//! the JoinHandle. If the JoinHandle is already dropped when the transition to +//! complete happens, the thread performing that transition retains exclusive +//! access to the output and should immediately drop it. +//! +//! ## Non-Send futures +//! +//! If a future is not Send, then it is bound to a LocalOwnedTasks. The future +//! will only ever be polled or dropped given a LocalNotified or inside a call +//! to LocalOwnedTasks::shutdown_all. In either case, it is guaranteed that the +//! future is on the right thread. +//! +//! If the task is never removed from the LocalOwnedTasks, then it is leaked, so +//! there is no risk that the task is dropped on some other thread when the last +//! ref-count drops. +//! +//! ## Non-Send output +//! +//! When a task completes, the output is placed in the stage of the task. Then, +//! a transition that sets COMPLETE to true is performed, and the value of +//! JOIN_INTEREST when this transition happens is read. +//! +//! If JOIN_INTEREST is zero when the transition to COMPLETE happens, then the +//! output is immediately dropped. +//! +//! If JOIN_INTEREST is one when the transition to COMPLETE happens, then the +//! JoinHandle is responsible for cleaning up the output. If the output is not +//! Send, then this happens: +//! +//! 1. The output is created on the thread that the future was polled on. Since +//! only non-Send futures can have non-Send output, the future was polled on +//! the thread that the future was spawned from. +//! 2. Since `JoinHandle<Output>` is not Send if Output is not Send, the +//! JoinHandle is also on the thread that the future was spawned from. +//! 3. Thus, the JoinHandle will not move the output across threads when it +//! takes or drops the output. +//! +//! ## Recursive poll/shutdown +//! +//! Calling poll from inside a shutdown call or vice-versa is not prevented by +//! the API exposed by the task module, so this has to be safe. In either case, +//! the lock in the RUNNING bitfield makes the inner call return immediately. If +//! the inner call is a `shutdown` call, then the CANCELLED bit is set, and the +//! poll call will notice it when the poll finishes, and the task is cancelled +//! at that point. + +// Some task infrastructure is here to support `JoinSet`, which is currently +// unstable. This should be removed once `JoinSet` is stabilized. +#![cfg_attr(not(tokio_unstable), allow(dead_code))] + +mod core; +use self::core::Cell; +use self::core::Header; + +mod error; +pub use self::error::JoinError; + +mod harness; +use self::harness::Harness; + +mod id; +#[cfg_attr(not(tokio_unstable), allow(unreachable_pub))] +pub use id::{id, try_id, Id}; + +#[cfg(feature = "rt")] +mod abort; +mod join; + +#[cfg(feature = "rt")] +pub use self::abort::AbortHandle; + +pub use self::join::JoinHandle; + +mod list; +pub(crate) use self::list::{LocalOwnedTasks, OwnedTasks}; + +mod raw; +pub(crate) use self::raw::RawTask; + +mod state; +use self::state::State; + +mod waker; + +cfg_taskdump! { + pub(crate) mod trace; +} + +use crate::future::Future; +use crate::util::linked_list; + +use std::marker::PhantomData; +use std::ptr::NonNull; +use std::{fmt, mem}; + +/// An owned handle to the task, tracked by ref count. +#[repr(transparent)] +pub(crate) struct Task<S: 'static> { + raw: RawTask, + _p: PhantomData<S>, +} + +unsafe impl<S> Send for Task<S> {} +unsafe impl<S> Sync for Task<S> {} + +/// A task was notified. +#[repr(transparent)] +pub(crate) struct Notified<S: 'static>(Task<S>); + +// safety: This type cannot be used to touch the task without first verifying +// that the value is on a thread where it is safe to poll the task. +unsafe impl<S: Schedule> Send for Notified<S> {} +unsafe impl<S: Schedule> Sync for Notified<S> {} + +/// A non-Send variant of Notified with the invariant that it is on a thread +/// where it is safe to poll it. +#[repr(transparent)] +pub(crate) struct LocalNotified<S: 'static> { + task: Task<S>, + _not_send: PhantomData<*const ()>, +} + +/// A task that is not owned by any OwnedTasks. Used for blocking tasks. +/// This type holds two ref-counts. +pub(crate) struct UnownedTask<S: 'static> { + raw: RawTask, + _p: PhantomData<S>, +} + +// safety: This type can only be created given a Send task. +unsafe impl<S> Send for UnownedTask<S> {} +unsafe impl<S> Sync for UnownedTask<S> {} + +/// Task result sent back. +pub(crate) type Result<T> = std::result::Result<T, JoinError>; + +pub(crate) trait Schedule: Sync + Sized + 'static { + /// The task has completed work and is ready to be released. The scheduler + /// should release it immediately and return it. The task module will batch + /// the ref-dec with setting other options. + /// + /// If the scheduler has already released the task, then None is returned. + fn release(&self, task: &Task<Self>) -> Option<Task<Self>>; + + /// Schedule the task + fn schedule(&self, task: Notified<Self>); + + /// Schedule the task to run in the near future, yielding the thread to + /// other tasks. + fn yield_now(&self, task: Notified<Self>) { + self.schedule(task); + } + + /// Polling the task resulted in a panic. Should the runtime shutdown? + fn unhandled_panic(&self) { + // By default, do nothing. This maintains the 1.0 behavior. + } +} + +cfg_rt! { + /// This is the constructor for a new task. Three references to the task are + /// created. The first task reference is usually put into an OwnedTasks + /// immediately. The Notified is sent to the scheduler as an ordinary + /// notification. + fn new_task<T, S>( + task: T, + scheduler: S, + id: Id, + ) -> (Task<S>, Notified<S>, JoinHandle<T::Output>) + where + S: Schedule, + T: Future + 'static, + T::Output: 'static, + { + let raw = RawTask::new::<T, S>(task, scheduler, id); + let task = Task { + raw, + _p: PhantomData, + }; + let notified = Notified(Task { + raw, + _p: PhantomData, + }); + let join = JoinHandle::new(raw); + + (task, notified, join) + } + + /// Creates a new task with an associated join handle. This method is used + /// only when the task is not going to be stored in an `OwnedTasks` list. + /// + /// Currently only blocking tasks use this method. + pub(crate) fn unowned<T, S>(task: T, scheduler: S, id: Id) -> (UnownedTask<S>, JoinHandle<T::Output>) + where + S: Schedule, + T: Send + Future + 'static, + T::Output: Send + 'static, + { + let (task, notified, join) = new_task(task, scheduler, id); + + // This transfers the ref-count of task and notified into an UnownedTask. + // This is valid because an UnownedTask holds two ref-counts. + let unowned = UnownedTask { + raw: task.raw, + _p: PhantomData, + }; + std::mem::forget(task); + std::mem::forget(notified); + + (unowned, join) + } +} + +impl<S: 'static> Task<S> { + unsafe fn new(raw: RawTask) -> Task<S> { + Task { + raw, + _p: PhantomData, + } + } + + unsafe fn from_raw(ptr: NonNull<Header>) -> Task<S> { + Task::new(RawTask::from_raw(ptr)) + } + + #[cfg(all( + tokio_unstable, + tokio_taskdump, + feature = "rt", + target_os = "linux", + any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64") + ))] + pub(super) fn as_raw(&self) -> RawTask { + self.raw + } + + fn header(&self) -> &Header { + self.raw.header() + } + + fn header_ptr(&self) -> NonNull<Header> { + self.raw.header_ptr() + } +} + +impl<S: 'static> Notified<S> { + fn header(&self) -> &Header { + self.0.header() + } +} + +impl<S: 'static> Notified<S> { + pub(crate) unsafe fn from_raw(ptr: RawTask) -> Notified<S> { + Notified(Task::new(ptr)) + } +} + +impl<S: 'static> Notified<S> { + pub(crate) fn into_raw(self) -> RawTask { + let raw = self.0.raw; + mem::forget(self); + raw + } +} + +impl<S: Schedule> Task<S> { + /// Preemptively cancels the task as part of the shutdown process. + pub(crate) fn shutdown(self) { + let raw = self.raw; + mem::forget(self); + raw.shutdown(); + } +} + +impl<S: Schedule> LocalNotified<S> { + /// Runs the task. + pub(crate) fn run(self) { + let raw = self.task.raw; + mem::forget(self); + raw.poll(); + } +} + +impl<S: Schedule> UnownedTask<S> { + // Used in test of the inject queue. + #[cfg(test)] + #[cfg_attr(tokio_wasm, allow(dead_code))] + pub(super) fn into_notified(self) -> Notified<S> { + Notified(self.into_task()) + } + + fn into_task(self) -> Task<S> { + // Convert into a task. + let task = Task { + raw: self.raw, + _p: PhantomData, + }; + mem::forget(self); + + // Drop a ref-count since an UnownedTask holds two. + task.header().state.ref_dec(); + + task + } + + pub(crate) fn run(self) { + let raw = self.raw; + mem::forget(self); + + // Transfer one ref-count to a Task object. + let task = Task::<S> { + raw, + _p: PhantomData, + }; + + // Use the other ref-count to poll the task. + raw.poll(); + // Decrement our extra ref-count + drop(task); + } + + pub(crate) fn shutdown(self) { + self.into_task().shutdown() + } +} + +impl<S: 'static> Drop for Task<S> { + fn drop(&mut self) { + // Decrement the ref count + if self.header().state.ref_dec() { + // Deallocate if this is the final ref count + self.raw.dealloc(); + } + } +} + +impl<S: 'static> Drop for UnownedTask<S> { + fn drop(&mut self) { + // Decrement the ref count + if self.raw.header().state.ref_dec_twice() { + // Deallocate if this is the final ref count + self.raw.dealloc(); + } + } +} + +impl<S> fmt::Debug for Task<S> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "Task({:p})", self.header()) + } +} + +impl<S> fmt::Debug for Notified<S> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "task::Notified({:p})", self.0.header()) + } +} + +/// # Safety +/// +/// Tasks are pinned. +unsafe impl<S> linked_list::Link for Task<S> { + type Handle = Task<S>; + type Target = Header; + + fn as_raw(handle: &Task<S>) -> NonNull<Header> { + handle.raw.header_ptr() + } + + unsafe fn from_raw(ptr: NonNull<Header>) -> Task<S> { + Task::from_raw(ptr) + } + + unsafe fn pointers(target: NonNull<Header>) -> NonNull<linked_list::Pointers<Header>> { + self::core::Trailer::addr_of_owned(Header::get_trailer(target)) + } +} diff --git a/third_party/rust/tokio/src/runtime/task/raw.rs b/third_party/rust/tokio/src/runtime/task/raw.rs new file mode 100644 index 0000000000..8078859285 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/raw.rs @@ -0,0 +1,317 @@ +use crate::future::Future; +use crate::runtime::task::core::{Core, Trailer}; +use crate::runtime::task::{Cell, Harness, Header, Id, Schedule, State}; + +use std::ptr::NonNull; +use std::task::{Poll, Waker}; + +/// Raw task handle +pub(crate) struct RawTask { + ptr: NonNull<Header>, +} + +pub(super) struct Vtable { + /// Polls the future. + pub(super) poll: unsafe fn(NonNull<Header>), + + /// Schedules the task for execution on the runtime. + pub(super) schedule: unsafe fn(NonNull<Header>), + + /// Deallocates the memory. + pub(super) dealloc: unsafe fn(NonNull<Header>), + + /// Reads the task output, if complete. + pub(super) try_read_output: unsafe fn(NonNull<Header>, *mut (), &Waker), + + /// The join handle has been dropped. + pub(super) drop_join_handle_slow: unsafe fn(NonNull<Header>), + + /// An abort handle has been dropped. + pub(super) drop_abort_handle: unsafe fn(NonNull<Header>), + + /// Scheduler is being shutdown. + pub(super) shutdown: unsafe fn(NonNull<Header>), + + /// The number of bytes that the `trailer` field is offset from the header. + pub(super) trailer_offset: usize, + + /// The number of bytes that the `scheduler` field is offset from the header. + pub(super) scheduler_offset: usize, + + /// The number of bytes that the `id` field is offset from the header. + pub(super) id_offset: usize, +} + +/// Get the vtable for the requested `T` and `S` generics. +pub(super) fn vtable<T: Future, S: Schedule>() -> &'static Vtable { + &Vtable { + poll: poll::<T, S>, + schedule: schedule::<S>, + dealloc: dealloc::<T, S>, + try_read_output: try_read_output::<T, S>, + drop_join_handle_slow: drop_join_handle_slow::<T, S>, + drop_abort_handle: drop_abort_handle::<T, S>, + shutdown: shutdown::<T, S>, + trailer_offset: OffsetHelper::<T, S>::TRAILER_OFFSET, + scheduler_offset: OffsetHelper::<T, S>::SCHEDULER_OFFSET, + id_offset: OffsetHelper::<T, S>::ID_OFFSET, + } +} + +/// Calling `get_trailer_offset` directly in vtable doesn't work because it +/// prevents the vtable from being promoted to a static reference. +/// +/// See this thread for more info: +/// <https://users.rust-lang.org/t/custom-vtables-with-integers/78508> +struct OffsetHelper<T, S>(T, S); +impl<T: Future, S: Schedule> OffsetHelper<T, S> { + // Pass `size_of`/`align_of` as arguments rather than calling them directly + // inside `get_trailer_offset` because trait bounds on generic parameters + // of const fn are unstable on our MSRV. + const TRAILER_OFFSET: usize = get_trailer_offset( + std::mem::size_of::<Header>(), + std::mem::size_of::<Core<T, S>>(), + std::mem::align_of::<Core<T, S>>(), + std::mem::align_of::<Trailer>(), + ); + + // The `scheduler` is the first field of `Core`, so it has the same + // offset as `Core`. + const SCHEDULER_OFFSET: usize = get_core_offset( + std::mem::size_of::<Header>(), + std::mem::align_of::<Core<T, S>>(), + ); + + const ID_OFFSET: usize = get_id_offset( + std::mem::size_of::<Header>(), + std::mem::align_of::<Core<T, S>>(), + std::mem::size_of::<S>(), + std::mem::align_of::<Id>(), + ); +} + +/// Compute the offset of the `Trailer` field in `Cell<T, S>` using the +/// `#[repr(C)]` algorithm. +/// +/// Pseudo-code for the `#[repr(C)]` algorithm can be found here: +/// <https://doc.rust-lang.org/reference/type-layout.html#reprc-structs> +const fn get_trailer_offset( + header_size: usize, + core_size: usize, + core_align: usize, + trailer_align: usize, +) -> usize { + let mut offset = header_size; + + let core_misalign = offset % core_align; + if core_misalign > 0 { + offset += core_align - core_misalign; + } + offset += core_size; + + let trailer_misalign = offset % trailer_align; + if trailer_misalign > 0 { + offset += trailer_align - trailer_misalign; + } + + offset +} + +/// Compute the offset of the `Core<T, S>` field in `Cell<T, S>` using the +/// `#[repr(C)]` algorithm. +/// +/// Pseudo-code for the `#[repr(C)]` algorithm can be found here: +/// <https://doc.rust-lang.org/reference/type-layout.html#reprc-structs> +const fn get_core_offset(header_size: usize, core_align: usize) -> usize { + let mut offset = header_size; + + let core_misalign = offset % core_align; + if core_misalign > 0 { + offset += core_align - core_misalign; + } + + offset +} + +/// Compute the offset of the `Id` field in `Cell<T, S>` using the +/// `#[repr(C)]` algorithm. +/// +/// Pseudo-code for the `#[repr(C)]` algorithm can be found here: +/// <https://doc.rust-lang.org/reference/type-layout.html#reprc-structs> +const fn get_id_offset( + header_size: usize, + core_align: usize, + scheduler_size: usize, + id_align: usize, +) -> usize { + let mut offset = get_core_offset(header_size, core_align); + offset += scheduler_size; + + let id_misalign = offset % id_align; + if id_misalign > 0 { + offset += id_align - id_misalign; + } + + offset +} + +impl RawTask { + pub(super) fn new<T, S>(task: T, scheduler: S, id: Id) -> RawTask + where + T: Future, + S: Schedule, + { + let ptr = Box::into_raw(Cell::<_, S>::new(task, scheduler, State::new(), id)); + let ptr = unsafe { NonNull::new_unchecked(ptr as *mut Header) }; + + RawTask { ptr } + } + + pub(super) unsafe fn from_raw(ptr: NonNull<Header>) -> RawTask { + RawTask { ptr } + } + + pub(super) fn header_ptr(&self) -> NonNull<Header> { + self.ptr + } + + pub(super) fn trailer_ptr(&self) -> NonNull<Trailer> { + unsafe { Header::get_trailer(self.ptr) } + } + + /// Returns a reference to the task's header. + pub(super) fn header(&self) -> &Header { + unsafe { self.ptr.as_ref() } + } + + /// Returns a reference to the task's trailer. + pub(super) fn trailer(&self) -> &Trailer { + unsafe { &*self.trailer_ptr().as_ptr() } + } + + /// Returns a reference to the task's state. + pub(super) fn state(&self) -> &State { + &self.header().state + } + + /// Safety: mutual exclusion is required to call this function. + pub(crate) fn poll(self) { + let vtable = self.header().vtable; + unsafe { (vtable.poll)(self.ptr) } + } + + pub(super) fn schedule(self) { + let vtable = self.header().vtable; + unsafe { (vtable.schedule)(self.ptr) } + } + + pub(super) fn dealloc(self) { + let vtable = self.header().vtable; + unsafe { + (vtable.dealloc)(self.ptr); + } + } + + /// Safety: `dst` must be a `*mut Poll<super::Result<T::Output>>` where `T` + /// is the future stored by the task. + pub(super) unsafe fn try_read_output(self, dst: *mut (), waker: &Waker) { + let vtable = self.header().vtable; + (vtable.try_read_output)(self.ptr, dst, waker); + } + + pub(super) fn drop_join_handle_slow(self) { + let vtable = self.header().vtable; + unsafe { (vtable.drop_join_handle_slow)(self.ptr) } + } + + pub(super) fn drop_abort_handle(self) { + let vtable = self.header().vtable; + unsafe { (vtable.drop_abort_handle)(self.ptr) } + } + + pub(super) fn shutdown(self) { + let vtable = self.header().vtable; + unsafe { (vtable.shutdown)(self.ptr) } + } + + /// Increment the task's reference count. + /// + /// Currently, this is used only when creating an `AbortHandle`. + pub(super) fn ref_inc(self) { + self.header().state.ref_inc(); + } + + /// Get the queue-next pointer + /// + /// This is for usage by the injection queue + /// + /// Safety: make sure only one queue uses this and access is synchronized. + pub(crate) unsafe fn get_queue_next(self) -> Option<RawTask> { + self.header() + .queue_next + .with(|ptr| *ptr) + .map(|p| RawTask::from_raw(p)) + } + + /// Sets the queue-next pointer + /// + /// This is for usage by the injection queue + /// + /// Safety: make sure only one queue uses this and access is synchronized. + pub(crate) unsafe fn set_queue_next(self, val: Option<RawTask>) { + self.header().set_next(val.map(|task| task.ptr)); + } +} + +impl Clone for RawTask { + fn clone(&self) -> Self { + RawTask { ptr: self.ptr } + } +} + +impl Copy for RawTask {} + +unsafe fn poll<T: Future, S: Schedule>(ptr: NonNull<Header>) { + let harness = Harness::<T, S>::from_raw(ptr); + harness.poll(); +} + +unsafe fn schedule<S: Schedule>(ptr: NonNull<Header>) { + use crate::runtime::task::{Notified, Task}; + + let scheduler = Header::get_scheduler::<S>(ptr); + scheduler + .as_ref() + .schedule(Notified(Task::from_raw(ptr.cast()))); +} + +unsafe fn dealloc<T: Future, S: Schedule>(ptr: NonNull<Header>) { + let harness = Harness::<T, S>::from_raw(ptr); + harness.dealloc(); +} + +unsafe fn try_read_output<T: Future, S: Schedule>( + ptr: NonNull<Header>, + dst: *mut (), + waker: &Waker, +) { + let out = &mut *(dst as *mut Poll<super::Result<T::Output>>); + + let harness = Harness::<T, S>::from_raw(ptr); + harness.try_read_output(out, waker); +} + +unsafe fn drop_join_handle_slow<T: Future, S: Schedule>(ptr: NonNull<Header>) { + let harness = Harness::<T, S>::from_raw(ptr); + harness.drop_join_handle_slow() +} + +unsafe fn drop_abort_handle<T: Future, S: Schedule>(ptr: NonNull<Header>) { + let harness = Harness::<T, S>::from_raw(ptr); + harness.drop_reference(); +} + +unsafe fn shutdown<T: Future, S: Schedule>(ptr: NonNull<Header>) { + let harness = Harness::<T, S>::from_raw(ptr); + harness.shutdown() +} diff --git a/third_party/rust/tokio/src/runtime/task/state.rs b/third_party/rust/tokio/src/runtime/task/state.rs new file mode 100644 index 0000000000..12f5449181 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/state.rs @@ -0,0 +1,611 @@ +use crate::loom::sync::atomic::AtomicUsize; + +use std::fmt; +use std::sync::atomic::Ordering::{AcqRel, Acquire, Release}; +use std::usize; + +pub(super) struct State { + val: AtomicUsize, +} + +/// Current state value. +#[derive(Copy, Clone)] +pub(super) struct Snapshot(usize); + +type UpdateResult = Result<Snapshot, Snapshot>; + +/// The task is currently being run. +const RUNNING: usize = 0b0001; + +/// The task is complete. +/// +/// Once this bit is set, it is never unset. +const COMPLETE: usize = 0b0010; + +/// Extracts the task's lifecycle value from the state. +const LIFECYCLE_MASK: usize = 0b11; + +/// Flag tracking if the task has been pushed into a run queue. +const NOTIFIED: usize = 0b100; + +/// The join handle is still around. +#[allow(clippy::unusual_byte_groupings)] // https://github.com/rust-lang/rust-clippy/issues/6556 +const JOIN_INTEREST: usize = 0b1_000; + +/// A join handle waker has been set. +#[allow(clippy::unusual_byte_groupings)] // https://github.com/rust-lang/rust-clippy/issues/6556 +const JOIN_WAKER: usize = 0b10_000; + +/// The task has been forcibly cancelled. +#[allow(clippy::unusual_byte_groupings)] // https://github.com/rust-lang/rust-clippy/issues/6556 +const CANCELLED: usize = 0b100_000; + +/// All bits. +const STATE_MASK: usize = LIFECYCLE_MASK | NOTIFIED | JOIN_INTEREST | JOIN_WAKER | CANCELLED; + +/// Bits used by the ref count portion of the state. +const REF_COUNT_MASK: usize = !STATE_MASK; + +/// Number of positions to shift the ref count. +const REF_COUNT_SHIFT: usize = REF_COUNT_MASK.count_zeros() as usize; + +/// One ref count. +const REF_ONE: usize = 1 << REF_COUNT_SHIFT; + +/// State a task is initialized with. +/// +/// A task is initialized with three references: +/// +/// * A reference that will be stored in an OwnedTasks or LocalOwnedTasks. +/// * A reference that will be sent to the scheduler as an ordinary notification. +/// * A reference for the JoinHandle. +/// +/// As the task starts with a `JoinHandle`, `JOIN_INTEREST` is set. +/// As the task starts with a `Notified`, `NOTIFIED` is set. +const INITIAL_STATE: usize = (REF_ONE * 3) | JOIN_INTEREST | NOTIFIED; + +#[must_use] +pub(super) enum TransitionToRunning { + Success, + Cancelled, + Failed, + Dealloc, +} + +#[must_use] +pub(super) enum TransitionToIdle { + Ok, + OkNotified, + OkDealloc, + Cancelled, +} + +#[must_use] +pub(super) enum TransitionToNotifiedByVal { + DoNothing, + Submit, + Dealloc, +} + +#[must_use] +pub(crate) enum TransitionToNotifiedByRef { + DoNothing, + Submit, +} + +/// All transitions are performed via RMW operations. This establishes an +/// unambiguous modification order. +impl State { + /// Returns a task's initial state. + pub(super) fn new() -> State { + // The raw task returned by this method has a ref-count of three. See + // the comment on INITIAL_STATE for more. + State { + val: AtomicUsize::new(INITIAL_STATE), + } + } + + /// Loads the current state, establishes `Acquire` ordering. + pub(super) fn load(&self) -> Snapshot { + Snapshot(self.val.load(Acquire)) + } + + /// Attempts to transition the lifecycle to `Running`. This sets the + /// notified bit to false so notifications during the poll can be detected. + pub(super) fn transition_to_running(&self) -> TransitionToRunning { + self.fetch_update_action(|mut next| { + let action; + assert!(next.is_notified()); + + if !next.is_idle() { + // This happens if the task is either currently running or if it + // has already completed, e.g. if it was cancelled during + // shutdown. Consume the ref-count and return. + next.ref_dec(); + if next.ref_count() == 0 { + action = TransitionToRunning::Dealloc; + } else { + action = TransitionToRunning::Failed; + } + } else { + // We are able to lock the RUNNING bit. + next.set_running(); + next.unset_notified(); + + if next.is_cancelled() { + action = TransitionToRunning::Cancelled; + } else { + action = TransitionToRunning::Success; + } + } + (action, Some(next)) + }) + } + + /// Transitions the task from `Running` -> `Idle`. + /// + /// Returns `true` if the transition to `Idle` is successful, `false` otherwise. + /// The transition to `Idle` fails if the task has been flagged to be + /// cancelled. + pub(super) fn transition_to_idle(&self) -> TransitionToIdle { + self.fetch_update_action(|curr| { + assert!(curr.is_running()); + + if curr.is_cancelled() { + return (TransitionToIdle::Cancelled, None); + } + + let mut next = curr; + let action; + next.unset_running(); + + if !next.is_notified() { + // Polling the future consumes the ref-count of the Notified. + next.ref_dec(); + if next.ref_count() == 0 { + action = TransitionToIdle::OkDealloc; + } else { + action = TransitionToIdle::Ok; + } + } else { + // The caller will schedule a new notification, so we create a + // new ref-count for the notification. Our own ref-count is kept + // for now, and the caller will drop it shortly. + next.ref_inc(); + action = TransitionToIdle::OkNotified; + } + + (action, Some(next)) + }) + } + + /// Transitions the task from `Running` -> `Complete`. + pub(super) fn transition_to_complete(&self) -> Snapshot { + const DELTA: usize = RUNNING | COMPLETE; + + let prev = Snapshot(self.val.fetch_xor(DELTA, AcqRel)); + assert!(prev.is_running()); + assert!(!prev.is_complete()); + + Snapshot(prev.0 ^ DELTA) + } + + /// Transitions from `Complete` -> `Terminal`, decrementing the reference + /// count the specified number of times. + /// + /// Returns true if the task should be deallocated. + pub(super) fn transition_to_terminal(&self, count: usize) -> bool { + let prev = Snapshot(self.val.fetch_sub(count * REF_ONE, AcqRel)); + assert!( + prev.ref_count() >= count, + "current: {}, sub: {}", + prev.ref_count(), + count + ); + prev.ref_count() == count + } + + /// Transitions the state to `NOTIFIED`. + /// + /// If no task needs to be submitted, a ref-count is consumed. + /// + /// If a task needs to be submitted, the ref-count is incremented for the + /// new Notified. + pub(super) fn transition_to_notified_by_val(&self) -> TransitionToNotifiedByVal { + self.fetch_update_action(|mut snapshot| { + let action; + + if snapshot.is_running() { + // If the task is running, we mark it as notified, but we should + // not submit anything as the thread currently running the + // future is responsible for that. + snapshot.set_notified(); + snapshot.ref_dec(); + + // The thread that set the running bit also holds a ref-count. + assert!(snapshot.ref_count() > 0); + + action = TransitionToNotifiedByVal::DoNothing; + } else if snapshot.is_complete() || snapshot.is_notified() { + // We do not need to submit any notifications, but we have to + // decrement the ref-count. + snapshot.ref_dec(); + + if snapshot.ref_count() == 0 { + action = TransitionToNotifiedByVal::Dealloc; + } else { + action = TransitionToNotifiedByVal::DoNothing; + } + } else { + // We create a new notified that we can submit. The caller + // retains ownership of the ref-count they passed in. + snapshot.set_notified(); + snapshot.ref_inc(); + action = TransitionToNotifiedByVal::Submit; + } + + (action, Some(snapshot)) + }) + } + + /// Transitions the state to `NOTIFIED`. + pub(super) fn transition_to_notified_by_ref(&self) -> TransitionToNotifiedByRef { + self.fetch_update_action(|mut snapshot| { + if snapshot.is_complete() || snapshot.is_notified() { + // There is nothing to do in this case. + (TransitionToNotifiedByRef::DoNothing, None) + } else if snapshot.is_running() { + // If the task is running, we mark it as notified, but we should + // not submit as the thread currently running the future is + // responsible for that. + snapshot.set_notified(); + (TransitionToNotifiedByRef::DoNothing, Some(snapshot)) + } else { + // The task is idle and not notified. We should submit a + // notification. + snapshot.set_notified(); + snapshot.ref_inc(); + (TransitionToNotifiedByRef::Submit, Some(snapshot)) + } + }) + } + + /// Transitions the state to `NOTIFIED`, unconditionally increasing the ref count. + #[cfg(all( + tokio_unstable, + tokio_taskdump, + feature = "rt", + target_os = "linux", + any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64") + ))] + pub(super) fn transition_to_notified_for_tracing(&self) { + self.fetch_update_action(|mut snapshot| { + snapshot.set_notified(); + snapshot.ref_inc(); + ((), Some(snapshot)) + }); + } + + /// Sets the cancelled bit and transitions the state to `NOTIFIED` if idle. + /// + /// Returns `true` if the task needs to be submitted to the pool for + /// execution. + pub(super) fn transition_to_notified_and_cancel(&self) -> bool { + self.fetch_update_action(|mut snapshot| { + if snapshot.is_cancelled() || snapshot.is_complete() { + // Aborts to completed or cancelled tasks are no-ops. + (false, None) + } else if snapshot.is_running() { + // If the task is running, we mark it as cancelled. The thread + // running the task will notice the cancelled bit when it + // stops polling and it will kill the task. + // + // The set_notified() call is not strictly necessary but it will + // in some cases let a wake_by_ref call return without having + // to perform a compare_exchange. + snapshot.set_notified(); + snapshot.set_cancelled(); + (false, Some(snapshot)) + } else { + // The task is idle. We set the cancelled and notified bits and + // submit a notification if the notified bit was not already + // set. + snapshot.set_cancelled(); + if !snapshot.is_notified() { + snapshot.set_notified(); + snapshot.ref_inc(); + (true, Some(snapshot)) + } else { + (false, Some(snapshot)) + } + } + }) + } + + /// Sets the `CANCELLED` bit and attempts to transition to `Running`. + /// + /// Returns `true` if the transition to `Running` succeeded. + pub(super) fn transition_to_shutdown(&self) -> bool { + let mut prev = Snapshot(0); + + let _ = self.fetch_update(|mut snapshot| { + prev = snapshot; + + if snapshot.is_idle() { + snapshot.set_running(); + } + + // If the task was not idle, the thread currently running the task + // will notice the cancelled bit and cancel it once the poll + // completes. + snapshot.set_cancelled(); + Some(snapshot) + }); + + prev.is_idle() + } + + /// Optimistically tries to swap the state assuming the join handle is + /// __immediately__ dropped on spawn. + pub(super) fn drop_join_handle_fast(&self) -> Result<(), ()> { + use std::sync::atomic::Ordering::Relaxed; + + // Relaxed is acceptable as if this function is called and succeeds, + // then nothing has been done w/ the join handle. + // + // The moment the join handle is used (polled), the `JOIN_WAKER` flag is + // set, at which point the CAS will fail. + // + // Given this, there is no risk if this operation is reordered. + self.val + .compare_exchange_weak( + INITIAL_STATE, + (INITIAL_STATE - REF_ONE) & !JOIN_INTEREST, + Release, + Relaxed, + ) + .map(|_| ()) + .map_err(|_| ()) + } + + /// Tries to unset the JOIN_INTEREST flag. + /// + /// Returns `Ok` if the operation happens before the task transitions to a + /// completed state, `Err` otherwise. + pub(super) fn unset_join_interested(&self) -> UpdateResult { + self.fetch_update(|curr| { + assert!(curr.is_join_interested()); + + if curr.is_complete() { + return None; + } + + let mut next = curr; + next.unset_join_interested(); + + Some(next) + }) + } + + /// Sets the `JOIN_WAKER` bit. + /// + /// Returns `Ok` if the bit is set, `Err` otherwise. This operation fails if + /// the task has completed. + pub(super) fn set_join_waker(&self) -> UpdateResult { + self.fetch_update(|curr| { + assert!(curr.is_join_interested()); + assert!(!curr.is_join_waker_set()); + + if curr.is_complete() { + return None; + } + + let mut next = curr; + next.set_join_waker(); + + Some(next) + }) + } + + /// Unsets the `JOIN_WAKER` bit. + /// + /// Returns `Ok` has been unset, `Err` otherwise. This operation fails if + /// the task has completed. + pub(super) fn unset_waker(&self) -> UpdateResult { + self.fetch_update(|curr| { + assert!(curr.is_join_interested()); + assert!(curr.is_join_waker_set()); + + if curr.is_complete() { + return None; + } + + let mut next = curr; + next.unset_join_waker(); + + Some(next) + }) + } + + pub(super) fn ref_inc(&self) { + use std::process; + use std::sync::atomic::Ordering::Relaxed; + + // Using a relaxed ordering is alright here, as knowledge of the + // original reference prevents other threads from erroneously deleting + // the object. + // + // As explained in the [Boost documentation][1], Increasing the + // reference counter can always be done with memory_order_relaxed: New + // references to an object can only be formed from an existing + // reference, and passing an existing reference from one thread to + // another must already provide any required synchronization. + // + // [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html) + let prev = self.val.fetch_add(REF_ONE, Relaxed); + + // If the reference count overflowed, abort. + if prev > isize::MAX as usize { + process::abort(); + } + } + + /// Returns `true` if the task should be released. + pub(super) fn ref_dec(&self) -> bool { + let prev = Snapshot(self.val.fetch_sub(REF_ONE, AcqRel)); + assert!(prev.ref_count() >= 1); + prev.ref_count() == 1 + } + + /// Returns `true` if the task should be released. + pub(super) fn ref_dec_twice(&self) -> bool { + let prev = Snapshot(self.val.fetch_sub(2 * REF_ONE, AcqRel)); + assert!(prev.ref_count() >= 2); + prev.ref_count() == 2 + } + + fn fetch_update_action<F, T>(&self, mut f: F) -> T + where + F: FnMut(Snapshot) -> (T, Option<Snapshot>), + { + let mut curr = self.load(); + + loop { + let (output, next) = f(curr); + let next = match next { + Some(next) => next, + None => return output, + }; + + let res = self.val.compare_exchange(curr.0, next.0, AcqRel, Acquire); + + match res { + Ok(_) => return output, + Err(actual) => curr = Snapshot(actual), + } + } + } + + fn fetch_update<F>(&self, mut f: F) -> Result<Snapshot, Snapshot> + where + F: FnMut(Snapshot) -> Option<Snapshot>, + { + let mut curr = self.load(); + + loop { + let next = match f(curr) { + Some(next) => next, + None => return Err(curr), + }; + + let res = self.val.compare_exchange(curr.0, next.0, AcqRel, Acquire); + + match res { + Ok(_) => return Ok(next), + Err(actual) => curr = Snapshot(actual), + } + } + } +} + +// ===== impl Snapshot ===== + +impl Snapshot { + /// Returns `true` if the task is in an idle state. + pub(super) fn is_idle(self) -> bool { + self.0 & (RUNNING | COMPLETE) == 0 + } + + /// Returns `true` if the task has been flagged as notified. + pub(super) fn is_notified(self) -> bool { + self.0 & NOTIFIED == NOTIFIED + } + + fn unset_notified(&mut self) { + self.0 &= !NOTIFIED + } + + fn set_notified(&mut self) { + self.0 |= NOTIFIED + } + + pub(super) fn is_running(self) -> bool { + self.0 & RUNNING == RUNNING + } + + fn set_running(&mut self) { + self.0 |= RUNNING; + } + + fn unset_running(&mut self) { + self.0 &= !RUNNING; + } + + pub(super) fn is_cancelled(self) -> bool { + self.0 & CANCELLED == CANCELLED + } + + fn set_cancelled(&mut self) { + self.0 |= CANCELLED; + } + + /// Returns `true` if the task's future has completed execution. + pub(super) fn is_complete(self) -> bool { + self.0 & COMPLETE == COMPLETE + } + + pub(super) fn is_join_interested(self) -> bool { + self.0 & JOIN_INTEREST == JOIN_INTEREST + } + + fn unset_join_interested(&mut self) { + self.0 &= !JOIN_INTEREST + } + + pub(super) fn is_join_waker_set(self) -> bool { + self.0 & JOIN_WAKER == JOIN_WAKER + } + + fn set_join_waker(&mut self) { + self.0 |= JOIN_WAKER; + } + + fn unset_join_waker(&mut self) { + self.0 &= !JOIN_WAKER + } + + pub(super) fn ref_count(self) -> usize { + (self.0 & REF_COUNT_MASK) >> REF_COUNT_SHIFT + } + + fn ref_inc(&mut self) { + assert!(self.0 <= isize::MAX as usize); + self.0 += REF_ONE; + } + + pub(super) fn ref_dec(&mut self) { + assert!(self.ref_count() > 0); + self.0 -= REF_ONE + } +} + +impl fmt::Debug for State { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + let snapshot = self.load(); + snapshot.fmt(fmt) + } +} + +impl fmt::Debug for Snapshot { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("Snapshot") + .field("is_running", &self.is_running()) + .field("is_complete", &self.is_complete()) + .field("is_notified", &self.is_notified()) + .field("is_cancelled", &self.is_cancelled()) + .field("is_join_interested", &self.is_join_interested()) + .field("is_join_waker_set", &self.is_join_waker_set()) + .field("ref_count", &self.ref_count()) + .finish() + } +} diff --git a/third_party/rust/tokio/src/runtime/task/trace/mod.rs b/third_party/rust/tokio/src/runtime/task/trace/mod.rs new file mode 100644 index 0000000000..543b7eee98 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/trace/mod.rs @@ -0,0 +1,330 @@ +use crate::loom::sync::Arc; +use crate::runtime::context; +use crate::runtime::scheduler::{self, current_thread, Inject}; + +use backtrace::BacktraceFrame; +use std::cell::Cell; +use std::collections::VecDeque; +use std::ffi::c_void; +use std::fmt; +use std::future::Future; +use std::pin::Pin; +use std::ptr::{self, NonNull}; +use std::task::{self, Poll}; + +mod symbol; +mod tree; + +use symbol::Symbol; +use tree::Tree; + +use super::{Notified, OwnedTasks}; + +type Backtrace = Vec<BacktraceFrame>; +type SymbolTrace = Vec<Symbol>; + +/// The ambiant backtracing context. +pub(crate) struct Context { + /// The address of [`Trace::root`] establishes an upper unwinding bound on + /// the backtraces in `Trace`. + active_frame: Cell<Option<NonNull<Frame>>>, + /// The place to stash backtraces. + collector: Cell<Option<Trace>>, +} + +/// A [`Frame`] in an intrusive, doubly-linked tree of [`Frame`]s. +struct Frame { + /// The location associated with this frame. + inner_addr: *const c_void, + + /// The parent frame, if any. + parent: Option<NonNull<Frame>>, +} + +/// An tree execution trace. +/// +/// Traces are captured with [`Trace::capture`], rooted with [`Trace::root`] +/// and leaved with [`trace_leaf`]. +#[derive(Clone, Debug)] +pub(crate) struct Trace { + // The linear backtraces that comprise this trace. These linear traces can + // be re-knitted into a tree. + backtraces: Vec<Backtrace>, +} + +pin_project_lite::pin_project! { + #[derive(Debug, Clone)] + #[must_use = "futures do nothing unless you `.await` or poll them"] + pub(crate) struct Root<T> { + #[pin] + future: T, + } +} + +const FAIL_NO_THREAD_LOCAL: &str = "The Tokio thread-local has been destroyed \ + as part of shutting down the current \ + thread, so collecting a taskdump is not \ + possible."; + +impl Context { + pub(crate) const fn new() -> Self { + Context { + active_frame: Cell::new(None), + collector: Cell::new(None), + } + } + + /// SAFETY: Callers of this function must ensure that trace frames always + /// form a valid linked list. + unsafe fn try_with_current<F, R>(f: F) -> Option<R> + where + F: FnOnce(&Self) -> R, + { + crate::runtime::context::with_trace(f) + } + + unsafe fn with_current_frame<F, R>(f: F) -> R + where + F: FnOnce(&Cell<Option<NonNull<Frame>>>) -> R, + { + Self::try_with_current(|context| f(&context.active_frame)).expect(FAIL_NO_THREAD_LOCAL) + } + + fn with_current_collector<F, R>(f: F) -> R + where + F: FnOnce(&Cell<Option<Trace>>) -> R, + { + // SAFETY: This call can only access the collector field, so it cannot + // break the trace frame linked list. + unsafe { + Self::try_with_current(|context| f(&context.collector)).expect(FAIL_NO_THREAD_LOCAL) + } + } +} + +impl Trace { + /// Invokes `f`, returning both its result and the collection of backtraces + /// captured at each sub-invocation of [`trace_leaf`]. + #[inline(never)] + pub(crate) fn capture<F, R>(f: F) -> (R, Trace) + where + F: FnOnce() -> R, + { + let collector = Trace { backtraces: vec![] }; + + let previous = Context::with_current_collector(|current| current.replace(Some(collector))); + + let result = f(); + + let collector = + Context::with_current_collector(|current| current.replace(previous)).unwrap(); + + (result, collector) + } + + /// The root of a trace. + #[inline(never)] + pub(crate) fn root<F>(future: F) -> Root<F> { + Root { future } + } +} + +/// If this is a sub-invocation of [`Trace::capture`], capture a backtrace. +/// +/// The captured backtrace will be returned by [`Trace::capture`]. +/// +/// Invoking this function does nothing when it is not a sub-invocation +/// [`Trace::capture`]. +// This function is marked `#[inline(never)]` to ensure that it gets a distinct `Frame` in the +// backtrace, below which frames should not be included in the backtrace (since they reflect the +// internal implementation details of this crate). +#[inline(never)] +pub(crate) fn trace_leaf(cx: &mut task::Context<'_>) -> Poll<()> { + // Safety: We don't manipulate the current context's active frame. + let did_trace = unsafe { + Context::try_with_current(|context_cell| { + if let Some(mut collector) = context_cell.collector.take() { + let mut frames = vec![]; + let mut above_leaf = false; + + if let Some(active_frame) = context_cell.active_frame.get() { + let active_frame = active_frame.as_ref(); + + backtrace::trace(|frame| { + let below_root = !ptr::eq(frame.symbol_address(), active_frame.inner_addr); + + // only capture frames above `Trace::leaf` and below + // `Trace::root`. + if above_leaf && below_root { + frames.push(frame.to_owned().into()); + } + + if ptr::eq(frame.symbol_address(), trace_leaf as *const _) { + above_leaf = true; + } + + // only continue unwinding if we're below `Trace::root` + below_root + }); + } + collector.backtraces.push(frames); + context_cell.collector.set(Some(collector)); + true + } else { + false + } + }) + .unwrap_or(false) + }; + + if did_trace { + // Use the same logic that `yield_now` uses to send out wakeups after + // the task yields. + context::with_scheduler(|scheduler| { + if let Some(scheduler) = scheduler { + match scheduler { + scheduler::Context::CurrentThread(s) => s.defer.defer(cx.waker()), + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Context::MultiThread(s) => s.defer.defer(cx.waker()), + } + } + }); + + Poll::Pending + } else { + Poll::Ready(()) + } +} + +impl fmt::Display for Trace { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Tree::from_trace(self.clone()).fmt(f) + } +} + +fn defer<F: FnOnce() -> R, R>(f: F) -> impl Drop { + use std::mem::ManuallyDrop; + + struct Defer<F: FnOnce() -> R, R>(ManuallyDrop<F>); + + impl<F: FnOnce() -> R, R> Drop for Defer<F, R> { + #[inline(always)] + fn drop(&mut self) { + unsafe { + ManuallyDrop::take(&mut self.0)(); + } + } + } + + Defer(ManuallyDrop::new(f)) +} + +impl<T: Future> Future for Root<T> { + type Output = T::Output; + + #[inline(never)] + fn poll(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll<Self::Output> { + // SAFETY: The context's current frame is restored to its original state + // before `frame` is dropped. + unsafe { + let mut frame = Frame { + inner_addr: Self::poll as *const c_void, + parent: None, + }; + + Context::with_current_frame(|current| { + frame.parent = current.take(); + current.set(Some(NonNull::from(&frame))); + }); + + let _restore = defer(|| { + Context::with_current_frame(|current| { + current.set(frame.parent); + }); + }); + + let this = self.project(); + this.future.poll(cx) + } + } +} + +/// Trace and poll all tasks of the current_thread runtime. +pub(in crate::runtime) fn trace_current_thread( + owned: &OwnedTasks<Arc<current_thread::Handle>>, + local: &mut VecDeque<Notified<Arc<current_thread::Handle>>>, + injection: &Inject<Arc<current_thread::Handle>>, +) -> Vec<Trace> { + // clear the local and injection queues + local.clear(); + + while let Some(task) = injection.pop() { + drop(task); + } + + // notify each task + let mut tasks = vec![]; + owned.for_each(|task| { + // set the notified bit + task.as_raw().state().transition_to_notified_for_tracing(); + // store the raw tasks into a vec + tasks.push(task.as_raw()); + }); + + tasks + .into_iter() + .map(|task| { + let ((), trace) = Trace::capture(|| task.poll()); + trace + }) + .collect() +} + +cfg_rt_multi_thread! { + use crate::loom::sync::Mutex; + use crate::runtime::scheduler::multi_thread; + use crate::runtime::scheduler::multi_thread::Synced; + use crate::runtime::scheduler::inject::Shared; + + /// Trace and poll all tasks of the current_thread runtime. + /// + /// ## Safety + /// + /// Must be called with the same `synced` that `injection` was created with. + pub(in crate::runtime) unsafe fn trace_multi_thread( + owned: &OwnedTasks<Arc<multi_thread::Handle>>, + local: &mut multi_thread::queue::Local<Arc<multi_thread::Handle>>, + synced: &Mutex<Synced>, + injection: &Shared<Arc<multi_thread::Handle>>, + ) -> Vec<Trace> { + // clear the local queue + while let Some(notified) = local.pop() { + drop(notified); + } + + // clear the injection queue + let mut synced = synced.lock(); + while let Some(notified) = injection.pop(&mut synced.inject) { + drop(notified); + } + + drop(synced); + + // notify each task + let mut traces = vec![]; + owned.for_each(|task| { + // set the notified bit + task.as_raw().state().transition_to_notified_for_tracing(); + + // trace the task + let ((), trace) = Trace::capture(|| task.as_raw().poll()); + traces.push(trace); + + // reschedule the task + let _ = task.as_raw().state().transition_to_notified_by_ref(); + task.as_raw().schedule(); + }); + + traces + } +} diff --git a/third_party/rust/tokio/src/runtime/task/trace/symbol.rs b/third_party/rust/tokio/src/runtime/task/trace/symbol.rs new file mode 100644 index 0000000000..49d7ba37f7 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/trace/symbol.rs @@ -0,0 +1,92 @@ +use backtrace::BacktraceSymbol; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::ptr; + +/// A symbol in a backtrace. +/// +/// This wrapper type serves two purposes. The first is that it provides a +/// representation of a symbol that can be inserted into hashmaps and hashsets; +/// the [`backtrace`] crate does not define [`Hash`], [`PartialEq`], or [`Eq`] +/// on [`BacktraceSymbol`], and recommends that users define their own wrapper +/// which implements these traits. +/// +/// Second, this wrapper includes a `parent_hash` field that uniquely +/// identifies this symbol's position in its trace. Otherwise, e.g., our code +/// would not be able to distinguish between recursive calls of a function at +/// different depths. +#[derive(Clone)] +pub(super) struct Symbol { + pub(super) symbol: BacktraceSymbol, + pub(super) parent_hash: u64, +} + +impl Hash for Symbol { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + if let Some(name) = self.symbol.name() { + name.as_bytes().hash(state); + } + + if let Some(addr) = self.symbol.addr() { + ptr::hash(addr, state); + } + + self.symbol.filename().hash(state); + self.symbol.lineno().hash(state); + self.symbol.colno().hash(state); + self.parent_hash.hash(state); + } +} + +impl PartialEq for Symbol { + fn eq(&self, other: &Self) -> bool { + (self.parent_hash == other.parent_hash) + && match (self.symbol.name(), other.symbol.name()) { + (None, None) => true, + (Some(lhs_name), Some(rhs_name)) => lhs_name.as_bytes() == rhs_name.as_bytes(), + _ => false, + } + && match (self.symbol.addr(), other.symbol.addr()) { + (None, None) => true, + (Some(lhs_addr), Some(rhs_addr)) => ptr::eq(lhs_addr, rhs_addr), + _ => false, + } + && (self.symbol.filename() == other.symbol.filename()) + && (self.symbol.lineno() == other.symbol.lineno()) + && (self.symbol.colno() == other.symbol.colno()) + } +} + +impl Eq for Symbol {} + +impl fmt::Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(name) = self.symbol.name() { + let name = name.to_string(); + let name = if let Some((name, _)) = name.rsplit_once("::") { + name + } else { + &name + }; + fmt::Display::fmt(&name, f)?; + } + + if let Some(filename) = self.symbol.filename() { + f.write_str(" at ")?; + filename.to_string_lossy().fmt(f)?; + if let Some(lineno) = self.symbol.lineno() { + f.write_str(":")?; + fmt::Display::fmt(&lineno, f)?; + if let Some(colno) = self.symbol.colno() { + f.write_str(":")?; + fmt::Display::fmt(&colno, f)?; + } + } + } + + Ok(()) + } +} diff --git a/third_party/rust/tokio/src/runtime/task/trace/tree.rs b/third_party/rust/tokio/src/runtime/task/trace/tree.rs new file mode 100644 index 0000000000..7e6f8efeca --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/trace/tree.rs @@ -0,0 +1,126 @@ +use std::collections::{hash_map::DefaultHasher, HashMap, HashSet}; +use std::fmt; +use std::hash::{Hash, Hasher}; + +use super::{Backtrace, Symbol, SymbolTrace, Trace}; + +/// An adjacency list representation of an execution tree. +/// +/// This tree provides a convenient intermediate representation for formatting +/// [`Trace`] as a tree. +pub(super) struct Tree { + /// The roots of the trees. + /// + /// There should only be one root, but the code is robust to multiple roots. + roots: HashSet<Symbol>, + + /// The adjacency list of symbols in the execution tree(s). + edges: HashMap<Symbol, HashSet<Symbol>>, +} + +impl Tree { + /// Constructs a [`Tree`] from [`Trace`] + pub(super) fn from_trace(trace: Trace) -> Self { + let mut roots: HashSet<Symbol> = HashSet::default(); + let mut edges: HashMap<Symbol, HashSet<Symbol>> = HashMap::default(); + + for trace in trace.backtraces { + let trace = to_symboltrace(trace); + + if let Some(first) = trace.first() { + roots.insert(first.to_owned()); + } + + let mut trace = trace.into_iter().peekable(); + while let Some(frame) = trace.next() { + let subframes = edges.entry(frame).or_default(); + if let Some(subframe) = trace.peek() { + subframes.insert(subframe.clone()); + } + } + } + + Tree { roots, edges } + } + + /// Produces the sub-symbols of a given symbol. + fn consequences(&self, frame: &Symbol) -> Option<impl ExactSizeIterator<Item = &Symbol>> { + Some(self.edges.get(frame)?.iter()) + } + + /// Format this [`Tree`] as a textual tree. + fn display<W: fmt::Write>( + &self, + f: &mut W, + root: &Symbol, + is_last: bool, + prefix: &str, + ) -> fmt::Result { + let root_fmt = format!("{}", root); + + let current; + let next; + + if is_last { + current = format!("{prefix}└╼\u{a0}{root_fmt}"); + next = format!("{}\u{a0}\u{a0}\u{a0}", prefix); + } else { + current = format!("{prefix}├╼\u{a0}{root_fmt}"); + next = format!("{}│\u{a0}\u{a0}", prefix); + } + + write!(f, "{}", { + let mut current = current.chars(); + current.next().unwrap(); + current.next().unwrap(); + ¤t.as_str() + })?; + + if let Some(consequences) = self.consequences(root) { + let len = consequences.len(); + for (i, consequence) in consequences.enumerate() { + let is_last = i == len - 1; + writeln!(f)?; + self.display(f, consequence, is_last, &next)?; + } + } + + Ok(()) + } +} + +impl fmt::Display for Tree { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for root in &self.roots { + self.display(f, root, true, " ")?; + } + Ok(()) + } +} + +/// Resolve a sequence of [`backtrace::BacktraceFrame`]s into a sequence of +/// [`Symbol`]s. +fn to_symboltrace(backtrace: Backtrace) -> SymbolTrace { + // Resolve the backtrace frames to symbols. + let backtrace: Backtrace = { + let mut backtrace = backtrace::Backtrace::from(backtrace); + backtrace.resolve(); + backtrace.into() + }; + + // Accumulate the symbols in descending order into `symboltrace`. + let mut symboltrace: SymbolTrace = vec![]; + let mut state = DefaultHasher::new(); + for frame in backtrace.into_iter().rev() { + for symbol in frame.symbols().iter().rev() { + let symbol = Symbol { + symbol: symbol.clone(), + parent_hash: state.finish(), + }; + symbol.hash(&mut state); + symboltrace.push(symbol); + } + } + + symboltrace +} diff --git a/third_party/rust/tokio/src/runtime/task/waker.rs b/third_party/rust/tokio/src/runtime/task/waker.rs new file mode 100644 index 0000000000..b5f5ace9ec --- /dev/null +++ b/third_party/rust/tokio/src/runtime/task/waker.rs @@ -0,0 +1,104 @@ +use crate::future::Future; +use crate::runtime::task::{Header, RawTask, Schedule}; + +use std::marker::PhantomData; +use std::mem::ManuallyDrop; +use std::ops; +use std::ptr::NonNull; +use std::task::{RawWaker, RawWakerVTable, Waker}; + +pub(super) struct WakerRef<'a, S: 'static> { + waker: ManuallyDrop<Waker>, + _p: PhantomData<(&'a Header, S)>, +} + +/// Returns a `WakerRef` which avoids having to preemptively increase the +/// refcount if there is no need to do so. +pub(super) fn waker_ref<T, S>(header: &NonNull<Header>) -> WakerRef<'_, S> +where + T: Future, + S: Schedule, +{ + // `Waker::will_wake` uses the VTABLE pointer as part of the check. This + // means that `will_wake` will always return false when using the current + // task's waker. (discussion at rust-lang/rust#66281). + // + // To fix this, we use a single vtable. Since we pass in a reference at this + // point and not an *owned* waker, we must ensure that `drop` is never + // called on this waker instance. This is done by wrapping it with + // `ManuallyDrop` and then never calling drop. + let waker = unsafe { ManuallyDrop::new(Waker::from_raw(raw_waker(*header))) }; + + WakerRef { + waker, + _p: PhantomData, + } +} + +impl<S> ops::Deref for WakerRef<'_, S> { + type Target = Waker; + + fn deref(&self) -> &Waker { + &self.waker + } +} + +cfg_trace! { + macro_rules! trace { + ($header:expr, $op:expr) => { + if let Some(id) = Header::get_tracing_id(&$header) { + tracing::trace!( + target: "tokio::task::waker", + op = $op, + task.id = id.into_u64(), + ); + } + } + } +} + +cfg_not_trace! { + macro_rules! trace { + ($header:expr, $op:expr) => { + // noop + let _ = &$header; + } + } +} + +unsafe fn clone_waker(ptr: *const ()) -> RawWaker { + let header = NonNull::new_unchecked(ptr as *mut Header); + trace!(header, "waker.clone"); + header.as_ref().state.ref_inc(); + raw_waker(header) +} + +unsafe fn drop_waker(ptr: *const ()) { + let ptr = NonNull::new_unchecked(ptr as *mut Header); + trace!(ptr, "waker.drop"); + let raw = RawTask::from_raw(ptr); + raw.drop_reference(); +} + +unsafe fn wake_by_val(ptr: *const ()) { + let ptr = NonNull::new_unchecked(ptr as *mut Header); + trace!(ptr, "waker.wake"); + let raw = RawTask::from_raw(ptr); + raw.wake_by_val(); +} + +// Wake without consuming the waker +unsafe fn wake_by_ref(ptr: *const ()) { + let ptr = NonNull::new_unchecked(ptr as *mut Header); + trace!(ptr, "waker.wake_by_ref"); + let raw = RawTask::from_raw(ptr); + raw.wake_by_ref(); +} + +static WAKER_VTABLE: RawWakerVTable = + RawWakerVTable::new(clone_waker, wake_by_val, wake_by_ref, drop_waker); + +fn raw_waker(header: NonNull<Header>) -> RawWaker { + let ptr = header.as_ptr() as *const (); + RawWaker::new(ptr, &WAKER_VTABLE) +} diff --git a/third_party/rust/tokio/src/runtime/tests/inject.rs b/third_party/rust/tokio/src/runtime/tests/inject.rs new file mode 100644 index 0000000000..ccead5e024 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/inject.rs @@ -0,0 +1,54 @@ +use crate::runtime::scheduler::inject; + +#[test] +fn push_and_pop() { + const N: usize = 2; + + let (inject, mut synced) = inject::Shared::new(); + + for i in 0..N { + assert_eq!(inject.len(), i); + let (task, _) = super::unowned(async {}); + unsafe { inject.push(&mut synced, task) }; + } + + for i in 0..N { + assert_eq!(inject.len(), N - i); + assert!(unsafe { inject.pop(&mut synced) }.is_some()); + } + + println!("--------------"); + + assert!(unsafe { inject.pop(&mut synced) }.is_none()); +} + +#[test] +fn push_batch_and_pop() { + let (inject, mut inject_synced) = inject::Shared::new(); + + unsafe { + inject.push_batch( + &mut inject_synced, + (0..10).map(|_| super::unowned(async {}).0), + ); + + assert_eq!(5, inject.pop_n(&mut inject_synced, 5).count()); + assert_eq!(5, inject.pop_n(&mut inject_synced, 5).count()); + assert_eq!(0, inject.pop_n(&mut inject_synced, 5).count()); + } +} + +#[test] +fn pop_n_drains_on_drop() { + let (inject, mut inject_synced) = inject::Shared::new(); + + unsafe { + inject.push_batch( + &mut inject_synced, + (0..10).map(|_| super::unowned(async {}).0), + ); + let _ = inject.pop_n(&mut inject_synced, 10); + + assert_eq!(inject.len(), 0); + } +} diff --git a/third_party/rust/tokio/src/runtime/tests/loom_blocking.rs b/third_party/rust/tokio/src/runtime/tests/loom_blocking.rs new file mode 100644 index 0000000000..5c4aeae39c --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/loom_blocking.rs @@ -0,0 +1,102 @@ +use crate::runtime::{self, Runtime}; + +use std::sync::Arc; + +#[test] +fn blocking_shutdown() { + loom::model(|| { + let v = Arc::new(()); + + let rt = mk_runtime(1); + { + let _enter = rt.enter(); + for _ in 0..2 { + let v = v.clone(); + crate::task::spawn_blocking(move || { + assert!(1 < Arc::strong_count(&v)); + }); + } + } + + drop(rt); + assert_eq!(1, Arc::strong_count(&v)); + }); +} + +#[test] +fn spawn_mandatory_blocking_should_always_run() { + use crate::runtime::tests::loom_oneshot; + loom::model(|| { + let rt = runtime::Builder::new_current_thread().build().unwrap(); + + let (tx, rx) = loom_oneshot::channel(); + let _enter = rt.enter(); + runtime::spawn_blocking(|| {}); + runtime::spawn_mandatory_blocking(move || { + let _ = tx.send(()); + }) + .unwrap(); + + drop(rt); + + // This call will deadlock if `spawn_mandatory_blocking` doesn't run. + let () = rx.recv(); + }); +} + +#[test] +fn spawn_mandatory_blocking_should_run_even_when_shutting_down_from_other_thread() { + use crate::runtime::tests::loom_oneshot; + loom::model(|| { + let rt = runtime::Builder::new_current_thread().build().unwrap(); + let handle = rt.handle().clone(); + + // Drop the runtime in a different thread + { + loom::thread::spawn(move || { + drop(rt); + }); + } + + let _enter = handle.enter(); + let (tx, rx) = loom_oneshot::channel(); + let handle = runtime::spawn_mandatory_blocking(move || { + let _ = tx.send(()); + }); + + // handle.is_some() means that `spawn_mandatory_blocking` + // promised us to run the blocking task + if handle.is_some() { + // This call will deadlock if `spawn_mandatory_blocking` doesn't run. + let () = rx.recv(); + } + }); +} + +#[test] +fn spawn_blocking_when_paused() { + use std::time::Duration; + loom::model(|| { + let rt = crate::runtime::Builder::new_current_thread() + .enable_time() + .start_paused(true) + .build() + .unwrap(); + let handle = rt.handle(); + let _enter = handle.enter(); + let a = crate::task::spawn_blocking(|| {}); + let b = crate::task::spawn_blocking(|| {}); + rt.block_on(crate::time::timeout(Duration::from_millis(1), async move { + a.await.expect("blocking task should finish"); + b.await.expect("blocking task should finish"); + })) + .expect("timeout should not trigger"); + }); +} + +fn mk_runtime(num_threads: usize) -> Runtime { + runtime::Builder::new_multi_thread() + .worker_threads(num_threads) + .build() + .unwrap() +} diff --git a/third_party/rust/tokio/src/runtime/tests/loom_current_thread_scheduler.rs b/third_party/rust/tokio/src/runtime/tests/loom_current_thread_scheduler.rs new file mode 100644 index 0000000000..a772603f71 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/loom_current_thread_scheduler.rs @@ -0,0 +1,142 @@ +use crate::loom::sync::atomic::AtomicUsize; +use crate::loom::sync::Arc; +use crate::loom::thread; +use crate::runtime::{Builder, Runtime}; +use crate::sync::oneshot::{self, Receiver}; +use crate::task; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::Ordering::{Acquire, Release}; +use std::task::{Context, Poll}; + +fn assert_at_most_num_polls(rt: Arc<Runtime>, at_most_polls: usize) { + let (tx, rx) = oneshot::channel(); + let num_polls = Arc::new(AtomicUsize::new(0)); + rt.spawn(async move { + for _ in 0..12 { + task::yield_now().await; + } + tx.send(()).unwrap(); + }); + + rt.block_on(async { + BlockedFuture { + rx, + num_polls: num_polls.clone(), + } + .await; + }); + + let polls = num_polls.load(Acquire); + assert!(polls <= at_most_polls); +} + +#[test] +fn block_on_num_polls() { + loom::model(|| { + // we expect at most 4 number of polls because there are three points at + // which we poll the future and an opportunity for a false-positive.. At + // any of these points it can be ready: + // + // - when we fail to steal the parker and we block on a notification + // that it is available. + // + // - when we steal the parker and we schedule the future + // + // - when the future is woken up and we have ran the max number of tasks + // for the current tick or there are no more tasks to run. + // + // - a thread is notified that the parker is available but a third + // thread acquires it before the notified thread can. + // + let at_most = 4; + + let rt1 = Arc::new(Builder::new_current_thread().build().unwrap()); + let rt2 = rt1.clone(); + let rt3 = rt1.clone(); + + let th1 = thread::spawn(move || assert_at_most_num_polls(rt1, at_most)); + let th2 = thread::spawn(move || assert_at_most_num_polls(rt2, at_most)); + let th3 = thread::spawn(move || assert_at_most_num_polls(rt3, at_most)); + + th1.join().unwrap(); + th2.join().unwrap(); + th3.join().unwrap(); + }); +} + +#[test] +fn assert_no_unnecessary_polls() { + loom::model(|| { + // // After we poll outer future, woken should reset to false + let rt = Builder::new_current_thread().build().unwrap(); + let (tx, rx) = oneshot::channel(); + let pending_cnt = Arc::new(AtomicUsize::new(0)); + + rt.spawn(async move { + for _ in 0..24 { + task::yield_now().await; + } + tx.send(()).unwrap(); + }); + + let pending_cnt_clone = pending_cnt.clone(); + rt.block_on(async move { + // use task::yield_now() to ensure woken set to true + // ResetFuture will be polled at most once + // Here comes two cases + // 1. recv no message from channel, ResetFuture will be polled + // but get Pending and we record ResetFuture.pending_cnt ++. + // Then when message arrive, ResetFuture returns Ready. So we + // expect ResetFuture.pending_cnt = 1 + // 2. recv message from channel, ResetFuture returns Ready immediately. + // We expect ResetFuture.pending_cnt = 0 + task::yield_now().await; + ResetFuture { + rx, + pending_cnt: pending_cnt_clone, + } + .await; + }); + + let pending_cnt = pending_cnt.load(Acquire); + assert!(pending_cnt <= 1); + }); +} + +struct BlockedFuture { + rx: Receiver<()>, + num_polls: Arc<AtomicUsize>, +} + +impl Future for BlockedFuture { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> { + self.num_polls.fetch_add(1, Release); + + match Pin::new(&mut self.rx).poll(cx) { + Poll::Pending => Poll::Pending, + _ => Poll::Ready(()), + } + } +} + +struct ResetFuture { + rx: Receiver<()>, + pending_cnt: Arc<AtomicUsize>, +} + +impl Future for ResetFuture { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> { + match Pin::new(&mut self.rx).poll(cx) { + Poll::Pending => { + self.pending_cnt.fetch_add(1, Release); + Poll::Pending + } + _ => Poll::Ready(()), + } + } +} diff --git a/third_party/rust/tokio/src/runtime/tests/loom_join_set.rs b/third_party/rust/tokio/src/runtime/tests/loom_join_set.rs new file mode 100644 index 0000000000..bd343876a2 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/loom_join_set.rs @@ -0,0 +1,82 @@ +use crate::runtime::Builder; +use crate::task::JoinSet; + +#[test] +fn test_join_set() { + loom::model(|| { + let rt = Builder::new_multi_thread() + .worker_threads(1) + .build() + .unwrap(); + let mut set = JoinSet::new(); + + rt.block_on(async { + assert_eq!(set.len(), 0); + set.spawn(async { () }); + assert_eq!(set.len(), 1); + set.spawn(async { () }); + assert_eq!(set.len(), 2); + let () = set.join_next().await.unwrap().unwrap(); + assert_eq!(set.len(), 1); + set.spawn(async { () }); + assert_eq!(set.len(), 2); + let () = set.join_next().await.unwrap().unwrap(); + assert_eq!(set.len(), 1); + let () = set.join_next().await.unwrap().unwrap(); + assert_eq!(set.len(), 0); + set.spawn(async { () }); + assert_eq!(set.len(), 1); + }); + + drop(set); + drop(rt); + }); +} + +#[test] +fn abort_all_during_completion() { + use std::sync::{ + atomic::{AtomicBool, Ordering::SeqCst}, + Arc, + }; + + // These booleans assert that at least one execution had the task complete first, and that at + // least one execution had the task be cancelled before it completed. + let complete_happened = Arc::new(AtomicBool::new(false)); + let cancel_happened = Arc::new(AtomicBool::new(false)); + + { + let complete_happened = complete_happened.clone(); + let cancel_happened = cancel_happened.clone(); + loom::model(move || { + let rt = Builder::new_multi_thread() + .worker_threads(1) + .build() + .unwrap(); + + let mut set = JoinSet::new(); + + rt.block_on(async { + set.spawn(async { () }); + set.abort_all(); + + match set.join_next().await { + Some(Ok(())) => complete_happened.store(true, SeqCst), + Some(Err(err)) if err.is_cancelled() => cancel_happened.store(true, SeqCst), + Some(Err(err)) => panic!("fail: {}", err), + None => { + unreachable!("Aborting the task does not remove it from the JoinSet.") + } + } + + assert!(matches!(set.join_next().await, None)); + }); + + drop(set); + drop(rt); + }); + } + + assert!(complete_happened.load(SeqCst)); + assert!(cancel_happened.load(SeqCst)); +} diff --git a/third_party/rust/tokio/src/runtime/tests/loom_local.rs b/third_party/rust/tokio/src/runtime/tests/loom_local.rs new file mode 100644 index 0000000000..d9a07a45f0 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/loom_local.rs @@ -0,0 +1,47 @@ +use crate::runtime::tests::loom_oneshot as oneshot; +use crate::runtime::Builder; +use crate::task::LocalSet; + +use std::task::Poll; + +/// Waking a runtime will attempt to push a task into a queue of notifications +/// in the runtime, however the tasks in such a queue usually have a reference +/// to the runtime itself. This means that if they are not properly removed at +/// runtime shutdown, this will cause a memory leak. +/// +/// This test verifies that waking something during shutdown of a LocalSet does +/// not result in tasks lingering in the queue once shutdown is complete. This +/// is verified using loom's leak finder. +#[test] +fn wake_during_shutdown() { + loom::model(|| { + let rt = Builder::new_current_thread().build().unwrap(); + let ls = LocalSet::new(); + + let (send, recv) = oneshot::channel(); + + ls.spawn_local(async move { + let mut send = Some(send); + + let () = futures::future::poll_fn(|cx| { + if let Some(send) = send.take() { + send.send(cx.waker().clone()); + } + + Poll::Pending + }) + .await; + }); + + let handle = loom::thread::spawn(move || { + let waker = recv.recv(); + waker.wake(); + }); + + ls.block_on(&rt, crate::task::yield_now()); + + drop(ls); + handle.join().unwrap(); + drop(rt); + }); +} diff --git a/third_party/rust/tokio/src/runtime/tests/loom_oneshot.rs b/third_party/rust/tokio/src/runtime/tests/loom_oneshot.rs new file mode 100644 index 0000000000..87eb638642 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/loom_oneshot.rs @@ -0,0 +1,48 @@ +use crate::loom::sync::{Arc, Mutex}; +use loom::sync::Notify; + +pub(crate) fn channel<T>() -> (Sender<T>, Receiver<T>) { + let inner = Arc::new(Inner { + notify: Notify::new(), + value: Mutex::new(None), + }); + + let tx = Sender { + inner: inner.clone(), + }; + let rx = Receiver { inner }; + + (tx, rx) +} + +pub(crate) struct Sender<T> { + inner: Arc<Inner<T>>, +} + +pub(crate) struct Receiver<T> { + inner: Arc<Inner<T>>, +} + +struct Inner<T> { + notify: Notify, + value: Mutex<Option<T>>, +} + +impl<T> Sender<T> { + pub(crate) fn send(self, value: T) { + *self.inner.value.lock() = Some(value); + self.inner.notify.notify(); + } +} + +impl<T> Receiver<T> { + pub(crate) fn recv(self) -> T { + loop { + if let Some(v) = self.inner.value.lock().take() { + return v; + } + + self.inner.notify.wait(); + } + } +} diff --git a/third_party/rust/tokio/src/runtime/tests/loom_pool.rs b/third_party/rust/tokio/src/runtime/tests/loom_pool.rs new file mode 100644 index 0000000000..fb42e1eb40 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/loom_pool.rs @@ -0,0 +1,458 @@ +/// Full runtime loom tests. These are heavy tests and take significant time to +/// run on CI. +/// +/// Use `LOOM_MAX_PREEMPTIONS=1` to do a "quick" run as a smoke test. +/// +/// In order to speed up the C +use crate::future::poll_fn; +use crate::runtime::tests::loom_oneshot as oneshot; +use crate::runtime::{self, Runtime}; +use crate::{spawn, task}; +use tokio_test::assert_ok; + +use loom::sync::atomic::{AtomicBool, AtomicUsize}; +use loom::sync::Arc; + +use pin_project_lite::pin_project; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::Ordering::{Relaxed, SeqCst}; +use std::task::{Context, Poll}; + +mod atomic_take { + use loom::sync::atomic::AtomicBool; + use std::mem::MaybeUninit; + use std::sync::atomic::Ordering::SeqCst; + + pub(super) struct AtomicTake<T> { + inner: MaybeUninit<T>, + taken: AtomicBool, + } + + impl<T> AtomicTake<T> { + pub(super) fn new(value: T) -> Self { + Self { + inner: MaybeUninit::new(value), + taken: AtomicBool::new(false), + } + } + + pub(super) fn take(&self) -> Option<T> { + // safety: Only one thread will see the boolean change from false + // to true, so that thread is able to take the value. + match self.taken.fetch_or(true, SeqCst) { + false => unsafe { Some(std::ptr::read(self.inner.as_ptr())) }, + true => None, + } + } + } + + impl<T> Drop for AtomicTake<T> { + fn drop(&mut self) { + drop(self.take()); + } + } +} + +#[derive(Clone)] +struct AtomicOneshot<T> { + value: std::sync::Arc<atomic_take::AtomicTake<oneshot::Sender<T>>>, +} +impl<T> AtomicOneshot<T> { + fn new(sender: oneshot::Sender<T>) -> Self { + Self { + value: std::sync::Arc::new(atomic_take::AtomicTake::new(sender)), + } + } + + fn assert_send(&self, value: T) { + self.value.take().unwrap().send(value); + } +} + +/// Tests are divided into groups to make the runs faster on CI. +mod group_a { + use super::*; + + #[test] + fn racy_shutdown() { + loom::model(|| { + let pool = mk_pool(1); + + // here's the case we want to exercise: + // + // a worker that still has tasks in its local queue gets sent to the blocking pool (due to + // block_in_place). the blocking pool is shut down, so drops the worker. the worker's + // shutdown method never gets run. + // + // we do this by spawning two tasks on one worker, the first of which does block_in_place, + // and then immediately drop the pool. + + pool.spawn(track(async { + crate::task::block_in_place(|| {}); + })); + pool.spawn(track(async {})); + drop(pool); + }); + } + + #[test] + fn pool_multi_spawn() { + loom::model(|| { + let pool = mk_pool(2); + let c1 = Arc::new(AtomicUsize::new(0)); + + let (tx, rx) = oneshot::channel(); + let tx1 = AtomicOneshot::new(tx); + + // Spawn a task + let c2 = c1.clone(); + let tx2 = tx1.clone(); + pool.spawn(track(async move { + spawn(track(async move { + if 1 == c1.fetch_add(1, Relaxed) { + tx1.assert_send(()); + } + })); + })); + + // Spawn a second task + pool.spawn(track(async move { + spawn(track(async move { + if 1 == c2.fetch_add(1, Relaxed) { + tx2.assert_send(()); + } + })); + })); + + rx.recv(); + }); + } + + fn only_blocking_inner(first_pending: bool) { + loom::model(move || { + let pool = mk_pool(1); + let (block_tx, block_rx) = oneshot::channel(); + + pool.spawn(track(async move { + crate::task::block_in_place(move || { + block_tx.send(()); + }); + if first_pending { + task::yield_now().await + } + })); + + block_rx.recv(); + drop(pool); + }); + } + + #[test] + fn only_blocking_without_pending() { + only_blocking_inner(false) + } + + #[test] + fn only_blocking_with_pending() { + only_blocking_inner(true) + } +} + +mod group_b { + use super::*; + + fn blocking_and_regular_inner(first_pending: bool) { + const NUM: usize = 3; + loom::model(move || { + let pool = mk_pool(1); + let cnt = Arc::new(AtomicUsize::new(0)); + + let (block_tx, block_rx) = oneshot::channel(); + let (done_tx, done_rx) = oneshot::channel(); + let done_tx = AtomicOneshot::new(done_tx); + + pool.spawn(track(async move { + crate::task::block_in_place(move || { + block_tx.send(()); + }); + if first_pending { + task::yield_now().await + } + })); + + for _ in 0..NUM { + let cnt = cnt.clone(); + let done_tx = done_tx.clone(); + + pool.spawn(track(async move { + if NUM == cnt.fetch_add(1, Relaxed) + 1 { + done_tx.assert_send(()); + } + })); + } + + done_rx.recv(); + block_rx.recv(); + + drop(pool); + }); + } + + #[test] + fn blocking_and_regular() { + blocking_and_regular_inner(false); + } + + #[test] + fn blocking_and_regular_with_pending() { + blocking_and_regular_inner(true); + } + + #[test] + fn join_output() { + loom::model(|| { + let rt = mk_pool(1); + + rt.block_on(async { + let t = crate::spawn(track(async { "hello" })); + + let out = assert_ok!(t.await); + assert_eq!("hello", out.into_inner()); + }); + }); + } + + #[test] + fn poll_drop_handle_then_drop() { + loom::model(|| { + let rt = mk_pool(1); + + rt.block_on(async move { + let mut t = crate::spawn(track(async { "hello" })); + + poll_fn(|cx| { + let _ = Pin::new(&mut t).poll(cx); + Poll::Ready(()) + }) + .await; + }); + }) + } + + #[test] + fn complete_block_on_under_load() { + loom::model(|| { + let pool = mk_pool(1); + + pool.block_on(async { + // Trigger a re-schedule + crate::spawn(track(async { + for _ in 0..2 { + task::yield_now().await; + } + })); + + gated2(true).await + }); + }); + } + + #[test] + fn shutdown_with_notification() { + use crate::sync::oneshot; + + loom::model(|| { + let rt = mk_pool(2); + let (done_tx, done_rx) = oneshot::channel::<()>(); + + rt.spawn(track(async move { + let (tx, rx) = oneshot::channel::<()>(); + + crate::spawn(async move { + crate::task::spawn_blocking(move || { + let _ = tx.send(()); + }); + + let _ = done_rx.await; + }); + + let _ = rx.await; + + let _ = done_tx.send(()); + })); + }); + } +} + +mod group_c { + use super::*; + + #[test] + fn pool_shutdown() { + loom::model(|| { + let pool = mk_pool(2); + + pool.spawn(track(async move { + gated2(true).await; + })); + + pool.spawn(track(async move { + gated2(false).await; + })); + + drop(pool); + }); + } +} + +mod group_d { + use super::*; + + #[test] + fn pool_multi_notify() { + loom::model(|| { + let pool = mk_pool(2); + + let c1 = Arc::new(AtomicUsize::new(0)); + + let (done_tx, done_rx) = oneshot::channel(); + let done_tx1 = AtomicOneshot::new(done_tx); + let done_tx2 = done_tx1.clone(); + + // Spawn a task + let c2 = c1.clone(); + pool.spawn(track(async move { + multi_gated().await; + + if 1 == c1.fetch_add(1, Relaxed) { + done_tx1.assert_send(()); + } + })); + + // Spawn a second task + pool.spawn(track(async move { + multi_gated().await; + + if 1 == c2.fetch_add(1, Relaxed) { + done_tx2.assert_send(()); + } + })); + + done_rx.recv(); + }); + } +} + +fn mk_pool(num_threads: usize) -> Runtime { + runtime::Builder::new_multi_thread() + .worker_threads(num_threads) + // Set the intervals to avoid tuning logic + .event_interval(2) + .build() + .unwrap() +} + +fn gated2(thread: bool) -> impl Future<Output = &'static str> { + use loom::thread; + use std::sync::Arc; + + let gate = Arc::new(AtomicBool::new(false)); + let mut fired = false; + + poll_fn(move |cx| { + if !fired { + let gate = gate.clone(); + let waker = cx.waker().clone(); + + if thread { + thread::spawn(move || { + gate.store(true, SeqCst); + waker.wake_by_ref(); + }); + } else { + spawn(track(async move { + gate.store(true, SeqCst); + waker.wake_by_ref(); + })); + } + + fired = true; + + return Poll::Pending; + } + + if gate.load(SeqCst) { + Poll::Ready("hello world") + } else { + Poll::Pending + } + }) +} + +async fn multi_gated() { + struct Gate { + waker: loom::future::AtomicWaker, + count: AtomicUsize, + } + + let gate = Arc::new(Gate { + waker: loom::future::AtomicWaker::new(), + count: AtomicUsize::new(0), + }); + + { + let gate = gate.clone(); + spawn(track(async move { + for i in 1..3 { + gate.count.store(i, SeqCst); + gate.waker.wake(); + } + })); + } + + poll_fn(move |cx| { + if gate.count.load(SeqCst) < 2 { + gate.waker.register_by_ref(cx.waker()); + Poll::Pending + } else { + Poll::Ready(()) + } + }) + .await; +} + +fn track<T: Future>(f: T) -> Track<T> { + Track { + inner: f, + arc: Arc::new(()), + } +} + +pin_project! { + struct Track<T> { + #[pin] + inner: T, + // Arc is used to hook into loom's leak tracking. + arc: Arc<()>, + } +} + +impl<T> Track<T> { + fn into_inner(self) -> T { + self.inner + } +} + +impl<T: Future> Future for Track<T> { + type Output = Track<T::Output>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> { + let me = self.project(); + + Poll::Ready(Track { + inner: ready!(me.inner.poll(cx)), + arc: me.arc.clone(), + }) + } +} diff --git a/third_party/rust/tokio/src/runtime/tests/loom_queue.rs b/third_party/rust/tokio/src/runtime/tests/loom_queue.rs new file mode 100644 index 0000000000..b60e039b9a --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/loom_queue.rs @@ -0,0 +1,205 @@ +use crate::runtime::scheduler::multi_thread::{queue, Stats}; +use crate::runtime::tests::NoopSchedule; + +use loom::thread; +use std::cell::RefCell; + +fn new_stats() -> Stats { + Stats::new(&crate::runtime::WorkerMetrics::new()) +} + +#[test] +fn basic() { + loom::model(|| { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + let mut n = 0; + + for _ in 0..3 { + if steal.steal_into(&mut local, &mut stats).is_some() { + n += 1; + } + + while local.pop().is_some() { + n += 1; + } + } + + n + }); + + let mut n = 0; + + for _ in 0..2 { + for _ in 0..2 { + let (task, _) = super::unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + if local.pop().is_some() { + n += 1; + } + + // Push another task + let (task, _) = super::unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + + while local.pop().is_some() { + n += 1; + } + } + + n += inject.borrow_mut().drain(..).count(); + + n += th.join().unwrap(); + + assert_eq!(6, n); + }); +} + +#[test] +fn steal_overflow() { + loom::model(|| { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + let mut n = 0; + + if steal.steal_into(&mut local, &mut stats).is_some() { + n += 1; + } + + while local.pop().is_some() { + n += 1; + } + + n + }); + + let mut n = 0; + + // push a task, pop a task + let (task, _) = super::unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + + if local.pop().is_some() { + n += 1; + } + + for _ in 0..6 { + let (task, _) = super::unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + n += th.join().unwrap(); + + while local.pop().is_some() { + n += 1; + } + + n += inject.borrow_mut().drain(..).count(); + + assert_eq!(7, n); + }); +} + +#[test] +fn multi_stealer() { + const NUM_TASKS: usize = 5; + + fn steal_tasks(steal: queue::Steal<NoopSchedule>) -> usize { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + + if steal.steal_into(&mut local, &mut stats).is_none() { + return 0; + } + + let mut n = 1; + + while local.pop().is_some() { + n += 1; + } + + n + } + + loom::model(|| { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + // Push work + for _ in 0..NUM_TASKS { + let (task, _) = super::unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + let th1 = { + let steal = steal.clone(); + thread::spawn(move || steal_tasks(steal)) + }; + + let th2 = thread::spawn(move || steal_tasks(steal)); + + let mut n = 0; + + while local.pop().is_some() { + n += 1; + } + + n += inject.borrow_mut().drain(..).count(); + + n += th1.join().unwrap(); + n += th2.join().unwrap(); + + assert_eq!(n, NUM_TASKS); + }); +} + +#[test] +fn chained_steal() { + loom::model(|| { + let mut stats = new_stats(); + let (s1, mut l1) = queue::local(); + let (s2, mut l2) = queue::local(); + let inject = RefCell::new(vec![]); + + // Load up some tasks + for _ in 0..4 { + let (task, _) = super::unowned(async {}); + l1.push_back_or_overflow(task, &inject, &mut stats); + + let (task, _) = super::unowned(async {}); + l2.push_back_or_overflow(task, &inject, &mut stats); + } + + // Spawn a task to steal from **our** queue + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + s1.steal_into(&mut local, &mut stats); + + while local.pop().is_some() {} + }); + + // Drain our tasks, then attempt to steal + while l1.pop().is_some() {} + + s2.steal_into(&mut l1, &mut stats); + + th.join().unwrap(); + + while l1.pop().is_some() {} + while l2.pop().is_some() {} + }); +} diff --git a/third_party/rust/tokio/src/runtime/tests/loom_shutdown_join.rs b/third_party/rust/tokio/src/runtime/tests/loom_shutdown_join.rs new file mode 100644 index 0000000000..6fbc4bfded --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/loom_shutdown_join.rs @@ -0,0 +1,28 @@ +use crate::runtime::{Builder, Handle}; + +#[test] +fn join_handle_cancel_on_shutdown() { + let mut builder = loom::model::Builder::new(); + builder.preemption_bound = Some(2); + builder.check(|| { + use futures::future::FutureExt; + + let rt = Builder::new_multi_thread() + .worker_threads(2) + .build() + .unwrap(); + + let handle = rt.block_on(async move { Handle::current() }); + + let jh1 = handle.spawn(futures::future::pending::<()>()); + + drop(rt); + + let jh2 = handle.spawn(futures::future::pending::<()>()); + + let err1 = jh1.now_or_never().unwrap().unwrap_err(); + let err2 = jh2.now_or_never().unwrap().unwrap_err(); + assert!(err1.is_cancelled()); + assert!(err2.is_cancelled()); + }); +} diff --git a/third_party/rust/tokio/src/runtime/tests/loom_yield.rs b/third_party/rust/tokio/src/runtime/tests/loom_yield.rs new file mode 100644 index 0000000000..ba506e5a40 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/loom_yield.rs @@ -0,0 +1,37 @@ +use crate::runtime::park; +use crate::runtime::tests::loom_oneshot as oneshot; +use crate::runtime::{self, Runtime}; + +#[test] +fn yield_calls_park_before_scheduling_again() { + // Don't need to check all permutations + let mut loom = loom::model::Builder::default(); + loom.max_permutations = Some(1); + loom.check(|| { + let rt = mk_runtime(2); + let (tx, rx) = oneshot::channel::<()>(); + + rt.spawn(async { + let tid = loom::thread::current().id(); + let park_count = park::current_thread_park_count(); + + crate::task::yield_now().await; + + if tid == loom::thread::current().id() { + let new_park_count = park::current_thread_park_count(); + assert_eq!(park_count + 1, new_park_count); + } + + tx.send(()); + }); + + rx.recv(); + }); +} + +fn mk_runtime(num_threads: usize) -> Runtime { + runtime::Builder::new_multi_thread() + .worker_threads(num_threads) + .build() + .unwrap() +} diff --git a/third_party/rust/tokio/src/runtime/tests/mod.rs b/third_party/rust/tokio/src/runtime/tests/mod.rs new file mode 100644 index 0000000000..b12a76e268 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/mod.rs @@ -0,0 +1,78 @@ +// Enable dead_code / unreachable_pub here. It has been disabled in lib.rs for +// other code when running loom tests. +#![cfg_attr(loom, warn(dead_code, unreachable_pub))] + +use self::noop_scheduler::NoopSchedule; +use self::unowned_wrapper::unowned; + +mod noop_scheduler { + use crate::runtime::task::{self, Task}; + + /// `task::Schedule` implementation that does nothing, for testing. + pub(crate) struct NoopSchedule; + + impl task::Schedule for NoopSchedule { + fn release(&self, _task: &Task<Self>) -> Option<Task<Self>> { + None + } + + fn schedule(&self, _task: task::Notified<Self>) { + unreachable!(); + } + } +} + +mod unowned_wrapper { + use crate::runtime::task::{Id, JoinHandle, Notified}; + use crate::runtime::tests::NoopSchedule; + + #[cfg(all(tokio_unstable, feature = "tracing"))] + pub(crate) fn unowned<T>(task: T) -> (Notified<NoopSchedule>, JoinHandle<T::Output>) + where + T: std::future::Future + Send + 'static, + T::Output: Send + 'static, + { + use tracing::Instrument; + let span = tracing::trace_span!("test_span"); + let task = task.instrument(span); + let (task, handle) = crate::runtime::task::unowned(task, NoopSchedule, Id::next()); + (task.into_notified(), handle) + } + + #[cfg(not(all(tokio_unstable, feature = "tracing")))] + pub(crate) fn unowned<T>(task: T) -> (Notified<NoopSchedule>, JoinHandle<T::Output>) + where + T: std::future::Future + Send + 'static, + T::Output: Send + 'static, + { + let (task, handle) = crate::runtime::task::unowned(task, NoopSchedule, Id::next()); + (task.into_notified(), handle) + } +} + +cfg_loom! { + mod loom_blocking; + mod loom_current_thread_scheduler; + mod loom_local; + mod loom_oneshot; + mod loom_pool; + mod loom_queue; + mod loom_shutdown_join; + mod loom_join_set; + mod loom_yield; + + // Make sure debug assertions are enabled + #[cfg(not(debug_assertions))] + compiler_error!("these tests require debug assertions to be enabled"); +} + +cfg_not_loom! { + mod inject; + mod queue; + + #[cfg(not(miri))] + mod task_combinations; + + #[cfg(miri)] + mod task; +} diff --git a/third_party/rust/tokio/src/runtime/tests/queue.rs b/third_party/rust/tokio/src/runtime/tests/queue.rs new file mode 100644 index 0000000000..5df92b7a29 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/queue.rs @@ -0,0 +1,283 @@ +use crate::runtime::scheduler::multi_thread::{queue, Stats}; +use crate::runtime::task::{self, Schedule, Task}; + +use std::cell::RefCell; +use std::thread; +use std::time::Duration; + +#[allow(unused)] +macro_rules! assert_metrics { + ($stats:ident, $field:ident == $v:expr) => {{ + use crate::runtime::WorkerMetrics; + use std::sync::atomic::Ordering::Relaxed; + + let worker = WorkerMetrics::new(); + $stats.submit(&worker); + + let expect = $v; + let actual = worker.$field.load(Relaxed); + + assert!(actual == expect, "expect = {}; actual = {}", expect, actual) + }}; +} + +fn new_stats() -> Stats { + use crate::runtime::WorkerMetrics; + Stats::new(&WorkerMetrics::new()) +} + +#[test] +fn fits_256_one_at_a_time() { + let (_, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + for _ in 0..256 { + let (task, _) = super::unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + cfg_metrics! { + assert_metrics!(stats, overflow_count == 0); + } + + assert!(inject.borrow_mut().pop().is_none()); + + while local.pop().is_some() {} +} + +#[test] +fn fits_256_all_at_once() { + let (_, mut local) = queue::local(); + + let mut tasks = (0..256) + .map(|_| super::unowned(async {}).0) + .collect::<Vec<_>>(); + local.push_back(tasks.drain(..)); + + let mut i = 0; + while local.pop().is_some() { + i += 1; + } + + assert_eq!(i, 256); +} + +#[test] +fn fits_256_all_in_chunks() { + let (_, mut local) = queue::local(); + + let mut tasks = (0..256) + .map(|_| super::unowned(async {}).0) + .collect::<Vec<_>>(); + + local.push_back(tasks.drain(..10)); + local.push_back(tasks.drain(..100)); + local.push_back(tasks.drain(..46)); + local.push_back(tasks.drain(..100)); + + let mut i = 0; + while local.pop().is_some() { + i += 1; + } + + assert_eq!(i, 256); +} + +#[test] +fn overflow() { + let (_, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + for _ in 0..257 { + let (task, _) = super::unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + cfg_metrics! { + assert_metrics!(stats, overflow_count == 1); + } + + let mut n = 0; + + n += inject.borrow_mut().drain(..).count(); + + while local.pop().is_some() { + n += 1; + } + + assert_eq!(n, 257); +} + +#[test] +fn steal_batch() { + let mut stats = new_stats(); + + let (steal1, mut local1) = queue::local(); + let (_, mut local2) = queue::local(); + let inject = RefCell::new(vec![]); + + for _ in 0..4 { + let (task, _) = super::unowned(async {}); + local1.push_back_or_overflow(task, &inject, &mut stats); + } + + assert!(steal1.steal_into(&mut local2, &mut stats).is_some()); + + cfg_metrics! { + assert_metrics!(stats, steal_count == 2); + } + + for _ in 0..1 { + assert!(local2.pop().is_some()); + } + + assert!(local2.pop().is_none()); + + for _ in 0..2 { + assert!(local1.pop().is_some()); + } + + assert!(local1.pop().is_none()); +} + +const fn normal_or_miri(normal: usize, miri: usize) -> usize { + if cfg!(miri) { + miri + } else { + normal + } +} + +#[test] +fn stress1() { + const NUM_ITER: usize = 5; + const NUM_STEAL: usize = normal_or_miri(1_000, 10); + const NUM_LOCAL: usize = normal_or_miri(1_000, 10); + const NUM_PUSH: usize = normal_or_miri(500, 10); + const NUM_POP: usize = normal_or_miri(250, 10); + + let mut stats = new_stats(); + + for _ in 0..NUM_ITER { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + let mut n = 0; + + for _ in 0..NUM_STEAL { + if steal.steal_into(&mut local, &mut stats).is_some() { + n += 1; + } + + while local.pop().is_some() { + n += 1; + } + + thread::yield_now(); + } + + cfg_metrics! { + assert_metrics!(stats, steal_count == n as _); + } + + n + }); + + let mut n = 0; + + for _ in 0..NUM_LOCAL { + for _ in 0..NUM_PUSH { + let (task, _) = super::unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + for _ in 0..NUM_POP { + if local.pop().is_some() { + n += 1; + } else { + break; + } + } + } + + n += inject.borrow_mut().drain(..).count(); + + n += th.join().unwrap(); + + assert_eq!(n, NUM_LOCAL * NUM_PUSH); + } +} + +#[test] +fn stress2() { + const NUM_ITER: usize = 1; + const NUM_TASKS: usize = normal_or_miri(1_000_000, 50); + const NUM_STEAL: usize = normal_or_miri(1_000, 10); + + let mut stats = new_stats(); + + for _ in 0..NUM_ITER { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + let mut n = 0; + + for _ in 0..NUM_STEAL { + if steal.steal_into(&mut local, &mut stats).is_some() { + n += 1; + } + + while local.pop().is_some() { + n += 1; + } + + thread::sleep(Duration::from_micros(10)); + } + + n + }); + + let mut num_pop = 0; + + for i in 0..NUM_TASKS { + let (task, _) = super::unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + + if i % 128 == 0 && local.pop().is_some() { + num_pop += 1; + } + + num_pop += inject.borrow_mut().drain(..).count(); + } + + num_pop += th.join().unwrap(); + + while local.pop().is_some() { + num_pop += 1; + } + + num_pop += inject.borrow_mut().drain(..).count(); + + assert_eq!(num_pop, NUM_TASKS); + } +} + +struct Runtime; + +impl Schedule for Runtime { + fn release(&self, _task: &Task<Self>) -> Option<Task<Self>> { + None + } + + fn schedule(&self, _task: task::Notified<Self>) { + unreachable!(); + } +} diff --git a/third_party/rust/tokio/src/runtime/tests/task.rs b/third_party/rust/tokio/src/runtime/tests/task.rs new file mode 100644 index 0000000000..a79c0f50d1 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/task.rs @@ -0,0 +1,332 @@ +use crate::runtime::task::{self, unowned, Id, JoinHandle, OwnedTasks, Schedule, Task}; +use crate::runtime::tests::NoopSchedule; +use crate::util::TryLock; + +use std::collections::VecDeque; +use std::future::Future; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +struct AssertDropHandle { + is_dropped: Arc<AtomicBool>, +} +impl AssertDropHandle { + #[track_caller] + fn assert_dropped(&self) { + assert!(self.is_dropped.load(Ordering::SeqCst)); + } + + #[track_caller] + fn assert_not_dropped(&self) { + assert!(!self.is_dropped.load(Ordering::SeqCst)); + } +} + +struct AssertDrop { + is_dropped: Arc<AtomicBool>, +} +impl AssertDrop { + fn new() -> (Self, AssertDropHandle) { + let shared = Arc::new(AtomicBool::new(false)); + ( + AssertDrop { + is_dropped: shared.clone(), + }, + AssertDropHandle { + is_dropped: shared.clone(), + }, + ) + } +} +impl Drop for AssertDrop { + fn drop(&mut self) { + self.is_dropped.store(true, Ordering::SeqCst); + } +} + +// A Notified does not shut down on drop, but it is dropped once the ref-count +// hits zero. +#[test] +fn create_drop1() { + let (ad, handle) = AssertDrop::new(); + let (notified, join) = unowned( + async { + drop(ad); + unreachable!() + }, + NoopSchedule, + Id::next(), + ); + drop(notified); + handle.assert_not_dropped(); + drop(join); + handle.assert_dropped(); +} + +#[test] +fn create_drop2() { + let (ad, handle) = AssertDrop::new(); + let (notified, join) = unowned( + async { + drop(ad); + unreachable!() + }, + NoopSchedule, + Id::next(), + ); + drop(join); + handle.assert_not_dropped(); + drop(notified); + handle.assert_dropped(); +} + +#[test] +fn drop_abort_handle1() { + let (ad, handle) = AssertDrop::new(); + let (notified, join) = unowned( + async { + drop(ad); + unreachable!() + }, + NoopSchedule, + Id::next(), + ); + let abort = join.abort_handle(); + drop(join); + handle.assert_not_dropped(); + drop(notified); + handle.assert_not_dropped(); + drop(abort); + handle.assert_dropped(); +} + +#[test] +fn drop_abort_handle2() { + let (ad, handle) = AssertDrop::new(); + let (notified, join) = unowned( + async { + drop(ad); + unreachable!() + }, + NoopSchedule, + Id::next(), + ); + let abort = join.abort_handle(); + drop(notified); + handle.assert_not_dropped(); + drop(abort); + handle.assert_not_dropped(); + drop(join); + handle.assert_dropped(); +} + +// Shutting down through Notified works +#[test] +fn create_shutdown1() { + let (ad, handle) = AssertDrop::new(); + let (notified, join) = unowned( + async { + drop(ad); + unreachable!() + }, + NoopSchedule, + Id::next(), + ); + drop(join); + handle.assert_not_dropped(); + notified.shutdown(); + handle.assert_dropped(); +} + +#[test] +fn create_shutdown2() { + let (ad, handle) = AssertDrop::new(); + let (notified, join) = unowned( + async { + drop(ad); + unreachable!() + }, + NoopSchedule, + Id::next(), + ); + handle.assert_not_dropped(); + notified.shutdown(); + handle.assert_dropped(); + drop(join); +} + +#[test] +fn unowned_poll() { + let (task, _) = unowned(async {}, NoopSchedule, Id::next()); + task.run(); +} + +#[test] +fn schedule() { + with(|rt| { + rt.spawn(async { + crate::task::yield_now().await; + }); + + assert_eq!(2, rt.tick()); + rt.shutdown(); + }) +} + +#[test] +fn shutdown() { + with(|rt| { + rt.spawn(async { + loop { + crate::task::yield_now().await; + } + }); + + rt.tick_max(1); + + rt.shutdown(); + }) +} + +#[test] +fn shutdown_immediately() { + with(|rt| { + rt.spawn(async { + loop { + crate::task::yield_now().await; + } + }); + + rt.shutdown(); + }) +} + +#[test] +fn spawn_during_shutdown() { + static DID_SPAWN: AtomicBool = AtomicBool::new(false); + + struct SpawnOnDrop(Runtime); + impl Drop for SpawnOnDrop { + fn drop(&mut self) { + DID_SPAWN.store(true, Ordering::SeqCst); + self.0.spawn(async {}); + } + } + + with(|rt| { + let rt2 = rt.clone(); + rt.spawn(async move { + let _spawn_on_drop = SpawnOnDrop(rt2); + + loop { + crate::task::yield_now().await; + } + }); + + rt.tick_max(1); + rt.shutdown(); + }); + + assert!(DID_SPAWN.load(Ordering::SeqCst)); +} + +fn with(f: impl FnOnce(Runtime)) { + struct Reset; + + impl Drop for Reset { + fn drop(&mut self) { + let _rt = CURRENT.try_lock().unwrap().take(); + } + } + + let _reset = Reset; + + let rt = Runtime(Arc::new(Inner { + owned: OwnedTasks::new(), + core: TryLock::new(Core { + queue: VecDeque::new(), + }), + })); + + *CURRENT.try_lock().unwrap() = Some(rt.clone()); + f(rt) +} + +#[derive(Clone)] +struct Runtime(Arc<Inner>); + +struct Inner { + core: TryLock<Core>, + owned: OwnedTasks<Runtime>, +} + +struct Core { + queue: VecDeque<task::Notified<Runtime>>, +} + +static CURRENT: TryLock<Option<Runtime>> = TryLock::new(None); + +impl Runtime { + fn spawn<T>(&self, future: T) -> JoinHandle<T::Output> + where + T: 'static + Send + Future, + T::Output: 'static + Send, + { + let (handle, notified) = self.0.owned.bind(future, self.clone(), Id::next()); + + if let Some(notified) = notified { + self.schedule(notified); + } + + handle + } + + fn tick(&self) -> usize { + self.tick_max(usize::MAX) + } + + fn tick_max(&self, max: usize) -> usize { + let mut n = 0; + + while !self.is_empty() && n < max { + let task = self.next_task(); + n += 1; + let task = self.0.owned.assert_owner(task); + task.run(); + } + + n + } + + fn is_empty(&self) -> bool { + self.0.core.try_lock().unwrap().queue.is_empty() + } + + fn next_task(&self) -> task::Notified<Runtime> { + self.0.core.try_lock().unwrap().queue.pop_front().unwrap() + } + + fn shutdown(&self) { + let mut core = self.0.core.try_lock().unwrap(); + + self.0.owned.close_and_shutdown_all(); + + while let Some(task) = core.queue.pop_back() { + drop(task); + } + + drop(core); + + assert!(self.0.owned.is_empty()); + } +} + +impl Schedule for Runtime { + fn release(&self, task: &Task<Self>) -> Option<Task<Self>> { + self.0.owned.remove(task) + } + + fn schedule(&self, task: task::Notified<Self>) { + self.0.core.try_lock().unwrap().queue.push_back(task); + } +} diff --git a/third_party/rust/tokio/src/runtime/tests/task_combinations.rs b/third_party/rust/tokio/src/runtime/tests/task_combinations.rs new file mode 100644 index 0000000000..73a20d9760 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/tests/task_combinations.rs @@ -0,0 +1,487 @@ +use std::fmt; +use std::future::Future; +use std::panic; +use std::pin::Pin; +use std::task::{Context, Poll}; + +use crate::runtime::task::AbortHandle; +use crate::runtime::Builder; +use crate::sync::oneshot; +use crate::task::JoinHandle; + +use futures::future::FutureExt; + +// Enums for each option in the combinations being tested + +#[derive(Copy, Clone, Debug, PartialEq)] +enum CombiRuntime { + CurrentThread, + Multi1, + Multi2, +} +#[derive(Copy, Clone, Debug, PartialEq)] +enum CombiLocalSet { + Yes, + No, +} +#[derive(Copy, Clone, Debug, PartialEq)] +enum CombiTask { + PanicOnRun, + PanicOnDrop, + PanicOnRunAndDrop, + NoPanic, +} +#[derive(Copy, Clone, Debug, PartialEq)] +enum CombiOutput { + PanicOnDrop, + NoPanic, +} +#[derive(Copy, Clone, Debug, PartialEq)] +enum CombiJoinInterest { + Polled, + NotPolled, +} +#[allow(clippy::enum_variant_names)] // we aren't using glob imports +#[derive(Copy, Clone, Debug, PartialEq)] +enum CombiJoinHandle { + DropImmediately = 1, + DropFirstPoll = 2, + DropAfterNoConsume = 3, + DropAfterConsume = 4, +} +#[derive(Copy, Clone, Debug, PartialEq)] +enum CombiAbort { + NotAborted = 0, + AbortedImmediately = 1, + AbortedFirstPoll = 2, + AbortedAfterFinish = 3, + AbortedAfterConsumeOutput = 4, +} + +#[derive(Copy, Clone, Debug, PartialEq)] +enum CombiAbortSource { + JoinHandle, + AbortHandle, +} + +#[test] +fn test_combinations() { + let mut rt = &[ + CombiRuntime::CurrentThread, + CombiRuntime::Multi1, + CombiRuntime::Multi2, + ][..]; + + if cfg!(miri) { + rt = &[CombiRuntime::CurrentThread]; + } + + let ls = [CombiLocalSet::Yes, CombiLocalSet::No]; + let task = [ + CombiTask::NoPanic, + CombiTask::PanicOnRun, + CombiTask::PanicOnDrop, + CombiTask::PanicOnRunAndDrop, + ]; + let output = [CombiOutput::NoPanic, CombiOutput::PanicOnDrop]; + let ji = [CombiJoinInterest::Polled, CombiJoinInterest::NotPolled]; + let jh = [ + CombiJoinHandle::DropImmediately, + CombiJoinHandle::DropFirstPoll, + CombiJoinHandle::DropAfterNoConsume, + CombiJoinHandle::DropAfterConsume, + ]; + let abort = [ + CombiAbort::NotAborted, + CombiAbort::AbortedImmediately, + CombiAbort::AbortedFirstPoll, + CombiAbort::AbortedAfterFinish, + CombiAbort::AbortedAfterConsumeOutput, + ]; + let ah = [ + None, + Some(CombiJoinHandle::DropImmediately), + Some(CombiJoinHandle::DropFirstPoll), + Some(CombiJoinHandle::DropAfterNoConsume), + Some(CombiJoinHandle::DropAfterConsume), + ]; + + for rt in rt.iter().copied() { + for ls in ls.iter().copied() { + for task in task.iter().copied() { + for output in output.iter().copied() { + for ji in ji.iter().copied() { + for jh in jh.iter().copied() { + for abort in abort.iter().copied() { + // abort via join handle --- abort handles + // may be dropped at any point + for ah in ah.iter().copied() { + test_combination( + rt, + ls, + task, + output, + ji, + jh, + ah, + abort, + CombiAbortSource::JoinHandle, + ); + } + // if aborting via AbortHandle, it will + // never be dropped. + test_combination( + rt, + ls, + task, + output, + ji, + jh, + None, + abort, + CombiAbortSource::AbortHandle, + ); + } + } + } + } + } + } + } +} + +fn is_debug<T: fmt::Debug>(_: &T) {} + +#[allow(clippy::too_many_arguments)] +fn test_combination( + rt: CombiRuntime, + ls: CombiLocalSet, + task: CombiTask, + output: CombiOutput, + ji: CombiJoinInterest, + jh: CombiJoinHandle, + ah: Option<CombiJoinHandle>, + abort: CombiAbort, + abort_src: CombiAbortSource, +) { + match (abort_src, ah) { + (CombiAbortSource::JoinHandle, _) if (jh as usize) < (abort as usize) => { + // join handle dropped prior to abort + return; + } + (CombiAbortSource::AbortHandle, Some(_)) => { + // abort handle dropped, we can't abort through the + // abort handle + return; + } + + _ => {} + } + + if (task == CombiTask::PanicOnDrop) && (output == CombiOutput::PanicOnDrop) { + // this causes double panic + return; + } + if (task == CombiTask::PanicOnRunAndDrop) && (abort != CombiAbort::AbortedImmediately) { + // this causes double panic + return; + } + + is_debug(&rt); + is_debug(&ls); + is_debug(&task); + is_debug(&output); + is_debug(&ji); + is_debug(&jh); + is_debug(&ah); + is_debug(&abort); + is_debug(&abort_src); + + // A runtime optionally with a LocalSet + struct Rt { + rt: crate::runtime::Runtime, + ls: Option<crate::task::LocalSet>, + } + impl Rt { + fn new(rt: CombiRuntime, ls: CombiLocalSet) -> Self { + let rt = match rt { + CombiRuntime::CurrentThread => Builder::new_current_thread().build().unwrap(), + CombiRuntime::Multi1 => Builder::new_multi_thread() + .worker_threads(1) + .build() + .unwrap(), + CombiRuntime::Multi2 => Builder::new_multi_thread() + .worker_threads(2) + .build() + .unwrap(), + }; + + let ls = match ls { + CombiLocalSet::Yes => Some(crate::task::LocalSet::new()), + CombiLocalSet::No => None, + }; + + Self { rt, ls } + } + fn block_on<T>(&self, task: T) -> T::Output + where + T: Future, + { + match &self.ls { + Some(ls) => ls.block_on(&self.rt, task), + None => self.rt.block_on(task), + } + } + fn spawn<T>(&self, task: T) -> JoinHandle<T::Output> + where + T: Future + Send + 'static, + T::Output: Send + 'static, + { + match &self.ls { + Some(ls) => ls.spawn_local(task), + None => self.rt.spawn(task), + } + } + } + + // The type used for the output of the future + struct Output { + panic_on_drop: bool, + on_drop: Option<oneshot::Sender<()>>, + } + impl Output { + fn disarm(&mut self) { + self.panic_on_drop = false; + } + } + impl Drop for Output { + fn drop(&mut self) { + let _ = self.on_drop.take().unwrap().send(()); + if self.panic_on_drop { + panic!("Panicking in Output"); + } + } + } + + // A wrapper around the future that is spawned + struct FutWrapper<F> { + inner: F, + on_drop: Option<oneshot::Sender<()>>, + panic_on_drop: bool, + } + impl<F: Future> Future for FutWrapper<F> { + type Output = F::Output; + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<F::Output> { + unsafe { + let me = Pin::into_inner_unchecked(self); + let inner = Pin::new_unchecked(&mut me.inner); + inner.poll(cx) + } + } + } + impl<F> Drop for FutWrapper<F> { + fn drop(&mut self) { + let _: Result<(), ()> = self.on_drop.take().unwrap().send(()); + if self.panic_on_drop { + panic!("Panicking in FutWrapper"); + } + } + } + + // The channels passed to the task + struct Signals { + on_first_poll: Option<oneshot::Sender<()>>, + wait_complete: Option<oneshot::Receiver<()>>, + on_output_drop: Option<oneshot::Sender<()>>, + } + + // The task we will spawn + async fn my_task(mut signal: Signals, task: CombiTask, out: CombiOutput) -> Output { + // Signal that we have been polled once + let _ = signal.on_first_poll.take().unwrap().send(()); + + // Wait for a signal, then complete the future + let _ = signal.wait_complete.take().unwrap().await; + + // If the task gets past wait_complete without yielding, then aborts + // may not be caught without this yield_now. + crate::task::yield_now().await; + + if task == CombiTask::PanicOnRun || task == CombiTask::PanicOnRunAndDrop { + panic!("Panicking in my_task on {:?}", std::thread::current().id()); + } + + Output { + panic_on_drop: out == CombiOutput::PanicOnDrop, + on_drop: signal.on_output_drop.take(), + } + } + + let rt = Rt::new(rt, ls); + + let (on_first_poll, wait_first_poll) = oneshot::channel(); + let (on_complete, wait_complete) = oneshot::channel(); + let (on_future_drop, wait_future_drop) = oneshot::channel(); + let (on_output_drop, wait_output_drop) = oneshot::channel(); + let signal = Signals { + on_first_poll: Some(on_first_poll), + wait_complete: Some(wait_complete), + on_output_drop: Some(on_output_drop), + }; + + // === Spawn task === + let mut handle = Some(rt.spawn(FutWrapper { + inner: my_task(signal, task, output), + on_drop: Some(on_future_drop), + panic_on_drop: task == CombiTask::PanicOnDrop || task == CombiTask::PanicOnRunAndDrop, + })); + + // Keep track of whether the task has been killed with an abort + let mut aborted = false; + + // If we want to poll the JoinHandle, do it now + if ji == CombiJoinInterest::Polled { + assert!( + handle.as_mut().unwrap().now_or_never().is_none(), + "Polling handle succeeded" + ); + } + + // If we are either aborting the task via an abort handle, or dropping via + // an abort handle, do that now. + let mut abort_handle = if ah.is_some() || abort_src == CombiAbortSource::AbortHandle { + handle.as_ref().map(JoinHandle::abort_handle) + } else { + None + }; + + let do_abort = |abort_handle: &mut Option<AbortHandle>, + join_handle: Option<&mut JoinHandle<_>>| { + match abort_src { + CombiAbortSource::AbortHandle => abort_handle.take().unwrap().abort(), + CombiAbortSource::JoinHandle => join_handle.unwrap().abort(), + } + }; + + if abort == CombiAbort::AbortedImmediately { + do_abort(&mut abort_handle, handle.as_mut()); + aborted = true; + } + if jh == CombiJoinHandle::DropImmediately { + drop(handle.take().unwrap()); + } + + // === Wait for first poll === + let got_polled = rt.block_on(wait_first_poll).is_ok(); + if !got_polled { + // it's possible that we are aborted but still got polled + assert!( + aborted, + "Task completed without ever being polled but was not aborted." + ); + } + + if abort == CombiAbort::AbortedFirstPoll { + do_abort(&mut abort_handle, handle.as_mut()); + aborted = true; + } + if jh == CombiJoinHandle::DropFirstPoll { + drop(handle.take().unwrap()); + } + if ah == Some(CombiJoinHandle::DropFirstPoll) { + drop(abort_handle.take().unwrap()); + } + + // Signal the future that it can return now + let _ = on_complete.send(()); + // === Wait for future to be dropped === + assert!( + rt.block_on(wait_future_drop).is_ok(), + "The future should always be dropped." + ); + + if abort == CombiAbort::AbortedAfterFinish { + // Don't set aborted to true here as the task already finished + do_abort(&mut abort_handle, handle.as_mut()); + } + if jh == CombiJoinHandle::DropAfterNoConsume { + if ah == Some(CombiJoinHandle::DropAfterNoConsume) { + drop(handle.take().unwrap()); + // The runtime will usually have dropped every ref-count at this point, + // in which case dropping the AbortHandle drops the output. + // + // (But it might race and still hold a ref-count) + let panic = panic::catch_unwind(panic::AssertUnwindSafe(|| { + drop(abort_handle.take().unwrap()); + })); + if panic.is_err() { + assert!( + (output == CombiOutput::PanicOnDrop) + && (!matches!(task, CombiTask::PanicOnRun | CombiTask::PanicOnRunAndDrop)) + && !aborted, + "Dropping AbortHandle shouldn't panic here" + ); + } + } else { + // The runtime will usually have dropped every ref-count at this point, + // in which case dropping the JoinHandle drops the output. + // + // (But it might race and still hold a ref-count) + let panic = panic::catch_unwind(panic::AssertUnwindSafe(|| { + drop(handle.take().unwrap()); + })); + if panic.is_err() { + assert!( + (output == CombiOutput::PanicOnDrop) + && (!matches!(task, CombiTask::PanicOnRun | CombiTask::PanicOnRunAndDrop)) + && !aborted, + "Dropping JoinHandle shouldn't panic here" + ); + } + } + } + + // Check whether we drop after consuming the output + if jh == CombiJoinHandle::DropAfterConsume { + // Using as_mut() to not immediately drop the handle + let result = rt.block_on(handle.as_mut().unwrap()); + + match result { + Ok(mut output) => { + // Don't panic here. + output.disarm(); + assert!(!aborted, "Task was aborted but returned output"); + } + Err(err) if err.is_cancelled() => assert!(aborted, "Cancelled output but not aborted"), + Err(err) if err.is_panic() => { + assert!( + (task == CombiTask::PanicOnRun) + || (task == CombiTask::PanicOnDrop) + || (task == CombiTask::PanicOnRunAndDrop) + || (output == CombiOutput::PanicOnDrop), + "Panic but nothing should panic" + ); + } + _ => unreachable!(), + } + + let mut handle = handle.take().unwrap(); + if abort == CombiAbort::AbortedAfterConsumeOutput { + do_abort(&mut abort_handle, Some(&mut handle)); + } + drop(handle); + + if ah == Some(CombiJoinHandle::DropAfterConsume) { + drop(abort_handle.take()); + } + } + + // The output should have been dropped now. Check whether the output + // object was created at all. + let output_created = rt.block_on(wait_output_drop).is_ok(); + assert_eq!( + output_created, + (!matches!(task, CombiTask::PanicOnRun | CombiTask::PanicOnRunAndDrop)) && !aborted, + "Creation of output object" + ); +} diff --git a/third_party/rust/tokio/src/runtime/thread_id.rs b/third_party/rust/tokio/src/runtime/thread_id.rs new file mode 100644 index 0000000000..ef39289796 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/thread_id.rs @@ -0,0 +1,31 @@ +use std::num::NonZeroU64; + +#[derive(Eq, PartialEq, Clone, Copy, Hash, Debug)] +pub(crate) struct ThreadId(NonZeroU64); + +impl ThreadId { + pub(crate) fn next() -> Self { + use crate::loom::sync::atomic::{Ordering::Relaxed, StaticAtomicU64}; + + static NEXT_ID: StaticAtomicU64 = StaticAtomicU64::new(0); + + let mut last = NEXT_ID.load(Relaxed); + loop { + let id = match last.checked_add(1) { + Some(id) => id, + None => exhausted(), + }; + + match NEXT_ID.compare_exchange_weak(last, id, Relaxed, Relaxed) { + Ok(_) => return ThreadId(NonZeroU64::new(id).unwrap()), + Err(id) => last = id, + } + } + } +} + +#[cold] +#[allow(dead_code)] +fn exhausted() -> ! { + panic!("failed to generate unique thread ID: bitspace exhausted") +} diff --git a/third_party/rust/tokio/src/runtime/time/entry.rs b/third_party/rust/tokio/src/runtime/time/entry.rs new file mode 100644 index 0000000000..798d3c11eb --- /dev/null +++ b/third_party/rust/tokio/src/runtime/time/entry.rs @@ -0,0 +1,644 @@ +//! Timer state structures. +//! +//! This module contains the heart of the intrusive timer implementation, and as +//! such the structures inside are full of tricky concurrency and unsafe code. +//! +//! # Ground rules +//! +//! The heart of the timer implementation here is the [`TimerShared`] structure, +//! shared between the [`TimerEntry`] and the driver. Generally, we permit access +//! to [`TimerShared`] ONLY via either 1) a mutable reference to [`TimerEntry`] or +//! 2) a held driver lock. +//! +//! It follows from this that any changes made while holding BOTH 1 and 2 will +//! be reliably visible, regardless of ordering. This is because of the acq/rel +//! fences on the driver lock ensuring ordering with 2, and rust mutable +//! reference rules for 1 (a mutable reference to an object can't be passed +//! between threads without an acq/rel barrier, and same-thread we have local +//! happens-before ordering). +//! +//! # State field +//! +//! Each timer has a state field associated with it. This field contains either +//! the current scheduled time, or a special flag value indicating its state. +//! This state can either indicate that the timer is on the 'pending' queue (and +//! thus will be fired with an `Ok(())` result soon) or that it has already been +//! fired/deregistered. +//! +//! This single state field allows for code that is firing the timer to +//! synchronize with any racing `reset` calls reliably. +//! +//! # Cached vs true timeouts +//! +//! To allow for the use case of a timeout that is periodically reset before +//! expiration to be as lightweight as possible, we support optimistically +//! lock-free timer resets, in the case where a timer is rescheduled to a later +//! point than it was originally scheduled for. +//! +//! This is accomplished by lazily rescheduling timers. That is, we update the +//! state field with the true expiration of the timer from the holder of +//! the [`TimerEntry`]. When the driver services timers (ie, whenever it's +//! walking lists of timers), it checks this "true when" value, and reschedules +//! based on it. +//! +//! We do, however, also need to track what the expiration time was when we +//! originally registered the timer; this is used to locate the right linked +//! list when the timer is being cancelled. This is referred to as the "cached +//! when" internally. +//! +//! There is of course a race condition between timer reset and timer +//! expiration. If the driver fails to observe the updated expiration time, it +//! could trigger expiration of the timer too early. However, because +//! [`mark_pending`][mark_pending] performs a compare-and-swap, it will identify this race and +//! refuse to mark the timer as pending. +//! +//! [mark_pending]: TimerHandle::mark_pending + +use crate::loom::cell::UnsafeCell; +use crate::loom::sync::atomic::AtomicU64; +use crate::loom::sync::atomic::Ordering; + +use crate::runtime::scheduler; +use crate::sync::AtomicWaker; +use crate::time::Instant; +use crate::util::linked_list; + +use std::cell::UnsafeCell as StdUnsafeCell; +use std::task::{Context, Poll, Waker}; +use std::{marker::PhantomPinned, pin::Pin, ptr::NonNull}; + +type TimerResult = Result<(), crate::time::error::Error>; + +const STATE_DEREGISTERED: u64 = u64::MAX; +const STATE_PENDING_FIRE: u64 = STATE_DEREGISTERED - 1; +const STATE_MIN_VALUE: u64 = STATE_PENDING_FIRE; +/// The largest safe integer to use for ticks. +/// +/// This value should be updated if any other signal values are added above. +pub(super) const MAX_SAFE_MILLIS_DURATION: u64 = u64::MAX - 2; + +/// This structure holds the current shared state of the timer - its scheduled +/// time (if registered), or otherwise the result of the timer completing, as +/// well as the registered waker. +/// +/// Generally, the StateCell is only permitted to be accessed from two contexts: +/// Either a thread holding the corresponding &mut TimerEntry, or a thread +/// holding the timer driver lock. The write actions on the StateCell amount to +/// passing "ownership" of the StateCell between these contexts; moving a timer +/// from the TimerEntry to the driver requires _both_ holding the &mut +/// TimerEntry and the driver lock, while moving it back (firing the timer) +/// requires only the driver lock. +pub(super) struct StateCell { + /// Holds either the scheduled expiration time for this timer, or (if the + /// timer has been fired and is unregistered), `u64::MAX`. + state: AtomicU64, + /// If the timer is fired (an Acquire order read on state shows + /// `u64::MAX`), holds the result that should be returned from + /// polling the timer. Otherwise, the contents are unspecified and reading + /// without holding the driver lock is undefined behavior. + result: UnsafeCell<TimerResult>, + /// The currently-registered waker + waker: AtomicWaker, +} + +impl Default for StateCell { + fn default() -> Self { + Self::new() + } +} + +impl std::fmt::Debug for StateCell { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "StateCell({:?})", self.read_state()) + } +} + +impl StateCell { + fn new() -> Self { + Self { + state: AtomicU64::new(STATE_DEREGISTERED), + result: UnsafeCell::new(Ok(())), + waker: AtomicWaker::new(), + } + } + + fn is_pending(&self) -> bool { + self.state.load(Ordering::Relaxed) == STATE_PENDING_FIRE + } + + /// Returns the current expiration time, or None if not currently scheduled. + fn when(&self) -> Option<u64> { + let cur_state = self.state.load(Ordering::Relaxed); + + if cur_state == STATE_DEREGISTERED { + None + } else { + Some(cur_state) + } + } + + /// If the timer is completed, returns the result of the timer. Otherwise, + /// returns None and registers the waker. + fn poll(&self, waker: &Waker) -> Poll<TimerResult> { + // We must register first. This ensures that either `fire` will + // observe the new waker, or we will observe a racing fire to have set + // the state, or both. + self.waker.register_by_ref(waker); + + self.read_state() + } + + fn read_state(&self) -> Poll<TimerResult> { + let cur_state = self.state.load(Ordering::Acquire); + + if cur_state == STATE_DEREGISTERED { + // SAFETY: The driver has fired this timer; this involves writing + // the result, and then writing (with release ordering) the state + // field. + Poll::Ready(unsafe { self.result.with(|p| *p) }) + } else { + Poll::Pending + } + } + + /// Marks this timer as being moved to the pending list, if its scheduled + /// time is not after `not_after`. + /// + /// If the timer is scheduled for a time after not_after, returns an Err + /// containing the current scheduled time. + /// + /// SAFETY: Must hold the driver lock. + unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> { + // Quick initial debug check to see if the timer is already fired. Since + // firing the timer can only happen with the driver lock held, we know + // we shouldn't be able to "miss" a transition to a fired state, even + // with relaxed ordering. + let mut cur_state = self.state.load(Ordering::Relaxed); + + loop { + // improve the error message for things like + // https://github.com/tokio-rs/tokio/issues/3675 + assert!( + cur_state < STATE_MIN_VALUE, + "mark_pending called when the timer entry is in an invalid state" + ); + + if cur_state > not_after { + break Err(cur_state); + } + + match self.state.compare_exchange( + cur_state, + STATE_PENDING_FIRE, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => { + break Ok(()); + } + Err(actual_state) => { + cur_state = actual_state; + } + } + } + } + + /// Fires the timer, setting the result to the provided result. + /// + /// Returns: + /// * `Some(waker) - if fired and a waker needs to be invoked once the + /// driver lock is released + /// * `None` - if fired and a waker does not need to be invoked, or if + /// already fired + /// + /// SAFETY: The driver lock must be held. + unsafe fn fire(&self, result: TimerResult) -> Option<Waker> { + // Quick initial check to see if the timer is already fired. Since + // firing the timer can only happen with the driver lock held, we know + // we shouldn't be able to "miss" a transition to a fired state, even + // with relaxed ordering. + let cur_state = self.state.load(Ordering::Relaxed); + if cur_state == STATE_DEREGISTERED { + return None; + } + + // SAFETY: We assume the driver lock is held and the timer is not + // fired, so only the driver is accessing this field. + // + // We perform a release-ordered store to state below, to ensure this + // write is visible before the state update is visible. + unsafe { self.result.with_mut(|p| *p = result) }; + + self.state.store(STATE_DEREGISTERED, Ordering::Release); + + self.waker.take_waker() + } + + /// Marks the timer as registered (poll will return None) and sets the + /// expiration time. + /// + /// While this function is memory-safe, it should only be called from a + /// context holding both `&mut TimerEntry` and the driver lock. + fn set_expiration(&self, timestamp: u64) { + debug_assert!(timestamp < STATE_MIN_VALUE); + + // We can use relaxed ordering because we hold the driver lock and will + // fence when we release the lock. + self.state.store(timestamp, Ordering::Relaxed); + } + + /// Attempts to adjust the timer to a new timestamp. + /// + /// If the timer has already been fired, is pending firing, or the new + /// timestamp is earlier than the old timestamp, (or occasionally + /// spuriously) returns Err without changing the timer's state. In this + /// case, the timer must be deregistered and re-registered. + fn extend_expiration(&self, new_timestamp: u64) -> Result<(), ()> { + let mut prior = self.state.load(Ordering::Relaxed); + loop { + if new_timestamp < prior || prior >= STATE_MIN_VALUE { + return Err(()); + } + + match self.state.compare_exchange_weak( + prior, + new_timestamp, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => { + return Ok(()); + } + Err(true_prior) => { + prior = true_prior; + } + } + } + } + + /// Returns true if the state of this timer indicates that the timer might + /// be registered with the driver. This check is performed with relaxed + /// ordering, but is conservative - if it returns false, the timer is + /// definitely _not_ registered. + pub(super) fn might_be_registered(&self) -> bool { + self.state.load(Ordering::Relaxed) != u64::MAX + } +} + +/// A timer entry. +/// +/// This is the handle to a timer that is controlled by the requester of the +/// timer. As this participates in intrusive data structures, it must be pinned +/// before polling. +#[derive(Debug)] +pub(crate) struct TimerEntry { + /// Arc reference to the runtime handle. We can only free the driver after + /// deregistering everything from their respective timer wheels. + driver: scheduler::Handle, + /// Shared inner structure; this is part of an intrusive linked list, and + /// therefore other references can exist to it while mutable references to + /// Entry exist. + /// + /// This is manipulated only under the inner mutex. TODO: Can we use loom + /// cells for this? + inner: StdUnsafeCell<TimerShared>, + /// Deadline for the timer. This is used to register on the first + /// poll, as we can't register prior to being pinned. + deadline: Instant, + /// Whether the deadline has been registered. + registered: bool, + /// Ensure the type is !Unpin + _m: std::marker::PhantomPinned, +} + +unsafe impl Send for TimerEntry {} +unsafe impl Sync for TimerEntry {} + +/// An TimerHandle is the (non-enforced) "unique" pointer from the driver to the +/// timer entry. Generally, at most one TimerHandle exists for a timer at a time +/// (enforced by the timer state machine). +/// +/// SAFETY: An TimerHandle is essentially a raw pointer, and the usual caveats +/// of pointer safety apply. In particular, TimerHandle does not itself enforce +/// that the timer does still exist; however, normally an TimerHandle is created +/// immediately before registering the timer, and is consumed when firing the +/// timer, to help minimize mistakes. Still, because TimerHandle cannot enforce +/// memory safety, all operations are unsafe. +#[derive(Debug)] +pub(crate) struct TimerHandle { + inner: NonNull<TimerShared>, +} + +pub(super) type EntryList = crate::util::linked_list::LinkedList<TimerShared, TimerShared>; + +/// The shared state structure of a timer. This structure is shared between the +/// frontend (`Entry`) and driver backend. +/// +/// Note that this structure is located inside the `TimerEntry` structure. +pub(crate) struct TimerShared { + /// A link within the doubly-linked list of timers on a particular level and + /// slot. Valid only if state is equal to Registered. + /// + /// Only accessed under the entry lock. + pointers: linked_list::Pointers<TimerShared>, + + /// The expiration time for which this entry is currently registered. + /// Generally owned by the driver, but is accessed by the entry when not + /// registered. + cached_when: AtomicU64, + + /// The true expiration time. Set by the timer future, read by the driver. + true_when: AtomicU64, + + /// Current state. This records whether the timer entry is currently under + /// the ownership of the driver, and if not, its current state (not + /// complete, fired, error, etc). + state: StateCell, + + _p: PhantomPinned, +} + +unsafe impl Send for TimerShared {} +unsafe impl Sync for TimerShared {} + +impl std::fmt::Debug for TimerShared { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TimerShared") + .field("when", &self.true_when.load(Ordering::Relaxed)) + .field("cached_when", &self.cached_when.load(Ordering::Relaxed)) + .field("state", &self.state) + .finish() + } +} + +generate_addr_of_methods! { + impl<> TimerShared { + unsafe fn addr_of_pointers(self: NonNull<Self>) -> NonNull<linked_list::Pointers<TimerShared>> { + &self.pointers + } + } +} + +impl TimerShared { + pub(super) fn new() -> Self { + Self { + cached_when: AtomicU64::new(0), + true_when: AtomicU64::new(0), + pointers: linked_list::Pointers::new(), + state: StateCell::default(), + _p: PhantomPinned, + } + } + + /// Gets the cached time-of-expiration value. + pub(super) fn cached_when(&self) -> u64 { + // Cached-when is only accessed under the driver lock, so we can use relaxed + self.cached_when.load(Ordering::Relaxed) + } + + /// Gets the true time-of-expiration value, and copies it into the cached + /// time-of-expiration value. + /// + /// SAFETY: Must be called with the driver lock held, and when this entry is + /// not in any timer wheel lists. + pub(super) unsafe fn sync_when(&self) -> u64 { + let true_when = self.true_when(); + + self.cached_when.store(true_when, Ordering::Relaxed); + + true_when + } + + /// Sets the cached time-of-expiration value. + /// + /// SAFETY: Must be called with the driver lock held, and when this entry is + /// not in any timer wheel lists. + unsafe fn set_cached_when(&self, when: u64) { + self.cached_when.store(when, Ordering::Relaxed); + } + + /// Returns the true time-of-expiration value, with relaxed memory ordering. + pub(super) fn true_when(&self) -> u64 { + self.state.when().expect("Timer already fired") + } + + /// Sets the true time-of-expiration value, even if it is less than the + /// current expiration or the timer is deregistered. + /// + /// SAFETY: Must only be called with the driver lock held and the entry not + /// in the timer wheel. + pub(super) unsafe fn set_expiration(&self, t: u64) { + self.state.set_expiration(t); + self.cached_when.store(t, Ordering::Relaxed); + } + + /// Sets the true time-of-expiration only if it is after the current. + pub(super) fn extend_expiration(&self, t: u64) -> Result<(), ()> { + self.state.extend_expiration(t) + } + + /// Returns a TimerHandle for this timer. + pub(super) fn handle(&self) -> TimerHandle { + TimerHandle { + inner: NonNull::from(self), + } + } + + /// Returns true if the state of this timer indicates that the timer might + /// be registered with the driver. This check is performed with relaxed + /// ordering, but is conservative - if it returns false, the timer is + /// definitely _not_ registered. + pub(super) fn might_be_registered(&self) -> bool { + self.state.might_be_registered() + } +} + +unsafe impl linked_list::Link for TimerShared { + type Handle = TimerHandle; + + type Target = TimerShared; + + fn as_raw(handle: &Self::Handle) -> NonNull<Self::Target> { + handle.inner + } + + unsafe fn from_raw(ptr: NonNull<Self::Target>) -> Self::Handle { + TimerHandle { inner: ptr } + } + + unsafe fn pointers( + target: NonNull<Self::Target>, + ) -> NonNull<linked_list::Pointers<Self::Target>> { + TimerShared::addr_of_pointers(target) + } +} + +// ===== impl Entry ===== + +impl TimerEntry { + #[track_caller] + pub(crate) fn new(handle: &scheduler::Handle, deadline: Instant) -> Self { + // Panic if the time driver is not enabled + let _ = handle.driver().time(); + + let driver = handle.clone(); + + Self { + driver, + inner: StdUnsafeCell::new(TimerShared::new()), + deadline, + registered: false, + _m: std::marker::PhantomPinned, + } + } + + fn inner(&self) -> &TimerShared { + unsafe { &*self.inner.get() } + } + + pub(crate) fn deadline(&self) -> Instant { + self.deadline + } + + pub(crate) fn is_elapsed(&self) -> bool { + !self.inner().state.might_be_registered() && self.registered + } + + /// Cancels and deregisters the timer. This operation is irreversible. + pub(crate) fn cancel(self: Pin<&mut Self>) { + // We need to perform an acq/rel fence with the driver thread, and the + // simplest way to do so is to grab the driver lock. + // + // Why is this necessary? We're about to release this timer's memory for + // some other non-timer use. However, we've been doing a bunch of + // relaxed (or even non-atomic) writes from the driver thread, and we'll + // be doing more from _this thread_ (as this memory is interpreted as + // something else). + // + // It is critical to ensure that, from the point of view of the driver, + // those future non-timer writes happen-after the timer is fully fired, + // and from the purpose of this thread, the driver's writes all + // happen-before we drop the timer. This in turn requires us to perform + // an acquire-release barrier in _both_ directions between the driver + // and dropping thread. + // + // The lock acquisition in clear_entry serves this purpose. All of the + // driver manipulations happen with the lock held, so we can just take + // the lock and be sure that this drop happens-after everything the + // driver did so far and happens-before everything the driver does in + // the future. While we have the lock held, we also go ahead and + // deregister the entry if necessary. + unsafe { self.driver().clear_entry(NonNull::from(self.inner())) }; + } + + pub(crate) fn reset(mut self: Pin<&mut Self>, new_time: Instant, reregister: bool) { + unsafe { self.as_mut().get_unchecked_mut() }.deadline = new_time; + unsafe { self.as_mut().get_unchecked_mut() }.registered = reregister; + + let tick = self.driver().time_source().deadline_to_tick(new_time); + + if self.inner().extend_expiration(tick).is_ok() { + return; + } + + if reregister { + unsafe { + self.driver() + .reregister(&self.driver.driver().io, tick, self.inner().into()); + } + } + } + + pub(crate) fn poll_elapsed( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll<Result<(), super::Error>> { + if self.driver().is_shutdown() { + panic!("{}", crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR); + } + + if !self.registered { + let deadline = self.deadline; + self.as_mut().reset(deadline, true); + } + + let this = unsafe { self.get_unchecked_mut() }; + + this.inner().state.poll(cx.waker()) + } + + pub(crate) fn driver(&self) -> &super::Handle { + self.driver.driver().time() + } + + #[cfg(all(tokio_unstable, feature = "tracing"))] + pub(crate) fn clock(&self) -> &super::Clock { + self.driver.driver().clock() + } +} + +impl TimerHandle { + pub(super) unsafe fn cached_when(&self) -> u64 { + unsafe { self.inner.as_ref().cached_when() } + } + + pub(super) unsafe fn sync_when(&self) -> u64 { + unsafe { self.inner.as_ref().sync_when() } + } + + pub(super) unsafe fn is_pending(&self) -> bool { + unsafe { self.inner.as_ref().state.is_pending() } + } + + /// Forcibly sets the true and cached expiration times to the given tick. + /// + /// SAFETY: The caller must ensure that the handle remains valid, the driver + /// lock is held, and that the timer is not in any wheel linked lists. + pub(super) unsafe fn set_expiration(&self, tick: u64) { + self.inner.as_ref().set_expiration(tick); + } + + /// Attempts to mark this entry as pending. If the expiration time is after + /// `not_after`, however, returns an Err with the current expiration time. + /// + /// If an `Err` is returned, the `cached_when` value will be updated to this + /// new expiration time. + /// + /// SAFETY: The caller must ensure that the handle remains valid, the driver + /// lock is held, and that the timer is not in any wheel linked lists. + /// After returning Ok, the entry must be added to the pending list. + pub(super) unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> { + match self.inner.as_ref().state.mark_pending(not_after) { + Ok(()) => { + // mark this as being on the pending queue in cached_when + self.inner.as_ref().set_cached_when(u64::MAX); + Ok(()) + } + Err(tick) => { + self.inner.as_ref().set_cached_when(tick); + Err(tick) + } + } + } + + /// Attempts to transition to a terminal state. If the state is already a + /// terminal state, does nothing. + /// + /// Because the entry might be dropped after the state is moved to a + /// terminal state, this function consumes the handle to ensure we don't + /// access the entry afterwards. + /// + /// Returns the last-registered waker, if any. + /// + /// SAFETY: The driver lock must be held while invoking this function, and + /// the entry must not be in any wheel linked lists. + pub(super) unsafe fn fire(self, completed_state: TimerResult) -> Option<Waker> { + self.inner.as_ref().state.fire(completed_state) + } +} + +impl Drop for TimerEntry { + fn drop(&mut self) { + unsafe { Pin::new_unchecked(self) }.as_mut().cancel() + } +} diff --git a/third_party/rust/tokio/src/runtime/time/handle.rs b/third_party/rust/tokio/src/runtime/time/handle.rs new file mode 100644 index 0000000000..fce791d998 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/time/handle.rs @@ -0,0 +1,62 @@ +use crate::runtime::time::TimeSource; +use std::fmt; + +/// Handle to time driver instance. +pub(crate) struct Handle { + pub(super) time_source: TimeSource, + pub(super) inner: super::Inner, +} + +impl Handle { + /// Returns the time source associated with this handle. + pub(crate) fn time_source(&self) -> &TimeSource { + &self.time_source + } + + /// Checks whether the driver has been shutdown. + pub(super) fn is_shutdown(&self) -> bool { + self.inner.is_shutdown() + } + + /// Track that the driver is being unparked + pub(crate) fn unpark(&self) { + #[cfg(feature = "test-util")] + self.inner + .did_wake + .store(true, std::sync::atomic::Ordering::SeqCst); + } +} + +cfg_not_rt! { + impl Handle { + /// Tries to get a handle to the current timer. + /// + /// # Panics + /// + /// This function panics if there is no current timer set. + /// + /// It can be triggered when [`Builder::enable_time`] or + /// [`Builder::enable_all`] are not included in the builder. + /// + /// It can also panic whenever a timer is created outside of a + /// Tokio runtime. That is why `rt.block_on(sleep(...))` will panic, + /// since the function is executed outside of the runtime. + /// Whereas `rt.block_on(async {sleep(...).await})` doesn't panic. + /// And this is because wrapping the function on an async makes it lazy, + /// and so gets executed inside the runtime successfully without + /// panicking. + /// + /// [`Builder::enable_time`]: crate::runtime::Builder::enable_time + /// [`Builder::enable_all`]: crate::runtime::Builder::enable_all + #[track_caller] + pub(crate) fn current() -> Self { + panic!("{}", crate::util::error::CONTEXT_MISSING_ERROR) + } + } +} + +impl fmt::Debug for Handle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Handle") + } +} diff --git a/third_party/rust/tokio/src/runtime/time/mod.rs b/third_party/rust/tokio/src/runtime/time/mod.rs new file mode 100644 index 0000000000..423ad79ab9 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/time/mod.rs @@ -0,0 +1,424 @@ +// Currently, rust warns when an unsafe fn contains an unsafe {} block. However, +// in the future, this will change to the reverse. For now, suppress this +// warning and generally stick with being explicit about unsafety. +#![allow(unused_unsafe)] +#![cfg_attr(not(feature = "rt"), allow(dead_code))] + +//! Time driver. + +mod entry; +pub(crate) use entry::TimerEntry; +use entry::{EntryList, TimerHandle, TimerShared, MAX_SAFE_MILLIS_DURATION}; + +mod handle; +pub(crate) use self::handle::Handle; + +mod source; +pub(crate) use source::TimeSource; + +mod wheel; + +use crate::loom::sync::atomic::{AtomicBool, Ordering}; +use crate::loom::sync::Mutex; +use crate::runtime::driver::{self, IoHandle, IoStack}; +use crate::time::error::Error; +use crate::time::{Clock, Duration}; + +use std::fmt; +use std::{num::NonZeroU64, ptr::NonNull, task::Waker}; + +/// Time implementation that drives [`Sleep`][sleep], [`Interval`][interval], and [`Timeout`][timeout]. +/// +/// A `Driver` instance tracks the state necessary for managing time and +/// notifying the [`Sleep`][sleep] instances once their deadlines are reached. +/// +/// It is expected that a single instance manages many individual [`Sleep`][sleep] +/// instances. The `Driver` implementation is thread-safe and, as such, is able +/// to handle callers from across threads. +/// +/// After creating the `Driver` instance, the caller must repeatedly call `park` +/// or `park_timeout`. The time driver will perform no work unless `park` or +/// `park_timeout` is called repeatedly. +/// +/// The driver has a resolution of one millisecond. Any unit of time that falls +/// between milliseconds are rounded up to the next millisecond. +/// +/// When an instance is dropped, any outstanding [`Sleep`][sleep] instance that has not +/// elapsed will be notified with an error. At this point, calling `poll` on the +/// [`Sleep`][sleep] instance will result in panic. +/// +/// # Implementation +/// +/// The time driver is based on the [paper by Varghese and Lauck][paper]. +/// +/// A hashed timing wheel is a vector of slots, where each slot handles a time +/// slice. As time progresses, the timer walks over the slot for the current +/// instant, and processes each entry for that slot. When the timer reaches the +/// end of the wheel, it starts again at the beginning. +/// +/// The implementation maintains six wheels arranged in a set of levels. As the +/// levels go up, the slots of the associated wheel represent larger intervals +/// of time. At each level, the wheel has 64 slots. Each slot covers a range of +/// time equal to the wheel at the lower level. At level zero, each slot +/// represents one millisecond of time. +/// +/// The wheels are: +/// +/// * Level 0: 64 x 1 millisecond slots. +/// * Level 1: 64 x 64 millisecond slots. +/// * Level 2: 64 x ~4 second slots. +/// * Level 3: 64 x ~4 minute slots. +/// * Level 4: 64 x ~4 hour slots. +/// * Level 5: 64 x ~12 day slots. +/// +/// When the timer processes entries at level zero, it will notify all the +/// `Sleep` instances as their deadlines have been reached. For all higher +/// levels, all entries will be redistributed across the wheel at the next level +/// down. Eventually, as time progresses, entries with [`Sleep`][sleep] instances will +/// either be canceled (dropped) or their associated entries will reach level +/// zero and be notified. +/// +/// [paper]: http://www.cs.columbia.edu/~nahum/w6998/papers/ton97-timing-wheels.pdf +/// [sleep]: crate::time::Sleep +/// [timeout]: crate::time::Timeout +/// [interval]: crate::time::Interval +#[derive(Debug)] +pub(crate) struct Driver { + /// Parker to delegate to. + park: IoStack, +} + +/// Timer state shared between `Driver`, `Handle`, and `Registration`. +struct Inner { + // The state is split like this so `Handle` can access `is_shutdown` without locking the mutex + pub(super) state: Mutex<InnerState>, + + /// True if the driver is being shutdown. + pub(super) is_shutdown: AtomicBool, + + // When `true`, a call to `park_timeout` should immediately return and time + // should not advance. One reason for this to be `true` is if the task + // passed to `Runtime::block_on` called `task::yield_now()`. + // + // While it may look racy, it only has any effect when the clock is paused + // and pausing the clock is restricted to a single-threaded runtime. + #[cfg(feature = "test-util")] + did_wake: AtomicBool, +} + +/// Time state shared which must be protected by a `Mutex` +struct InnerState { + /// The last published timer `elapsed` value. + elapsed: u64, + + /// The earliest time at which we promise to wake up without unparking. + next_wake: Option<NonZeroU64>, + + /// Timer wheel. + wheel: wheel::Wheel, +} + +// ===== impl Driver ===== + +impl Driver { + /// Creates a new `Driver` instance that uses `park` to block the current + /// thread and `time_source` to get the current time and convert to ticks. + /// + /// Specifying the source of time is useful when testing. + pub(crate) fn new(park: IoStack, clock: &Clock) -> (Driver, Handle) { + let time_source = TimeSource::new(clock); + + let handle = Handle { + time_source, + inner: Inner { + state: Mutex::new(InnerState { + elapsed: 0, + next_wake: None, + wheel: wheel::Wheel::new(), + }), + is_shutdown: AtomicBool::new(false), + + #[cfg(feature = "test-util")] + did_wake: AtomicBool::new(false), + }, + }; + + let driver = Driver { park }; + + (driver, handle) + } + + pub(crate) fn park(&mut self, handle: &driver::Handle) { + self.park_internal(handle, None) + } + + pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { + self.park_internal(handle, Some(duration)) + } + + pub(crate) fn shutdown(&mut self, rt_handle: &driver::Handle) { + let handle = rt_handle.time(); + + if handle.is_shutdown() { + return; + } + + handle.inner.is_shutdown.store(true, Ordering::SeqCst); + + // Advance time forward to the end of time. + + handle.process_at_time(u64::MAX); + + self.park.shutdown(rt_handle); + } + + fn park_internal(&mut self, rt_handle: &driver::Handle, limit: Option<Duration>) { + let handle = rt_handle.time(); + let mut lock = handle.inner.state.lock(); + + assert!(!handle.is_shutdown()); + + let next_wake = lock.wheel.next_expiration_time(); + lock.next_wake = + next_wake.map(|t| NonZeroU64::new(t).unwrap_or_else(|| NonZeroU64::new(1).unwrap())); + + drop(lock); + + match next_wake { + Some(when) => { + let now = handle.time_source.now(rt_handle.clock()); + // Note that we effectively round up to 1ms here - this avoids + // very short-duration microsecond-resolution sleeps that the OS + // might treat as zero-length. + let mut duration = handle + .time_source + .tick_to_duration(when.saturating_sub(now)); + + if duration > Duration::from_millis(0) { + if let Some(limit) = limit { + duration = std::cmp::min(limit, duration); + } + + self.park_thread_timeout(rt_handle, duration); + } else { + self.park.park_timeout(rt_handle, Duration::from_secs(0)); + } + } + None => { + if let Some(duration) = limit { + self.park_thread_timeout(rt_handle, duration); + } else { + self.park.park(rt_handle); + } + } + } + + // Process pending timers after waking up + handle.process(rt_handle.clock()); + } + + cfg_test_util! { + fn park_thread_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) { + let handle = rt_handle.time(); + let clock = rt_handle.clock(); + + if clock.can_auto_advance() { + self.park.park_timeout(rt_handle, Duration::from_secs(0)); + + // If the time driver was woken, then the park completed + // before the "duration" elapsed (usually caused by a + // yield in `Runtime::block_on`). In this case, we don't + // advance the clock. + if !handle.did_wake() { + // Simulate advancing time + if let Err(msg) = clock.advance(duration) { + panic!("{}", msg); + } + } + } else { + self.park.park_timeout(rt_handle, duration); + } + } + } + + cfg_not_test_util! { + fn park_thread_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) { + self.park.park_timeout(rt_handle, duration); + } + } +} + +impl Handle { + /// Runs timer related logic, and returns the next wakeup time + pub(self) fn process(&self, clock: &Clock) { + let now = self.time_source().now(clock); + + self.process_at_time(now) + } + + pub(self) fn process_at_time(&self, mut now: u64) { + let mut waker_list: [Option<Waker>; 32] = Default::default(); + let mut waker_idx = 0; + + let mut lock = self.inner.lock(); + + if now < lock.elapsed { + // Time went backwards! This normally shouldn't happen as the Rust language + // guarantees that an Instant is monotonic, but can happen when running + // Linux in a VM on a Windows host due to std incorrectly trusting the + // hardware clock to be monotonic. + // + // See <https://github.com/tokio-rs/tokio/issues/3619> for more information. + now = lock.elapsed; + } + + while let Some(entry) = lock.wheel.poll(now) { + debug_assert!(unsafe { entry.is_pending() }); + + // SAFETY: We hold the driver lock, and just removed the entry from any linked lists. + if let Some(waker) = unsafe { entry.fire(Ok(())) } { + waker_list[waker_idx] = Some(waker); + + waker_idx += 1; + + if waker_idx == waker_list.len() { + // Wake a batch of wakers. To avoid deadlock, we must do this with the lock temporarily dropped. + drop(lock); + + for waker in waker_list.iter_mut() { + waker.take().unwrap().wake(); + } + + waker_idx = 0; + + lock = self.inner.lock(); + } + } + } + + // Update the elapsed cache + lock.elapsed = lock.wheel.elapsed(); + lock.next_wake = lock + .wheel + .poll_at() + .map(|t| NonZeroU64::new(t).unwrap_or_else(|| NonZeroU64::new(1).unwrap())); + + drop(lock); + + for waker in waker_list[0..waker_idx].iter_mut() { + waker.take().unwrap().wake(); + } + } + + /// Removes a registered timer from the driver. + /// + /// The timer will be moved to the cancelled state. Wakers will _not_ be + /// invoked. If the timer is already completed, this function is a no-op. + /// + /// This function always acquires the driver lock, even if the entry does + /// not appear to be registered. + /// + /// SAFETY: The timer must not be registered with some other driver, and + /// `add_entry` must not be called concurrently. + pub(self) unsafe fn clear_entry(&self, entry: NonNull<TimerShared>) { + unsafe { + let mut lock = self.inner.lock(); + + if entry.as_ref().might_be_registered() { + lock.wheel.remove(entry); + } + + entry.as_ref().handle().fire(Ok(())); + } + } + + /// Removes and re-adds an entry to the driver. + /// + /// SAFETY: The timer must be either unregistered, or registered with this + /// driver. No other threads are allowed to concurrently manipulate the + /// timer at all (the current thread should hold an exclusive reference to + /// the `TimerEntry`) + pub(self) unsafe fn reregister( + &self, + unpark: &IoHandle, + new_tick: u64, + entry: NonNull<TimerShared>, + ) { + let waker = unsafe { + let mut lock = self.inner.lock(); + + // We may have raced with a firing/deregistration, so check before + // deregistering. + if unsafe { entry.as_ref().might_be_registered() } { + lock.wheel.remove(entry); + } + + // Now that we have exclusive control of this entry, mint a handle to reinsert it. + let entry = entry.as_ref().handle(); + + if self.is_shutdown() { + unsafe { entry.fire(Err(crate::time::error::Error::shutdown())) } + } else { + entry.set_expiration(new_tick); + + // Note: We don't have to worry about racing with some other resetting + // thread, because add_entry and reregister require exclusive control of + // the timer entry. + match unsafe { lock.wheel.insert(entry) } { + Ok(when) => { + if lock + .next_wake + .map(|next_wake| when < next_wake.get()) + .unwrap_or(true) + { + unpark.unpark(); + } + + None + } + Err((entry, crate::time::error::InsertError::Elapsed)) => unsafe { + entry.fire(Ok(())) + }, + } + } + + // Must release lock before invoking waker to avoid the risk of deadlock. + }; + + // The timer was fired synchronously as a result of the reregistration. + // Wake the waker; this is needed because we might reset _after_ a poll, + // and otherwise the task won't be awoken to poll again. + if let Some(waker) = waker { + waker.wake(); + } + } + + cfg_test_util! { + fn did_wake(&self) -> bool { + self.inner.did_wake.swap(false, Ordering::SeqCst) + } + } +} + +// ===== impl Inner ===== + +impl Inner { + /// Locks the driver's inner structure + pub(super) fn lock(&self) -> crate::loom::sync::MutexGuard<'_, InnerState> { + self.state.lock() + } + + // Check whether the driver has been shutdown + pub(super) fn is_shutdown(&self) -> bool { + self.is_shutdown.load(Ordering::SeqCst) + } +} + +impl fmt::Debug for Inner { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("Inner").finish() + } +} + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/tokio/src/runtime/time/source.rs b/third_party/rust/tokio/src/runtime/time/source.rs new file mode 100644 index 0000000000..4647bc4122 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/time/source.rs @@ -0,0 +1,39 @@ +use super::MAX_SAFE_MILLIS_DURATION; +use crate::time::{Clock, Duration, Instant}; + +/// A structure which handles conversion from Instants to u64 timestamps. +#[derive(Debug)] +pub(crate) struct TimeSource { + start_time: Instant, +} + +impl TimeSource { + pub(crate) fn new(clock: &Clock) -> Self { + Self { + start_time: clock.now(), + } + } + + pub(crate) fn deadline_to_tick(&self, t: Instant) -> u64 { + // Round up to the end of a ms + self.instant_to_tick(t + Duration::from_nanos(999_999)) + } + + pub(crate) fn instant_to_tick(&self, t: Instant) -> u64 { + // round up + let dur: Duration = t + .checked_duration_since(self.start_time) + .unwrap_or_else(|| Duration::from_secs(0)); + let ms = dur.as_millis(); + + ms.try_into().unwrap_or(MAX_SAFE_MILLIS_DURATION) + } + + pub(crate) fn tick_to_duration(&self, t: u64) -> Duration { + Duration::from_millis(t) + } + + pub(crate) fn now(&self, clock: &Clock) -> u64 { + self.instant_to_tick(clock.now()) + } +} diff --git a/third_party/rust/tokio/src/runtime/time/tests/mod.rs b/third_party/rust/tokio/src/runtime/time/tests/mod.rs new file mode 100644 index 0000000000..155d99a348 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/time/tests/mod.rs @@ -0,0 +1,267 @@ +#![cfg(not(tokio_wasi))] + +use std::{task::Context, time::Duration}; + +#[cfg(not(loom))] +use futures::task::noop_waker_ref; + +use crate::loom::sync::atomic::{AtomicBool, Ordering}; +use crate::loom::sync::Arc; +use crate::loom::thread; + +use super::TimerEntry; + +fn block_on<T>(f: impl std::future::Future<Output = T>) -> T { + #[cfg(loom)] + return loom::future::block_on(f); + + #[cfg(not(loom))] + { + let rt = crate::runtime::Builder::new_current_thread() + .build() + .unwrap(); + rt.block_on(f) + } +} + +fn model(f: impl Fn() + Send + Sync + 'static) { + #[cfg(loom)] + loom::model(f); + + #[cfg(not(loom))] + f(); +} + +fn rt(start_paused: bool) -> crate::runtime::Runtime { + crate::runtime::Builder::new_current_thread() + .enable_time() + .start_paused(start_paused) + .build() + .unwrap() +} + +#[test] +fn single_timer() { + model(|| { + let rt = rt(false); + let handle = rt.handle(); + + let handle_ = handle.clone(); + let jh = thread::spawn(move || { + let entry = TimerEntry::new( + &handle_.inner, + handle_.inner.driver().clock().now() + Duration::from_secs(1), + ); + pin!(entry); + + block_on(futures::future::poll_fn(|cx| { + entry.as_mut().poll_elapsed(cx) + })) + .unwrap(); + }); + + thread::yield_now(); + + let time = handle.inner.driver().time(); + let clock = handle.inner.driver().clock(); + + // This may or may not return Some (depending on how it races with the + // thread). If it does return None, however, the timer should complete + // synchronously. + time.process_at_time(time.time_source().now(clock) + 2_000_000_000); + + jh.join().unwrap(); + }) +} + +#[test] +fn drop_timer() { + model(|| { + let rt = rt(false); + let handle = rt.handle(); + + let handle_ = handle.clone(); + let jh = thread::spawn(move || { + let entry = TimerEntry::new( + &handle_.inner, + handle_.inner.driver().clock().now() + Duration::from_secs(1), + ); + pin!(entry); + + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + }); + + thread::yield_now(); + + let time = handle.inner.driver().time(); + let clock = handle.inner.driver().clock(); + + // advance 2s in the future. + time.process_at_time(time.time_source().now(clock) + 2_000_000_000); + + jh.join().unwrap(); + }) +} + +#[test] +fn change_waker() { + model(|| { + let rt = rt(false); + let handle = rt.handle(); + + let handle_ = handle.clone(); + let jh = thread::spawn(move || { + let entry = TimerEntry::new( + &handle_.inner, + handle_.inner.driver().clock().now() + Duration::from_secs(1), + ); + pin!(entry); + + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + + block_on(futures::future::poll_fn(|cx| { + entry.as_mut().poll_elapsed(cx) + })) + .unwrap(); + }); + + thread::yield_now(); + + let time = handle.inner.driver().time(); + let clock = handle.inner.driver().clock(); + + // advance 2s + time.process_at_time(time.time_source().now(clock) + 2_000_000_000); + + jh.join().unwrap(); + }) +} + +#[test] +fn reset_future() { + model(|| { + let finished_early = Arc::new(AtomicBool::new(false)); + + let rt = rt(false); + let handle = rt.handle(); + + let handle_ = handle.clone(); + let finished_early_ = finished_early.clone(); + let start = handle.inner.driver().clock().now(); + + let jh = thread::spawn(move || { + let entry = TimerEntry::new(&handle_.inner, start + Duration::from_secs(1)); + pin!(entry); + + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + + entry.as_mut().reset(start + Duration::from_secs(2), true); + + // shouldn't complete before 2s + block_on(futures::future::poll_fn(|cx| { + entry.as_mut().poll_elapsed(cx) + })) + .unwrap(); + + finished_early_.store(true, Ordering::Relaxed); + }); + + thread::yield_now(); + + let handle = handle.inner.driver().time(); + + // This may or may not return a wakeup time. + handle.process_at_time( + handle + .time_source() + .instant_to_tick(start + Duration::from_millis(1500)), + ); + + assert!(!finished_early.load(Ordering::Relaxed)); + + handle.process_at_time( + handle + .time_source() + .instant_to_tick(start + Duration::from_millis(2500)), + ); + + jh.join().unwrap(); + + assert!(finished_early.load(Ordering::Relaxed)); + }) +} + +#[cfg(not(loom))] +fn normal_or_miri<T>(normal: T, miri: T) -> T { + if cfg!(miri) { + miri + } else { + normal + } +} + +#[test] +#[cfg(not(loom))] +fn poll_process_levels() { + let rt = rt(true); + let handle = rt.handle(); + + let mut entries = vec![]; + + for i in 0..normal_or_miri(1024, 64) { + let mut entry = Box::pin(TimerEntry::new( + &handle.inner, + handle.inner.driver().clock().now() + Duration::from_millis(i), + )); + + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(noop_waker_ref())); + + entries.push(entry); + } + + for t in 1..normal_or_miri(1024, 64) { + handle.inner.driver().time().process_at_time(t as u64); + + for (deadline, future) in entries.iter_mut().enumerate() { + let mut context = Context::from_waker(noop_waker_ref()); + if deadline <= t { + assert!(future.as_mut().poll_elapsed(&mut context).is_ready()); + } else { + assert!(future.as_mut().poll_elapsed(&mut context).is_pending()); + } + } + } +} + +#[test] +#[cfg(not(loom))] +fn poll_process_levels_targeted() { + let mut context = Context::from_waker(noop_waker_ref()); + + let rt = rt(true); + let handle = rt.handle(); + + let e1 = TimerEntry::new( + &handle.inner, + handle.inner.driver().clock().now() + Duration::from_millis(193), + ); + pin!(e1); + + let handle = handle.inner.driver().time(); + + handle.process_at_time(62); + assert!(e1.as_mut().poll_elapsed(&mut context).is_pending()); + handle.process_at_time(192); + handle.process_at_time(192); +} diff --git a/third_party/rust/tokio/src/runtime/time/wheel/level.rs b/third_party/rust/tokio/src/runtime/time/wheel/level.rs new file mode 100644 index 0000000000..7e48ff5c57 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/time/wheel/level.rs @@ -0,0 +1,274 @@ +use crate::runtime::time::{EntryList, TimerHandle, TimerShared}; + +use std::{fmt, ptr::NonNull}; + +/// Wheel for a single level in the timer. This wheel contains 64 slots. +pub(crate) struct Level { + level: usize, + + /// Bit field tracking which slots currently contain entries. + /// + /// Using a bit field to track slots that contain entries allows avoiding a + /// scan to find entries. This field is updated when entries are added or + /// removed from a slot. + /// + /// The least-significant bit represents slot zero. + occupied: u64, + + /// Slots. We access these via the EntryInner `current_list` as well, so this needs to be an UnsafeCell. + slot: [EntryList; LEVEL_MULT], +} + +/// Indicates when a slot must be processed next. +#[derive(Debug)] +pub(crate) struct Expiration { + /// The level containing the slot. + pub(crate) level: usize, + + /// The slot index. + pub(crate) slot: usize, + + /// The instant at which the slot needs to be processed. + pub(crate) deadline: u64, +} + +/// Level multiplier. +/// +/// Being a power of 2 is very important. +const LEVEL_MULT: usize = 64; + +impl Level { + pub(crate) fn new(level: usize) -> Level { + // A value has to be Copy in order to use syntax like: + // let stack = Stack::default(); + // ... + // slots: [stack; 64], + // + // Alternatively, since Stack is Default one can + // use syntax like: + // let slots: [Stack; 64] = Default::default(); + // + // However, that is only supported for arrays of size + // 32 or fewer. So in our case we have to explicitly + // invoke the constructor for each array element. + let ctor = EntryList::default; + + Level { + level, + occupied: 0, + slot: [ + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ctor(), + ], + } + } + + /// Finds the slot that needs to be processed next and returns the slot and + /// `Instant` at which this slot must be processed. + pub(crate) fn next_expiration(&self, now: u64) -> Option<Expiration> { + // Use the `occupied` bit field to get the index of the next slot that + // needs to be processed. + let slot = match self.next_occupied_slot(now) { + Some(slot) => slot, + None => return None, + }; + + // From the slot index, calculate the `Instant` at which it needs to be + // processed. This value *must* be in the future with respect to `now`. + + let level_range = level_range(self.level); + let slot_range = slot_range(self.level); + + // Compute the start date of the current level by masking the low bits + // of `now` (`level_range` is a power of 2). + let level_start = now & !(level_range - 1); + let mut deadline = level_start + slot as u64 * slot_range; + + if deadline <= now { + // A timer is in a slot "prior" to the current time. This can occur + // because we do not have an infinite hierarchy of timer levels, and + // eventually a timer scheduled for a very distant time might end up + // being placed in a slot that is beyond the end of all of the + // arrays. + // + // To deal with this, we first limit timers to being scheduled no + // more than MAX_DURATION ticks in the future; that is, they're at + // most one rotation of the top level away. Then, we force timers + // that logically would go into the top+1 level, to instead go into + // the top level's slots. + // + // What this means is that the top level's slots act as a + // pseudo-ring buffer, and we rotate around them indefinitely. If we + // compute a deadline before now, and it's the top level, it + // therefore means we're actually looking at a slot in the future. + debug_assert_eq!(self.level, super::NUM_LEVELS - 1); + + deadline += level_range; + } + + debug_assert!( + deadline >= now, + "deadline={:016X}; now={:016X}; level={}; lr={:016X}, sr={:016X}, slot={}; occupied={:b}", + deadline, + now, + self.level, + level_range, + slot_range, + slot, + self.occupied + ); + + Some(Expiration { + level: self.level, + slot, + deadline, + }) + } + + fn next_occupied_slot(&self, now: u64) -> Option<usize> { + if self.occupied == 0 { + return None; + } + + // Get the slot for now using Maths + let now_slot = (now / slot_range(self.level)) as usize; + let occupied = self.occupied.rotate_right(now_slot as u32); + let zeros = occupied.trailing_zeros() as usize; + let slot = (zeros + now_slot) % 64; + + Some(slot) + } + + pub(crate) unsafe fn add_entry(&mut self, item: TimerHandle) { + let slot = slot_for(item.cached_when(), self.level); + + self.slot[slot].push_front(item); + + self.occupied |= occupied_bit(slot); + } + + pub(crate) unsafe fn remove_entry(&mut self, item: NonNull<TimerShared>) { + let slot = slot_for(unsafe { item.as_ref().cached_when() }, self.level); + + unsafe { self.slot[slot].remove(item) }; + if self.slot[slot].is_empty() { + // The bit is currently set + debug_assert!(self.occupied & occupied_bit(slot) != 0); + + // Unset the bit + self.occupied ^= occupied_bit(slot); + } + } + + pub(crate) fn take_slot(&mut self, slot: usize) -> EntryList { + self.occupied &= !occupied_bit(slot); + + std::mem::take(&mut self.slot[slot]) + } +} + +impl fmt::Debug for Level { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("Level") + .field("occupied", &self.occupied) + .finish() + } +} + +fn occupied_bit(slot: usize) -> u64 { + 1 << slot +} + +fn slot_range(level: usize) -> u64 { + LEVEL_MULT.pow(level as u32) as u64 +} + +fn level_range(level: usize) -> u64 { + LEVEL_MULT as u64 * slot_range(level) +} + +/// Converts a duration (milliseconds) and a level to a slot position. +fn slot_for(duration: u64, level: usize) -> usize { + ((duration >> (level * 6)) % LEVEL_MULT as u64) as usize +} + +#[cfg(all(test, not(loom)))] +mod test { + use super::*; + + #[test] + fn test_slot_for() { + for pos in 0..64 { + assert_eq!(pos as usize, slot_for(pos, 0)); + } + + for level in 1..5 { + for pos in level..64 { + let a = pos * 64_usize.pow(level as u32); + assert_eq!(pos as usize, slot_for(a as u64, level)); + } + } + } +} diff --git a/third_party/rust/tokio/src/runtime/time/wheel/mod.rs b/third_party/rust/tokio/src/runtime/time/wheel/mod.rs new file mode 100644 index 0000000000..bf13b7b241 --- /dev/null +++ b/third_party/rust/tokio/src/runtime/time/wheel/mod.rs @@ -0,0 +1,349 @@ +use crate::runtime::time::{TimerHandle, TimerShared}; +use crate::time::error::InsertError; + +mod level; +pub(crate) use self::level::Expiration; +use self::level::Level; + +use std::ptr::NonNull; + +use super::EntryList; + +/// Timing wheel implementation. +/// +/// This type provides the hashed timing wheel implementation that backs `Timer` +/// and `DelayQueue`. +/// +/// The structure is generic over `T: Stack`. This allows handling timeout data +/// being stored on the heap or in a slab. In order to support the latter case, +/// the slab must be passed into each function allowing the implementation to +/// lookup timer entries. +/// +/// See `Timer` documentation for some implementation notes. +#[derive(Debug)] +pub(crate) struct Wheel { + /// The number of milliseconds elapsed since the wheel started. + elapsed: u64, + + /// Timer wheel. + /// + /// Levels: + /// + /// * 1 ms slots / 64 ms range + /// * 64 ms slots / ~ 4 sec range + /// * ~ 4 sec slots / ~ 4 min range + /// * ~ 4 min slots / ~ 4 hr range + /// * ~ 4 hr slots / ~ 12 day range + /// * ~ 12 day slots / ~ 2 yr range + levels: Vec<Level>, + + /// Entries queued for firing + pending: EntryList, +} + +/// Number of levels. Each level has 64 slots. By using 6 levels with 64 slots +/// each, the timer is able to track time up to 2 years into the future with a +/// precision of 1 millisecond. +const NUM_LEVELS: usize = 6; + +/// The maximum duration of a `Sleep`. +pub(super) const MAX_DURATION: u64 = (1 << (6 * NUM_LEVELS)) - 1; + +impl Wheel { + /// Creates a new timing wheel. + pub(crate) fn new() -> Wheel { + let levels = (0..NUM_LEVELS).map(Level::new).collect(); + + Wheel { + elapsed: 0, + levels, + pending: EntryList::new(), + } + } + + /// Returns the number of milliseconds that have elapsed since the timing + /// wheel's creation. + pub(crate) fn elapsed(&self) -> u64 { + self.elapsed + } + + /// Inserts an entry into the timing wheel. + /// + /// # Arguments + /// + /// * `item`: The item to insert into the wheel. + /// + /// # Return + /// + /// Returns `Ok` when the item is successfully inserted, `Err` otherwise. + /// + /// `Err(Elapsed)` indicates that `when` represents an instant that has + /// already passed. In this case, the caller should fire the timeout + /// immediately. + /// + /// `Err(Invalid)` indicates an invalid `when` argument as been supplied. + /// + /// # Safety + /// + /// This function registers item into an intrusive linked list. The caller + /// must ensure that `item` is pinned and will not be dropped without first + /// being deregistered. + pub(crate) unsafe fn insert( + &mut self, + item: TimerHandle, + ) -> Result<u64, (TimerHandle, InsertError)> { + let when = item.sync_when(); + + if when <= self.elapsed { + return Err((item, InsertError::Elapsed)); + } + + // Get the level at which the entry should be stored + let level = self.level_for(when); + + unsafe { + self.levels[level].add_entry(item); + } + + debug_assert!({ + self.levels[level] + .next_expiration(self.elapsed) + .map(|e| e.deadline >= self.elapsed) + .unwrap_or(true) + }); + + Ok(when) + } + + /// Removes `item` from the timing wheel. + pub(crate) unsafe fn remove(&mut self, item: NonNull<TimerShared>) { + unsafe { + let when = item.as_ref().cached_when(); + if when == u64::MAX { + self.pending.remove(item); + } else { + debug_assert!( + self.elapsed <= when, + "elapsed={}; when={}", + self.elapsed, + when + ); + + let level = self.level_for(when); + + self.levels[level].remove_entry(item); + } + } + } + + /// Instant at which to poll. + pub(crate) fn poll_at(&self) -> Option<u64> { + self.next_expiration().map(|expiration| expiration.deadline) + } + + /// Advances the timer up to the instant represented by `now`. + pub(crate) fn poll(&mut self, now: u64) -> Option<TimerHandle> { + loop { + if let Some(handle) = self.pending.pop_back() { + return Some(handle); + } + + match self.next_expiration() { + Some(ref expiration) if expiration.deadline <= now => { + self.process_expiration(expiration); + + self.set_elapsed(expiration.deadline); + } + _ => { + // in this case the poll did not indicate an expiration + // _and_ we were not able to find a next expiration in + // the current list of timers. advance to the poll's + // current time and do nothing else. + self.set_elapsed(now); + break; + } + } + } + + self.pending.pop_back() + } + + /// Returns the instant at which the next timeout expires. + fn next_expiration(&self) -> Option<Expiration> { + if !self.pending.is_empty() { + // Expire immediately as we have things pending firing + return Some(Expiration { + level: 0, + slot: 0, + deadline: self.elapsed, + }); + } + + // Check all levels + for level in 0..NUM_LEVELS { + if let Some(expiration) = self.levels[level].next_expiration(self.elapsed) { + // There cannot be any expirations at a higher level that happen + // before this one. + debug_assert!(self.no_expirations_before(level + 1, expiration.deadline)); + + return Some(expiration); + } + } + + None + } + + /// Returns the tick at which this timer wheel next needs to perform some + /// processing, or None if there are no timers registered. + pub(super) fn next_expiration_time(&self) -> Option<u64> { + self.next_expiration().map(|ex| ex.deadline) + } + + /// Used for debug assertions + fn no_expirations_before(&self, start_level: usize, before: u64) -> bool { + let mut res = true; + + for l2 in start_level..NUM_LEVELS { + if let Some(e2) = self.levels[l2].next_expiration(self.elapsed) { + if e2.deadline < before { + res = false; + } + } + } + + res + } + + /// iteratively find entries that are between the wheel's current + /// time and the expiration time. for each in that population either + /// queue it for notification (in the case of the last level) or tier + /// it down to the next level (in all other cases). + pub(crate) fn process_expiration(&mut self, expiration: &Expiration) { + // Note that we need to take _all_ of the entries off the list before + // processing any of them. This is important because it's possible that + // those entries might need to be reinserted into the same slot. + // + // This happens only on the highest level, when an entry is inserted + // more than MAX_DURATION into the future. When this happens, we wrap + // around, and process some entries a multiple of MAX_DURATION before + // they actually need to be dropped down a level. We then reinsert them + // back into the same position; we must make sure we don't then process + // those entries again or we'll end up in an infinite loop. + let mut entries = self.take_entries(expiration); + + while let Some(item) = entries.pop_back() { + if expiration.level == 0 { + debug_assert_eq!(unsafe { item.cached_when() }, expiration.deadline); + } + + // Try to expire the entry; this is cheap (doesn't synchronize) if + // the timer is not expired, and updates cached_when. + match unsafe { item.mark_pending(expiration.deadline) } { + Ok(()) => { + // Item was expired + self.pending.push_front(item); + } + Err(expiration_tick) => { + let level = level_for(expiration.deadline, expiration_tick); + unsafe { + self.levels[level].add_entry(item); + } + } + } + } + } + + fn set_elapsed(&mut self, when: u64) { + assert!( + self.elapsed <= when, + "elapsed={:?}; when={:?}", + self.elapsed, + when + ); + + if when > self.elapsed { + self.elapsed = when; + } + } + + /// Obtains the list of entries that need processing for the given expiration. + /// + fn take_entries(&mut self, expiration: &Expiration) -> EntryList { + self.levels[expiration.level].take_slot(expiration.slot) + } + + fn level_for(&self, when: u64) -> usize { + level_for(self.elapsed, when) + } +} + +fn level_for(elapsed: u64, when: u64) -> usize { + const SLOT_MASK: u64 = (1 << 6) - 1; + + // Mask in the trailing bits ignored by the level calculation in order to cap + // the possible leading zeros + let mut masked = elapsed ^ when | SLOT_MASK; + + if masked >= MAX_DURATION { + // Fudge the timer into the top level + masked = MAX_DURATION - 1; + } + + let leading_zeros = masked.leading_zeros() as usize; + let significant = 63 - leading_zeros; + + significant / 6 +} + +#[cfg(all(test, not(loom)))] +mod test { + use super::*; + + #[test] + fn test_level_for() { + for pos in 0..64 { + assert_eq!( + 0, + level_for(0, pos), + "level_for({}) -- binary = {:b}", + pos, + pos + ); + } + + for level in 1..5 { + for pos in level..64 { + let a = pos * 64_usize.pow(level as u32); + assert_eq!( + level, + level_for(0, a as u64), + "level_for({}) -- binary = {:b}", + a, + a + ); + + if pos > level { + let a = a - 1; + assert_eq!( + level, + level_for(0, a as u64), + "level_for({}) -- binary = {:b}", + a, + a + ); + } + + if pos < 64 { + let a = a + 1; + assert_eq!( + level, + level_for(0, a as u64), + "level_for({}) -- binary = {:b}", + a, + a + ); + } + } + } + } +} |