summaryrefslogtreecommitdiffstats
path: root/third_party/rust/tokio/src/runtime
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/tokio/src/runtime
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/tokio/src/runtime')
-rw-r--r--third_party/rust/tokio/src/runtime/blocking/mod.rs26
-rw-r--r--third_party/rust/tokio/src/runtime/blocking/pool.rs602
-rw-r--r--third_party/rust/tokio/src/runtime/blocking/schedule.rs56
-rw-r--r--third_party/rust/tokio/src/runtime/blocking/shutdown.rs71
-rw-r--r--third_party/rust/tokio/src/runtime/blocking/task.rs44
-rw-r--r--third_party/rust/tokio/src/runtime/builder.rs1236
-rw-r--r--third_party/rust/tokio/src/runtime/config.rs37
-rw-r--r--third_party/rust/tokio/src/runtime/context.rs191
-rw-r--r--third_party/rust/tokio/src/runtime/context/blocking.rs121
-rw-r--r--third_party/rust/tokio/src/runtime/context/current.rs99
-rw-r--r--third_party/rust/tokio/src/runtime/context/runtime.rs99
-rw-r--r--third_party/rust/tokio/src/runtime/context/runtime_mt.rs36
-rw-r--r--third_party/rust/tokio/src/runtime/context/scoped.rs56
-rw-r--r--third_party/rust/tokio/src/runtime/coop.rs323
-rw-r--r--third_party/rust/tokio/src/runtime/driver.rs341
-rw-r--r--third_party/rust/tokio/src/runtime/dump.rs76
-rw-r--r--third_party/rust/tokio/src/runtime/handle.rs587
-rw-r--r--third_party/rust/tokio/src/runtime/io/metrics.rs24
-rw-r--r--third_party/rust/tokio/src/runtime/io/mod.rs356
-rw-r--r--third_party/rust/tokio/src/runtime/io/registration.rs252
-rw-r--r--third_party/rust/tokio/src/runtime/io/scheduled_io.rs558
-rw-r--r--third_party/rust/tokio/src/runtime/metrics/batch.rs162
-rw-r--r--third_party/rust/tokio/src/runtime/metrics/histogram.rs502
-rw-r--r--third_party/rust/tokio/src/runtime/metrics/io.rs24
-rw-r--r--third_party/rust/tokio/src/runtime/metrics/mock.rs55
-rw-r--r--third_party/rust/tokio/src/runtime/metrics/mod.rs40
-rw-r--r--third_party/rust/tokio/src/runtime/metrics/runtime.rs883
-rw-r--r--third_party/rust/tokio/src/runtime/metrics/scheduler.rs34
-rw-r--r--third_party/rust/tokio/src/runtime/metrics/worker.rs80
-rw-r--r--third_party/rust/tokio/src/runtime/mod.rs265
-rw-r--r--third_party/rust/tokio/src/runtime/park.rs348
-rw-r--r--third_party/rust/tokio/src/runtime/process.rs44
-rw-r--r--third_party/rust/tokio/src/runtime/runtime.rs445
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/current_thread.rs750
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/defer.rs43
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/inject.rs72
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/inject/metrics.rs7
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/inject/pop.rs55
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs98
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/inject/shared.rs119
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/inject/synced.rs32
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/lock.rs6
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/mod.rs249
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/counters.rs62
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle.rs68
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/metrics.rs41
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/taskdump.rs26
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/idle.rs240
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/mod.rs103
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/overflow.rs26
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/park.rs232
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/queue.rs608
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/stats.rs140
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace.rs61
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace_mock.rs11
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker.rs1216
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/metrics.rs11
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump.rs79
-rw-r--r--third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump_mock.rs7
-rw-r--r--third_party/rust/tokio/src/runtime/signal/mod.rs142
-rw-r--r--third_party/rust/tokio/src/runtime/task/abort.rs87
-rw-r--r--third_party/rust/tokio/src/runtime/task/core.rs470
-rw-r--r--third_party/rust/tokio/src/runtime/task/error.rs165
-rw-r--r--third_party/rust/tokio/src/runtime/task/harness.rs501
-rw-r--r--third_party/rust/tokio/src/runtime/task/id.rs87
-rw-r--r--third_party/rust/tokio/src/runtime/task/join.rs366
-rw-r--r--third_party/rust/tokio/src/runtime/task/list.rs319
-rw-r--r--third_party/rust/tokio/src/runtime/task/mod.rs497
-rw-r--r--third_party/rust/tokio/src/runtime/task/raw.rs317
-rw-r--r--third_party/rust/tokio/src/runtime/task/state.rs611
-rw-r--r--third_party/rust/tokio/src/runtime/task/trace/mod.rs330
-rw-r--r--third_party/rust/tokio/src/runtime/task/trace/symbol.rs92
-rw-r--r--third_party/rust/tokio/src/runtime/task/trace/tree.rs126
-rw-r--r--third_party/rust/tokio/src/runtime/task/waker.rs104
-rw-r--r--third_party/rust/tokio/src/runtime/tests/inject.rs54
-rw-r--r--third_party/rust/tokio/src/runtime/tests/loom_blocking.rs102
-rw-r--r--third_party/rust/tokio/src/runtime/tests/loom_current_thread_scheduler.rs142
-rw-r--r--third_party/rust/tokio/src/runtime/tests/loom_join_set.rs82
-rw-r--r--third_party/rust/tokio/src/runtime/tests/loom_local.rs47
-rw-r--r--third_party/rust/tokio/src/runtime/tests/loom_oneshot.rs48
-rw-r--r--third_party/rust/tokio/src/runtime/tests/loom_pool.rs458
-rw-r--r--third_party/rust/tokio/src/runtime/tests/loom_queue.rs205
-rw-r--r--third_party/rust/tokio/src/runtime/tests/loom_shutdown_join.rs28
-rw-r--r--third_party/rust/tokio/src/runtime/tests/loom_yield.rs37
-rw-r--r--third_party/rust/tokio/src/runtime/tests/mod.rs78
-rw-r--r--third_party/rust/tokio/src/runtime/tests/queue.rs283
-rw-r--r--third_party/rust/tokio/src/runtime/tests/task.rs332
-rw-r--r--third_party/rust/tokio/src/runtime/tests/task_combinations.rs487
-rw-r--r--third_party/rust/tokio/src/runtime/thread_id.rs31
-rw-r--r--third_party/rust/tokio/src/runtime/time/entry.rs644
-rw-r--r--third_party/rust/tokio/src/runtime/time/handle.rs62
-rw-r--r--third_party/rust/tokio/src/runtime/time/mod.rs424
-rw-r--r--third_party/rust/tokio/src/runtime/time/source.rs39
-rw-r--r--third_party/rust/tokio/src/runtime/time/tests/mod.rs267
-rw-r--r--third_party/rust/tokio/src/runtime/time/wheel/level.rs274
-rw-r--r--third_party/rust/tokio/src/runtime/time/wheel/mod.rs349
96 files changed, 21122 insertions, 0 deletions
diff --git a/third_party/rust/tokio/src/runtime/blocking/mod.rs b/third_party/rust/tokio/src/runtime/blocking/mod.rs
new file mode 100644
index 0000000000..c42924be77
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/blocking/mod.rs
@@ -0,0 +1,26 @@
+//! Abstracts out the APIs necessary to `Runtime` for integrating the blocking
+//! pool. When the `blocking` feature flag is **not** enabled, these APIs are
+//! shells. This isolates the complexity of dealing with conditional
+//! compilation.
+
+mod pool;
+pub(crate) use pool::{spawn_blocking, BlockingPool, Spawner};
+
+cfg_fs! {
+ pub(crate) use pool::spawn_mandatory_blocking;
+}
+
+cfg_trace! {
+ pub(crate) use pool::Mandatory;
+}
+
+mod schedule;
+mod shutdown;
+mod task;
+pub(crate) use task::BlockingTask;
+
+use crate::runtime::Builder;
+
+pub(crate) fn create_blocking_pool(builder: &Builder, thread_cap: usize) -> BlockingPool {
+ BlockingPool::new(builder, thread_cap)
+}
diff --git a/third_party/rust/tokio/src/runtime/blocking/pool.rs b/third_party/rust/tokio/src/runtime/blocking/pool.rs
new file mode 100644
index 0000000000..a23b0a0d2d
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/blocking/pool.rs
@@ -0,0 +1,602 @@
+//! Thread pool for blocking operations
+
+use crate::loom::sync::{Arc, Condvar, Mutex};
+use crate::loom::thread;
+use crate::runtime::blocking::schedule::BlockingSchedule;
+use crate::runtime::blocking::{shutdown, BlockingTask};
+use crate::runtime::builder::ThreadNameFn;
+use crate::runtime::task::{self, JoinHandle};
+use crate::runtime::{Builder, Callback, Handle};
+
+use std::collections::{HashMap, VecDeque};
+use std::fmt;
+use std::io;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::time::Duration;
+
+pub(crate) struct BlockingPool {
+ spawner: Spawner,
+ shutdown_rx: shutdown::Receiver,
+}
+
+#[derive(Clone)]
+pub(crate) struct Spawner {
+ inner: Arc<Inner>,
+}
+
+#[derive(Default)]
+pub(crate) struct SpawnerMetrics {
+ num_threads: AtomicUsize,
+ num_idle_threads: AtomicUsize,
+ queue_depth: AtomicUsize,
+}
+
+impl SpawnerMetrics {
+ fn num_threads(&self) -> usize {
+ self.num_threads.load(Ordering::Relaxed)
+ }
+
+ fn num_idle_threads(&self) -> usize {
+ self.num_idle_threads.load(Ordering::Relaxed)
+ }
+
+ cfg_metrics! {
+ fn queue_depth(&self) -> usize {
+ self.queue_depth.load(Ordering::Relaxed)
+ }
+ }
+
+ fn inc_num_threads(&self) {
+ self.num_threads.fetch_add(1, Ordering::Relaxed);
+ }
+
+ fn dec_num_threads(&self) {
+ self.num_threads.fetch_sub(1, Ordering::Relaxed);
+ }
+
+ fn inc_num_idle_threads(&self) {
+ self.num_idle_threads.fetch_add(1, Ordering::Relaxed);
+ }
+
+ fn dec_num_idle_threads(&self) -> usize {
+ self.num_idle_threads.fetch_sub(1, Ordering::Relaxed)
+ }
+
+ fn inc_queue_depth(&self) {
+ self.queue_depth.fetch_add(1, Ordering::Relaxed);
+ }
+
+ fn dec_queue_depth(&self) {
+ self.queue_depth.fetch_sub(1, Ordering::Relaxed);
+ }
+}
+
+struct Inner {
+ /// State shared between worker threads.
+ shared: Mutex<Shared>,
+
+ /// Pool threads wait on this.
+ condvar: Condvar,
+
+ /// Spawned threads use this name.
+ thread_name: ThreadNameFn,
+
+ /// Spawned thread stack size.
+ stack_size: Option<usize>,
+
+ /// Call after a thread starts.
+ after_start: Option<Callback>,
+
+ /// Call before a thread stops.
+ before_stop: Option<Callback>,
+
+ // Maximum number of threads.
+ thread_cap: usize,
+
+ // Customizable wait timeout.
+ keep_alive: Duration,
+
+ // Metrics about the pool.
+ metrics: SpawnerMetrics,
+}
+
+struct Shared {
+ queue: VecDeque<Task>,
+ num_notify: u32,
+ shutdown: bool,
+ shutdown_tx: Option<shutdown::Sender>,
+ /// Prior to shutdown, we clean up JoinHandles by having each timed-out
+ /// thread join on the previous timed-out thread. This is not strictly
+ /// necessary but helps avoid Valgrind false positives, see
+ /// <https://github.com/tokio-rs/tokio/commit/646fbae76535e397ef79dbcaacb945d4c829f666>
+ /// for more information.
+ last_exiting_thread: Option<thread::JoinHandle<()>>,
+ /// This holds the JoinHandles for all running threads; on shutdown, the thread
+ /// calling shutdown handles joining on these.
+ worker_threads: HashMap<usize, thread::JoinHandle<()>>,
+ /// This is a counter used to iterate worker_threads in a consistent order (for loom's
+ /// benefit).
+ worker_thread_index: usize,
+}
+
+pub(crate) struct Task {
+ task: task::UnownedTask<BlockingSchedule>,
+ mandatory: Mandatory,
+}
+
+#[derive(PartialEq, Eq)]
+pub(crate) enum Mandatory {
+ #[cfg_attr(not(fs), allow(dead_code))]
+ Mandatory,
+ NonMandatory,
+}
+
+pub(crate) enum SpawnError {
+ /// Pool is shutting down and the task was not scheduled
+ ShuttingDown,
+ /// There are no worker threads available to take the task
+ /// and the OS failed to spawn a new one
+ NoThreads(io::Error),
+}
+
+impl From<SpawnError> for io::Error {
+ fn from(e: SpawnError) -> Self {
+ match e {
+ SpawnError::ShuttingDown => {
+ io::Error::new(io::ErrorKind::Other, "blocking pool shutting down")
+ }
+ SpawnError::NoThreads(e) => e,
+ }
+ }
+}
+
+impl Task {
+ pub(crate) fn new(task: task::UnownedTask<BlockingSchedule>, mandatory: Mandatory) -> Task {
+ Task { task, mandatory }
+ }
+
+ fn run(self) {
+ self.task.run();
+ }
+
+ fn shutdown_or_run_if_mandatory(self) {
+ match self.mandatory {
+ Mandatory::NonMandatory => self.task.shutdown(),
+ Mandatory::Mandatory => self.task.run(),
+ }
+ }
+}
+
+const KEEP_ALIVE: Duration = Duration::from_secs(10);
+
+/// Runs the provided function on an executor dedicated to blocking operations.
+/// Tasks will be scheduled as non-mandatory, meaning they may not get executed
+/// in case of runtime shutdown.
+#[track_caller]
+#[cfg_attr(tokio_wasi, allow(dead_code))]
+pub(crate) fn spawn_blocking<F, R>(func: F) -> JoinHandle<R>
+where
+ F: FnOnce() -> R + Send + 'static,
+ R: Send + 'static,
+{
+ let rt = Handle::current();
+ rt.spawn_blocking(func)
+}
+
+cfg_fs! {
+ #[cfg_attr(any(
+ all(loom, not(test)), // the function is covered by loom tests
+ test
+ ), allow(dead_code))]
+ /// Runs the provided function on an executor dedicated to blocking
+ /// operations. Tasks will be scheduled as mandatory, meaning they are
+ /// guaranteed to run unless a shutdown is already taking place. In case a
+ /// shutdown is already taking place, `None` will be returned.
+ pub(crate) fn spawn_mandatory_blocking<F, R>(func: F) -> Option<JoinHandle<R>>
+ where
+ F: FnOnce() -> R + Send + 'static,
+ R: Send + 'static,
+ {
+ let rt = Handle::current();
+ rt.inner.blocking_spawner().spawn_mandatory_blocking(&rt, func)
+ }
+}
+
+// ===== impl BlockingPool =====
+
+impl BlockingPool {
+ pub(crate) fn new(builder: &Builder, thread_cap: usize) -> BlockingPool {
+ let (shutdown_tx, shutdown_rx) = shutdown::channel();
+ let keep_alive = builder.keep_alive.unwrap_or(KEEP_ALIVE);
+
+ BlockingPool {
+ spawner: Spawner {
+ inner: Arc::new(Inner {
+ shared: Mutex::new(Shared {
+ queue: VecDeque::new(),
+ num_notify: 0,
+ shutdown: false,
+ shutdown_tx: Some(shutdown_tx),
+ last_exiting_thread: None,
+ worker_threads: HashMap::new(),
+ worker_thread_index: 0,
+ }),
+ condvar: Condvar::new(),
+ thread_name: builder.thread_name.clone(),
+ stack_size: builder.thread_stack_size,
+ after_start: builder.after_start.clone(),
+ before_stop: builder.before_stop.clone(),
+ thread_cap,
+ keep_alive,
+ metrics: Default::default(),
+ }),
+ },
+ shutdown_rx,
+ }
+ }
+
+ pub(crate) fn spawner(&self) -> &Spawner {
+ &self.spawner
+ }
+
+ pub(crate) fn shutdown(&mut self, timeout: Option<Duration>) {
+ let mut shared = self.spawner.inner.shared.lock();
+
+ // The function can be called multiple times. First, by explicitly
+ // calling `shutdown` then by the drop handler calling `shutdown`. This
+ // prevents shutting down twice.
+ if shared.shutdown {
+ return;
+ }
+
+ shared.shutdown = true;
+ shared.shutdown_tx = None;
+ self.spawner.inner.condvar.notify_all();
+
+ let last_exited_thread = std::mem::take(&mut shared.last_exiting_thread);
+ let workers = std::mem::take(&mut shared.worker_threads);
+
+ drop(shared);
+
+ if self.shutdown_rx.wait(timeout) {
+ let _ = last_exited_thread.map(|th| th.join());
+
+ // Loom requires that execution be deterministic, so sort by thread ID before joining.
+ // (HashMaps use a randomly-seeded hash function, so the order is nondeterministic)
+ let mut workers: Vec<(usize, thread::JoinHandle<()>)> = workers.into_iter().collect();
+ workers.sort_by_key(|(id, _)| *id);
+
+ for (_id, handle) in workers.into_iter() {
+ let _ = handle.join();
+ }
+ }
+ }
+}
+
+impl Drop for BlockingPool {
+ fn drop(&mut self) {
+ self.shutdown(None);
+ }
+}
+
+impl fmt::Debug for BlockingPool {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("BlockingPool").finish()
+ }
+}
+
+// ===== impl Spawner =====
+
+impl Spawner {
+ #[track_caller]
+ pub(crate) fn spawn_blocking<F, R>(&self, rt: &Handle, func: F) -> JoinHandle<R>
+ where
+ F: FnOnce() -> R + Send + 'static,
+ R: Send + 'static,
+ {
+ let (join_handle, spawn_result) =
+ if cfg!(debug_assertions) && std::mem::size_of::<F>() > 2048 {
+ self.spawn_blocking_inner(Box::new(func), Mandatory::NonMandatory, None, rt)
+ } else {
+ self.spawn_blocking_inner(func, Mandatory::NonMandatory, None, rt)
+ };
+
+ match spawn_result {
+ Ok(()) => join_handle,
+ // Compat: do not panic here, return the join_handle even though it will never resolve
+ Err(SpawnError::ShuttingDown) => join_handle,
+ Err(SpawnError::NoThreads(e)) => {
+ panic!("OS can't spawn worker thread: {}", e)
+ }
+ }
+ }
+
+ cfg_fs! {
+ #[track_caller]
+ #[cfg_attr(any(
+ all(loom, not(test)), // the function is covered by loom tests
+ test
+ ), allow(dead_code))]
+ pub(crate) fn spawn_mandatory_blocking<F, R>(&self, rt: &Handle, func: F) -> Option<JoinHandle<R>>
+ where
+ F: FnOnce() -> R + Send + 'static,
+ R: Send + 'static,
+ {
+ let (join_handle, spawn_result) = if cfg!(debug_assertions) && std::mem::size_of::<F>() > 2048 {
+ self.spawn_blocking_inner(
+ Box::new(func),
+ Mandatory::Mandatory,
+ None,
+ rt,
+ )
+ } else {
+ self.spawn_blocking_inner(
+ func,
+ Mandatory::Mandatory,
+ None,
+ rt,
+ )
+ };
+
+ if spawn_result.is_ok() {
+ Some(join_handle)
+ } else {
+ None
+ }
+ }
+ }
+
+ #[track_caller]
+ pub(crate) fn spawn_blocking_inner<F, R>(
+ &self,
+ func: F,
+ is_mandatory: Mandatory,
+ name: Option<&str>,
+ rt: &Handle,
+ ) -> (JoinHandle<R>, Result<(), SpawnError>)
+ where
+ F: FnOnce() -> R + Send + 'static,
+ R: Send + 'static,
+ {
+ let fut = BlockingTask::new(func);
+ let id = task::Id::next();
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ let fut = {
+ use tracing::Instrument;
+ let location = std::panic::Location::caller();
+ let span = tracing::trace_span!(
+ target: "tokio::task::blocking",
+ "runtime.spawn",
+ kind = %"blocking",
+ task.name = %name.unwrap_or_default(),
+ task.id = id.as_u64(),
+ "fn" = %std::any::type_name::<F>(),
+ loc.file = location.file(),
+ loc.line = location.line(),
+ loc.col = location.column(),
+ );
+ fut.instrument(span)
+ };
+
+ #[cfg(not(all(tokio_unstable, feature = "tracing")))]
+ let _ = name;
+
+ let (task, handle) = task::unowned(fut, BlockingSchedule::new(rt), id);
+
+ let spawned = self.spawn_task(Task::new(task, is_mandatory), rt);
+ (handle, spawned)
+ }
+
+ fn spawn_task(&self, task: Task, rt: &Handle) -> Result<(), SpawnError> {
+ let mut shared = self.inner.shared.lock();
+
+ if shared.shutdown {
+ // Shutdown the task: it's fine to shutdown this task (even if
+ // mandatory) because it was scheduled after the shutdown of the
+ // runtime began.
+ task.task.shutdown();
+
+ // no need to even push this task; it would never get picked up
+ return Err(SpawnError::ShuttingDown);
+ }
+
+ shared.queue.push_back(task);
+ self.inner.metrics.inc_queue_depth();
+
+ if self.inner.metrics.num_idle_threads() == 0 {
+ // No threads are able to process the task.
+
+ if self.inner.metrics.num_threads() == self.inner.thread_cap {
+ // At max number of threads
+ } else {
+ assert!(shared.shutdown_tx.is_some());
+ let shutdown_tx = shared.shutdown_tx.clone();
+
+ if let Some(shutdown_tx) = shutdown_tx {
+ let id = shared.worker_thread_index;
+
+ match self.spawn_thread(shutdown_tx, rt, id) {
+ Ok(handle) => {
+ self.inner.metrics.inc_num_threads();
+ shared.worker_thread_index += 1;
+ shared.worker_threads.insert(id, handle);
+ }
+ Err(ref e)
+ if is_temporary_os_thread_error(e)
+ && self.inner.metrics.num_threads() > 0 =>
+ {
+ // OS temporarily failed to spawn a new thread.
+ // The task will be picked up eventually by a currently
+ // busy thread.
+ }
+ Err(e) => {
+ // The OS refused to spawn the thread and there is no thread
+ // to pick up the task that has just been pushed to the queue.
+ return Err(SpawnError::NoThreads(e));
+ }
+ }
+ }
+ }
+ } else {
+ // Notify an idle worker thread. The notification counter
+ // is used to count the needed amount of notifications
+ // exactly. Thread libraries may generate spurious
+ // wakeups, this counter is used to keep us in a
+ // consistent state.
+ self.inner.metrics.dec_num_idle_threads();
+ shared.num_notify += 1;
+ self.inner.condvar.notify_one();
+ }
+
+ Ok(())
+ }
+
+ fn spawn_thread(
+ &self,
+ shutdown_tx: shutdown::Sender,
+ rt: &Handle,
+ id: usize,
+ ) -> std::io::Result<thread::JoinHandle<()>> {
+ let mut builder = thread::Builder::new().name((self.inner.thread_name)());
+
+ if let Some(stack_size) = self.inner.stack_size {
+ builder = builder.stack_size(stack_size);
+ }
+
+ let rt = rt.clone();
+
+ builder.spawn(move || {
+ // Only the reference should be moved into the closure
+ let _enter = rt.enter();
+ rt.inner.blocking_spawner().inner.run(id);
+ drop(shutdown_tx);
+ })
+ }
+}
+
+cfg_metrics! {
+ impl Spawner {
+ pub(crate) fn num_threads(&self) -> usize {
+ self.inner.metrics.num_threads()
+ }
+
+ pub(crate) fn num_idle_threads(&self) -> usize {
+ self.inner.metrics.num_idle_threads()
+ }
+
+ pub(crate) fn queue_depth(&self) -> usize {
+ self.inner.metrics.queue_depth()
+ }
+ }
+}
+
+// Tells whether the error when spawning a thread is temporary.
+#[inline]
+fn is_temporary_os_thread_error(error: &std::io::Error) -> bool {
+ matches!(error.kind(), std::io::ErrorKind::WouldBlock)
+}
+
+impl Inner {
+ fn run(&self, worker_thread_id: usize) {
+ if let Some(f) = &self.after_start {
+ f()
+ }
+
+ let mut shared = self.shared.lock();
+ let mut join_on_thread = None;
+
+ 'main: loop {
+ // BUSY
+ while let Some(task) = shared.queue.pop_front() {
+ self.metrics.dec_queue_depth();
+ drop(shared);
+ task.run();
+
+ shared = self.shared.lock();
+ }
+
+ // IDLE
+ self.metrics.inc_num_idle_threads();
+
+ while !shared.shutdown {
+ let lock_result = self.condvar.wait_timeout(shared, self.keep_alive).unwrap();
+
+ shared = lock_result.0;
+ let timeout_result = lock_result.1;
+
+ if shared.num_notify != 0 {
+ // We have received a legitimate wakeup,
+ // acknowledge it by decrementing the counter
+ // and transition to the BUSY state.
+ shared.num_notify -= 1;
+ break;
+ }
+
+ // Even if the condvar "timed out", if the pool is entering the
+ // shutdown phase, we want to perform the cleanup logic.
+ if !shared.shutdown && timeout_result.timed_out() {
+ // We'll join the prior timed-out thread's JoinHandle after dropping the lock.
+ // This isn't done when shutting down, because the thread calling shutdown will
+ // handle joining everything.
+ let my_handle = shared.worker_threads.remove(&worker_thread_id);
+ join_on_thread = std::mem::replace(&mut shared.last_exiting_thread, my_handle);
+
+ break 'main;
+ }
+
+ // Spurious wakeup detected, go back to sleep.
+ }
+
+ if shared.shutdown {
+ // Drain the queue
+ while let Some(task) = shared.queue.pop_front() {
+ self.metrics.dec_queue_depth();
+ drop(shared);
+
+ task.shutdown_or_run_if_mandatory();
+
+ shared = self.shared.lock();
+ }
+
+ // Work was produced, and we "took" it (by decrementing num_notify).
+ // This means that num_idle was decremented once for our wakeup.
+ // But, since we are exiting, we need to "undo" that, as we'll stay idle.
+ self.metrics.inc_num_idle_threads();
+ // NOTE: Technically we should also do num_notify++ and notify again,
+ // but since we're shutting down anyway, that won't be necessary.
+ break;
+ }
+ }
+
+ // Thread exit
+ self.metrics.dec_num_threads();
+
+ // num_idle should now be tracked exactly, panic
+ // with a descriptive message if it is not the
+ // case.
+ let prev_idle = self.metrics.dec_num_idle_threads();
+ if prev_idle < self.metrics.num_idle_threads() {
+ panic!("num_idle_threads underflowed on thread exit")
+ }
+
+ if shared.shutdown && self.metrics.num_threads() == 0 {
+ self.condvar.notify_one();
+ }
+
+ drop(shared);
+
+ if let Some(f) = &self.before_stop {
+ f()
+ }
+
+ if let Some(handle) = join_on_thread {
+ let _ = handle.join();
+ }
+ }
+}
+
+impl fmt::Debug for Spawner {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("blocking::Spawner").finish()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/blocking/schedule.rs b/third_party/rust/tokio/src/runtime/blocking/schedule.rs
new file mode 100644
index 0000000000..edf775be8b
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/blocking/schedule.rs
@@ -0,0 +1,56 @@
+#[cfg(feature = "test-util")]
+use crate::runtime::scheduler;
+use crate::runtime::task::{self, Task};
+use crate::runtime::Handle;
+
+/// `task::Schedule` implementation that does nothing (except some bookkeeping
+/// in test-util builds). This is unique to the blocking scheduler as tasks
+/// scheduled are not really futures but blocking operations.
+///
+/// We avoid storing the task by forgetting it in `bind` and re-materializing it
+/// in `release`.
+pub(crate) struct BlockingSchedule {
+ #[cfg(feature = "test-util")]
+ handle: Handle,
+}
+
+impl BlockingSchedule {
+ #[cfg_attr(not(feature = "test-util"), allow(unused_variables))]
+ pub(crate) fn new(handle: &Handle) -> Self {
+ #[cfg(feature = "test-util")]
+ {
+ match &handle.inner {
+ scheduler::Handle::CurrentThread(handle) => {
+ handle.driver.clock.inhibit_auto_advance();
+ }
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ scheduler::Handle::MultiThread(_) => {}
+ }
+ }
+ BlockingSchedule {
+ #[cfg(feature = "test-util")]
+ handle: handle.clone(),
+ }
+ }
+}
+
+impl task::Schedule for BlockingSchedule {
+ fn release(&self, _task: &Task<Self>) -> Option<Task<Self>> {
+ #[cfg(feature = "test-util")]
+ {
+ match &self.handle.inner {
+ scheduler::Handle::CurrentThread(handle) => {
+ handle.driver.clock.allow_auto_advance();
+ handle.driver.unpark();
+ }
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ scheduler::Handle::MultiThread(_) => {}
+ }
+ }
+ None
+ }
+
+ fn schedule(&self, _task: task::Notified<Self>) {
+ unreachable!();
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/blocking/shutdown.rs b/third_party/rust/tokio/src/runtime/blocking/shutdown.rs
new file mode 100644
index 0000000000..fe5abae076
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/blocking/shutdown.rs
@@ -0,0 +1,71 @@
+//! A shutdown channel.
+//!
+//! Each worker holds the `Sender` half. When all the `Sender` halves are
+//! dropped, the `Receiver` receives a notification.
+
+use crate::loom::sync::Arc;
+use crate::sync::oneshot;
+
+use std::time::Duration;
+
+#[derive(Debug, Clone)]
+pub(super) struct Sender {
+ _tx: Arc<oneshot::Sender<()>>,
+}
+
+#[derive(Debug)]
+pub(super) struct Receiver {
+ rx: oneshot::Receiver<()>,
+}
+
+pub(super) fn channel() -> (Sender, Receiver) {
+ let (tx, rx) = oneshot::channel();
+ let tx = Sender { _tx: Arc::new(tx) };
+ let rx = Receiver { rx };
+
+ (tx, rx)
+}
+
+impl Receiver {
+ /// Blocks the current thread until all `Sender` handles drop.
+ ///
+ /// If `timeout` is `Some`, the thread is blocked for **at most** `timeout`
+ /// duration. If `timeout` is `None`, then the thread is blocked until the
+ /// shutdown signal is received.
+ ///
+ /// If the timeout has elapsed, it returns `false`, otherwise it returns `true`.
+ pub(crate) fn wait(&mut self, timeout: Option<Duration>) -> bool {
+ use crate::runtime::context::try_enter_blocking_region;
+
+ if timeout == Some(Duration::from_nanos(0)) {
+ return false;
+ }
+
+ let mut e = match try_enter_blocking_region() {
+ Some(enter) => enter,
+ _ => {
+ if std::thread::panicking() {
+ // Don't panic in a panic
+ return false;
+ } else {
+ panic!(
+ "Cannot drop a runtime in a context where blocking is not allowed. \
+ This happens when a runtime is dropped from within an asynchronous context."
+ );
+ }
+ }
+ };
+
+ // The oneshot completes with an Err
+ //
+ // If blocking fails to wait, this indicates a problem parking the
+ // current thread (usually, shutting down a runtime stored in a
+ // thread-local).
+ if let Some(timeout) = timeout {
+ e.block_on_timeout(&mut self.rx, timeout).is_ok()
+ } else {
+ let _ = e.block_on(&mut self.rx);
+ true
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/blocking/task.rs b/third_party/rust/tokio/src/runtime/blocking/task.rs
new file mode 100644
index 0000000000..c446175400
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/blocking/task.rs
@@ -0,0 +1,44 @@
+use std::future::Future;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+/// Converts a function to a future that completes on poll.
+pub(crate) struct BlockingTask<T> {
+ func: Option<T>,
+}
+
+impl<T> BlockingTask<T> {
+ /// Initializes a new blocking task from the given function.
+ pub(crate) fn new(func: T) -> BlockingTask<T> {
+ BlockingTask { func: Some(func) }
+ }
+}
+
+// The closure `F` is never pinned
+impl<T> Unpin for BlockingTask<T> {}
+
+impl<T, R> Future for BlockingTask<T>
+where
+ T: FnOnce() -> R + Send + 'static,
+ R: Send + 'static,
+{
+ type Output = R;
+
+ fn poll(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<R> {
+ let me = &mut *self;
+ let func = me
+ .func
+ .take()
+ .expect("[internal exception] blocking task ran twice.");
+
+ // This is a little subtle:
+ // For convenience, we'd like _every_ call tokio ever makes to Task::poll() to be budgeted
+ // using coop. However, the way things are currently modeled, even running a blocking task
+ // currently goes through Task::poll(), and so is subject to budgeting. That isn't really
+ // what we want; a blocking task may itself want to run tasks (it might be a Worker!), so
+ // we want it to start without any budgeting.
+ crate::runtime::coop::stop();
+
+ Poll::Ready(func())
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/builder.rs b/third_party/rust/tokio/src/runtime/builder.rs
new file mode 100644
index 0000000000..af9e0e172f
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/builder.rs
@@ -0,0 +1,1236 @@
+use crate::runtime::handle::Handle;
+use crate::runtime::{blocking, driver, Callback, HistogramBuilder, Runtime};
+use crate::util::rand::{RngSeed, RngSeedGenerator};
+
+use std::fmt;
+use std::io;
+use std::time::Duration;
+
+/// Builds Tokio Runtime with custom configuration values.
+///
+/// Methods can be chained in order to set the configuration values. The
+/// Runtime is constructed by calling [`build`].
+///
+/// New instances of `Builder` are obtained via [`Builder::new_multi_thread`]
+/// or [`Builder::new_current_thread`].
+///
+/// See function level documentation for details on the various configuration
+/// settings.
+///
+/// [`build`]: method@Self::build
+/// [`Builder::new_multi_thread`]: method@Self::new_multi_thread
+/// [`Builder::new_current_thread`]: method@Self::new_current_thread
+///
+/// # Examples
+///
+/// ```
+/// use tokio::runtime::Builder;
+///
+/// fn main() {
+/// // build runtime
+/// let runtime = Builder::new_multi_thread()
+/// .worker_threads(4)
+/// .thread_name("my-custom-name")
+/// .thread_stack_size(3 * 1024 * 1024)
+/// .build()
+/// .unwrap();
+///
+/// // use runtime ...
+/// }
+/// ```
+pub struct Builder {
+ /// Runtime type
+ kind: Kind,
+
+ /// Whether or not to enable the I/O driver
+ enable_io: bool,
+ nevents: usize,
+
+ /// Whether or not to enable the time driver
+ enable_time: bool,
+
+ /// Whether or not the clock should start paused.
+ start_paused: bool,
+
+ /// The number of worker threads, used by Runtime.
+ ///
+ /// Only used when not using the current-thread executor.
+ worker_threads: Option<usize>,
+
+ /// Cap on thread usage.
+ max_blocking_threads: usize,
+
+ /// Name fn used for threads spawned by the runtime.
+ pub(super) thread_name: ThreadNameFn,
+
+ /// Stack size used for threads spawned by the runtime.
+ pub(super) thread_stack_size: Option<usize>,
+
+ /// Callback to run after each thread starts.
+ pub(super) after_start: Option<Callback>,
+
+ /// To run before each worker thread stops
+ pub(super) before_stop: Option<Callback>,
+
+ /// To run before each worker thread is parked.
+ pub(super) before_park: Option<Callback>,
+
+ /// To run after each thread is unparked.
+ pub(super) after_unpark: Option<Callback>,
+
+ /// Customizable keep alive timeout for BlockingPool
+ pub(super) keep_alive: Option<Duration>,
+
+ /// How many ticks before pulling a task from the global/remote queue?
+ ///
+ /// When `None`, the value is unspecified and behavior details are left to
+ /// the scheduler. Each scheduler flavor could choose to either pick its own
+ /// default value or use some other strategy to decide when to poll from the
+ /// global queue. For example, the multi-threaded scheduler uses a
+ /// self-tuning strategy based on mean task poll times.
+ pub(super) global_queue_interval: Option<u32>,
+
+ /// How many ticks before yielding to the driver for timer and I/O events?
+ pub(super) event_interval: u32,
+
+ /// When true, the multi-threade scheduler LIFO slot should not be used.
+ ///
+ /// This option should only be exposed as unstable.
+ pub(super) disable_lifo_slot: bool,
+
+ /// Specify a random number generator seed to provide deterministic results
+ pub(super) seed_generator: RngSeedGenerator,
+
+ /// When true, enables task poll count histogram instrumentation.
+ pub(super) metrics_poll_count_histogram_enable: bool,
+
+ /// Configures the task poll count histogram
+ pub(super) metrics_poll_count_histogram: HistogramBuilder,
+
+ #[cfg(tokio_unstable)]
+ pub(super) unhandled_panic: UnhandledPanic,
+}
+
+cfg_unstable! {
+ /// How the runtime should respond to unhandled panics.
+ ///
+ /// Instances of `UnhandledPanic` are passed to `Builder::unhandled_panic`
+ /// to configure the runtime behavior when a spawned task panics.
+ ///
+ /// See [`Builder::unhandled_panic`] for more details.
+ #[derive(Debug, Clone)]
+ #[non_exhaustive]
+ pub enum UnhandledPanic {
+ /// The runtime should ignore panics on spawned tasks.
+ ///
+ /// The panic is forwarded to the task's [`JoinHandle`] and all spawned
+ /// tasks continue running normally.
+ ///
+ /// This is the default behavior.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::{self, UnhandledPanic};
+ ///
+ /// # pub fn main() {
+ /// let rt = runtime::Builder::new_current_thread()
+ /// .unhandled_panic(UnhandledPanic::Ignore)
+ /// .build()
+ /// .unwrap();
+ ///
+ /// let task1 = rt.spawn(async { panic!("boom"); });
+ /// let task2 = rt.spawn(async {
+ /// // This task completes normally
+ /// "done"
+ /// });
+ ///
+ /// rt.block_on(async {
+ /// // The panic on the first task is forwarded to the `JoinHandle`
+ /// assert!(task1.await.is_err());
+ ///
+ /// // The second task completes normally
+ /// assert!(task2.await.is_ok());
+ /// })
+ /// # }
+ /// ```
+ ///
+ /// [`JoinHandle`]: struct@crate::task::JoinHandle
+ Ignore,
+
+ /// The runtime should immediately shutdown if a spawned task panics.
+ ///
+ /// The runtime will immediately shutdown even if the panicked task's
+ /// [`JoinHandle`] is still available. All further spawned tasks will be
+ /// immediately dropped and call to [`Runtime::block_on`] will panic.
+ ///
+ /// # Examples
+ ///
+ /// ```should_panic
+ /// use tokio::runtime::{self, UnhandledPanic};
+ ///
+ /// # pub fn main() {
+ /// let rt = runtime::Builder::new_current_thread()
+ /// .unhandled_panic(UnhandledPanic::ShutdownRuntime)
+ /// .build()
+ /// .unwrap();
+ ///
+ /// rt.spawn(async { panic!("boom"); });
+ /// rt.spawn(async {
+ /// // This task never completes.
+ /// });
+ ///
+ /// rt.block_on(async {
+ /// // Do some work
+ /// # loop { tokio::task::yield_now().await; }
+ /// })
+ /// # }
+ /// ```
+ ///
+ /// [`JoinHandle`]: struct@crate::task::JoinHandle
+ ShutdownRuntime,
+ }
+}
+
+pub(crate) type ThreadNameFn = std::sync::Arc<dyn Fn() -> String + Send + Sync + 'static>;
+
+#[derive(Clone, Copy)]
+pub(crate) enum Kind {
+ CurrentThread,
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ MultiThread,
+}
+
+impl Builder {
+ /// Returns a new builder with the current thread scheduler selected.
+ ///
+ /// Configuration methods can be chained on the return value.
+ ///
+ /// To spawn non-`Send` tasks on the resulting runtime, combine it with a
+ /// [`LocalSet`].
+ ///
+ /// [`LocalSet`]: crate::task::LocalSet
+ pub fn new_current_thread() -> Builder {
+ #[cfg(loom)]
+ const EVENT_INTERVAL: u32 = 4;
+ // The number `61` is fairly arbitrary. I believe this value was copied from golang.
+ #[cfg(not(loom))]
+ const EVENT_INTERVAL: u32 = 61;
+
+ Builder::new(Kind::CurrentThread, EVENT_INTERVAL)
+ }
+
+ cfg_not_wasi! {
+ /// Returns a new builder with the multi thread scheduler selected.
+ ///
+ /// Configuration methods can be chained on the return value.
+ #[cfg(feature = "rt-multi-thread")]
+ #[cfg_attr(docsrs, doc(cfg(feature = "rt-multi-thread")))]
+ pub fn new_multi_thread() -> Builder {
+ // The number `61` is fairly arbitrary. I believe this value was copied from golang.
+ Builder::new(Kind::MultiThread, 61)
+ }
+ }
+
+ /// Returns a new runtime builder initialized with default configuration
+ /// values.
+ ///
+ /// Configuration methods can be chained on the return value.
+ pub(crate) fn new(kind: Kind, event_interval: u32) -> Builder {
+ Builder {
+ kind,
+
+ // I/O defaults to "off"
+ enable_io: false,
+ nevents: 1024,
+
+ // Time defaults to "off"
+ enable_time: false,
+
+ // The clock starts not-paused
+ start_paused: false,
+
+ // Read from environment variable first in multi-threaded mode.
+ // Default to lazy auto-detection (one thread per CPU core)
+ worker_threads: None,
+
+ max_blocking_threads: 512,
+
+ // Default thread name
+ thread_name: std::sync::Arc::new(|| "tokio-runtime-worker".into()),
+
+ // Do not set a stack size by default
+ thread_stack_size: None,
+
+ // No worker thread callbacks
+ after_start: None,
+ before_stop: None,
+ before_park: None,
+ after_unpark: None,
+
+ keep_alive: None,
+
+ // Defaults for these values depend on the scheduler kind, so we get them
+ // as parameters.
+ global_queue_interval: None,
+ event_interval,
+
+ seed_generator: RngSeedGenerator::new(RngSeed::new()),
+
+ #[cfg(tokio_unstable)]
+ unhandled_panic: UnhandledPanic::Ignore,
+
+ metrics_poll_count_histogram_enable: false,
+
+ metrics_poll_count_histogram: Default::default(),
+
+ disable_lifo_slot: false,
+ }
+ }
+
+ /// Enables both I/O and time drivers.
+ ///
+ /// Doing this is a shorthand for calling `enable_io` and `enable_time`
+ /// individually. If additional components are added to Tokio in the future,
+ /// `enable_all` will include these future components.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .enable_all()
+ /// .build()
+ /// .unwrap();
+ /// ```
+ pub fn enable_all(&mut self) -> &mut Self {
+ #[cfg(any(
+ feature = "net",
+ all(unix, feature = "process"),
+ all(unix, feature = "signal")
+ ))]
+ self.enable_io();
+ #[cfg(feature = "time")]
+ self.enable_time();
+
+ self
+ }
+
+ /// Sets the number of worker threads the `Runtime` will use.
+ ///
+ /// This can be any number above 0 though it is advised to keep this value
+ /// on the smaller side.
+ ///
+ /// This will override the value read from environment variable `TOKIO_WORKER_THREADS`.
+ ///
+ /// # Default
+ ///
+ /// The default value is the number of cores available to the system.
+ ///
+ /// When using the `current_thread` runtime this method has no effect.
+ ///
+ /// # Examples
+ ///
+ /// ## Multi threaded runtime with 4 threads
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// // This will spawn a work-stealing runtime with 4 worker threads.
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .worker_threads(4)
+ /// .build()
+ /// .unwrap();
+ ///
+ /// rt.spawn(async move {});
+ /// ```
+ ///
+ /// ## Current thread runtime (will only run on the current thread via `Runtime::block_on`)
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// // Create a runtime that _must_ be driven from a call
+ /// // to `Runtime::block_on`.
+ /// let rt = runtime::Builder::new_current_thread()
+ /// .build()
+ /// .unwrap();
+ ///
+ /// // This will run the runtime and future on the current thread
+ /// rt.block_on(async move {});
+ /// ```
+ ///
+ /// # Panics
+ ///
+ /// This will panic if `val` is not larger than `0`.
+ #[track_caller]
+ pub fn worker_threads(&mut self, val: usize) -> &mut Self {
+ assert!(val > 0, "Worker threads cannot be set to 0");
+ self.worker_threads = Some(val);
+ self
+ }
+
+ /// Specifies the limit for additional threads spawned by the Runtime.
+ ///
+ /// These threads are used for blocking operations like tasks spawned
+ /// through [`spawn_blocking`]. Unlike the [`worker_threads`], they are not
+ /// always active and will exit if left idle for too long. You can change
+ /// this timeout duration with [`thread_keep_alive`].
+ ///
+ /// The default value is 512.
+ ///
+ /// # Panics
+ ///
+ /// This will panic if `val` is not larger than `0`.
+ ///
+ /// # Upgrading from 0.x
+ ///
+ /// In old versions `max_threads` limited both blocking and worker threads, but the
+ /// current `max_blocking_threads` does not include async worker threads in the count.
+ ///
+ /// [`spawn_blocking`]: fn@crate::task::spawn_blocking
+ /// [`worker_threads`]: Self::worker_threads
+ /// [`thread_keep_alive`]: Self::thread_keep_alive
+ #[track_caller]
+ #[cfg_attr(docsrs, doc(alias = "max_threads"))]
+ pub fn max_blocking_threads(&mut self, val: usize) -> &mut Self {
+ assert!(val > 0, "Max blocking threads cannot be set to 0");
+ self.max_blocking_threads = val;
+ self
+ }
+
+ /// Sets name of threads spawned by the `Runtime`'s thread pool.
+ ///
+ /// The default name is "tokio-runtime-worker".
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use tokio::runtime;
+ ///
+ /// # pub fn main() {
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .thread_name("my-pool")
+ /// .build();
+ /// # }
+ /// ```
+ pub fn thread_name(&mut self, val: impl Into<String>) -> &mut Self {
+ let val = val.into();
+ self.thread_name = std::sync::Arc::new(move || val.clone());
+ self
+ }
+
+ /// Sets a function used to generate the name of threads spawned by the `Runtime`'s thread pool.
+ ///
+ /// The default name fn is `|| "tokio-runtime-worker".into()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use tokio::runtime;
+ /// # use std::sync::atomic::{AtomicUsize, Ordering};
+ /// # pub fn main() {
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .thread_name_fn(|| {
+ /// static ATOMIC_ID: AtomicUsize = AtomicUsize::new(0);
+ /// let id = ATOMIC_ID.fetch_add(1, Ordering::SeqCst);
+ /// format!("my-pool-{}", id)
+ /// })
+ /// .build();
+ /// # }
+ /// ```
+ pub fn thread_name_fn<F>(&mut self, f: F) -> &mut Self
+ where
+ F: Fn() -> String + Send + Sync + 'static,
+ {
+ self.thread_name = std::sync::Arc::new(f);
+ self
+ }
+
+ /// Sets the stack size (in bytes) for worker threads.
+ ///
+ /// The actual stack size may be greater than this value if the platform
+ /// specifies minimal stack size.
+ ///
+ /// The default stack size for spawned threads is 2 MiB, though this
+ /// particular stack size is subject to change in the future.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use tokio::runtime;
+ ///
+ /// # pub fn main() {
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .thread_stack_size(32 * 1024)
+ /// .build();
+ /// # }
+ /// ```
+ pub fn thread_stack_size(&mut self, val: usize) -> &mut Self {
+ self.thread_stack_size = Some(val);
+ self
+ }
+
+ /// Executes function `f` after each thread is started but before it starts
+ /// doing work.
+ ///
+ /// This is intended for bookkeeping and monitoring use cases.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use tokio::runtime;
+ /// # pub fn main() {
+ /// let runtime = runtime::Builder::new_multi_thread()
+ /// .on_thread_start(|| {
+ /// println!("thread started");
+ /// })
+ /// .build();
+ /// # }
+ /// ```
+ #[cfg(not(loom))]
+ pub fn on_thread_start<F>(&mut self, f: F) -> &mut Self
+ where
+ F: Fn() + Send + Sync + 'static,
+ {
+ self.after_start = Some(std::sync::Arc::new(f));
+ self
+ }
+
+ /// Executes function `f` before each thread stops.
+ ///
+ /// This is intended for bookkeeping and monitoring use cases.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use tokio::runtime;
+ /// # pub fn main() {
+ /// let runtime = runtime::Builder::new_multi_thread()
+ /// .on_thread_stop(|| {
+ /// println!("thread stopping");
+ /// })
+ /// .build();
+ /// # }
+ /// ```
+ #[cfg(not(loom))]
+ pub fn on_thread_stop<F>(&mut self, f: F) -> &mut Self
+ where
+ F: Fn() + Send + Sync + 'static,
+ {
+ self.before_stop = Some(std::sync::Arc::new(f));
+ self
+ }
+
+ /// Executes function `f` just before a thread is parked (goes idle).
+ /// `f` is called within the Tokio context, so functions like [`tokio::spawn`](crate::spawn)
+ /// can be called, and may result in this thread being unparked immediately.
+ ///
+ /// This can be used to start work only when the executor is idle, or for bookkeeping
+ /// and monitoring purposes.
+ ///
+ /// Note: There can only be one park callback for a runtime; calling this function
+ /// more than once replaces the last callback defined, rather than adding to it.
+ ///
+ /// # Examples
+ ///
+ /// ## Multithreaded executor
+ /// ```
+ /// # use std::sync::Arc;
+ /// # use std::sync::atomic::{AtomicBool, Ordering};
+ /// # use tokio::runtime;
+ /// # use tokio::sync::Barrier;
+ /// # pub fn main() {
+ /// let once = AtomicBool::new(true);
+ /// let barrier = Arc::new(Barrier::new(2));
+ ///
+ /// let runtime = runtime::Builder::new_multi_thread()
+ /// .worker_threads(1)
+ /// .on_thread_park({
+ /// let barrier = barrier.clone();
+ /// move || {
+ /// let barrier = barrier.clone();
+ /// if once.swap(false, Ordering::Relaxed) {
+ /// tokio::spawn(async move { barrier.wait().await; });
+ /// }
+ /// }
+ /// })
+ /// .build()
+ /// .unwrap();
+ ///
+ /// runtime.block_on(async {
+ /// barrier.wait().await;
+ /// })
+ /// # }
+ /// ```
+ /// ## Current thread executor
+ /// ```
+ /// # use std::sync::Arc;
+ /// # use std::sync::atomic::{AtomicBool, Ordering};
+ /// # use tokio::runtime;
+ /// # use tokio::sync::Barrier;
+ /// # pub fn main() {
+ /// let once = AtomicBool::new(true);
+ /// let barrier = Arc::new(Barrier::new(2));
+ ///
+ /// let runtime = runtime::Builder::new_current_thread()
+ /// .on_thread_park({
+ /// let barrier = barrier.clone();
+ /// move || {
+ /// let barrier = barrier.clone();
+ /// if once.swap(false, Ordering::Relaxed) {
+ /// tokio::spawn(async move { barrier.wait().await; });
+ /// }
+ /// }
+ /// })
+ /// .build()
+ /// .unwrap();
+ ///
+ /// runtime.block_on(async {
+ /// barrier.wait().await;
+ /// })
+ /// # }
+ /// ```
+ #[cfg(not(loom))]
+ pub fn on_thread_park<F>(&mut self, f: F) -> &mut Self
+ where
+ F: Fn() + Send + Sync + 'static,
+ {
+ self.before_park = Some(std::sync::Arc::new(f));
+ self
+ }
+
+ /// Executes function `f` just after a thread unparks (starts executing tasks).
+ ///
+ /// This is intended for bookkeeping and monitoring use cases; note that work
+ /// in this callback will increase latencies when the application has allowed one or
+ /// more runtime threads to go idle.
+ ///
+ /// Note: There can only be one unpark callback for a runtime; calling this function
+ /// more than once replaces the last callback defined, rather than adding to it.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use tokio::runtime;
+ /// # pub fn main() {
+ /// let runtime = runtime::Builder::new_multi_thread()
+ /// .on_thread_unpark(|| {
+ /// println!("thread unparking");
+ /// })
+ /// .build();
+ ///
+ /// runtime.unwrap().block_on(async {
+ /// tokio::task::yield_now().await;
+ /// println!("Hello from Tokio!");
+ /// })
+ /// # }
+ /// ```
+ #[cfg(not(loom))]
+ pub fn on_thread_unpark<F>(&mut self, f: F) -> &mut Self
+ where
+ F: Fn() + Send + Sync + 'static,
+ {
+ self.after_unpark = Some(std::sync::Arc::new(f));
+ self
+ }
+
+ /// Creates the configured `Runtime`.
+ ///
+ /// The returned `Runtime` instance is ready to spawn tasks.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Builder;
+ ///
+ /// let rt = Builder::new_multi_thread().build().unwrap();
+ ///
+ /// rt.block_on(async {
+ /// println!("Hello from the Tokio runtime");
+ /// });
+ /// ```
+ pub fn build(&mut self) -> io::Result<Runtime> {
+ match &self.kind {
+ Kind::CurrentThread => self.build_current_thread_runtime(),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Kind::MultiThread => self.build_threaded_runtime(),
+ }
+ }
+
+ fn get_cfg(&self) -> driver::Cfg {
+ driver::Cfg {
+ enable_pause_time: match self.kind {
+ Kind::CurrentThread => true,
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Kind::MultiThread => false,
+ },
+ enable_io: self.enable_io,
+ enable_time: self.enable_time,
+ start_paused: self.start_paused,
+ nevents: self.nevents,
+ }
+ }
+
+ /// Sets a custom timeout for a thread in the blocking pool.
+ ///
+ /// By default, the timeout for a thread is set to 10 seconds. This can
+ /// be overridden using .thread_keep_alive().
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use tokio::runtime;
+ /// # use std::time::Duration;
+ /// # pub fn main() {
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .thread_keep_alive(Duration::from_millis(100))
+ /// .build();
+ /// # }
+ /// ```
+ pub fn thread_keep_alive(&mut self, duration: Duration) -> &mut Self {
+ self.keep_alive = Some(duration);
+ self
+ }
+
+ /// Sets the number of scheduler ticks after which the scheduler will poll the global
+ /// task queue.
+ ///
+ /// A scheduler "tick" roughly corresponds to one `poll` invocation on a task.
+ ///
+ /// By default the global queue interval is:
+ ///
+ /// * `31` for the current-thread scheduler.
+ /// * `61` for the multithreaded scheduler.
+ ///
+ /// Schedulers have a local queue of already-claimed tasks, and a global queue of incoming
+ /// tasks. Setting the interval to a smaller value increases the fairness of the scheduler,
+ /// at the cost of more synchronization overhead. That can be beneficial for prioritizing
+ /// getting started on new work, especially if tasks frequently yield rather than complete
+ /// or await on further I/O. Conversely, a higher value prioritizes existing work, and
+ /// is a good choice when most tasks quickly complete polling.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use tokio::runtime;
+ /// # pub fn main() {
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .global_queue_interval(31)
+ /// .build();
+ /// # }
+ /// ```
+ pub fn global_queue_interval(&mut self, val: u32) -> &mut Self {
+ self.global_queue_interval = Some(val);
+ self
+ }
+
+ /// Sets the number of scheduler ticks after which the scheduler will poll for
+ /// external events (timers, I/O, and so on).
+ ///
+ /// A scheduler "tick" roughly corresponds to one `poll` invocation on a task.
+ ///
+ /// By default, the event interval is `61` for all scheduler types.
+ ///
+ /// Setting the event interval determines the effective "priority" of delivering
+ /// these external events (which may wake up additional tasks), compared to
+ /// executing tasks that are currently ready to run. A smaller value is useful
+ /// when tasks frequently spend a long time in polling, or frequently yield,
+ /// which can result in overly long delays picking up I/O events. Conversely,
+ /// picking up new events requires extra synchronization and syscall overhead,
+ /// so if tasks generally complete their polling quickly, a higher event interval
+ /// will minimize that overhead while still keeping the scheduler responsive to
+ /// events.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use tokio::runtime;
+ /// # pub fn main() {
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .event_interval(31)
+ /// .build();
+ /// # }
+ /// ```
+ pub fn event_interval(&mut self, val: u32) -> &mut Self {
+ self.event_interval = val;
+ self
+ }
+
+ cfg_unstable! {
+ /// Configure how the runtime responds to an unhandled panic on a
+ /// spawned task.
+ ///
+ /// By default, an unhandled panic (i.e. a panic not caught by
+ /// [`std::panic::catch_unwind`]) has no impact on the runtime's
+ /// execution. The panic is error value is forwarded to the task's
+ /// [`JoinHandle`] and all other spawned tasks continue running.
+ ///
+ /// The `unhandled_panic` option enables configuring this behavior.
+ ///
+ /// * `UnhandledPanic::Ignore` is the default behavior. Panics on
+ /// spawned tasks have no impact on the runtime's execution.
+ /// * `UnhandledPanic::ShutdownRuntime` will force the runtime to
+ /// shutdown immediately when a spawned task panics even if that
+ /// task's `JoinHandle` has not been dropped. All other spawned tasks
+ /// will immediately terminate and further calls to
+ /// [`Runtime::block_on`] will panic.
+ ///
+ /// # Unstable
+ ///
+ /// This option is currently unstable and its implementation is
+ /// incomplete. The API may change or be removed in the future. See
+ /// tokio-rs/tokio#4516 for more details.
+ ///
+ /// # Examples
+ ///
+ /// The following demonstrates a runtime configured to shutdown on
+ /// panic. The first spawned task panics and results in the runtime
+ /// shutting down. The second spawned task never has a chance to
+ /// execute. The call to `block_on` will panic due to the runtime being
+ /// forcibly shutdown.
+ ///
+ /// ```should_panic
+ /// use tokio::runtime::{self, UnhandledPanic};
+ ///
+ /// # pub fn main() {
+ /// let rt = runtime::Builder::new_current_thread()
+ /// .unhandled_panic(UnhandledPanic::ShutdownRuntime)
+ /// .build()
+ /// .unwrap();
+ ///
+ /// rt.spawn(async { panic!("boom"); });
+ /// rt.spawn(async {
+ /// // This task never completes.
+ /// });
+ ///
+ /// rt.block_on(async {
+ /// // Do some work
+ /// # loop { tokio::task::yield_now().await; }
+ /// })
+ /// # }
+ /// ```
+ ///
+ /// [`JoinHandle`]: struct@crate::task::JoinHandle
+ pub fn unhandled_panic(&mut self, behavior: UnhandledPanic) -> &mut Self {
+ self.unhandled_panic = behavior;
+ self
+ }
+
+ /// Disables the LIFO task scheduler heuristic.
+ ///
+ /// The multi-threaded scheduler includes a heuristic for optimizing
+ /// message-passing patterns. This heuristic results in the **last**
+ /// scheduled task being polled first.
+ ///
+ /// To implement this heuristic, each worker thread has a slot which
+ /// holds the task that should be polled next. However, this slot cannot
+ /// be stolen by other worker threads, which can result in lower total
+ /// throughput when tasks tend to have longer poll times.
+ ///
+ /// This configuration option will disable this heuristic resulting in
+ /// all scheduled tasks being pushed into the worker-local queue, which
+ /// is stealable.
+ ///
+ /// Consider trying this option when the task "scheduled" time is high
+ /// but the runtime is underutilized. Use tokio-rs/tokio-metrics to
+ /// collect this data.
+ ///
+ /// # Unstable
+ ///
+ /// This configuration option is considered a workaround for the LIFO
+ /// slot not being stealable. When the slot becomes stealable, we will
+ /// revisit whether or not this option is necessary. See
+ /// tokio-rs/tokio#4941.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .disable_lifo_slot()
+ /// .build()
+ /// .unwrap();
+ /// ```
+ pub fn disable_lifo_slot(&mut self) -> &mut Self {
+ self.disable_lifo_slot = true;
+ self
+ }
+
+ /// Specifies the random number generation seed to use within all
+ /// threads associated with the runtime being built.
+ ///
+ /// This option is intended to make certain parts of the runtime
+ /// deterministic (e.g. the [`tokio::select!`] macro). In the case of
+ /// [`tokio::select!`] it will ensure that the order that branches are
+ /// polled is deterministic.
+ ///
+ /// In addition to the code specifying `rng_seed` and interacting with
+ /// the runtime, the internals of Tokio and the Rust compiler may affect
+ /// the sequences of random numbers. In order to ensure repeatable
+ /// results, the version of Tokio, the versions of all other
+ /// dependencies that interact with Tokio, and the Rust compiler version
+ /// should also all remain constant.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use tokio::runtime::{self, RngSeed};
+ /// # pub fn main() {
+ /// let seed = RngSeed::from_bytes(b"place your seed here");
+ /// let rt = runtime::Builder::new_current_thread()
+ /// .rng_seed(seed)
+ /// .build();
+ /// # }
+ /// ```
+ ///
+ /// [`tokio::select!`]: crate::select
+ pub fn rng_seed(&mut self, seed: RngSeed) -> &mut Self {
+ self.seed_generator = RngSeedGenerator::new(seed);
+ self
+ }
+ }
+
+ cfg_metrics! {
+ /// Enables tracking the distribution of task poll times.
+ ///
+ /// Task poll times are not instrumented by default as doing so requires
+ /// calling [`Instant::now()`] twice per task poll, which could add
+ /// measurable overhead. Use the [`Handle::metrics()`] to access the
+ /// metrics data.
+ ///
+ /// The histogram uses fixed bucket sizes. In other words, the histogram
+ /// buckets are not dynamic based on input values. Use the
+ /// `metrics_poll_count_histogram_` builder methods to configure the
+ /// histogram details.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .enable_metrics_poll_count_histogram()
+ /// .build()
+ /// .unwrap();
+ /// # // Test default values here
+ /// # fn us(n: u64) -> std::time::Duration { std::time::Duration::from_micros(n) }
+ /// # let m = rt.handle().metrics();
+ /// # assert_eq!(m.poll_count_histogram_num_buckets(), 10);
+ /// # assert_eq!(m.poll_count_histogram_bucket_range(0), us(0)..us(100));
+ /// # assert_eq!(m.poll_count_histogram_bucket_range(1), us(100)..us(200));
+ /// ```
+ ///
+ /// [`Handle::metrics()`]: crate::runtime::Handle::metrics
+ /// [`Instant::now()`]: std::time::Instant::now
+ pub fn enable_metrics_poll_count_histogram(&mut self) -> &mut Self {
+ self.metrics_poll_count_histogram_enable = true;
+ self
+ }
+
+ /// Sets the histogram scale for tracking the distribution of task poll
+ /// times.
+ ///
+ /// Tracking the distribution of task poll times can be done using a
+ /// linear or log scale. When using linear scale, each histogram bucket
+ /// will represent the same range of poll times. When using log scale,
+ /// each histogram bucket will cover a range twice as big as the
+ /// previous bucket.
+ ///
+ /// **Default:** linear scale.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::{self, HistogramScale};
+ ///
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .enable_metrics_poll_count_histogram()
+ /// .metrics_poll_count_histogram_scale(HistogramScale::Log)
+ /// .build()
+ /// .unwrap();
+ /// ```
+ pub fn metrics_poll_count_histogram_scale(&mut self, histogram_scale: crate::runtime::HistogramScale) -> &mut Self {
+ self.metrics_poll_count_histogram.scale = histogram_scale;
+ self
+ }
+
+ /// Sets the histogram resolution for tracking the distribution of task
+ /// poll times.
+ ///
+ /// The resolution is the histogram's first bucket's range. When using a
+ /// linear histogram scale, each bucket will cover the same range. When
+ /// using a log scale, each bucket will cover a range twice as big as
+ /// the previous bucket. In the log case, the resolution represents the
+ /// smallest bucket range.
+ ///
+ /// Note that, when using log scale, the resolution is rounded up to the
+ /// nearest power of 2 in nanoseconds.
+ ///
+ /// **Default:** 100 microseconds.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime;
+ /// use std::time::Duration;
+ ///
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .enable_metrics_poll_count_histogram()
+ /// .metrics_poll_count_histogram_resolution(Duration::from_micros(100))
+ /// .build()
+ /// .unwrap();
+ /// ```
+ pub fn metrics_poll_count_histogram_resolution(&mut self, resolution: Duration) -> &mut Self {
+ assert!(resolution > Duration::from_secs(0));
+ // Sanity check the argument and also make the cast below safe.
+ assert!(resolution <= Duration::from_secs(1));
+
+ let resolution = resolution.as_nanos() as u64;
+ self.metrics_poll_count_histogram.resolution = resolution;
+ self
+ }
+
+ /// Sets the number of buckets for the histogram tracking the
+ /// distribution of task poll times.
+ ///
+ /// The last bucket tracks all greater values that fall out of other
+ /// ranges. So, configuring the histogram using a linear scale,
+ /// resolution of 50ms, and 10 buckets, the 10th bucket will track task
+ /// polls that take more than 450ms to complete.
+ ///
+ /// **Default:** 10
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .enable_metrics_poll_count_histogram()
+ /// .metrics_poll_count_histogram_buckets(15)
+ /// .build()
+ /// .unwrap();
+ /// ```
+ pub fn metrics_poll_count_histogram_buckets(&mut self, buckets: usize) -> &mut Self {
+ self.metrics_poll_count_histogram.num_buckets = buckets;
+ self
+ }
+ }
+
+ fn build_current_thread_runtime(&mut self) -> io::Result<Runtime> {
+ use crate::runtime::scheduler::{self, CurrentThread};
+ use crate::runtime::{runtime::Scheduler, Config};
+
+ let (driver, driver_handle) = driver::Driver::new(self.get_cfg())?;
+
+ // Blocking pool
+ let blocking_pool = blocking::create_blocking_pool(self, self.max_blocking_threads);
+ let blocking_spawner = blocking_pool.spawner().clone();
+
+ // Generate a rng seed for this runtime.
+ let seed_generator_1 = self.seed_generator.next_generator();
+ let seed_generator_2 = self.seed_generator.next_generator();
+
+ // And now put a single-threaded scheduler on top of the timer. When
+ // there are no futures ready to do something, it'll let the timer or
+ // the reactor to generate some new stimuli for the futures to continue
+ // in their life.
+ let (scheduler, handle) = CurrentThread::new(
+ driver,
+ driver_handle,
+ blocking_spawner,
+ seed_generator_2,
+ Config {
+ before_park: self.before_park.clone(),
+ after_unpark: self.after_unpark.clone(),
+ global_queue_interval: self.global_queue_interval,
+ event_interval: self.event_interval,
+ #[cfg(tokio_unstable)]
+ unhandled_panic: self.unhandled_panic.clone(),
+ disable_lifo_slot: self.disable_lifo_slot,
+ seed_generator: seed_generator_1,
+ metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(),
+ },
+ );
+
+ let handle = Handle {
+ inner: scheduler::Handle::CurrentThread(handle),
+ };
+
+ Ok(Runtime::from_parts(
+ Scheduler::CurrentThread(scheduler),
+ handle,
+ blocking_pool,
+ ))
+ }
+
+ fn metrics_poll_count_histogram_builder(&self) -> Option<HistogramBuilder> {
+ if self.metrics_poll_count_histogram_enable {
+ Some(self.metrics_poll_count_histogram.clone())
+ } else {
+ None
+ }
+ }
+}
+
+cfg_io_driver! {
+ impl Builder {
+ /// Enables the I/O driver.
+ ///
+ /// Doing this enables using net, process, signal, and some I/O types on
+ /// the runtime.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .enable_io()
+ /// .build()
+ /// .unwrap();
+ /// ```
+ pub fn enable_io(&mut self) -> &mut Self {
+ self.enable_io = true;
+ self
+ }
+
+ /// Enables the I/O driver and configures the max number of events to be
+ /// processed per tick.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// let rt = runtime::Builder::new_current_thread()
+ /// .enable_io()
+ /// .max_io_events_per_tick(1024)
+ /// .build()
+ /// .unwrap();
+ /// ```
+ pub fn max_io_events_per_tick(&mut self, capacity: usize) -> &mut Self {
+ self.nevents = capacity;
+ self
+ }
+ }
+}
+
+cfg_time! {
+ impl Builder {
+ /// Enables the time driver.
+ ///
+ /// Doing this enables using `tokio::time` on the runtime.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// let rt = runtime::Builder::new_multi_thread()
+ /// .enable_time()
+ /// .build()
+ /// .unwrap();
+ /// ```
+ pub fn enable_time(&mut self) -> &mut Self {
+ self.enable_time = true;
+ self
+ }
+ }
+}
+
+cfg_test_util! {
+ impl Builder {
+ /// Controls if the runtime's clock starts paused or advancing.
+ ///
+ /// Pausing time requires the current-thread runtime; construction of
+ /// the runtime will panic otherwise.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime;
+ ///
+ /// let rt = runtime::Builder::new_current_thread()
+ /// .enable_time()
+ /// .start_paused(true)
+ /// .build()
+ /// .unwrap();
+ /// ```
+ pub fn start_paused(&mut self, start_paused: bool) -> &mut Self {
+ self.start_paused = start_paused;
+ self
+ }
+ }
+}
+
+cfg_rt_multi_thread! {
+ impl Builder {
+ fn build_threaded_runtime(&mut self) -> io::Result<Runtime> {
+ use crate::loom::sys::num_cpus;
+ use crate::runtime::{Config, runtime::Scheduler};
+ use crate::runtime::scheduler::{self, MultiThread};
+
+ let core_threads = self.worker_threads.unwrap_or_else(num_cpus);
+
+ let (driver, driver_handle) = driver::Driver::new(self.get_cfg())?;
+
+ // Create the blocking pool
+ let blocking_pool =
+ blocking::create_blocking_pool(self, self.max_blocking_threads + core_threads);
+ let blocking_spawner = blocking_pool.spawner().clone();
+
+ // Generate a rng seed for this runtime.
+ let seed_generator_1 = self.seed_generator.next_generator();
+ let seed_generator_2 = self.seed_generator.next_generator();
+
+ let (scheduler, handle, launch) = MultiThread::new(
+ core_threads,
+ driver,
+ driver_handle,
+ blocking_spawner,
+ seed_generator_2,
+ Config {
+ before_park: self.before_park.clone(),
+ after_unpark: self.after_unpark.clone(),
+ global_queue_interval: self.global_queue_interval,
+ event_interval: self.event_interval,
+ #[cfg(tokio_unstable)]
+ unhandled_panic: self.unhandled_panic.clone(),
+ disable_lifo_slot: self.disable_lifo_slot,
+ seed_generator: seed_generator_1,
+ metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(),
+ },
+ );
+
+ let handle = Handle { inner: scheduler::Handle::MultiThread(handle) };
+
+ // Spawn the thread pool workers
+ let _enter = handle.enter();
+ launch.launch();
+
+ Ok(Runtime::from_parts(Scheduler::MultiThread(scheduler), handle, blocking_pool))
+ }
+ }
+}
+
+impl fmt::Debug for Builder {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("Builder")
+ .field("worker_threads", &self.worker_threads)
+ .field("max_blocking_threads", &self.max_blocking_threads)
+ .field(
+ "thread_name",
+ &"<dyn Fn() -> String + Send + Sync + 'static>",
+ )
+ .field("thread_stack_size", &self.thread_stack_size)
+ .field("after_start", &self.after_start.as_ref().map(|_| "..."))
+ .field("before_stop", &self.before_stop.as_ref().map(|_| "..."))
+ .field("before_park", &self.before_park.as_ref().map(|_| "..."))
+ .field("after_unpark", &self.after_unpark.as_ref().map(|_| "..."))
+ .finish()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/config.rs b/third_party/rust/tokio/src/runtime/config.rs
new file mode 100644
index 0000000000..c42e4fe5a8
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/config.rs
@@ -0,0 +1,37 @@
+#![cfg_attr(any(not(feature = "full"), tokio_wasm), allow(dead_code))]
+use crate::runtime::Callback;
+use crate::util::RngSeedGenerator;
+
+pub(crate) struct Config {
+ /// How many ticks before pulling a task from the global/remote queue?
+ pub(crate) global_queue_interval: Option<u32>,
+
+ /// How many ticks before yielding to the driver for timer and I/O events?
+ pub(crate) event_interval: u32,
+
+ /// Callback for a worker parking itself
+ pub(crate) before_park: Option<Callback>,
+
+ /// Callback for a worker unparking itself
+ pub(crate) after_unpark: Option<Callback>,
+
+ /// The multi-threaded scheduler includes a per-worker LIFO slot used to
+ /// store the last scheduled task. This can improve certain usage patterns,
+ /// especially message passing between tasks. However, this LIFO slot is not
+ /// currently stealable.
+ ///
+ /// Eventually, the LIFO slot **will** become stealable, however as a
+ /// stop-gap, this unstable option lets users disable the LIFO task.
+ pub(crate) disable_lifo_slot: bool,
+
+ /// Random number generator seed to configure runtimes to act in a
+ /// deterministic way.
+ pub(crate) seed_generator: RngSeedGenerator,
+
+ /// How to build poll time histograms
+ pub(crate) metrics_poll_count_histogram: Option<crate::runtime::HistogramBuilder>,
+
+ #[cfg(tokio_unstable)]
+ /// How to respond to unhandled task panics.
+ pub(crate) unhandled_panic: crate::runtime::UnhandledPanic,
+}
diff --git a/third_party/rust/tokio/src/runtime/context.rs b/third_party/rust/tokio/src/runtime/context.rs
new file mode 100644
index 0000000000..5943e9aa97
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/context.rs
@@ -0,0 +1,191 @@
+use crate::loom::thread::AccessError;
+use crate::runtime::coop;
+
+use std::cell::Cell;
+
+#[cfg(any(feature = "rt", feature = "macros"))]
+use crate::util::rand::FastRand;
+
+cfg_rt! {
+ mod blocking;
+ pub(crate) use blocking::{disallow_block_in_place, try_enter_blocking_region, BlockingRegionGuard};
+
+ mod current;
+ pub(crate) use current::{with_current, try_set_current, SetCurrentGuard};
+
+ mod runtime;
+ pub(crate) use runtime::{EnterRuntime, enter_runtime};
+
+ mod scoped;
+ use scoped::Scoped;
+
+ use crate::runtime::{scheduler, task::Id};
+
+ use std::task::Waker;
+
+ cfg_taskdump! {
+ use crate::runtime::task::trace;
+ }
+}
+
+cfg_rt_multi_thread! {
+ mod runtime_mt;
+ pub(crate) use runtime_mt::{current_enter_context, exit_runtime};
+}
+
+struct Context {
+ /// Uniquely identifies the current thread
+ #[cfg(feature = "rt")]
+ thread_id: Cell<Option<ThreadId>>,
+
+ /// Handle to the runtime scheduler running on the current thread.
+ #[cfg(feature = "rt")]
+ current: current::HandleCell,
+
+ /// Handle to the scheduler's internal "context"
+ #[cfg(feature = "rt")]
+ scheduler: Scoped<scheduler::Context>,
+
+ #[cfg(feature = "rt")]
+ current_task_id: Cell<Option<Id>>,
+
+ /// Tracks if the current thread is currently driving a runtime.
+ /// Note, that if this is set to "entered", the current scheduler
+ /// handle may not reference the runtime currently executing. This
+ /// is because other runtime handles may be set to current from
+ /// within a runtime.
+ #[cfg(feature = "rt")]
+ runtime: Cell<EnterRuntime>,
+
+ #[cfg(any(feature = "rt", feature = "macros"))]
+ rng: Cell<Option<FastRand>>,
+
+ /// Tracks the amount of "work" a task may still do before yielding back to
+ /// the sheduler
+ budget: Cell<coop::Budget>,
+
+ #[cfg(all(
+ tokio_unstable,
+ tokio_taskdump,
+ feature = "rt",
+ target_os = "linux",
+ any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")
+ ))]
+ trace: trace::Context,
+}
+
+tokio_thread_local! {
+ static CONTEXT: Context = const {
+ Context {
+ #[cfg(feature = "rt")]
+ thread_id: Cell::new(None),
+
+ /// Tracks the current runtime handle to use when spawning,
+ /// accessing drivers, etc...
+ #[cfg(feature = "rt")]
+ current: current::HandleCell::new(),
+
+ /// Tracks the current scheduler internal context
+ #[cfg(feature = "rt")]
+ scheduler: Scoped::new(),
+
+ #[cfg(feature = "rt")]
+ current_task_id: Cell::new(None),
+
+ /// Tracks if the current thread is currently driving a runtime.
+ /// Note, that if this is set to "entered", the current scheduler
+ /// handle may not reference the runtime currently executing. This
+ /// is because other runtime handles may be set to current from
+ /// within a runtime.
+ #[cfg(feature = "rt")]
+ runtime: Cell::new(EnterRuntime::NotEntered),
+
+ #[cfg(any(feature = "rt", feature = "macros"))]
+ rng: Cell::new(None),
+
+ budget: Cell::new(coop::Budget::unconstrained()),
+
+ #[cfg(all(
+ tokio_unstable,
+ tokio_taskdump,
+ feature = "rt",
+ target_os = "linux",
+ any(
+ target_arch = "aarch64",
+ target_arch = "x86",
+ target_arch = "x86_64"
+ )
+ ))]
+ trace: trace::Context::new(),
+ }
+ }
+}
+
+#[cfg(any(feature = "macros", all(feature = "sync", feature = "rt")))]
+pub(crate) fn thread_rng_n(n: u32) -> u32 {
+ CONTEXT.with(|ctx| {
+ let mut rng = ctx.rng.get().unwrap_or_else(FastRand::new);
+ let ret = rng.fastrand_n(n);
+ ctx.rng.set(Some(rng));
+ ret
+ })
+}
+
+pub(super) fn budget<R>(f: impl FnOnce(&Cell<coop::Budget>) -> R) -> Result<R, AccessError> {
+ CONTEXT.try_with(|ctx| f(&ctx.budget))
+}
+
+cfg_rt! {
+ use crate::runtime::ThreadId;
+
+ pub(crate) fn thread_id() -> Result<ThreadId, AccessError> {
+ CONTEXT.try_with(|ctx| {
+ match ctx.thread_id.get() {
+ Some(id) => id,
+ None => {
+ let id = ThreadId::next();
+ ctx.thread_id.set(Some(id));
+ id
+ }
+ }
+ })
+ }
+
+ pub(crate) fn set_current_task_id(id: Option<Id>) -> Option<Id> {
+ CONTEXT.try_with(|ctx| ctx.current_task_id.replace(id)).unwrap_or(None)
+ }
+
+ pub(crate) fn current_task_id() -> Option<Id> {
+ CONTEXT.try_with(|ctx| ctx.current_task_id.get()).unwrap_or(None)
+ }
+
+ #[track_caller]
+ pub(crate) fn defer(waker: &Waker) {
+ with_scheduler(|maybe_scheduler| {
+ if let Some(scheduler) = maybe_scheduler {
+ scheduler.defer(waker);
+ } else {
+ // Called from outside of the runtime, immediately wake the
+ // task.
+ waker.wake_by_ref();
+ }
+ });
+ }
+
+ pub(super) fn set_scheduler<R>(v: &scheduler::Context, f: impl FnOnce() -> R) -> R {
+ CONTEXT.with(|c| c.scheduler.set(v, f))
+ }
+
+ #[track_caller]
+ pub(super) fn with_scheduler<R>(f: impl FnOnce(Option<&scheduler::Context>) -> R) -> R {
+ CONTEXT.with(|c| c.scheduler.with(f))
+ }
+
+ cfg_taskdump! {
+ /// SAFETY: Callers of this function must ensure that trace frames always
+ /// form a valid linked list.
+ pub(crate) unsafe fn with_trace<R>(f: impl FnOnce(&trace::Context) -> R) -> Option<R> {
+ CONTEXT.try_with(|c| f(&c.trace)).ok()
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/context/blocking.rs b/third_party/rust/tokio/src/runtime/context/blocking.rs
new file mode 100644
index 0000000000..8ae4f570e8
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/context/blocking.rs
@@ -0,0 +1,121 @@
+use super::{EnterRuntime, CONTEXT};
+
+use crate::loom::thread::AccessError;
+use crate::util::markers::NotSendOrSync;
+
+use std::marker::PhantomData;
+use std::time::Duration;
+
+/// Guard tracking that a caller has entered a blocking region.
+#[must_use]
+pub(crate) struct BlockingRegionGuard {
+ _p: PhantomData<NotSendOrSync>,
+}
+
+pub(crate) struct DisallowBlockInPlaceGuard(bool);
+
+pub(crate) fn try_enter_blocking_region() -> Option<BlockingRegionGuard> {
+ CONTEXT
+ .try_with(|c| {
+ if c.runtime.get().is_entered() {
+ None
+ } else {
+ Some(BlockingRegionGuard::new())
+ }
+ // If accessing the thread-local fails, the thread is terminating
+ // and thread-locals are being destroyed. Because we don't know if
+ // we are currently in a runtime or not, we default to being
+ // permissive.
+ })
+ .unwrap_or_else(|_| Some(BlockingRegionGuard::new()))
+}
+
+/// Disallows blocking in the current runtime context until the guard is dropped.
+pub(crate) fn disallow_block_in_place() -> DisallowBlockInPlaceGuard {
+ let reset = CONTEXT.with(|c| {
+ if let EnterRuntime::Entered {
+ allow_block_in_place: true,
+ } = c.runtime.get()
+ {
+ c.runtime.set(EnterRuntime::Entered {
+ allow_block_in_place: false,
+ });
+ true
+ } else {
+ false
+ }
+ });
+
+ DisallowBlockInPlaceGuard(reset)
+}
+
+impl BlockingRegionGuard {
+ pub(super) fn new() -> BlockingRegionGuard {
+ BlockingRegionGuard { _p: PhantomData }
+ }
+
+ /// Blocks the thread on the specified future, returning the value with
+ /// which that future completes.
+ pub(crate) fn block_on<F>(&mut self, f: F) -> Result<F::Output, AccessError>
+ where
+ F: std::future::Future,
+ {
+ use crate::runtime::park::CachedParkThread;
+
+ let mut park = CachedParkThread::new();
+ park.block_on(f)
+ }
+
+ /// Blocks the thread on the specified future for **at most** `timeout`
+ ///
+ /// If the future completes before `timeout`, the result is returned. If
+ /// `timeout` elapses, then `Err` is returned.
+ pub(crate) fn block_on_timeout<F>(&mut self, f: F, timeout: Duration) -> Result<F::Output, ()>
+ where
+ F: std::future::Future,
+ {
+ use crate::runtime::park::CachedParkThread;
+ use std::task::Context;
+ use std::task::Poll::Ready;
+ use std::time::Instant;
+
+ let mut park = CachedParkThread::new();
+ let waker = park.waker().map_err(|_| ())?;
+ let mut cx = Context::from_waker(&waker);
+
+ pin!(f);
+ let when = Instant::now() + timeout;
+
+ loop {
+ if let Ready(v) = crate::runtime::coop::budget(|| f.as_mut().poll(&mut cx)) {
+ return Ok(v);
+ }
+
+ let now = Instant::now();
+
+ if now >= when {
+ return Err(());
+ }
+
+ park.park_timeout(when - now);
+ }
+ }
+}
+
+impl Drop for DisallowBlockInPlaceGuard {
+ fn drop(&mut self) {
+ if self.0 {
+ // XXX: Do we want some kind of assertion here, or is "best effort" okay?
+ CONTEXT.with(|c| {
+ if let EnterRuntime::Entered {
+ allow_block_in_place: false,
+ } = c.runtime.get()
+ {
+ c.runtime.set(EnterRuntime::Entered {
+ allow_block_in_place: true,
+ });
+ }
+ })
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/context/current.rs b/third_party/rust/tokio/src/runtime/context/current.rs
new file mode 100644
index 0000000000..c3dc5c8994
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/context/current.rs
@@ -0,0 +1,99 @@
+use super::{Context, CONTEXT};
+
+use crate::runtime::{scheduler, TryCurrentError};
+use crate::util::markers::SyncNotSend;
+
+use std::cell::{Cell, RefCell};
+use std::marker::PhantomData;
+
+#[derive(Debug)]
+#[must_use]
+pub(crate) struct SetCurrentGuard {
+ // The previous handle
+ prev: Option<scheduler::Handle>,
+
+ // The depth for this guard
+ depth: usize,
+
+ // Don't let the type move across threads.
+ _p: PhantomData<SyncNotSend>,
+}
+
+pub(super) struct HandleCell {
+ /// Current handle
+ handle: RefCell<Option<scheduler::Handle>>,
+
+ /// Tracks the number of nested calls to `try_set_current`.
+ depth: Cell<usize>,
+}
+
+/// Sets this [`Handle`] as the current active [`Handle`].
+///
+/// [`Handle`]: crate::runtime::scheduler::Handle
+pub(crate) fn try_set_current(handle: &scheduler::Handle) -> Option<SetCurrentGuard> {
+ CONTEXT.try_with(|ctx| ctx.set_current(handle)).ok()
+}
+
+pub(crate) fn with_current<F, R>(f: F) -> Result<R, TryCurrentError>
+where
+ F: FnOnce(&scheduler::Handle) -> R,
+{
+ match CONTEXT.try_with(|ctx| ctx.current.handle.borrow().as_ref().map(f)) {
+ Ok(Some(ret)) => Ok(ret),
+ Ok(None) => Err(TryCurrentError::new_no_context()),
+ Err(_access_error) => Err(TryCurrentError::new_thread_local_destroyed()),
+ }
+}
+
+impl Context {
+ pub(super) fn set_current(&self, handle: &scheduler::Handle) -> SetCurrentGuard {
+ let old_handle = self.current.handle.borrow_mut().replace(handle.clone());
+ let depth = self.current.depth.get();
+
+ if depth == usize::MAX {
+ panic!("reached max `enter` depth");
+ }
+
+ let depth = depth + 1;
+ self.current.depth.set(depth);
+
+ SetCurrentGuard {
+ prev: old_handle,
+ depth,
+ _p: PhantomData,
+ }
+ }
+}
+
+impl HandleCell {
+ pub(super) const fn new() -> HandleCell {
+ HandleCell {
+ handle: RefCell::new(None),
+ depth: Cell::new(0),
+ }
+ }
+}
+
+impl Drop for SetCurrentGuard {
+ fn drop(&mut self) {
+ CONTEXT.with(|ctx| {
+ let depth = ctx.current.depth.get();
+
+ if depth != self.depth {
+ if !std::thread::panicking() {
+ panic!(
+ "`EnterGuard` values dropped out of order. Guards returned by \
+ `tokio::runtime::Handle::enter()` must be dropped in the reverse \
+ order as they were acquired."
+ );
+ } else {
+ // Just return... this will leave handles in a wonky state though...
+ return;
+ }
+ }
+
+ *ctx.current.handle.borrow_mut() = self.prev.take();
+ ctx.current.depth.set(depth - 1);
+ });
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/context/runtime.rs b/third_party/rust/tokio/src/runtime/context/runtime.rs
new file mode 100644
index 0000000000..f2e29899a4
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/context/runtime.rs
@@ -0,0 +1,99 @@
+use super::{BlockingRegionGuard, SetCurrentGuard, CONTEXT};
+
+use crate::runtime::scheduler;
+use crate::util::rand::{FastRand, RngSeed};
+
+use std::fmt;
+
+#[derive(Debug, Clone, Copy)]
+#[must_use]
+pub(crate) enum EnterRuntime {
+ /// Currently in a runtime context.
+ #[cfg_attr(not(feature = "rt"), allow(dead_code))]
+ Entered { allow_block_in_place: bool },
+
+ /// Not in a runtime context **or** a blocking region.
+ NotEntered,
+}
+
+/// Guard tracking that a caller has entered a runtime context.
+#[must_use]
+pub(crate) struct EnterRuntimeGuard {
+ /// Tracks that the current thread has entered a blocking function call.
+ pub(crate) blocking: BlockingRegionGuard,
+
+ #[allow(dead_code)] // Only tracking the guard.
+ pub(crate) handle: SetCurrentGuard,
+
+ // Tracks the previous random number generator seed
+ old_seed: RngSeed,
+}
+
+/// Marks the current thread as being within the dynamic extent of an
+/// executor.
+#[track_caller]
+pub(crate) fn enter_runtime<F, R>(handle: &scheduler::Handle, allow_block_in_place: bool, f: F) -> R
+where
+ F: FnOnce(&mut BlockingRegionGuard) -> R,
+{
+ let maybe_guard = CONTEXT.with(|c| {
+ if c.runtime.get().is_entered() {
+ None
+ } else {
+ // Set the entered flag
+ c.runtime.set(EnterRuntime::Entered {
+ allow_block_in_place,
+ });
+
+ // Generate a new seed
+ let rng_seed = handle.seed_generator().next_seed();
+
+ // Swap the RNG seed
+ let mut rng = c.rng.get().unwrap_or_else(FastRand::new);
+ let old_seed = rng.replace_seed(rng_seed);
+ c.rng.set(Some(rng));
+
+ Some(EnterRuntimeGuard {
+ blocking: BlockingRegionGuard::new(),
+ handle: c.set_current(handle),
+ old_seed,
+ })
+ }
+ });
+
+ if let Some(mut guard) = maybe_guard {
+ return f(&mut guard.blocking);
+ }
+
+ panic!(
+ "Cannot start a runtime from within a runtime. This happens \
+ because a function (like `block_on`) attempted to block the \
+ current thread while the thread is being used to drive \
+ asynchronous tasks."
+ );
+}
+
+impl fmt::Debug for EnterRuntimeGuard {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("Enter").finish()
+ }
+}
+
+impl Drop for EnterRuntimeGuard {
+ fn drop(&mut self) {
+ CONTEXT.with(|c| {
+ assert!(c.runtime.get().is_entered());
+ c.runtime.set(EnterRuntime::NotEntered);
+ // Replace the previous RNG seed
+ let mut rng = c.rng.get().unwrap_or_else(FastRand::new);
+ rng.replace_seed(self.old_seed.clone());
+ c.rng.set(Some(rng));
+ });
+ }
+}
+
+impl EnterRuntime {
+ pub(crate) fn is_entered(self) -> bool {
+ matches!(self, EnterRuntime::Entered { .. })
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/context/runtime_mt.rs b/third_party/rust/tokio/src/runtime/context/runtime_mt.rs
new file mode 100644
index 0000000000..728caeae99
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/context/runtime_mt.rs
@@ -0,0 +1,36 @@
+use super::{EnterRuntime, CONTEXT};
+
+/// Returns true if in a runtime context.
+pub(crate) fn current_enter_context() -> EnterRuntime {
+ CONTEXT.with(|c| c.runtime.get())
+}
+
+/// Forces the current "entered" state to be cleared while the closure
+/// is executed.
+pub(crate) fn exit_runtime<F: FnOnce() -> R, R>(f: F) -> R {
+ // Reset in case the closure panics
+ struct Reset(EnterRuntime);
+
+ impl Drop for Reset {
+ fn drop(&mut self) {
+ CONTEXT.with(|c| {
+ assert!(
+ !c.runtime.get().is_entered(),
+ "closure claimed permanent executor"
+ );
+ c.runtime.set(self.0);
+ });
+ }
+ }
+
+ let was = CONTEXT.with(|c| {
+ let e = c.runtime.get();
+ assert!(e.is_entered(), "asked to exit when not entered");
+ c.runtime.set(EnterRuntime::NotEntered);
+ e
+ });
+
+ let _reset = Reset(was);
+ // dropping _reset after f() will reset ENTERED
+ f()
+}
diff --git a/third_party/rust/tokio/src/runtime/context/scoped.rs b/third_party/rust/tokio/src/runtime/context/scoped.rs
new file mode 100644
index 0000000000..7b202a16c0
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/context/scoped.rs
@@ -0,0 +1,56 @@
+use std::cell::Cell;
+use std::ptr;
+
+/// Scoped thread-local storage
+pub(super) struct Scoped<T> {
+ pub(super) inner: Cell<*const T>,
+}
+
+impl<T> Scoped<T> {
+ pub(super) const fn new() -> Scoped<T> {
+ Scoped {
+ inner: Cell::new(ptr::null()),
+ }
+ }
+
+ /// Inserts a value into the scoped cell for the duration of the closure
+ pub(super) fn set<F, R>(&self, t: &T, f: F) -> R
+ where
+ F: FnOnce() -> R,
+ {
+ struct Reset<'a, T> {
+ cell: &'a Cell<*const T>,
+ prev: *const T,
+ }
+
+ impl<T> Drop for Reset<'_, T> {
+ fn drop(&mut self) {
+ self.cell.set(self.prev);
+ }
+ }
+
+ let prev = self.inner.get();
+ self.inner.set(t as *const _);
+
+ let _reset = Reset {
+ cell: &self.inner,
+ prev,
+ };
+
+ f()
+ }
+
+ /// Gets the value out of the scoped cell;
+ pub(super) fn with<F, R>(&self, f: F) -> R
+ where
+ F: FnOnce(Option<&T>) -> R,
+ {
+ let val = self.inner.get();
+
+ if val.is_null() {
+ f(None)
+ } else {
+ unsafe { f(Some(&*val)) }
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/coop.rs b/third_party/rust/tokio/src/runtime/coop.rs
new file mode 100644
index 0000000000..2dba246159
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/coop.rs
@@ -0,0 +1,323 @@
+#![cfg_attr(not(feature = "full"), allow(dead_code))]
+
+//! Yield points for improved cooperative scheduling.
+//!
+//! Documentation for this can be found in the [`tokio::task`] module.
+//!
+//! [`tokio::task`]: crate::task.
+
+// ```ignore
+// # use tokio_stream::{Stream, StreamExt};
+// async fn drop_all<I: Stream + Unpin>(mut input: I) {
+// while let Some(_) = input.next().await {
+// tokio::coop::proceed().await;
+// }
+// }
+// ```
+//
+// The `proceed` future will coordinate with the executor to make sure that
+// every so often control is yielded back to the executor so it can run other
+// tasks.
+//
+// # Placing yield points
+//
+// Voluntary yield points should be placed _after_ at least some work has been
+// done. If they are not, a future sufficiently deep in the task hierarchy may
+// end up _never_ getting to run because of the number of yield points that
+// inevitably appear before it is reached. In general, you will want yield
+// points to only appear in "leaf" futures -- those that do not themselves poll
+// other futures. By doing this, you avoid double-counting each iteration of
+// the outer future against the cooperating budget.
+
+use crate::runtime::context;
+
+/// Opaque type tracking the amount of "work" a task may still do before
+/// yielding back to the scheduler.
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct Budget(Option<u8>);
+
+pub(crate) struct BudgetDecrement {
+ success: bool,
+ hit_zero: bool,
+}
+
+impl Budget {
+ /// Budget assigned to a task on each poll.
+ ///
+ /// The value itself is chosen somewhat arbitrarily. It needs to be high
+ /// enough to amortize wakeup and scheduling costs, but low enough that we
+ /// do not starve other tasks for too long. The value also needs to be high
+ /// enough that particularly deep tasks are able to do at least some useful
+ /// work at all.
+ ///
+ /// Note that as more yield points are added in the ecosystem, this value
+ /// will probably also have to be raised.
+ const fn initial() -> Budget {
+ Budget(Some(128))
+ }
+
+ /// Returns an unconstrained budget. Operations will not be limited.
+ pub(super) const fn unconstrained() -> Budget {
+ Budget(None)
+ }
+
+ fn has_remaining(self) -> bool {
+ self.0.map(|budget| budget > 0).unwrap_or(true)
+ }
+}
+
+/// Runs the given closure with a cooperative task budget. When the function
+/// returns, the budget is reset to the value prior to calling the function.
+#[inline(always)]
+pub(crate) fn budget<R>(f: impl FnOnce() -> R) -> R {
+ with_budget(Budget::initial(), f)
+}
+
+/// Runs the given closure with an unconstrained task budget. When the function returns, the budget
+/// is reset to the value prior to calling the function.
+#[inline(always)]
+pub(crate) fn with_unconstrained<R>(f: impl FnOnce() -> R) -> R {
+ with_budget(Budget::unconstrained(), f)
+}
+
+#[inline(always)]
+fn with_budget<R>(budget: Budget, f: impl FnOnce() -> R) -> R {
+ struct ResetGuard {
+ prev: Budget,
+ }
+
+ impl Drop for ResetGuard {
+ fn drop(&mut self) {
+ let _ = context::budget(|cell| {
+ cell.set(self.prev);
+ });
+ }
+ }
+
+ #[allow(unused_variables)]
+ let maybe_guard = context::budget(|cell| {
+ let prev = cell.get();
+ cell.set(budget);
+
+ ResetGuard { prev }
+ });
+
+ // The function is called regardless even if the budget is not successfully
+ // set due to the thread-local being destroyed.
+ f()
+}
+
+#[inline(always)]
+pub(crate) fn has_budget_remaining() -> bool {
+ // If the current budget cannot be accessed due to the thread-local being
+ // shutdown, then we assume there is budget remaining.
+ context::budget(|cell| cell.get().has_remaining()).unwrap_or(true)
+}
+
+cfg_rt_multi_thread! {
+ /// Sets the current task's budget.
+ pub(crate) fn set(budget: Budget) {
+ let _ = context::budget(|cell| cell.set(budget));
+ }
+}
+
+cfg_rt! {
+ /// Forcibly removes the budgeting constraints early.
+ ///
+ /// Returns the remaining budget
+ pub(crate) fn stop() -> Budget {
+ context::budget(|cell| {
+ let prev = cell.get();
+ cell.set(Budget::unconstrained());
+ prev
+ }).unwrap_or(Budget::unconstrained())
+ }
+}
+
+cfg_coop! {
+ use std::cell::Cell;
+ use std::task::{Context, Poll};
+
+ #[must_use]
+ pub(crate) struct RestoreOnPending(Cell<Budget>);
+
+ impl RestoreOnPending {
+ pub(crate) fn made_progress(&self) {
+ self.0.set(Budget::unconstrained());
+ }
+ }
+
+ impl Drop for RestoreOnPending {
+ fn drop(&mut self) {
+ // Don't reset if budget was unconstrained or if we made progress.
+ // They are both represented as the remembered budget being unconstrained.
+ let budget = self.0.get();
+ if !budget.is_unconstrained() {
+ let _ = context::budget(|cell| {
+ cell.set(budget);
+ });
+ }
+ }
+ }
+
+ /// Returns `Poll::Pending` if the current task has exceeded its budget and should yield.
+ ///
+ /// When you call this method, the current budget is decremented. However, to ensure that
+ /// progress is made every time a task is polled, the budget is automatically restored to its
+ /// former value if the returned `RestoreOnPending` is dropped. It is the caller's
+ /// responsibility to call `RestoreOnPending::made_progress` if it made progress, to ensure
+ /// that the budget empties appropriately.
+ ///
+ /// Note that `RestoreOnPending` restores the budget **as it was before `poll_proceed`**.
+ /// Therefore, if the budget is _further_ adjusted between when `poll_proceed` returns and
+ /// `RestRestoreOnPending` is dropped, those adjustments are erased unless the caller indicates
+ /// that progress was made.
+ #[inline]
+ pub(crate) fn poll_proceed(cx: &mut Context<'_>) -> Poll<RestoreOnPending> {
+ context::budget(|cell| {
+ let mut budget = cell.get();
+
+ let decrement = budget.decrement();
+
+ if decrement.success {
+ let restore = RestoreOnPending(Cell::new(cell.get()));
+ cell.set(budget);
+
+ // avoid double counting
+ if decrement.hit_zero {
+ inc_budget_forced_yield_count();
+ }
+
+ Poll::Ready(restore)
+ } else {
+ cx.waker().wake_by_ref();
+ Poll::Pending
+ }
+ }).unwrap_or(Poll::Ready(RestoreOnPending(Cell::new(Budget::unconstrained()))))
+ }
+
+ cfg_rt! {
+ cfg_metrics! {
+ #[inline(always)]
+ fn inc_budget_forced_yield_count() {
+ let _ = context::with_current(|handle| {
+ handle.scheduler_metrics().inc_budget_forced_yield_count();
+ });
+ }
+ }
+
+ cfg_not_metrics! {
+ #[inline(always)]
+ fn inc_budget_forced_yield_count() {}
+ }
+ }
+
+ cfg_not_rt! {
+ #[inline(always)]
+ fn inc_budget_forced_yield_count() {}
+ }
+
+ impl Budget {
+ /// Decrements the budget. Returns `true` if successful. Decrementing fails
+ /// when there is not enough remaining budget.
+ fn decrement(&mut self) -> BudgetDecrement {
+ if let Some(num) = &mut self.0 {
+ if *num > 0 {
+ *num -= 1;
+
+ let hit_zero = *num == 0;
+
+ BudgetDecrement { success: true, hit_zero }
+ } else {
+ BudgetDecrement { success: false, hit_zero: false }
+ }
+ } else {
+ BudgetDecrement { success: true, hit_zero: false }
+ }
+ }
+
+ fn is_unconstrained(self) -> bool {
+ self.0.is_none()
+ }
+ }
+}
+
+#[cfg(all(test, not(loom)))]
+mod test {
+ use super::*;
+
+ #[cfg(tokio_wasm_not_wasi)]
+ use wasm_bindgen_test::wasm_bindgen_test as test;
+
+ fn get() -> Budget {
+ context::budget(|cell| cell.get()).unwrap_or(Budget::unconstrained())
+ }
+
+ #[test]
+ fn budgeting() {
+ use futures::future::poll_fn;
+ use tokio_test::*;
+
+ assert!(get().0.is_none());
+
+ let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx)));
+
+ assert!(get().0.is_none());
+ drop(coop);
+ assert!(get().0.is_none());
+
+ budget(|| {
+ assert_eq!(get().0, Budget::initial().0);
+
+ let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx)));
+ assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1);
+ drop(coop);
+ // we didn't make progress
+ assert_eq!(get().0, Budget::initial().0);
+
+ let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx)));
+ assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1);
+ coop.made_progress();
+ drop(coop);
+ // we _did_ make progress
+ assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1);
+
+ let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx)));
+ assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 2);
+ coop.made_progress();
+ drop(coop);
+ assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 2);
+
+ budget(|| {
+ assert_eq!(get().0, Budget::initial().0);
+
+ let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx)));
+ assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1);
+ coop.made_progress();
+ drop(coop);
+ assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 1);
+ });
+
+ assert_eq!(get().0.unwrap(), Budget::initial().0.unwrap() - 2);
+ });
+
+ assert!(get().0.is_none());
+
+ budget(|| {
+ let n = get().0.unwrap();
+
+ for _ in 0..n {
+ let coop = assert_ready!(task::spawn(()).enter(|cx, _| poll_proceed(cx)));
+ coop.made_progress();
+ }
+
+ let mut task = task::spawn(poll_fn(|cx| {
+ let coop = ready!(poll_proceed(cx));
+ coop.made_progress();
+ Poll::Ready(())
+ }));
+
+ assert_pending!(task.poll());
+ });
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/driver.rs b/third_party/rust/tokio/src/runtime/driver.rs
new file mode 100644
index 0000000000..572fdefb0d
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/driver.rs
@@ -0,0 +1,341 @@
+//! Abstracts out the entire chain of runtime sub-drivers into common types.
+
+// Eventually, this file will see significant refactoring / cleanup. For now, we
+// don't need to worry much about dead code with certain feature permutations.
+#![cfg_attr(not(feature = "full"), allow(dead_code))]
+
+use crate::runtime::park::{ParkThread, UnparkThread};
+
+use std::io;
+use std::time::Duration;
+
+#[derive(Debug)]
+pub(crate) struct Driver {
+ inner: TimeDriver,
+}
+
+#[derive(Debug)]
+pub(crate) struct Handle {
+ /// IO driver handle
+ pub(crate) io: IoHandle,
+
+ /// Signal driver handle
+ #[cfg_attr(any(not(unix), loom), allow(dead_code))]
+ pub(crate) signal: SignalHandle,
+
+ /// Time driver handle
+ pub(crate) time: TimeHandle,
+
+ /// Source of `Instant::now()`
+ #[cfg_attr(not(all(feature = "time", feature = "test-util")), allow(dead_code))]
+ pub(crate) clock: Clock,
+}
+
+pub(crate) struct Cfg {
+ pub(crate) enable_io: bool,
+ pub(crate) enable_time: bool,
+ pub(crate) enable_pause_time: bool,
+ pub(crate) start_paused: bool,
+ pub(crate) nevents: usize,
+}
+
+impl Driver {
+ pub(crate) fn new(cfg: Cfg) -> io::Result<(Self, Handle)> {
+ let (io_stack, io_handle, signal_handle) = create_io_stack(cfg.enable_io, cfg.nevents)?;
+
+ let clock = create_clock(cfg.enable_pause_time, cfg.start_paused);
+
+ let (time_driver, time_handle) = create_time_driver(cfg.enable_time, io_stack, &clock);
+
+ Ok((
+ Self { inner: time_driver },
+ Handle {
+ io: io_handle,
+ signal: signal_handle,
+ time: time_handle,
+ clock,
+ },
+ ))
+ }
+
+ pub(crate) fn park(&mut self, handle: &Handle) {
+ self.inner.park(handle)
+ }
+
+ pub(crate) fn park_timeout(&mut self, handle: &Handle, duration: Duration) {
+ self.inner.park_timeout(handle, duration)
+ }
+
+ pub(crate) fn shutdown(&mut self, handle: &Handle) {
+ self.inner.shutdown(handle)
+ }
+}
+
+impl Handle {
+ pub(crate) fn unpark(&self) {
+ #[cfg(feature = "time")]
+ if let Some(handle) = &self.time {
+ handle.unpark();
+ }
+
+ self.io.unpark();
+ }
+
+ cfg_io_driver! {
+ #[track_caller]
+ pub(crate) fn io(&self) -> &crate::runtime::io::Handle {
+ self.io
+ .as_ref()
+ .expect("A Tokio 1.x context was found, but IO is disabled. Call `enable_io` on the runtime builder to enable IO.")
+ }
+ }
+
+ cfg_signal_internal_and_unix! {
+ #[track_caller]
+ pub(crate) fn signal(&self) -> &crate::runtime::signal::Handle {
+ self.signal
+ .as_ref()
+ .expect("there is no signal driver running, must be called from the context of Tokio runtime")
+ }
+ }
+
+ cfg_time! {
+ /// Returns a reference to the time driver handle.
+ ///
+ /// Panics if no time driver is present.
+ #[track_caller]
+ pub(crate) fn time(&self) -> &crate::runtime::time::Handle {
+ self.time
+ .as_ref()
+ .expect("A Tokio 1.x context was found, but timers are disabled. Call `enable_time` on the runtime builder to enable timers.")
+ }
+
+ pub(crate) fn clock(&self) -> &Clock {
+ &self.clock
+ }
+ }
+}
+
+// ===== io driver =====
+
+cfg_io_driver! {
+ pub(crate) type IoDriver = crate::runtime::io::Driver;
+
+ #[derive(Debug)]
+ pub(crate) enum IoStack {
+ Enabled(ProcessDriver),
+ Disabled(ParkThread),
+ }
+
+ #[derive(Debug)]
+ pub(crate) enum IoHandle {
+ Enabled(crate::runtime::io::Handle),
+ Disabled(UnparkThread),
+ }
+
+ fn create_io_stack(enabled: bool, nevents: usize) -> io::Result<(IoStack, IoHandle, SignalHandle)> {
+ #[cfg(loom)]
+ assert!(!enabled);
+
+ let ret = if enabled {
+ let (io_driver, io_handle) = crate::runtime::io::Driver::new(nevents)?;
+
+ let (signal_driver, signal_handle) = create_signal_driver(io_driver, &io_handle)?;
+ let process_driver = create_process_driver(signal_driver);
+
+ (IoStack::Enabled(process_driver), IoHandle::Enabled(io_handle), signal_handle)
+ } else {
+ let park_thread = ParkThread::new();
+ let unpark_thread = park_thread.unpark();
+ (IoStack::Disabled(park_thread), IoHandle::Disabled(unpark_thread), Default::default())
+ };
+
+ Ok(ret)
+ }
+
+ impl IoStack {
+ pub(crate) fn park(&mut self, handle: &Handle) {
+ match self {
+ IoStack::Enabled(v) => v.park(handle),
+ IoStack::Disabled(v) => v.park(),
+ }
+ }
+
+ pub(crate) fn park_timeout(&mut self, handle: &Handle, duration: Duration) {
+ match self {
+ IoStack::Enabled(v) => v.park_timeout(handle, duration),
+ IoStack::Disabled(v) => v.park_timeout(duration),
+ }
+ }
+
+ pub(crate) fn shutdown(&mut self, handle: &Handle) {
+ match self {
+ IoStack::Enabled(v) => v.shutdown(handle),
+ IoStack::Disabled(v) => v.shutdown(),
+ }
+ }
+ }
+
+ impl IoHandle {
+ pub(crate) fn unpark(&self) {
+ match self {
+ IoHandle::Enabled(handle) => handle.unpark(),
+ IoHandle::Disabled(handle) => handle.unpark(),
+ }
+ }
+
+ pub(crate) fn as_ref(&self) -> Option<&crate::runtime::io::Handle> {
+ match self {
+ IoHandle::Enabled(v) => Some(v),
+ IoHandle::Disabled(..) => None,
+ }
+ }
+ }
+}
+
+cfg_not_io_driver! {
+ pub(crate) type IoHandle = UnparkThread;
+
+ #[derive(Debug)]
+ pub(crate) struct IoStack(ParkThread);
+
+ fn create_io_stack(_enabled: bool, _nevents: usize) -> io::Result<(IoStack, IoHandle, SignalHandle)> {
+ let park_thread = ParkThread::new();
+ let unpark_thread = park_thread.unpark();
+ Ok((IoStack(park_thread), unpark_thread, Default::default()))
+ }
+
+ impl IoStack {
+ pub(crate) fn park(&mut self, _handle: &Handle) {
+ self.0.park();
+ }
+
+ pub(crate) fn park_timeout(&mut self, _handle: &Handle, duration: Duration) {
+ self.0.park_timeout(duration);
+ }
+
+ pub(crate) fn shutdown(&mut self, _handle: &Handle) {
+ self.0.shutdown();
+ }
+ }
+}
+
+// ===== signal driver =====
+
+cfg_signal_internal_and_unix! {
+ type SignalDriver = crate::runtime::signal::Driver;
+ pub(crate) type SignalHandle = Option<crate::runtime::signal::Handle>;
+
+ fn create_signal_driver(io_driver: IoDriver, io_handle: &crate::runtime::io::Handle) -> io::Result<(SignalDriver, SignalHandle)> {
+ let driver = crate::runtime::signal::Driver::new(io_driver, io_handle)?;
+ let handle = driver.handle();
+ Ok((driver, Some(handle)))
+ }
+}
+
+cfg_not_signal_internal! {
+ pub(crate) type SignalHandle = ();
+
+ cfg_io_driver! {
+ type SignalDriver = IoDriver;
+
+ fn create_signal_driver(io_driver: IoDriver, _io_handle: &crate::runtime::io::Handle) -> io::Result<(SignalDriver, SignalHandle)> {
+ Ok((io_driver, ()))
+ }
+ }
+}
+
+// ===== process driver =====
+
+cfg_process_driver! {
+ type ProcessDriver = crate::runtime::process::Driver;
+
+ fn create_process_driver(signal_driver: SignalDriver) -> ProcessDriver {
+ ProcessDriver::new(signal_driver)
+ }
+}
+
+cfg_not_process_driver! {
+ cfg_io_driver! {
+ type ProcessDriver = SignalDriver;
+
+ fn create_process_driver(signal_driver: SignalDriver) -> ProcessDriver {
+ signal_driver
+ }
+ }
+}
+
+// ===== time driver =====
+
+cfg_time! {
+ #[derive(Debug)]
+ pub(crate) enum TimeDriver {
+ Enabled {
+ driver: crate::runtime::time::Driver,
+ },
+ Disabled(IoStack),
+ }
+
+ pub(crate) type Clock = crate::time::Clock;
+ pub(crate) type TimeHandle = Option<crate::runtime::time::Handle>;
+
+ fn create_clock(enable_pausing: bool, start_paused: bool) -> Clock {
+ crate::time::Clock::new(enable_pausing, start_paused)
+ }
+
+ fn create_time_driver(
+ enable: bool,
+ io_stack: IoStack,
+ clock: &Clock,
+ ) -> (TimeDriver, TimeHandle) {
+ if enable {
+ let (driver, handle) = crate::runtime::time::Driver::new(io_stack, clock);
+
+ (TimeDriver::Enabled { driver }, Some(handle))
+ } else {
+ (TimeDriver::Disabled(io_stack), None)
+ }
+ }
+
+ impl TimeDriver {
+ pub(crate) fn park(&mut self, handle: &Handle) {
+ match self {
+ TimeDriver::Enabled { driver, .. } => driver.park(handle),
+ TimeDriver::Disabled(v) => v.park(handle),
+ }
+ }
+
+ pub(crate) fn park_timeout(&mut self, handle: &Handle, duration: Duration) {
+ match self {
+ TimeDriver::Enabled { driver } => driver.park_timeout(handle, duration),
+ TimeDriver::Disabled(v) => v.park_timeout(handle, duration),
+ }
+ }
+
+ pub(crate) fn shutdown(&mut self, handle: &Handle) {
+ match self {
+ TimeDriver::Enabled { driver } => driver.shutdown(handle),
+ TimeDriver::Disabled(v) => v.shutdown(handle),
+ }
+ }
+ }
+}
+
+cfg_not_time! {
+ type TimeDriver = IoStack;
+
+ pub(crate) type Clock = ();
+ pub(crate) type TimeHandle = ();
+
+ fn create_clock(_enable_pausing: bool, _start_paused: bool) -> Clock {
+ ()
+ }
+
+ fn create_time_driver(
+ _enable: bool,
+ io_stack: IoStack,
+ _clock: &Clock,
+ ) -> (TimeDriver, TimeHandle) {
+ (io_stack, ())
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/dump.rs b/third_party/rust/tokio/src/runtime/dump.rs
new file mode 100644
index 0000000000..994b7f9c01
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/dump.rs
@@ -0,0 +1,76 @@
+//! Snapshots of runtime state.
+//!
+//! See [Handle::dump][crate::runtime::Handle::dump].
+
+use std::fmt;
+
+/// A snapshot of a runtime's state.
+///
+/// See [Handle::dump][crate::runtime::Handle::dump].
+#[derive(Debug)]
+pub struct Dump {
+ tasks: Tasks,
+}
+
+/// Snapshots of tasks.
+///
+/// See [Handle::dump][crate::runtime::Handle::dump].
+#[derive(Debug)]
+pub struct Tasks {
+ tasks: Vec<Task>,
+}
+
+/// A snapshot of a task.
+///
+/// See [Handle::dump][crate::runtime::Handle::dump].
+#[derive(Debug)]
+pub struct Task {
+ trace: Trace,
+}
+
+/// An execution trace of a task's last poll.
+///
+/// See [Handle::dump][crate::runtime::Handle::dump].
+#[derive(Debug)]
+pub struct Trace {
+ inner: super::task::trace::Trace,
+}
+
+impl Dump {
+ pub(crate) fn new(tasks: Vec<Task>) -> Self {
+ Self {
+ tasks: Tasks { tasks },
+ }
+ }
+
+ /// Tasks in this snapshot.
+ pub fn tasks(&self) -> &Tasks {
+ &self.tasks
+ }
+}
+
+impl Tasks {
+ /// Iterate over tasks.
+ pub fn iter(&self) -> impl Iterator<Item = &Task> {
+ self.tasks.iter()
+ }
+}
+
+impl Task {
+ pub(crate) fn new(trace: super::task::trace::Trace) -> Self {
+ Self {
+ trace: Trace { inner: trace },
+ }
+ }
+
+ /// A trace of this task's state.
+ pub fn trace(&self) -> &Trace {
+ &self.trace
+ }
+}
+
+impl fmt::Display for Trace {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.inner.fmt(f)
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/handle.rs b/third_party/rust/tokio/src/runtime/handle.rs
new file mode 100644
index 0000000000..be4743d477
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/handle.rs
@@ -0,0 +1,587 @@
+use crate::runtime::{context, scheduler, RuntimeFlavor};
+
+/// Handle to the runtime.
+///
+/// The handle is internally reference-counted and can be freely cloned. A handle can be
+/// obtained using the [`Runtime::handle`] method.
+///
+/// [`Runtime::handle`]: crate::runtime::Runtime::handle()
+#[derive(Debug, Clone)]
+// When the `rt` feature is *not* enabled, this type is still defined, but not
+// included in the public API.
+pub struct Handle {
+ pub(crate) inner: scheduler::Handle,
+}
+
+use crate::runtime::task::JoinHandle;
+use crate::util::error::{CONTEXT_MISSING_ERROR, THREAD_LOCAL_DESTROYED_ERROR};
+
+use std::future::Future;
+use std::marker::PhantomData;
+use std::{error, fmt};
+
+/// Runtime context guard.
+///
+/// Returned by [`Runtime::enter`] and [`Handle::enter`], the context guard exits
+/// the runtime context on drop.
+///
+/// [`Runtime::enter`]: fn@crate::runtime::Runtime::enter
+#[derive(Debug)]
+#[must_use = "Creating and dropping a guard does nothing"]
+pub struct EnterGuard<'a> {
+ _guard: context::SetCurrentGuard,
+ _handle_lifetime: PhantomData<&'a Handle>,
+}
+
+impl Handle {
+ /// Enters the runtime context. This allows you to construct types that must
+ /// have an executor available on creation such as [`Sleep`] or
+ /// [`TcpStream`]. It will also allow you to call methods such as
+ /// [`tokio::spawn`] and [`Handle::current`] without panicking.
+ ///
+ /// # Panics
+ ///
+ /// When calling `Handle::enter` multiple times, the returned guards
+ /// **must** be dropped in the reverse order that they were acquired.
+ /// Failure to do so will result in a panic and possible memory leaks.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// let rt = Runtime::new().unwrap();
+ ///
+ /// let _guard = rt.enter();
+ /// tokio::spawn(async {
+ /// println!("Hello world!");
+ /// });
+ /// ```
+ ///
+ /// Do **not** do the following, this shows a scenario that will result in a
+ /// panic and possible memory leak.
+ ///
+ /// ```should_panic
+ /// use tokio::runtime::Runtime;
+ ///
+ /// let rt1 = Runtime::new().unwrap();
+ /// let rt2 = Runtime::new().unwrap();
+ ///
+ /// let enter1 = rt1.enter();
+ /// let enter2 = rt2.enter();
+ ///
+ /// drop(enter1);
+ /// drop(enter2);
+ /// ```
+ ///
+ /// [`Sleep`]: struct@crate::time::Sleep
+ /// [`TcpStream`]: struct@crate::net::TcpStream
+ /// [`tokio::spawn`]: fn@crate::spawn
+ pub fn enter(&self) -> EnterGuard<'_> {
+ EnterGuard {
+ _guard: match context::try_set_current(&self.inner) {
+ Some(guard) => guard,
+ None => panic!("{}", crate::util::error::THREAD_LOCAL_DESTROYED_ERROR),
+ },
+ _handle_lifetime: PhantomData,
+ }
+ }
+
+ /// Returns a `Handle` view over the currently running `Runtime`.
+ ///
+ /// # Panics
+ ///
+ /// This will panic if called outside the context of a Tokio runtime. That means that you must
+ /// call this on one of the threads **being run by the runtime**, or from a thread with an active
+ /// `EnterGuard`. Calling this from within a thread created by `std::thread::spawn` (for example)
+ /// will cause a panic unless that thread has an active `EnterGuard`.
+ ///
+ /// # Examples
+ ///
+ /// This can be used to obtain the handle of the surrounding runtime from an async
+ /// block or function running on that runtime.
+ ///
+ /// ```
+ /// # use std::thread;
+ /// # use tokio::runtime::Runtime;
+ /// # fn dox() {
+ /// # let rt = Runtime::new().unwrap();
+ /// # rt.spawn(async {
+ /// use tokio::runtime::Handle;
+ ///
+ /// // Inside an async block or function.
+ /// let handle = Handle::current();
+ /// handle.spawn(async {
+ /// println!("now running in the existing Runtime");
+ /// });
+ ///
+ /// # let handle =
+ /// thread::spawn(move || {
+ /// // Notice that the handle is created outside of this thread and then moved in
+ /// handle.spawn(async { /* ... */ });
+ /// // This next line would cause a panic because we haven't entered the runtime
+ /// // and created an EnterGuard
+ /// // let handle2 = Handle::current(); // panic
+ /// // So we create a guard here with Handle::enter();
+ /// let _guard = handle.enter();
+ /// // Now we can call Handle::current();
+ /// let handle2 = Handle::current();
+ /// });
+ /// # handle.join().unwrap();
+ /// # });
+ /// # }
+ /// ```
+ #[track_caller]
+ pub fn current() -> Self {
+ Handle {
+ inner: scheduler::Handle::current(),
+ }
+ }
+
+ /// Returns a Handle view over the currently running Runtime
+ ///
+ /// Returns an error if no Runtime has been started
+ ///
+ /// Contrary to `current`, this never panics
+ pub fn try_current() -> Result<Self, TryCurrentError> {
+ context::with_current(|inner| Handle {
+ inner: inner.clone(),
+ })
+ }
+
+ /// Spawns a future onto the Tokio runtime.
+ ///
+ /// This spawns the given future onto the runtime's executor, usually a
+ /// thread pool. The thread pool is then responsible for polling the future
+ /// until it completes.
+ ///
+ /// The provided future will start running in the background immediately
+ /// when `spawn` is called, even if you don't await the returned
+ /// `JoinHandle`.
+ ///
+ /// See [module level][mod] documentation for more details.
+ ///
+ /// [mod]: index.html
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// # fn dox() {
+ /// // Create the runtime
+ /// let rt = Runtime::new().unwrap();
+ /// // Get a handle from this runtime
+ /// let handle = rt.handle();
+ ///
+ /// // Spawn a future onto the runtime using the handle
+ /// handle.spawn(async {
+ /// println!("now running on a worker thread");
+ /// });
+ /// # }
+ /// ```
+ #[track_caller]
+ pub fn spawn<F>(&self, future: F) -> JoinHandle<F::Output>
+ where
+ F: Future + Send + 'static,
+ F::Output: Send + 'static,
+ {
+ self.spawn_named(future, None)
+ }
+
+ /// Runs the provided function on an executor dedicated to blocking
+ /// operations.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// # fn dox() {
+ /// // Create the runtime
+ /// let rt = Runtime::new().unwrap();
+ /// // Get a handle from this runtime
+ /// let handle = rt.handle();
+ ///
+ /// // Spawn a blocking function onto the runtime using the handle
+ /// handle.spawn_blocking(|| {
+ /// println!("now running on a worker thread");
+ /// });
+ /// # }
+ #[track_caller]
+ pub fn spawn_blocking<F, R>(&self, func: F) -> JoinHandle<R>
+ where
+ F: FnOnce() -> R + Send + 'static,
+ R: Send + 'static,
+ {
+ self.inner.blocking_spawner().spawn_blocking(self, func)
+ }
+
+ /// Runs a future to completion on this `Handle`'s associated `Runtime`.
+ ///
+ /// This runs the given future on the current thread, blocking until it is
+ /// complete, and yielding its resolved result. Any tasks or timers which
+ /// the future spawns internally will be executed on the runtime.
+ ///
+ /// When this is used on a `current_thread` runtime, only the
+ /// [`Runtime::block_on`] method can drive the IO and timer drivers, but the
+ /// `Handle::block_on` method cannot drive them. This means that, when using
+ /// this method on a current_thread runtime, anything that relies on IO or
+ /// timers will not work unless there is another thread currently calling
+ /// [`Runtime::block_on`] on the same runtime.
+ ///
+ /// # If the runtime has been shut down
+ ///
+ /// If the `Handle`'s associated `Runtime` has been shut down (through
+ /// [`Runtime::shutdown_background`], [`Runtime::shutdown_timeout`], or by
+ /// dropping it) and `Handle::block_on` is used it might return an error or
+ /// panic. Specifically IO resources will return an error and timers will
+ /// panic. Runtime independent futures will run as normal.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if the provided future panics, if called within an
+ /// asynchronous execution context, or if a timer future is executed on a
+ /// runtime that has been shut down.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// // Create the runtime
+ /// let rt = Runtime::new().unwrap();
+ ///
+ /// // Get a handle from this runtime
+ /// let handle = rt.handle();
+ ///
+ /// // Execute the future, blocking the current thread until completion
+ /// handle.block_on(async {
+ /// println!("hello");
+ /// });
+ /// ```
+ ///
+ /// Or using `Handle::current`:
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main () {
+ /// let handle = Handle::current();
+ /// std::thread::spawn(move || {
+ /// // Using Handle::block_on to run async code in the new thread.
+ /// handle.block_on(async {
+ /// println!("hello");
+ /// });
+ /// });
+ /// }
+ /// ```
+ ///
+ /// [`JoinError`]: struct@crate::task::JoinError
+ /// [`JoinHandle`]: struct@crate::task::JoinHandle
+ /// [`Runtime::block_on`]: fn@crate::runtime::Runtime::block_on
+ /// [`Runtime::shutdown_background`]: fn@crate::runtime::Runtime::shutdown_background
+ /// [`Runtime::shutdown_timeout`]: fn@crate::runtime::Runtime::shutdown_timeout
+ /// [`spawn_blocking`]: crate::task::spawn_blocking
+ /// [`tokio::fs`]: crate::fs
+ /// [`tokio::net`]: crate::net
+ /// [`tokio::time`]: crate::time
+ #[track_caller]
+ pub fn block_on<F: Future>(&self, future: F) -> F::Output {
+ #[cfg(all(
+ tokio_unstable,
+ tokio_taskdump,
+ feature = "rt",
+ target_os = "linux",
+ any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")
+ ))]
+ let future = super::task::trace::Trace::root(future);
+
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ let future =
+ crate::util::trace::task(future, "block_on", None, super::task::Id::next().as_u64());
+
+ // Enter the runtime context. This sets the current driver handles and
+ // prevents blocking an existing runtime.
+ context::enter_runtime(&self.inner, true, |blocking| {
+ blocking.block_on(future).expect("failed to park thread")
+ })
+ }
+
+ #[track_caller]
+ pub(crate) fn spawn_named<F>(&self, future: F, _name: Option<&str>) -> JoinHandle<F::Output>
+ where
+ F: Future + Send + 'static,
+ F::Output: Send + 'static,
+ {
+ let id = crate::runtime::task::Id::next();
+ #[cfg(all(
+ tokio_unstable,
+ tokio_taskdump,
+ feature = "rt",
+ target_os = "linux",
+ any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")
+ ))]
+ let future = super::task::trace::Trace::root(future);
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ let future = crate::util::trace::task(future, "task", _name, id.as_u64());
+ self.inner.spawn(future, id)
+ }
+
+ /// Returns the flavor of the current `Runtime`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::{Handle, RuntimeFlavor};
+ ///
+ /// #[tokio::main(flavor = "current_thread")]
+ /// async fn main() {
+ /// assert_eq!(RuntimeFlavor::CurrentThread, Handle::current().runtime_flavor());
+ /// }
+ /// ```
+ ///
+ /// ```
+ /// use tokio::runtime::{Handle, RuntimeFlavor};
+ ///
+ /// #[tokio::main(flavor = "multi_thread", worker_threads = 4)]
+ /// async fn main() {
+ /// assert_eq!(RuntimeFlavor::MultiThread, Handle::current().runtime_flavor());
+ /// }
+ /// ```
+ pub fn runtime_flavor(&self) -> RuntimeFlavor {
+ match self.inner {
+ scheduler::Handle::CurrentThread(_) => RuntimeFlavor::CurrentThread,
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ scheduler::Handle::MultiThread(_) => RuntimeFlavor::MultiThread,
+ }
+ }
+}
+
+cfg_metrics! {
+ use crate::runtime::RuntimeMetrics;
+
+ impl Handle {
+ /// Returns a view that lets you get information about how the runtime
+ /// is performing.
+ pub fn metrics(&self) -> RuntimeMetrics {
+ RuntimeMetrics::new(self.clone())
+ }
+ }
+}
+
+cfg_taskdump! {
+ impl Handle {
+ /// Captures a snapshot of the runtime's state.
+ ///
+ /// This functionality is experimental, and comes with a number of
+ /// requirements and limitations.
+ ///
+ /// # Examples
+ ///
+ /// This can be used to get call traces of each task in the runtime.
+ /// Calls to `Handle::dump` should usually be enclosed in a
+ /// [timeout][crate::time::timeout], so that dumping does not escalate a
+ /// single blocked runtime thread into an entirely blocked runtime.
+ ///
+ /// ```
+ /// # use tokio::runtime::Runtime;
+ /// # fn dox() {
+ /// # let rt = Runtime::new().unwrap();
+ /// # rt.spawn(async {
+ /// use tokio::runtime::Handle;
+ /// use tokio::time::{timeout, Duration};
+ ///
+ /// // Inside an async block or function.
+ /// let handle = Handle::current();
+ /// if let Ok(dump) = timeout(Duration::from_secs(2), handle.dump()).await {
+ /// for (i, task) in dump.tasks().iter().enumerate() {
+ /// let trace = task.trace();
+ /// println!("TASK {i}:");
+ /// println!("{trace}\n");
+ /// }
+ /// }
+ /// # });
+ /// # }
+ /// ```
+ ///
+ /// This produces highly detailed traces of tasks; e.g.:
+ ///
+ /// ```plain
+ /// TASK 0:
+ /// ╼ dump::main::{{closure}}::a::{{closure}} at /tokio/examples/dump.rs:18:20
+ /// └╼ dump::main::{{closure}}::b::{{closure}} at /tokio/examples/dump.rs:23:20
+ /// └╼ dump::main::{{closure}}::c::{{closure}} at /tokio/examples/dump.rs:28:24
+ /// └╼ tokio::sync::barrier::Barrier::wait::{{closure}} at /tokio/tokio/src/sync/barrier.rs:129:10
+ /// └╼ <tokio::util::trace::InstrumentedAsyncOp<F> as core::future::future::Future>::poll at /tokio/tokio/src/util/trace.rs:77:46
+ /// └╼ tokio::sync::barrier::Barrier::wait_internal::{{closure}} at /tokio/tokio/src/sync/barrier.rs:183:36
+ /// └╼ tokio::sync::watch::Receiver<T>::changed::{{closure}} at /tokio/tokio/src/sync/watch.rs:604:55
+ /// └╼ tokio::sync::watch::changed_impl::{{closure}} at /tokio/tokio/src/sync/watch.rs:755:18
+ /// └╼ <tokio::sync::notify::Notified as core::future::future::Future>::poll at /tokio/tokio/src/sync/notify.rs:1103:9
+ /// └╼ tokio::sync::notify::Notified::poll_notified at /tokio/tokio/src/sync/notify.rs:996:32
+ /// ```
+ ///
+ /// # Requirements
+ ///
+ /// ## Debug Info Must Be Available
+ ///
+ /// To produce task traces, the application must **not** be compiled
+ /// with split debuginfo. On Linux, including debuginfo within the
+ /// application binary is the (correct) default. You can further ensure
+ /// this behavior with the following directive in your `Cargo.toml`:
+ ///
+ /// ```toml
+ /// [profile.*]
+ /// split-debuginfo = "off"
+ /// ```
+ ///
+ /// ## Unstable Features
+ ///
+ /// This functionality is **unstable**, and requires both the
+ /// `tokio_unstable` and `tokio_taskdump` cfg flags to be set.
+ ///
+ /// You can do this by setting the `RUSTFLAGS` environment variable
+ /// before invoking `cargo`; e.g.:
+ /// ```bash
+ /// RUSTFLAGS="--cfg tokio_unstable --cfg tokio_taskdump" cargo run --example dump
+ /// ```
+ ///
+ /// Or by [configuring][cargo-config] `rustflags` in
+ /// `.cargo/config.toml`:
+ /// ```text
+ /// [build]
+ /// rustflags = ["--cfg tokio_unstable", "--cfg tokio_taskdump"]
+ /// ```
+ ///
+ /// [cargo-config]:
+ /// https://doc.rust-lang.org/cargo/reference/config.html
+ ///
+ /// ## Platform Requirements
+ ///
+ /// Task dumps are supported on Linux atop aarch64, x86 and x86_64.
+ ///
+ /// ## Current Thread Runtime Requirements
+ ///
+ /// On the `current_thread` runtime, task dumps may only be requested
+ /// from *within* the context of the runtime being dumped. Do not, for
+ /// example, await `Handle::dump()` on a different runtime.
+ ///
+ /// # Limitations
+ ///
+ /// ## Performance
+ ///
+ /// Although enabling the `tokio_taskdump` feature imposes virtually no
+ /// additional runtime overhead, actually calling `Handle::dump` is
+ /// expensive. The runtime must synchronize and pause its workers, then
+ /// re-poll every task in a special tracing mode. Avoid requesting dumps
+ /// often.
+ ///
+ /// ## Local Executors
+ ///
+ /// Tasks managed by local executors (e.g., `FuturesUnordered` and
+ /// [`LocalSet`][crate::task::LocalSet]) may not appear in task dumps.
+ ///
+ /// ## Non-Termination When Workers Are Blocked
+ ///
+ /// The future produced by `Handle::dump` may never produce `Ready` if
+ /// another runtime worker is blocked for more than 250ms. This may
+ /// occur if a dump is requested during shutdown, or if another runtime
+ /// worker is infinite looping or synchronously deadlocked. For these
+ /// reasons, task dumping should usually be paired with an explicit
+ /// [timeout][crate::time::timeout].
+ pub async fn dump(&self) -> crate::runtime::Dump {
+ match &self.inner {
+ scheduler::Handle::CurrentThread(handle) => handle.dump(),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ scheduler::Handle::MultiThread(handle) => {
+ // perform the trace in a separate thread so that the
+ // trace itself does not appear in the taskdump.
+ let handle = handle.clone();
+ spawn_thread(async {
+ let handle = handle;
+ handle.dump().await
+ }).await
+ },
+ }
+ }
+ }
+
+ cfg_rt_multi_thread! {
+ /// Spawn a new thread and asynchronously await on its result.
+ async fn spawn_thread<F>(f: F) -> <F as Future>::Output
+ where
+ F: Future + Send + 'static,
+ <F as Future>::Output: Send + 'static
+ {
+ let (tx, rx) = crate::sync::oneshot::channel();
+ crate::loom::thread::spawn(|| {
+ let rt = crate::runtime::Builder::new_current_thread().build().unwrap();
+ rt.block_on(async {
+ let _ = tx.send(f.await);
+ });
+ });
+ rx.await.unwrap()
+ }
+ }
+}
+
+/// Error returned by `try_current` when no Runtime has been started
+#[derive(Debug)]
+pub struct TryCurrentError {
+ kind: TryCurrentErrorKind,
+}
+
+impl TryCurrentError {
+ pub(crate) fn new_no_context() -> Self {
+ Self {
+ kind: TryCurrentErrorKind::NoContext,
+ }
+ }
+
+ pub(crate) fn new_thread_local_destroyed() -> Self {
+ Self {
+ kind: TryCurrentErrorKind::ThreadLocalDestroyed,
+ }
+ }
+
+ /// Returns true if the call failed because there is currently no runtime in
+ /// the Tokio context.
+ pub fn is_missing_context(&self) -> bool {
+ matches!(self.kind, TryCurrentErrorKind::NoContext)
+ }
+
+ /// Returns true if the call failed because the Tokio context thread-local
+ /// had been destroyed. This can usually only happen if in the destructor of
+ /// other thread-locals.
+ pub fn is_thread_local_destroyed(&self) -> bool {
+ matches!(self.kind, TryCurrentErrorKind::ThreadLocalDestroyed)
+ }
+}
+
+enum TryCurrentErrorKind {
+ NoContext,
+ ThreadLocalDestroyed,
+}
+
+impl fmt::Debug for TryCurrentErrorKind {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ use TryCurrentErrorKind::*;
+ match self {
+ NoContext => f.write_str("NoContext"),
+ ThreadLocalDestroyed => f.write_str("ThreadLocalDestroyed"),
+ }
+ }
+}
+
+impl fmt::Display for TryCurrentError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ use TryCurrentErrorKind::*;
+ match self.kind {
+ NoContext => f.write_str(CONTEXT_MISSING_ERROR),
+ ThreadLocalDestroyed => f.write_str(THREAD_LOCAL_DESTROYED_ERROR),
+ }
+ }
+}
+
+impl error::Error for TryCurrentError {}
diff --git a/third_party/rust/tokio/src/runtime/io/metrics.rs b/third_party/rust/tokio/src/runtime/io/metrics.rs
new file mode 100644
index 0000000000..ec341efe68
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/io/metrics.rs
@@ -0,0 +1,24 @@
+//! This file contains mocks of the metrics types used in the I/O driver.
+//!
+//! The reason these mocks don't live in `src/runtime/mock.rs` is because
+//! these need to be available in the case when `net` is enabled but
+//! `rt` is not.
+
+cfg_not_rt_and_metrics_and_net! {
+ #[derive(Default)]
+ pub(crate) struct IoDriverMetrics {}
+
+ impl IoDriverMetrics {
+ pub(crate) fn incr_fd_count(&self) {}
+ pub(crate) fn dec_fd_count(&self) {}
+ pub(crate) fn incr_ready_count_by(&self, _amt: u64) {}
+ }
+}
+
+cfg_net! {
+ cfg_rt! {
+ cfg_metrics! {
+ pub(crate) use crate::runtime::IoDriverMetrics;
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/io/mod.rs b/third_party/rust/tokio/src/runtime/io/mod.rs
new file mode 100644
index 0000000000..2dd426f111
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/io/mod.rs
@@ -0,0 +1,356 @@
+#![cfg_attr(not(all(feature = "rt", feature = "net")), allow(dead_code))]
+
+mod registration;
+pub(crate) use registration::Registration;
+
+mod scheduled_io;
+use scheduled_io::ScheduledIo;
+
+mod metrics;
+
+use crate::io::interest::Interest;
+use crate::io::ready::Ready;
+use crate::runtime::driver;
+use crate::util::slab::{self, Slab};
+use crate::{loom::sync::RwLock, util::bit};
+
+use metrics::IoDriverMetrics;
+
+use std::fmt;
+use std::io;
+use std::time::Duration;
+
+/// I/O driver, backed by Mio.
+pub(crate) struct Driver {
+ /// Tracks the number of times `turn` is called. It is safe for this to wrap
+ /// as it is mostly used to determine when to call `compact()`.
+ tick: u8,
+
+ /// True when an event with the signal token is received
+ signal_ready: bool,
+
+ /// Reuse the `mio::Events` value across calls to poll.
+ events: mio::Events,
+
+ /// Primary slab handle containing the state for each resource registered
+ /// with this driver.
+ resources: Slab<ScheduledIo>,
+
+ /// The system event queue.
+ poll: mio::Poll,
+}
+
+/// A reference to an I/O driver.
+pub(crate) struct Handle {
+ /// Registers I/O resources.
+ registry: mio::Registry,
+
+ /// Allocates `ScheduledIo` handles when creating new resources.
+ io_dispatch: RwLock<IoDispatcher>,
+
+ /// Used to wake up the reactor from a call to `turn`.
+ /// Not supported on Wasi due to lack of threading support.
+ #[cfg(not(tokio_wasi))]
+ waker: mio::Waker,
+
+ pub(crate) metrics: IoDriverMetrics,
+}
+
+#[derive(Debug)]
+pub(crate) struct ReadyEvent {
+ tick: u8,
+ pub(crate) ready: Ready,
+ is_shutdown: bool,
+}
+
+cfg_net_unix!(
+ impl ReadyEvent {
+ pub(crate) fn with_ready(&self, ready: Ready) -> Self {
+ Self {
+ ready,
+ tick: self.tick,
+ is_shutdown: self.is_shutdown,
+ }
+ }
+ }
+);
+
+struct IoDispatcher {
+ allocator: slab::Allocator<ScheduledIo>,
+ is_shutdown: bool,
+}
+
+#[derive(Debug, Eq, PartialEq, Clone, Copy)]
+enum Direction {
+ Read,
+ Write,
+}
+
+enum Tick {
+ Set(u8),
+ Clear(u8),
+}
+
+// TODO: Don't use a fake token. Instead, reserve a slot entry for the wakeup
+// token.
+const TOKEN_WAKEUP: mio::Token = mio::Token(1 << 31);
+const TOKEN_SIGNAL: mio::Token = mio::Token(1 + (1 << 31));
+
+const ADDRESS: bit::Pack = bit::Pack::least_significant(24);
+
+// Packs the generation value in the `readiness` field.
+//
+// The generation prevents a race condition where a slab slot is reused for a
+// new socket while the I/O driver is about to apply a readiness event. The
+// generation value is checked when setting new readiness. If the generation do
+// not match, then the readiness event is discarded.
+const GENERATION: bit::Pack = ADDRESS.then(7);
+
+fn _assert_kinds() {
+ fn _assert<T: Send + Sync>() {}
+
+ _assert::<Handle>();
+}
+
+// ===== impl Driver =====
+
+impl Driver {
+ /// Creates a new event loop, returning any error that happened during the
+ /// creation.
+ pub(crate) fn new(nevents: usize) -> io::Result<(Driver, Handle)> {
+ let poll = mio::Poll::new()?;
+ #[cfg(not(tokio_wasi))]
+ let waker = mio::Waker::new(poll.registry(), TOKEN_WAKEUP)?;
+ let registry = poll.registry().try_clone()?;
+
+ let slab = Slab::new();
+ let allocator = slab.allocator();
+
+ let driver = Driver {
+ tick: 0,
+ signal_ready: false,
+ events: mio::Events::with_capacity(nevents),
+ poll,
+ resources: slab,
+ };
+
+ let handle = Handle {
+ registry,
+ io_dispatch: RwLock::new(IoDispatcher::new(allocator)),
+ #[cfg(not(tokio_wasi))]
+ waker,
+ metrics: IoDriverMetrics::default(),
+ };
+
+ Ok((driver, handle))
+ }
+
+ pub(crate) fn park(&mut self, rt_handle: &driver::Handle) {
+ let handle = rt_handle.io();
+ self.turn(handle, None);
+ }
+
+ pub(crate) fn park_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) {
+ let handle = rt_handle.io();
+ self.turn(handle, Some(duration));
+ }
+
+ pub(crate) fn shutdown(&mut self, rt_handle: &driver::Handle) {
+ let handle = rt_handle.io();
+
+ if handle.shutdown() {
+ self.resources.for_each(|io| {
+ // If a task is waiting on the I/O resource, notify it that the
+ // runtime is being shutdown. And shutdown will clear all wakers.
+ io.shutdown();
+ });
+ }
+ }
+
+ fn turn(&mut self, handle: &Handle, max_wait: Option<Duration>) {
+ // How often to call `compact()` on the resource slab
+ const COMPACT_INTERVAL: u8 = 255;
+
+ self.tick = self.tick.wrapping_add(1);
+
+ if self.tick == COMPACT_INTERVAL {
+ self.resources.compact()
+ }
+
+ let events = &mut self.events;
+
+ // Block waiting for an event to happen, peeling out how many events
+ // happened.
+ match self.poll.poll(events, max_wait) {
+ Ok(_) => {}
+ Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
+ #[cfg(tokio_wasi)]
+ Err(e) if e.kind() == io::ErrorKind::InvalidInput => {
+ // In case of wasm32_wasi this error happens, when trying to poll without subscriptions
+ // just return from the park, as there would be nothing, which wakes us up.
+ }
+ Err(e) => panic!("unexpected error when polling the I/O driver: {:?}", e),
+ }
+
+ // Process all the events that came in, dispatching appropriately
+ let mut ready_count = 0;
+ for event in events.iter() {
+ let token = event.token();
+
+ if token == TOKEN_WAKEUP {
+ // Nothing to do, the event is used to unblock the I/O driver
+ } else if token == TOKEN_SIGNAL {
+ self.signal_ready = true;
+ } else {
+ Self::dispatch(
+ &mut self.resources,
+ self.tick,
+ token,
+ Ready::from_mio(event),
+ );
+ ready_count += 1;
+ }
+ }
+
+ handle.metrics.incr_ready_count_by(ready_count);
+ }
+
+ fn dispatch(resources: &mut Slab<ScheduledIo>, tick: u8, token: mio::Token, ready: Ready) {
+ let addr = slab::Address::from_usize(ADDRESS.unpack(token.0));
+
+ let io = match resources.get(addr) {
+ Some(io) => io,
+ None => return,
+ };
+
+ let res = io.set_readiness(Some(token.0), Tick::Set(tick), |curr| curr | ready);
+
+ if res.is_err() {
+ // token no longer valid!
+ return;
+ }
+
+ io.wake(ready);
+ }
+}
+
+impl fmt::Debug for Driver {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "Driver")
+ }
+}
+
+impl Handle {
+ /// Forces a reactor blocked in a call to `turn` to wakeup, or otherwise
+ /// makes the next call to `turn` return immediately.
+ ///
+ /// This method is intended to be used in situations where a notification
+ /// needs to otherwise be sent to the main reactor. If the reactor is
+ /// currently blocked inside of `turn` then it will wake up and soon return
+ /// after this method has been called. If the reactor is not currently
+ /// blocked in `turn`, then the next call to `turn` will not block and
+ /// return immediately.
+ pub(crate) fn unpark(&self) {
+ #[cfg(not(tokio_wasi))]
+ self.waker.wake().expect("failed to wake I/O driver");
+ }
+
+ /// Registers an I/O resource with the reactor for a given `mio::Ready` state.
+ ///
+ /// The registration token is returned.
+ pub(super) fn add_source(
+ &self,
+ source: &mut impl mio::event::Source,
+ interest: Interest,
+ ) -> io::Result<slab::Ref<ScheduledIo>> {
+ let (address, shared) = self.allocate()?;
+
+ let token = GENERATION.pack(shared.generation(), ADDRESS.pack(address.as_usize(), 0));
+
+ self.registry
+ .register(source, mio::Token(token), interest.to_mio())?;
+
+ self.metrics.incr_fd_count();
+
+ Ok(shared)
+ }
+
+ /// Deregisters an I/O resource from the reactor.
+ pub(super) fn deregister_source(&self, source: &mut impl mio::event::Source) -> io::Result<()> {
+ self.registry.deregister(source)?;
+
+ self.metrics.dec_fd_count();
+
+ Ok(())
+ }
+
+ /// shutdown the dispatcher.
+ fn shutdown(&self) -> bool {
+ let mut io = self.io_dispatch.write().unwrap();
+ if io.is_shutdown {
+ return false;
+ }
+ io.is_shutdown = true;
+ true
+ }
+
+ fn allocate(&self) -> io::Result<(slab::Address, slab::Ref<ScheduledIo>)> {
+ let io = self.io_dispatch.read().unwrap();
+ if io.is_shutdown {
+ return Err(io::Error::new(
+ io::ErrorKind::Other,
+ crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR,
+ ));
+ }
+ io.allocator.allocate().ok_or_else(|| {
+ io::Error::new(
+ io::ErrorKind::Other,
+ "reactor at max registered I/O resources",
+ )
+ })
+ }
+}
+
+impl fmt::Debug for Handle {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "Handle")
+ }
+}
+
+// ===== impl IoDispatcher =====
+
+impl IoDispatcher {
+ fn new(allocator: slab::Allocator<ScheduledIo>) -> Self {
+ Self {
+ allocator,
+ is_shutdown: false,
+ }
+ }
+}
+
+impl Direction {
+ pub(super) fn mask(self) -> Ready {
+ match self {
+ Direction::Read => Ready::READABLE | Ready::READ_CLOSED,
+ Direction::Write => Ready::WRITABLE | Ready::WRITE_CLOSED,
+ }
+ }
+}
+
+// Signal handling
+cfg_signal_internal_and_unix! {
+ impl Handle {
+ pub(crate) fn register_signal_receiver(&self, receiver: &mut mio::net::UnixStream) -> io::Result<()> {
+ self.registry.register(receiver, TOKEN_SIGNAL, mio::Interest::READABLE)?;
+ Ok(())
+ }
+ }
+
+ impl Driver {
+ pub(crate) fn consume_signal_ready(&mut self) -> bool {
+ let ret = self.signal_ready;
+ self.signal_ready = false;
+ ret
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/io/registration.rs b/third_party/rust/tokio/src/runtime/io/registration.rs
new file mode 100644
index 0000000000..341fa0539a
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/io/registration.rs
@@ -0,0 +1,252 @@
+#![cfg_attr(not(feature = "net"), allow(dead_code))]
+
+use crate::io::interest::Interest;
+use crate::runtime::io::{Direction, Handle, ReadyEvent, ScheduledIo};
+use crate::runtime::scheduler;
+use crate::util::slab;
+
+use mio::event::Source;
+use std::io;
+use std::task::{Context, Poll};
+
+cfg_io_driver! {
+ /// Associates an I/O resource with the reactor instance that drives it.
+ ///
+ /// A registration represents an I/O resource registered with a Reactor such
+ /// that it will receive task notifications on readiness. This is the lowest
+ /// level API for integrating with a reactor.
+ ///
+ /// The association between an I/O resource is made by calling
+ /// [`new_with_interest_and_handle`].
+ /// Once the association is established, it remains established until the
+ /// registration instance is dropped.
+ ///
+ /// A registration instance represents two separate readiness streams. One
+ /// for the read readiness and one for write readiness. These streams are
+ /// independent and can be consumed from separate tasks.
+ ///
+ /// **Note**: while `Registration` is `Sync`, the caller must ensure that
+ /// there are at most two tasks that use a registration instance
+ /// concurrently. One task for [`poll_read_ready`] and one task for
+ /// [`poll_write_ready`]. While violating this requirement is "safe" from a
+ /// Rust memory safety point of view, it will result in unexpected behavior
+ /// in the form of lost notifications and tasks hanging.
+ ///
+ /// ## Platform-specific events
+ ///
+ /// `Registration` also allows receiving platform-specific `mio::Ready`
+ /// events. These events are included as part of the read readiness event
+ /// stream. The write readiness event stream is only for `Ready::writable()`
+ /// events.
+ ///
+ /// [`new_with_interest_and_handle`]: method@Self::new_with_interest_and_handle
+ /// [`poll_read_ready`]: method@Self::poll_read_ready`
+ /// [`poll_write_ready`]: method@Self::poll_write_ready`
+ #[derive(Debug)]
+ pub(crate) struct Registration {
+ /// Handle to the associated runtime.
+ handle: scheduler::Handle,
+
+ /// Reference to state stored by the driver.
+ shared: slab::Ref<ScheduledIo>,
+ }
+}
+
+unsafe impl Send for Registration {}
+unsafe impl Sync for Registration {}
+
+// ===== impl Registration =====
+
+impl Registration {
+ /// Registers the I/O resource with the reactor for the provided handle, for
+ /// a specific `Interest`. This does not add `hup` or `error` so if you are
+ /// interested in those states, you will need to add them to the readiness
+ /// state passed to this function.
+ ///
+ /// # Return
+ ///
+ /// - `Ok` if the registration happened successfully
+ /// - `Err` if an error was encountered during registration
+ #[track_caller]
+ pub(crate) fn new_with_interest_and_handle(
+ io: &mut impl Source,
+ interest: Interest,
+ handle: scheduler::Handle,
+ ) -> io::Result<Registration> {
+ let shared = handle.driver().io().add_source(io, interest)?;
+
+ Ok(Registration { handle, shared })
+ }
+
+ /// Deregisters the I/O resource from the reactor it is associated with.
+ ///
+ /// This function must be called before the I/O resource associated with the
+ /// registration is dropped.
+ ///
+ /// Note that deregistering does not guarantee that the I/O resource can be
+ /// registered with a different reactor. Some I/O resource types can only be
+ /// associated with a single reactor instance for their lifetime.
+ ///
+ /// # Return
+ ///
+ /// If the deregistration was successful, `Ok` is returned. Any calls to
+ /// `Reactor::turn` that happen after a successful call to `deregister` will
+ /// no longer result in notifications getting sent for this registration.
+ ///
+ /// `Err` is returned if an error is encountered.
+ pub(crate) fn deregister(&mut self, io: &mut impl Source) -> io::Result<()> {
+ self.handle().deregister_source(io)
+ }
+
+ pub(crate) fn clear_readiness(&self, event: ReadyEvent) {
+ self.shared.clear_readiness(event);
+ }
+
+ // Uses the poll path, requiring the caller to ensure mutual exclusion for
+ // correctness. Only the last task to call this function is notified.
+ pub(crate) fn poll_read_ready(&self, cx: &mut Context<'_>) -> Poll<io::Result<ReadyEvent>> {
+ self.poll_ready(cx, Direction::Read)
+ }
+
+ // Uses the poll path, requiring the caller to ensure mutual exclusion for
+ // correctness. Only the last task to call this function is notified.
+ pub(crate) fn poll_write_ready(&self, cx: &mut Context<'_>) -> Poll<io::Result<ReadyEvent>> {
+ self.poll_ready(cx, Direction::Write)
+ }
+
+ // Uses the poll path, requiring the caller to ensure mutual exclusion for
+ // correctness. Only the last task to call this function is notified.
+ #[cfg(not(tokio_wasi))]
+ pub(crate) fn poll_read_io<R>(
+ &self,
+ cx: &mut Context<'_>,
+ f: impl FnMut() -> io::Result<R>,
+ ) -> Poll<io::Result<R>> {
+ self.poll_io(cx, Direction::Read, f)
+ }
+
+ // Uses the poll path, requiring the caller to ensure mutual exclusion for
+ // correctness. Only the last task to call this function is notified.
+ pub(crate) fn poll_write_io<R>(
+ &self,
+ cx: &mut Context<'_>,
+ f: impl FnMut() -> io::Result<R>,
+ ) -> Poll<io::Result<R>> {
+ self.poll_io(cx, Direction::Write, f)
+ }
+
+ /// Polls for events on the I/O resource's `direction` readiness stream.
+ ///
+ /// If called with a task context, notify the task when a new event is
+ /// received.
+ fn poll_ready(
+ &self,
+ cx: &mut Context<'_>,
+ direction: Direction,
+ ) -> Poll<io::Result<ReadyEvent>> {
+ ready!(crate::trace::trace_leaf(cx));
+ // Keep track of task budget
+ let coop = ready!(crate::runtime::coop::poll_proceed(cx));
+ let ev = ready!(self.shared.poll_readiness(cx, direction));
+
+ if ev.is_shutdown {
+ return Poll::Ready(Err(gone()));
+ }
+
+ coop.made_progress();
+ Poll::Ready(Ok(ev))
+ }
+
+ fn poll_io<R>(
+ &self,
+ cx: &mut Context<'_>,
+ direction: Direction,
+ mut f: impl FnMut() -> io::Result<R>,
+ ) -> Poll<io::Result<R>> {
+ loop {
+ let ev = ready!(self.poll_ready(cx, direction))?;
+
+ match f() {
+ Ok(ret) => {
+ return Poll::Ready(Ok(ret));
+ }
+ Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {
+ self.clear_readiness(ev);
+ }
+ Err(e) => return Poll::Ready(Err(e)),
+ }
+ }
+ }
+
+ pub(crate) fn try_io<R>(
+ &self,
+ interest: Interest,
+ f: impl FnOnce() -> io::Result<R>,
+ ) -> io::Result<R> {
+ let ev = self.shared.ready_event(interest);
+
+ // Don't attempt the operation if the resource is not ready.
+ if ev.ready.is_empty() {
+ return Err(io::ErrorKind::WouldBlock.into());
+ }
+
+ match f() {
+ Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {
+ self.clear_readiness(ev);
+ Err(io::ErrorKind::WouldBlock.into())
+ }
+ res => res,
+ }
+ }
+
+ fn handle(&self) -> &Handle {
+ self.handle.driver().io()
+ }
+}
+
+impl Drop for Registration {
+ fn drop(&mut self) {
+ // It is possible for a cycle to be created between wakers stored in
+ // `ScheduledIo` instances and `Arc<driver::Inner>`. To break this
+ // cycle, wakers are cleared. This is an imperfect solution as it is
+ // possible to store a `Registration` in a waker. In this case, the
+ // cycle would remain.
+ //
+ // See tokio-rs/tokio#3481 for more details.
+ self.shared.clear_wakers();
+ }
+}
+
+fn gone() -> io::Error {
+ io::Error::new(
+ io::ErrorKind::Other,
+ crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR,
+ )
+}
+
+cfg_io_readiness! {
+ impl Registration {
+ pub(crate) async fn readiness(&self, interest: Interest) -> io::Result<ReadyEvent> {
+ let ev = self.shared.readiness(interest).await;
+
+ if ev.is_shutdown {
+ return Err(gone())
+ }
+
+ Ok(ev)
+ }
+
+ pub(crate) async fn async_io<R>(&self, interest: Interest, mut f: impl FnMut() -> io::Result<R>) -> io::Result<R> {
+ loop {
+ let event = self.readiness(interest).await?;
+
+ match f() {
+ Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {
+ self.clear_readiness(event);
+ }
+ x => return x,
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/io/scheduled_io.rs b/third_party/rust/tokio/src/runtime/io/scheduled_io.rs
new file mode 100644
index 0000000000..197a4e0e21
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/io/scheduled_io.rs
@@ -0,0 +1,558 @@
+use super::{ReadyEvent, Tick};
+use crate::io::interest::Interest;
+use crate::io::ready::Ready;
+use crate::loom::sync::atomic::AtomicUsize;
+use crate::loom::sync::Mutex;
+use crate::util::bit;
+use crate::util::slab::Entry;
+use crate::util::WakeList;
+
+use std::sync::atomic::Ordering::{AcqRel, Acquire, Release};
+use std::task::{Context, Poll, Waker};
+
+use super::Direction;
+
+cfg_io_readiness! {
+ use crate::util::linked_list::{self, LinkedList};
+
+ use std::cell::UnsafeCell;
+ use std::future::Future;
+ use std::marker::PhantomPinned;
+ use std::pin::Pin;
+ use std::ptr::NonNull;
+}
+
+/// Stored in the I/O driver resource slab.
+#[derive(Debug)]
+pub(crate) struct ScheduledIo {
+ /// Packs the resource's readiness with the resource's generation.
+ readiness: AtomicUsize,
+
+ waiters: Mutex<Waiters>,
+}
+
+cfg_io_readiness! {
+ type WaitList = LinkedList<Waiter, <Waiter as linked_list::Link>::Target>;
+}
+
+#[derive(Debug, Default)]
+struct Waiters {
+ #[cfg(feature = "net")]
+ /// List of all current waiters.
+ list: WaitList,
+
+ /// Waker used for AsyncRead.
+ reader: Option<Waker>,
+
+ /// Waker used for AsyncWrite.
+ writer: Option<Waker>,
+}
+
+cfg_io_readiness! {
+ #[derive(Debug)]
+ struct Waiter {
+ pointers: linked_list::Pointers<Waiter>,
+
+ /// The waker for this task.
+ waker: Option<Waker>,
+
+ /// The interest this waiter is waiting on.
+ interest: Interest,
+
+ is_ready: bool,
+
+ /// Should never be `!Unpin`.
+ _p: PhantomPinned,
+ }
+
+ generate_addr_of_methods! {
+ impl<> Waiter {
+ unsafe fn addr_of_pointers(self: NonNull<Self>) -> NonNull<linked_list::Pointers<Waiter>> {
+ &self.pointers
+ }
+ }
+ }
+
+ /// Future returned by `readiness()`.
+ struct Readiness<'a> {
+ scheduled_io: &'a ScheduledIo,
+
+ state: State,
+
+ /// Entry in the waiter `LinkedList`.
+ waiter: UnsafeCell<Waiter>,
+ }
+
+ enum State {
+ Init,
+ Waiting,
+ Done,
+ }
+}
+
+// The `ScheduledIo::readiness` (`AtomicUsize`) is packed full of goodness.
+//
+// | shutdown | generation | driver tick | readiness |
+// |----------+------------+--------------+-----------|
+// | 1 bit | 7 bits + 8 bits + 16 bits |
+
+const READINESS: bit::Pack = bit::Pack::least_significant(16);
+
+const TICK: bit::Pack = READINESS.then(8);
+
+const GENERATION: bit::Pack = TICK.then(7);
+
+const SHUTDOWN: bit::Pack = GENERATION.then(1);
+
+#[test]
+fn test_generations_assert_same() {
+ assert_eq!(super::GENERATION, GENERATION);
+}
+
+// ===== impl ScheduledIo =====
+
+impl Entry for ScheduledIo {
+ fn reset(&self) {
+ let state = self.readiness.load(Acquire);
+
+ let generation = GENERATION.unpack(state);
+ let next = GENERATION.pack_lossy(generation + 1, 0);
+
+ self.readiness.store(next, Release);
+ }
+}
+
+impl Default for ScheduledIo {
+ fn default() -> ScheduledIo {
+ ScheduledIo {
+ readiness: AtomicUsize::new(0),
+ waiters: Mutex::new(Default::default()),
+ }
+ }
+}
+
+impl ScheduledIo {
+ pub(crate) fn generation(&self) -> usize {
+ GENERATION.unpack(self.readiness.load(Acquire))
+ }
+
+ /// Invoked when the IO driver is shut down; forces this ScheduledIo into a
+ /// permanently shutdown state.
+ pub(super) fn shutdown(&self) {
+ let mask = SHUTDOWN.pack(1, 0);
+ self.readiness.fetch_or(mask, AcqRel);
+ self.wake(Ready::ALL);
+ }
+
+ /// Sets the readiness on this `ScheduledIo` by invoking the given closure on
+ /// the current value, returning the previous readiness value.
+ ///
+ /// # Arguments
+ /// - `token`: the token for this `ScheduledIo`.
+ /// - `tick`: whether setting the tick or trying to clear readiness for a
+ /// specific tick.
+ /// - `f`: a closure returning a new readiness value given the previous
+ /// readiness.
+ ///
+ /// # Returns
+ ///
+ /// If the given token's generation no longer matches the `ScheduledIo`'s
+ /// generation, then the corresponding IO resource has been removed and
+ /// replaced with a new resource. In that case, this method returns `Err`.
+ /// Otherwise, this returns the previous readiness.
+ pub(super) fn set_readiness(
+ &self,
+ token: Option<usize>,
+ tick: Tick,
+ f: impl Fn(Ready) -> Ready,
+ ) -> Result<(), ()> {
+ let mut current = self.readiness.load(Acquire);
+
+ loop {
+ let current_generation = GENERATION.unpack(current);
+
+ if let Some(token) = token {
+ // Check that the generation for this access is still the
+ // current one.
+ if GENERATION.unpack(token) != current_generation {
+ return Err(());
+ }
+ }
+
+ // Mask out the tick/generation bits so that the modifying
+ // function doesn't see them.
+ let current_readiness = Ready::from_usize(current);
+ let new = f(current_readiness);
+
+ let packed = match tick {
+ Tick::Set(t) => TICK.pack(t as usize, new.as_usize()),
+ Tick::Clear(t) => {
+ if TICK.unpack(current) as u8 != t {
+ // Trying to clear readiness with an old event!
+ return Err(());
+ }
+
+ TICK.pack(t as usize, new.as_usize())
+ }
+ };
+
+ let next = GENERATION.pack(current_generation, packed);
+
+ match self
+ .readiness
+ .compare_exchange(current, next, AcqRel, Acquire)
+ {
+ Ok(_) => return Ok(()),
+ // we lost the race, retry!
+ Err(actual) => current = actual,
+ }
+ }
+ }
+
+ /// Notifies all pending waiters that have registered interest in `ready`.
+ ///
+ /// There may be many waiters to notify. Waking the pending task **must** be
+ /// done from outside of the lock otherwise there is a potential for a
+ /// deadlock.
+ ///
+ /// A stack array of wakers is created and filled with wakers to notify, the
+ /// lock is released, and the wakers are notified. Because there may be more
+ /// than 32 wakers to notify, if the stack array fills up, the lock is
+ /// released, the array is cleared, and the iteration continues.
+ pub(super) fn wake(&self, ready: Ready) {
+ let mut wakers = WakeList::new();
+
+ let mut waiters = self.waiters.lock();
+
+ // check for AsyncRead slot
+ if ready.is_readable() {
+ if let Some(waker) = waiters.reader.take() {
+ wakers.push(waker);
+ }
+ }
+
+ // check for AsyncWrite slot
+ if ready.is_writable() {
+ if let Some(waker) = waiters.writer.take() {
+ wakers.push(waker);
+ }
+ }
+
+ #[cfg(feature = "net")]
+ 'outer: loop {
+ let mut iter = waiters.list.drain_filter(|w| ready.satisfies(w.interest));
+
+ while wakers.can_push() {
+ match iter.next() {
+ Some(waiter) => {
+ let waiter = unsafe { &mut *waiter.as_ptr() };
+
+ if let Some(waker) = waiter.waker.take() {
+ waiter.is_ready = true;
+ wakers.push(waker);
+ }
+ }
+ None => {
+ break 'outer;
+ }
+ }
+ }
+
+ drop(waiters);
+
+ wakers.wake_all();
+
+ // Acquire the lock again.
+ waiters = self.waiters.lock();
+ }
+
+ // Release the lock before notifying
+ drop(waiters);
+
+ wakers.wake_all();
+ }
+
+ pub(super) fn ready_event(&self, interest: Interest) -> ReadyEvent {
+ let curr = self.readiness.load(Acquire);
+
+ ReadyEvent {
+ tick: TICK.unpack(curr) as u8,
+ ready: interest.mask() & Ready::from_usize(READINESS.unpack(curr)),
+ is_shutdown: SHUTDOWN.unpack(curr) != 0,
+ }
+ }
+
+ /// Polls for readiness events in a given direction.
+ ///
+ /// These are to support `AsyncRead` and `AsyncWrite` polling methods,
+ /// which cannot use the `async fn` version. This uses reserved reader
+ /// and writer slots.
+ pub(super) fn poll_readiness(
+ &self,
+ cx: &mut Context<'_>,
+ direction: Direction,
+ ) -> Poll<ReadyEvent> {
+ let curr = self.readiness.load(Acquire);
+
+ let ready = direction.mask() & Ready::from_usize(READINESS.unpack(curr));
+ let is_shutdown = SHUTDOWN.unpack(curr) != 0;
+
+ if ready.is_empty() && !is_shutdown {
+ // Update the task info
+ let mut waiters = self.waiters.lock();
+ let slot = match direction {
+ Direction::Read => &mut waiters.reader,
+ Direction::Write => &mut waiters.writer,
+ };
+
+ // Avoid cloning the waker if one is already stored that matches the
+ // current task.
+ match slot {
+ Some(existing) => {
+ if !existing.will_wake(cx.waker()) {
+ *existing = cx.waker().clone();
+ }
+ }
+ None => {
+ *slot = Some(cx.waker().clone());
+ }
+ }
+
+ // Try again, in case the readiness was changed while we were
+ // taking the waiters lock
+ let curr = self.readiness.load(Acquire);
+ let ready = direction.mask() & Ready::from_usize(READINESS.unpack(curr));
+ let is_shutdown = SHUTDOWN.unpack(curr) != 0;
+ if is_shutdown {
+ Poll::Ready(ReadyEvent {
+ tick: TICK.unpack(curr) as u8,
+ ready: direction.mask(),
+ is_shutdown,
+ })
+ } else if ready.is_empty() {
+ Poll::Pending
+ } else {
+ Poll::Ready(ReadyEvent {
+ tick: TICK.unpack(curr) as u8,
+ ready,
+ is_shutdown,
+ })
+ }
+ } else {
+ Poll::Ready(ReadyEvent {
+ tick: TICK.unpack(curr) as u8,
+ ready,
+ is_shutdown,
+ })
+ }
+ }
+
+ pub(crate) fn clear_readiness(&self, event: ReadyEvent) {
+ // This consumes the current readiness state **except** for closed
+ // states. Closed states are excluded because they are final states.
+ let mask_no_closed = event.ready - Ready::READ_CLOSED - Ready::WRITE_CLOSED;
+
+ // result isn't important
+ let _ = self.set_readiness(None, Tick::Clear(event.tick), |curr| curr - mask_no_closed);
+ }
+
+ pub(crate) fn clear_wakers(&self) {
+ let mut waiters = self.waiters.lock();
+ waiters.reader.take();
+ waiters.writer.take();
+ }
+}
+
+impl Drop for ScheduledIo {
+ fn drop(&mut self) {
+ self.wake(Ready::ALL);
+ }
+}
+
+unsafe impl Send for ScheduledIo {}
+unsafe impl Sync for ScheduledIo {}
+
+cfg_io_readiness! {
+ impl ScheduledIo {
+ /// An async version of `poll_readiness` which uses a linked list of wakers.
+ pub(crate) async fn readiness(&self, interest: Interest) -> ReadyEvent {
+ self.readiness_fut(interest).await
+ }
+
+ // This is in a separate function so that the borrow checker doesn't think
+ // we are borrowing the `UnsafeCell` possibly over await boundaries.
+ //
+ // Go figure.
+ fn readiness_fut(&self, interest: Interest) -> Readiness<'_> {
+ Readiness {
+ scheduled_io: self,
+ state: State::Init,
+ waiter: UnsafeCell::new(Waiter {
+ pointers: linked_list::Pointers::new(),
+ waker: None,
+ is_ready: false,
+ interest,
+ _p: PhantomPinned,
+ }),
+ }
+ }
+ }
+
+ unsafe impl linked_list::Link for Waiter {
+ type Handle = NonNull<Waiter>;
+ type Target = Waiter;
+
+ fn as_raw(handle: &NonNull<Waiter>) -> NonNull<Waiter> {
+ *handle
+ }
+
+ unsafe fn from_raw(ptr: NonNull<Waiter>) -> NonNull<Waiter> {
+ ptr
+ }
+
+ unsafe fn pointers(target: NonNull<Waiter>) -> NonNull<linked_list::Pointers<Waiter>> {
+ Waiter::addr_of_pointers(target)
+ }
+ }
+
+ // ===== impl Readiness =====
+
+ impl Future for Readiness<'_> {
+ type Output = ReadyEvent;
+
+ fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+ use std::sync::atomic::Ordering::SeqCst;
+
+ let (scheduled_io, state, waiter) = unsafe {
+ let me = self.get_unchecked_mut();
+ (&me.scheduled_io, &mut me.state, &me.waiter)
+ };
+
+ loop {
+ match *state {
+ State::Init => {
+ // Optimistically check existing readiness
+ let curr = scheduled_io.readiness.load(SeqCst);
+ let ready = Ready::from_usize(READINESS.unpack(curr));
+ let is_shutdown = SHUTDOWN.unpack(curr) != 0;
+
+ // Safety: `waiter.interest` never changes
+ let interest = unsafe { (*waiter.get()).interest };
+ let ready = ready.intersection(interest);
+
+ if !ready.is_empty() || is_shutdown {
+ // Currently ready!
+ let tick = TICK.unpack(curr) as u8;
+ *state = State::Done;
+ return Poll::Ready(ReadyEvent { tick, ready, is_shutdown });
+ }
+
+ // Wasn't ready, take the lock (and check again while locked).
+ let mut waiters = scheduled_io.waiters.lock();
+
+ let curr = scheduled_io.readiness.load(SeqCst);
+ let mut ready = Ready::from_usize(READINESS.unpack(curr));
+ let is_shutdown = SHUTDOWN.unpack(curr) != 0;
+
+ if is_shutdown {
+ ready = Ready::ALL;
+ }
+
+ let ready = ready.intersection(interest);
+
+ if !ready.is_empty() || is_shutdown {
+ // Currently ready!
+ let tick = TICK.unpack(curr) as u8;
+ *state = State::Done;
+ return Poll::Ready(ReadyEvent { tick, ready, is_shutdown });
+ }
+
+ // Not ready even after locked, insert into list...
+
+ // Safety: called while locked
+ unsafe {
+ (*waiter.get()).waker = Some(cx.waker().clone());
+ }
+
+ // Insert the waiter into the linked list
+ //
+ // safety: pointers from `UnsafeCell` are never null.
+ waiters
+ .list
+ .push_front(unsafe { NonNull::new_unchecked(waiter.get()) });
+ *state = State::Waiting;
+ }
+ State::Waiting => {
+ // Currently in the "Waiting" state, implying the caller has
+ // a waiter stored in the waiter list (guarded by
+ // `notify.waiters`). In order to access the waker fields,
+ // we must hold the lock.
+
+ let waiters = scheduled_io.waiters.lock();
+
+ // Safety: called while locked
+ let w = unsafe { &mut *waiter.get() };
+
+ if w.is_ready {
+ // Our waker has been notified.
+ *state = State::Done;
+ } else {
+ // Update the waker, if necessary.
+ if !w.waker.as_ref().unwrap().will_wake(cx.waker()) {
+ w.waker = Some(cx.waker().clone());
+ }
+
+ return Poll::Pending;
+ }
+
+ // Explicit drop of the lock to indicate the scope that the
+ // lock is held. Because holding the lock is required to
+ // ensure safe access to fields not held within the lock, it
+ // is helpful to visualize the scope of the critical
+ // section.
+ drop(waiters);
+ }
+ State::Done => {
+ // Safety: State::Done means it is no longer shared
+ let w = unsafe { &mut *waiter.get() };
+
+ let curr = scheduled_io.readiness.load(Acquire);
+ let is_shutdown = SHUTDOWN.unpack(curr) != 0;
+
+ // The returned tick might be newer than the event
+ // which notified our waker. This is ok because the future
+ // still didn't return `Poll::Ready`.
+ let tick = TICK.unpack(curr) as u8;
+
+ // The readiness state could have been cleared in the meantime,
+ // but we allow the returned ready set to be empty.
+ let curr_ready = Ready::from_usize(READINESS.unpack(curr));
+ let ready = curr_ready.intersection(w.interest);
+
+ return Poll::Ready(ReadyEvent {
+ tick,
+ ready,
+ is_shutdown,
+ });
+ }
+ }
+ }
+ }
+ }
+
+ impl Drop for Readiness<'_> {
+ fn drop(&mut self) {
+ let mut waiters = self.scheduled_io.waiters.lock();
+
+ // Safety: `waiter` is only ever stored in `waiters`
+ unsafe {
+ waiters
+ .list
+ .remove(NonNull::new_unchecked(self.waiter.get()))
+ };
+ }
+ }
+
+ unsafe impl Send for Readiness<'_> {}
+ unsafe impl Sync for Readiness<'_> {}
+}
diff --git a/third_party/rust/tokio/src/runtime/metrics/batch.rs b/third_party/rust/tokio/src/runtime/metrics/batch.rs
new file mode 100644
index 0000000000..1bb4e261f7
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/metrics/batch.rs
@@ -0,0 +1,162 @@
+use crate::runtime::metrics::{HistogramBatch, WorkerMetrics};
+
+use std::sync::atomic::Ordering::Relaxed;
+use std::time::{Duration, Instant};
+
+pub(crate) struct MetricsBatch {
+ /// Number of times the worker parked.
+ park_count: u64,
+
+ /// Number of times the worker woke w/o doing work.
+ noop_count: u64,
+
+ /// Number of tasks stolen.
+ steal_count: u64,
+
+ /// Number of times tasks where stolen.
+ steal_operations: u64,
+
+ /// Number of tasks that were polled by the worker.
+ poll_count: u64,
+
+ /// Number of tasks polled when the worker entered park. This is used to
+ /// track the noop count.
+ poll_count_on_last_park: u64,
+
+ /// Number of tasks that were scheduled locally on this worker.
+ local_schedule_count: u64,
+
+ /// Number of tasks moved to the global queue to make space in the local
+ /// queue
+ overflow_count: u64,
+
+ /// The total busy duration in nanoseconds.
+ busy_duration_total: u64,
+
+ /// Instant at which work last resumed (continued after park).
+ processing_scheduled_tasks_started_at: Instant,
+
+ /// If `Some`, tracks poll times in nanoseconds
+ poll_timer: Option<PollTimer>,
+}
+
+struct PollTimer {
+ /// Histogram of poll counts within each band.
+ poll_counts: HistogramBatch,
+
+ /// Instant when the most recent task started polling.
+ poll_started_at: Instant,
+}
+
+impl MetricsBatch {
+ pub(crate) fn new(worker_metrics: &WorkerMetrics) -> MetricsBatch {
+ let now = Instant::now();
+
+ MetricsBatch {
+ park_count: 0,
+ noop_count: 0,
+ steal_count: 0,
+ steal_operations: 0,
+ poll_count: 0,
+ poll_count_on_last_park: 0,
+ local_schedule_count: 0,
+ overflow_count: 0,
+ busy_duration_total: 0,
+ processing_scheduled_tasks_started_at: now,
+ poll_timer: worker_metrics
+ .poll_count_histogram
+ .as_ref()
+ .map(|worker_poll_counts| PollTimer {
+ poll_counts: HistogramBatch::from_histogram(worker_poll_counts),
+ poll_started_at: now,
+ }),
+ }
+ }
+
+ pub(crate) fn submit(&mut self, worker: &WorkerMetrics) {
+ worker.park_count.store(self.park_count, Relaxed);
+ worker.noop_count.store(self.noop_count, Relaxed);
+ worker.steal_count.store(self.steal_count, Relaxed);
+ worker
+ .steal_operations
+ .store(self.steal_operations, Relaxed);
+ worker.poll_count.store(self.poll_count, Relaxed);
+
+ worker
+ .busy_duration_total
+ .store(self.busy_duration_total, Relaxed);
+
+ worker
+ .local_schedule_count
+ .store(self.local_schedule_count, Relaxed);
+ worker.overflow_count.store(self.overflow_count, Relaxed);
+
+ if let Some(poll_timer) = &self.poll_timer {
+ let dst = worker.poll_count_histogram.as_ref().unwrap();
+ poll_timer.poll_counts.submit(dst);
+ }
+ }
+
+ /// The worker is about to park.
+ pub(crate) fn about_to_park(&mut self) {
+ self.park_count += 1;
+
+ if self.poll_count_on_last_park == self.poll_count {
+ self.noop_count += 1;
+ } else {
+ self.poll_count_on_last_park = self.poll_count;
+ }
+ }
+
+ /// Start processing a batch of tasks
+ pub(crate) fn start_processing_scheduled_tasks(&mut self) {
+ self.processing_scheduled_tasks_started_at = Instant::now();
+ }
+
+ /// Stop processing a batch of tasks
+ pub(crate) fn end_processing_scheduled_tasks(&mut self) {
+ let busy_duration = self.processing_scheduled_tasks_started_at.elapsed();
+ self.busy_duration_total += duration_as_u64(busy_duration);
+ }
+
+ /// Start polling an individual task
+ pub(crate) fn start_poll(&mut self) {
+ self.poll_count += 1;
+
+ if let Some(poll_timer) = &mut self.poll_timer {
+ poll_timer.poll_started_at = Instant::now();
+ }
+ }
+
+ /// Stop polling an individual task
+ pub(crate) fn end_poll(&mut self) {
+ if let Some(poll_timer) = &mut self.poll_timer {
+ let elapsed = duration_as_u64(poll_timer.poll_started_at.elapsed());
+ poll_timer.poll_counts.measure(elapsed, 1);
+ }
+ }
+
+ pub(crate) fn inc_local_schedule_count(&mut self) {
+ self.local_schedule_count += 1;
+ }
+}
+
+cfg_rt_multi_thread! {
+ impl MetricsBatch {
+ pub(crate) fn incr_steal_count(&mut self, by: u16) {
+ self.steal_count += by as u64;
+ }
+
+ pub(crate) fn incr_steal_operations(&mut self) {
+ self.steal_operations += 1;
+ }
+
+ pub(crate) fn incr_overflow_count(&mut self) {
+ self.overflow_count += 1;
+ }
+ }
+}
+
+fn duration_as_u64(dur: Duration) -> u64 {
+ u64::try_from(dur.as_nanos()).unwrap_or(u64::MAX)
+}
diff --git a/third_party/rust/tokio/src/runtime/metrics/histogram.rs b/third_party/rust/tokio/src/runtime/metrics/histogram.rs
new file mode 100644
index 0000000000..976f54fe85
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/metrics/histogram.rs
@@ -0,0 +1,502 @@
+use crate::loom::sync::atomic::{AtomicU64, Ordering::Relaxed};
+
+use std::cmp;
+use std::ops::Range;
+
+#[derive(Debug)]
+pub(crate) struct Histogram {
+ /// The histogram buckets
+ buckets: Box<[AtomicU64]>,
+
+ /// Bucket scale, linear or log
+ scale: HistogramScale,
+
+ /// Minimum resolution
+ resolution: u64,
+}
+
+#[derive(Debug, Clone)]
+pub(crate) struct HistogramBuilder {
+ /// Histogram scale
+ pub(crate) scale: HistogramScale,
+
+ /// Must be a power of 2
+ pub(crate) resolution: u64,
+
+ /// Number of buckets
+ pub(crate) num_buckets: usize,
+}
+
+#[derive(Debug)]
+pub(crate) struct HistogramBatch {
+ buckets: Box<[u64]>,
+ scale: HistogramScale,
+ resolution: u64,
+}
+
+cfg_unstable! {
+ /// Whether the histogram used to aggregate a metric uses a linear or
+ /// logarithmic scale.
+ #[derive(Debug, Copy, Clone, Eq, PartialEq)]
+ #[non_exhaustive]
+ pub enum HistogramScale {
+ /// Linear bucket scale
+ Linear,
+
+ /// Logarithmic bucket scale
+ Log,
+ }
+}
+
+impl Histogram {
+ pub(crate) fn num_buckets(&self) -> usize {
+ self.buckets.len()
+ }
+
+ pub(crate) fn get(&self, bucket: usize) -> u64 {
+ self.buckets[bucket].load(Relaxed)
+ }
+
+ pub(crate) fn bucket_range(&self, bucket: usize) -> Range<u64> {
+ match self.scale {
+ HistogramScale::Log => Range {
+ start: if bucket == 0 {
+ 0
+ } else {
+ self.resolution << (bucket - 1)
+ },
+ end: if bucket == self.buckets.len() - 1 {
+ u64::MAX
+ } else {
+ self.resolution << bucket
+ },
+ },
+ HistogramScale::Linear => Range {
+ start: self.resolution * bucket as u64,
+ end: if bucket == self.buckets.len() - 1 {
+ u64::MAX
+ } else {
+ self.resolution * (bucket as u64 + 1)
+ },
+ },
+ }
+ }
+}
+
+impl HistogramBatch {
+ pub(crate) fn from_histogram(histogram: &Histogram) -> HistogramBatch {
+ let buckets = vec![0; histogram.buckets.len()].into_boxed_slice();
+
+ HistogramBatch {
+ buckets,
+ scale: histogram.scale,
+ resolution: histogram.resolution,
+ }
+ }
+
+ pub(crate) fn measure(&mut self, value: u64, count: u64) {
+ self.buckets[self.value_to_bucket(value)] += count;
+ }
+
+ pub(crate) fn submit(&self, histogram: &Histogram) {
+ debug_assert_eq!(self.scale, histogram.scale);
+ debug_assert_eq!(self.resolution, histogram.resolution);
+ debug_assert_eq!(self.buckets.len(), histogram.buckets.len());
+
+ for i in 0..self.buckets.len() {
+ histogram.buckets[i].store(self.buckets[i], Relaxed);
+ }
+ }
+
+ fn value_to_bucket(&self, value: u64) -> usize {
+ match self.scale {
+ HistogramScale::Linear => {
+ let max = self.buckets.len() - 1;
+ cmp::min(value / self.resolution, max as u64) as usize
+ }
+ HistogramScale::Log => {
+ let max = self.buckets.len() - 1;
+
+ if value < self.resolution {
+ 0
+ } else {
+ let significant_digits = 64 - value.leading_zeros();
+ let bucket_digits = 64 - (self.resolution - 1).leading_zeros();
+ cmp::min(significant_digits as usize - bucket_digits as usize, max)
+ }
+ }
+ }
+ }
+}
+
+impl HistogramBuilder {
+ pub(crate) fn new() -> HistogramBuilder {
+ HistogramBuilder {
+ scale: HistogramScale::Linear,
+ // Resolution is in nanoseconds.
+ resolution: 100_000,
+ num_buckets: 10,
+ }
+ }
+
+ pub(crate) fn build(&self) -> Histogram {
+ let mut resolution = self.resolution;
+
+ assert!(resolution > 0);
+
+ if matches!(self.scale, HistogramScale::Log) {
+ resolution = resolution.next_power_of_two();
+ }
+
+ Histogram {
+ buckets: (0..self.num_buckets)
+ .map(|_| AtomicU64::new(0))
+ .collect::<Vec<_>>()
+ .into_boxed_slice(),
+ resolution,
+ scale: self.scale,
+ }
+ }
+}
+
+impl Default for HistogramBuilder {
+ fn default() -> HistogramBuilder {
+ HistogramBuilder::new()
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ macro_rules! assert_bucket_eq {
+ ($h:expr, $bucket:expr, $val:expr) => {{
+ assert_eq!($h.buckets[$bucket], $val);
+ }};
+ }
+
+ #[test]
+ fn log_scale_resolution_1() {
+ let h = HistogramBuilder {
+ scale: HistogramScale::Log,
+ resolution: 1,
+ num_buckets: 10,
+ }
+ .build();
+
+ assert_eq!(h.bucket_range(0), 0..1);
+ assert_eq!(h.bucket_range(1), 1..2);
+ assert_eq!(h.bucket_range(2), 2..4);
+ assert_eq!(h.bucket_range(3), 4..8);
+ assert_eq!(h.bucket_range(9), 256..u64::MAX);
+
+ let mut b = HistogramBatch::from_histogram(&h);
+
+ b.measure(0, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 0);
+
+ b.measure(1, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 1);
+ assert_bucket_eq!(b, 2, 0);
+
+ b.measure(2, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 1);
+ assert_bucket_eq!(b, 2, 1);
+
+ b.measure(3, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 1);
+ assert_bucket_eq!(b, 2, 2);
+
+ b.measure(4, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 1);
+ assert_bucket_eq!(b, 2, 2);
+ assert_bucket_eq!(b, 3, 1);
+
+ b.measure(100, 1);
+ assert_bucket_eq!(b, 7, 1);
+
+ b.measure(128, 1);
+ assert_bucket_eq!(b, 8, 1);
+
+ b.measure(4096, 1);
+ assert_bucket_eq!(b, 9, 1);
+ }
+
+ #[test]
+ fn log_scale_resolution_2() {
+ let h = HistogramBuilder {
+ scale: HistogramScale::Log,
+ resolution: 2,
+ num_buckets: 10,
+ }
+ .build();
+
+ assert_eq!(h.bucket_range(0), 0..2);
+ assert_eq!(h.bucket_range(1), 2..4);
+ assert_eq!(h.bucket_range(2), 4..8);
+ assert_eq!(h.bucket_range(3), 8..16);
+ assert_eq!(h.bucket_range(9), 512..u64::MAX);
+
+ let mut b = HistogramBatch::from_histogram(&h);
+
+ b.measure(0, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 0);
+
+ b.measure(1, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 0);
+
+ b.measure(2, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 1);
+ assert_bucket_eq!(b, 2, 0);
+
+ b.measure(3, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 0);
+
+ b.measure(4, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 1);
+
+ b.measure(5, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 2);
+
+ b.measure(6, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 3);
+
+ b.measure(7, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 4);
+
+ b.measure(8, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 4);
+ assert_bucket_eq!(b, 3, 1);
+
+ b.measure(100, 1);
+ assert_bucket_eq!(b, 6, 1);
+
+ b.measure(128, 1);
+ assert_bucket_eq!(b, 7, 1);
+
+ b.measure(4096, 1);
+ assert_bucket_eq!(b, 9, 1);
+
+ for bucket in h.buckets.iter() {
+ assert_eq!(bucket.load(Relaxed), 0);
+ }
+
+ b.submit(&h);
+
+ for i in 0..h.buckets.len() {
+ assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]);
+ }
+
+ b.submit(&h);
+
+ for i in 0..h.buckets.len() {
+ assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]);
+ }
+ }
+
+ #[test]
+ fn linear_scale_resolution_1() {
+ let h = HistogramBuilder {
+ scale: HistogramScale::Linear,
+ resolution: 1,
+ num_buckets: 10,
+ }
+ .build();
+
+ assert_eq!(h.bucket_range(0), 0..1);
+ assert_eq!(h.bucket_range(1), 1..2);
+ assert_eq!(h.bucket_range(2), 2..3);
+ assert_eq!(h.bucket_range(3), 3..4);
+ assert_eq!(h.bucket_range(9), 9..u64::MAX);
+
+ let mut b = HistogramBatch::from_histogram(&h);
+
+ b.measure(0, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 0);
+
+ b.measure(1, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 1);
+ assert_bucket_eq!(b, 2, 0);
+
+ b.measure(2, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 1);
+ assert_bucket_eq!(b, 2, 1);
+ assert_bucket_eq!(b, 3, 0);
+
+ b.measure(3, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 1);
+ assert_bucket_eq!(b, 2, 1);
+ assert_bucket_eq!(b, 3, 1);
+
+ b.measure(5, 1);
+ assert_bucket_eq!(b, 5, 1);
+
+ b.measure(4096, 1);
+ assert_bucket_eq!(b, 9, 1);
+
+ for bucket in h.buckets.iter() {
+ assert_eq!(bucket.load(Relaxed), 0);
+ }
+
+ b.submit(&h);
+
+ for i in 0..h.buckets.len() {
+ assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]);
+ }
+
+ b.submit(&h);
+
+ for i in 0..h.buckets.len() {
+ assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]);
+ }
+ }
+
+ #[test]
+ fn linear_scale_resolution_100() {
+ let h = HistogramBuilder {
+ scale: HistogramScale::Linear,
+ resolution: 100,
+ num_buckets: 10,
+ }
+ .build();
+
+ assert_eq!(h.bucket_range(0), 0..100);
+ assert_eq!(h.bucket_range(1), 100..200);
+ assert_eq!(h.bucket_range(2), 200..300);
+ assert_eq!(h.bucket_range(3), 300..400);
+ assert_eq!(h.bucket_range(9), 900..u64::MAX);
+
+ let mut b = HistogramBatch::from_histogram(&h);
+
+ b.measure(0, 1);
+ assert_bucket_eq!(b, 0, 1);
+ assert_bucket_eq!(b, 1, 0);
+
+ b.measure(50, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 0);
+
+ b.measure(100, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 1);
+ assert_bucket_eq!(b, 2, 0);
+
+ b.measure(101, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 0);
+
+ b.measure(200, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 1);
+
+ b.measure(299, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 2);
+
+ b.measure(222, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 3);
+
+ b.measure(300, 1);
+ assert_bucket_eq!(b, 0, 2);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 3);
+ assert_bucket_eq!(b, 3, 1);
+
+ b.measure(888, 1);
+ assert_bucket_eq!(b, 8, 1);
+
+ b.measure(4096, 1);
+ assert_bucket_eq!(b, 9, 1);
+
+ for bucket in h.buckets.iter() {
+ assert_eq!(bucket.load(Relaxed), 0);
+ }
+
+ b.submit(&h);
+
+ for i in 0..h.buckets.len() {
+ assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]);
+ }
+
+ b.submit(&h);
+
+ for i in 0..h.buckets.len() {
+ assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]);
+ }
+ }
+
+ #[test]
+ fn inc_by_more_than_one() {
+ let h = HistogramBuilder {
+ scale: HistogramScale::Linear,
+ resolution: 100,
+ num_buckets: 10,
+ }
+ .build();
+
+ let mut b = HistogramBatch::from_histogram(&h);
+
+ b.measure(0, 3);
+ assert_bucket_eq!(b, 0, 3);
+ assert_bucket_eq!(b, 1, 0);
+
+ b.measure(50, 5);
+ assert_bucket_eq!(b, 0, 8);
+ assert_bucket_eq!(b, 1, 0);
+
+ b.measure(100, 2);
+ assert_bucket_eq!(b, 0, 8);
+ assert_bucket_eq!(b, 1, 2);
+ assert_bucket_eq!(b, 2, 0);
+
+ b.measure(101, 19);
+ assert_bucket_eq!(b, 0, 8);
+ assert_bucket_eq!(b, 1, 21);
+ assert_bucket_eq!(b, 2, 0);
+
+ for bucket in h.buckets.iter() {
+ assert_eq!(bucket.load(Relaxed), 0);
+ }
+
+ b.submit(&h);
+
+ for i in 0..h.buckets.len() {
+ assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]);
+ }
+
+ b.submit(&h);
+
+ for i in 0..h.buckets.len() {
+ assert_eq!(h.buckets[i].load(Relaxed), b.buckets[i]);
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/metrics/io.rs b/third_party/rust/tokio/src/runtime/metrics/io.rs
new file mode 100644
index 0000000000..06efdd42d7
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/metrics/io.rs
@@ -0,0 +1,24 @@
+#![cfg_attr(not(feature = "net"), allow(dead_code))]
+
+use crate::loom::sync::atomic::{AtomicU64, Ordering::Relaxed};
+
+#[derive(Default)]
+pub(crate) struct IoDriverMetrics {
+ pub(super) fd_registered_count: AtomicU64,
+ pub(super) fd_deregistered_count: AtomicU64,
+ pub(super) ready_count: AtomicU64,
+}
+
+impl IoDriverMetrics {
+ pub(crate) fn incr_fd_count(&self) {
+ self.fd_registered_count.fetch_add(1, Relaxed);
+ }
+
+ pub(crate) fn dec_fd_count(&self) {
+ self.fd_deregistered_count.fetch_add(1, Relaxed);
+ }
+
+ pub(crate) fn incr_ready_count_by(&self, amt: u64) {
+ self.ready_count.fetch_add(amt, Relaxed);
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/metrics/mock.rs b/third_party/rust/tokio/src/runtime/metrics/mock.rs
new file mode 100644
index 0000000000..8f8345c08b
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/metrics/mock.rs
@@ -0,0 +1,55 @@
+//! This file contains mocks of the types in src/runtime/metrics
+
+pub(crate) struct SchedulerMetrics {}
+
+pub(crate) struct WorkerMetrics {}
+
+pub(crate) struct MetricsBatch {}
+
+#[derive(Clone, Default)]
+pub(crate) struct HistogramBuilder {}
+
+impl SchedulerMetrics {
+ pub(crate) fn new() -> Self {
+ Self {}
+ }
+
+ /// Increment the number of tasks scheduled externally
+ pub(crate) fn inc_remote_schedule_count(&self) {}
+}
+
+impl WorkerMetrics {
+ pub(crate) fn new() -> Self {
+ Self {}
+ }
+
+ pub(crate) fn from_config(config: &crate::runtime::Config) -> Self {
+ // Prevent the dead-code warning from being triggered
+ let _ = &config.metrics_poll_count_histogram;
+ Self::new()
+ }
+
+ pub(crate) fn set_queue_depth(&self, _len: usize) {}
+}
+
+impl MetricsBatch {
+ pub(crate) fn new(_: &WorkerMetrics) -> Self {
+ Self {}
+ }
+
+ pub(crate) fn submit(&mut self, _to: &WorkerMetrics) {}
+ pub(crate) fn about_to_park(&mut self) {}
+ pub(crate) fn inc_local_schedule_count(&mut self) {}
+ pub(crate) fn start_processing_scheduled_tasks(&mut self) {}
+ pub(crate) fn end_processing_scheduled_tasks(&mut self) {}
+ pub(crate) fn start_poll(&mut self) {}
+ pub(crate) fn end_poll(&mut self) {}
+}
+
+cfg_rt_multi_thread! {
+ impl MetricsBatch {
+ pub(crate) fn incr_steal_count(&mut self, _by: u16) {}
+ pub(crate) fn incr_steal_operations(&mut self) {}
+ pub(crate) fn incr_overflow_count(&mut self) {}
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/metrics/mod.rs b/third_party/rust/tokio/src/runtime/metrics/mod.rs
new file mode 100644
index 0000000000..88be4a5211
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/metrics/mod.rs
@@ -0,0 +1,40 @@
+//! This module contains information need to view information about how the
+//! runtime is performing.
+//!
+//! **Note**: This is an [unstable API][unstable]. The public API of types in
+//! this module may break in 1.x releases. See [the documentation on unstable
+//! features][unstable] for details.
+//!
+//! [unstable]: crate#unstable-features
+#![allow(clippy::module_inception)]
+
+cfg_metrics! {
+ mod batch;
+ pub(crate) use batch::MetricsBatch;
+
+ mod histogram;
+ pub(crate) use histogram::{Histogram, HistogramBatch, HistogramBuilder};
+ #[allow(unreachable_pub)] // rust-lang/rust#57411
+ pub use histogram::HistogramScale;
+
+ mod runtime;
+ #[allow(unreachable_pub)] // rust-lang/rust#57411
+ pub use runtime::RuntimeMetrics;
+
+ mod scheduler;
+ pub(crate) use scheduler::SchedulerMetrics;
+
+ mod worker;
+ pub(crate) use worker::WorkerMetrics;
+
+ cfg_net! {
+ mod io;
+ pub(crate) use io::IoDriverMetrics;
+ }
+}
+
+cfg_not_metrics! {
+ mod mock;
+
+ pub(crate) use mock::{SchedulerMetrics, WorkerMetrics, MetricsBatch, HistogramBuilder};
+}
diff --git a/third_party/rust/tokio/src/runtime/metrics/runtime.rs b/third_party/rust/tokio/src/runtime/metrics/runtime.rs
new file mode 100644
index 0000000000..1f990a1f85
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/metrics/runtime.rs
@@ -0,0 +1,883 @@
+use crate::runtime::Handle;
+
+use std::ops::Range;
+use std::sync::atomic::Ordering::Relaxed;
+use std::time::Duration;
+
+/// Handle to the runtime's metrics.
+///
+/// This handle is internally reference-counted and can be freely cloned. A
+/// `RuntimeMetrics` handle is obtained using the [`Runtime::metrics`] method.
+///
+/// [`Runtime::metrics`]: crate::runtime::Runtime::metrics()
+#[derive(Clone, Debug)]
+pub struct RuntimeMetrics {
+ handle: Handle,
+}
+
+impl RuntimeMetrics {
+ pub(crate) fn new(handle: Handle) -> RuntimeMetrics {
+ RuntimeMetrics { handle }
+ }
+
+ /// Returns the number of worker threads used by the runtime.
+ ///
+ /// The number of workers is set by configuring `worker_threads` on
+ /// `runtime::Builder`. When using the `current_thread` runtime, the return
+ /// value is always `1`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.num_workers();
+ /// println!("Runtime is using {} workers", n);
+ /// }
+ /// ```
+ pub fn num_workers(&self) -> usize {
+ self.handle.inner.num_workers()
+ }
+
+ /// Returns the number of additional threads spawned by the runtime.
+ ///
+ /// The number of workers is set by configuring `max_blocking_threads` on
+ /// `runtime::Builder`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let _ = tokio::task::spawn_blocking(move || {
+ /// // Stand-in for compute-heavy work or using synchronous APIs
+ /// 1 + 1
+ /// }).await;
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.num_blocking_threads();
+ /// println!("Runtime has created {} threads", n);
+ /// }
+ /// ```
+ pub fn num_blocking_threads(&self) -> usize {
+ self.handle.inner.num_blocking_threads()
+ }
+
+ /// Returns the number of active tasks in the runtime.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.active_tasks_count();
+ /// println!("Runtime has {} active tasks", n);
+ /// }
+ /// ```
+ pub fn active_tasks_count(&self) -> usize {
+ self.handle.inner.active_tasks_count()
+ }
+
+ /// Returns the number of idle threads, which have spawned by the runtime
+ /// for `spawn_blocking` calls.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let _ = tokio::task::spawn_blocking(move || {
+ /// // Stand-in for compute-heavy work or using synchronous APIs
+ /// 1 + 1
+ /// }).await;
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.num_idle_blocking_threads();
+ /// println!("Runtime has {} idle blocking thread pool threads", n);
+ /// }
+ /// ```
+ pub fn num_idle_blocking_threads(&self) -> usize {
+ self.handle.inner.num_idle_blocking_threads()
+ }
+
+ /// Returns the number of tasks scheduled from **outside** of the runtime.
+ ///
+ /// The remote schedule count starts at zero when the runtime is created and
+ /// increases by one each time a task is woken from **outside** of the
+ /// runtime. This usually means that a task is spawned or notified from a
+ /// non-runtime thread and must be queued using the Runtime's injection
+ /// queue, which tends to be slower.
+ ///
+ /// The counter is monotonically increasing. It is never decremented or
+ /// reset to zero.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.remote_schedule_count();
+ /// println!("{} tasks were scheduled from outside the runtime", n);
+ /// }
+ /// ```
+ pub fn remote_schedule_count(&self) -> u64 {
+ self.handle
+ .inner
+ .scheduler_metrics()
+ .remote_schedule_count
+ .load(Relaxed)
+ }
+
+ /// Returns the number of times that tasks have been forced to yield back to the scheduler
+ /// after exhausting their task budgets.
+ ///
+ /// This count starts at zero when the runtime is created and increases by one each time a task yields due to exhausting its budget.
+ ///
+ /// The counter is monotonically increasing. It is never decremented or
+ /// reset to zero.
+ pub fn budget_forced_yield_count(&self) -> u64 {
+ self.handle
+ .inner
+ .scheduler_metrics()
+ .budget_forced_yield_count
+ .load(Relaxed)
+ }
+
+ /// Returns the total number of times the given worker thread has parked.
+ ///
+ /// The worker park count starts at zero when the runtime is created and
+ /// increases by one each time the worker parks the thread waiting for new
+ /// inbound events to process. This usually means the worker has processed
+ /// all pending work and is currently idle.
+ ///
+ /// The counter is monotonically increasing. It is never decremented or
+ /// reset to zero.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.worker_park_count(0);
+ /// println!("worker 0 parked {} times", n);
+ /// }
+ /// ```
+ pub fn worker_park_count(&self, worker: usize) -> u64 {
+ self.handle
+ .inner
+ .worker_metrics(worker)
+ .park_count
+ .load(Relaxed)
+ }
+
+ /// Returns the number of times the given worker thread unparked but
+ /// performed no work before parking again.
+ ///
+ /// The worker no-op count starts at zero when the runtime is created and
+ /// increases by one each time the worker unparks the thread but finds no
+ /// new work and goes back to sleep. This indicates a false-positive wake up.
+ ///
+ /// The counter is monotonically increasing. It is never decremented or
+ /// reset to zero.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.worker_noop_count(0);
+ /// println!("worker 0 had {} no-op unparks", n);
+ /// }
+ /// ```
+ pub fn worker_noop_count(&self, worker: usize) -> u64 {
+ self.handle
+ .inner
+ .worker_metrics(worker)
+ .noop_count
+ .load(Relaxed)
+ }
+
+ /// Returns the number of tasks the given worker thread stole from
+ /// another worker thread.
+ ///
+ /// This metric only applies to the **multi-threaded** runtime and will
+ /// always return `0` when using the current thread runtime.
+ ///
+ /// The worker steal count starts at zero when the runtime is created and
+ /// increases by `N` each time the worker has processed its scheduled queue
+ /// and successfully steals `N` more pending tasks from another worker.
+ ///
+ /// The counter is monotonically increasing. It is never decremented or
+ /// reset to zero.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.worker_steal_count(0);
+ /// println!("worker 0 has stolen {} tasks", n);
+ /// }
+ /// ```
+ pub fn worker_steal_count(&self, worker: usize) -> u64 {
+ self.handle
+ .inner
+ .worker_metrics(worker)
+ .steal_count
+ .load(Relaxed)
+ }
+
+ /// Returns the number of times the given worker thread stole tasks from
+ /// another worker thread.
+ ///
+ /// This metric only applies to the **multi-threaded** runtime and will
+ /// always return `0` when using the current thread runtime.
+ ///
+ /// The worker steal count starts at zero when the runtime is created and
+ /// increases by one each time the worker has processed its scheduled queue
+ /// and successfully steals more pending tasks from another worker.
+ ///
+ /// The counter is monotonically increasing. It is never decremented or
+ /// reset to zero.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.worker_steal_operations(0);
+ /// println!("worker 0 has stolen tasks {} times", n);
+ /// }
+ /// ```
+ pub fn worker_steal_operations(&self, worker: usize) -> u64 {
+ self.handle
+ .inner
+ .worker_metrics(worker)
+ .steal_operations
+ .load(Relaxed)
+ }
+
+ /// Returns the number of tasks the given worker thread has polled.
+ ///
+ /// The worker poll count starts at zero when the runtime is created and
+ /// increases by one each time the worker polls a scheduled task.
+ ///
+ /// The counter is monotonically increasing. It is never decremented or
+ /// reset to zero.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.worker_poll_count(0);
+ /// println!("worker 0 has polled {} tasks", n);
+ /// }
+ /// ```
+ pub fn worker_poll_count(&self, worker: usize) -> u64 {
+ self.handle
+ .inner
+ .worker_metrics(worker)
+ .poll_count
+ .load(Relaxed)
+ }
+
+ /// Returns the amount of time the given worker thread has been busy.
+ ///
+ /// The worker busy duration starts at zero when the runtime is created and
+ /// increases whenever the worker is spending time processing work. Using
+ /// this value can indicate the load of the given worker. If a lot of time
+ /// is spent busy, then the worker is under load and will check for inbound
+ /// events less often.
+ ///
+ /// The timer is monotonically increasing. It is never decremented or reset
+ /// to zero.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.worker_total_busy_duration(0);
+ /// println!("worker 0 was busy for a total of {:?}", n);
+ /// }
+ /// ```
+ pub fn worker_total_busy_duration(&self, worker: usize) -> Duration {
+ let nanos = self
+ .handle
+ .inner
+ .worker_metrics(worker)
+ .busy_duration_total
+ .load(Relaxed);
+ Duration::from_nanos(nanos)
+ }
+
+ /// Returns the number of tasks scheduled from **within** the runtime on the
+ /// given worker's local queue.
+ ///
+ /// The local schedule count starts at zero when the runtime is created and
+ /// increases by one each time a task is woken from **inside** of the
+ /// runtime on the given worker. This usually means that a task is spawned
+ /// or notified from within a runtime thread and will be queued on the
+ /// worker-local queue.
+ ///
+ /// The counter is monotonically increasing. It is never decremented or
+ /// reset to zero.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.worker_local_schedule_count(0);
+ /// println!("{} tasks were scheduled on the worker's local queue", n);
+ /// }
+ /// ```
+ pub fn worker_local_schedule_count(&self, worker: usize) -> u64 {
+ self.handle
+ .inner
+ .worker_metrics(worker)
+ .local_schedule_count
+ .load(Relaxed)
+ }
+
+ /// Returns the number of times the given worker thread saturated its local
+ /// queue.
+ ///
+ /// This metric only applies to the **multi-threaded** scheduler.
+ ///
+ /// The worker steal count starts at zero when the runtime is created and
+ /// increases by one each time the worker attempts to schedule a task
+ /// locally, but its local queue is full. When this happens, half of the
+ /// local queue is moved to the injection queue.
+ ///
+ /// The counter is monotonically increasing. It is never decremented or
+ /// reset to zero.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.worker_overflow_count(0);
+ /// println!("worker 0 has overflowed its queue {} times", n);
+ /// }
+ /// ```
+ pub fn worker_overflow_count(&self, worker: usize) -> u64 {
+ self.handle
+ .inner
+ .worker_metrics(worker)
+ .overflow_count
+ .load(Relaxed)
+ }
+
+ /// Returns the number of tasks currently scheduled in the runtime's
+ /// injection queue.
+ ///
+ /// Tasks that are spawned or notified from a non-runtime thread are
+ /// scheduled using the runtime's injection queue. This metric returns the
+ /// **current** number of tasks pending in the injection queue. As such, the
+ /// returned value may increase or decrease as new tasks are scheduled and
+ /// processed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.injection_queue_depth();
+ /// println!("{} tasks currently pending in the runtime's injection queue", n);
+ /// }
+ /// ```
+ pub fn injection_queue_depth(&self) -> usize {
+ self.handle.inner.injection_queue_depth()
+ }
+
+ /// Returns the number of tasks currently scheduled in the given worker's
+ /// local queue.
+ ///
+ /// Tasks that are spawned or notified from within a runtime thread are
+ /// scheduled using that worker's local queue. This metric returns the
+ /// **current** number of tasks pending in the worker's local queue. As
+ /// such, the returned value may increase or decrease as new tasks are
+ /// scheduled and processed.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.worker_local_queue_depth(0);
+ /// println!("{} tasks currently pending in worker 0's local queue", n);
+ /// }
+ /// ```
+ pub fn worker_local_queue_depth(&self, worker: usize) -> usize {
+ self.handle.inner.worker_local_queue_depth(worker)
+ }
+
+ /// Returns `true` if the runtime is tracking the distribution of task poll
+ /// times.
+ ///
+ /// Task poll times are not instrumented by default as doing so requires
+ /// calling [`Instant::now()`] twice per task poll. The feature is enabled
+ /// by calling [`enable_metrics_poll_count_histogram()`] when building the
+ /// runtime.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::{self, Handle};
+ ///
+ /// fn main() {
+ /// runtime::Builder::new_current_thread()
+ /// .enable_metrics_poll_count_histogram()
+ /// .build()
+ /// .unwrap()
+ /// .block_on(async {
+ /// let metrics = Handle::current().metrics();
+ /// let enabled = metrics.poll_count_histogram_enabled();
+ ///
+ /// println!("Tracking task poll time distribution: {:?}", enabled);
+ /// });
+ /// }
+ /// ```
+ ///
+ /// [`enable_metrics_poll_count_histogram()`]: crate::runtime::Builder::enable_metrics_poll_count_histogram
+ /// [`Instant::now()`]: std::time::Instant::now
+ pub fn poll_count_histogram_enabled(&self) -> bool {
+ self.handle
+ .inner
+ .worker_metrics(0)
+ .poll_count_histogram
+ .is_some()
+ }
+
+ /// Returns the number of histogram buckets tracking the distribution of
+ /// task poll times.
+ ///
+ /// This value is configured by calling
+ /// [`metrics_poll_count_histogram_buckets()`] when building the runtime.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::{self, Handle};
+ ///
+ /// fn main() {
+ /// runtime::Builder::new_current_thread()
+ /// .enable_metrics_poll_count_histogram()
+ /// .build()
+ /// .unwrap()
+ /// .block_on(async {
+ /// let metrics = Handle::current().metrics();
+ /// let buckets = metrics.poll_count_histogram_num_buckets();
+ ///
+ /// println!("Histogram buckets: {:?}", buckets);
+ /// });
+ /// }
+ /// ```
+ ///
+ /// [`metrics_poll_count_histogram_buckets()`]:
+ /// crate::runtime::Builder::metrics_poll_count_histogram_buckets
+ pub fn poll_count_histogram_num_buckets(&self) -> usize {
+ self.handle
+ .inner
+ .worker_metrics(0)
+ .poll_count_histogram
+ .as_ref()
+ .map(|histogram| histogram.num_buckets())
+ .unwrap_or_default()
+ }
+
+ /// Returns the range of task poll times tracked by the given bucket.
+ ///
+ /// This value is configured by calling
+ /// [`metrics_poll_count_histogram_resolution()`] when building the runtime.
+ ///
+ /// # Panics
+ ///
+ /// The method panics if `bucket` represents an invalid bucket index, i.e.
+ /// is greater than or equal to `poll_count_histogram_num_buckets()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::{self, Handle};
+ ///
+ /// fn main() {
+ /// runtime::Builder::new_current_thread()
+ /// .enable_metrics_poll_count_histogram()
+ /// .build()
+ /// .unwrap()
+ /// .block_on(async {
+ /// let metrics = Handle::current().metrics();
+ /// let buckets = metrics.poll_count_histogram_num_buckets();
+ ///
+ /// for i in 0..buckets {
+ /// let range = metrics.poll_count_histogram_bucket_range(i);
+ /// println!("Histogram bucket {} range: {:?}", i, range);
+ /// }
+ /// });
+ /// }
+ /// ```
+ ///
+ /// [`metrics_poll_count_histogram_resolution()`]:
+ /// crate::runtime::Builder::metrics_poll_count_histogram_resolution
+ #[track_caller]
+ pub fn poll_count_histogram_bucket_range(&self, bucket: usize) -> Range<Duration> {
+ self.handle
+ .inner
+ .worker_metrics(0)
+ .poll_count_histogram
+ .as_ref()
+ .map(|histogram| {
+ let range = histogram.bucket_range(bucket);
+ std::ops::Range {
+ start: Duration::from_nanos(range.start),
+ end: Duration::from_nanos(range.end),
+ }
+ })
+ .unwrap_or_default()
+ }
+
+ /// Returns the number of times the given worker polled tasks with a poll
+ /// duration within the given bucket's range.
+ ///
+ /// Each worker maintains its own histogram and the counts for each bucket
+ /// starts at zero when the runtime is created. Each time the worker polls a
+ /// task, it tracks the duration the task poll time took and increments the
+ /// associated bucket by 1.
+ ///
+ /// Each bucket is a monotonically increasing counter. It is never
+ /// decremented or reset to zero.
+ ///
+ /// # Arguments
+ ///
+ /// `worker` is the index of the worker being queried. The given value must
+ /// be between 0 and `num_workers()`. The index uniquely identifies a single
+ /// worker and will continue to identify the worker throughout the lifetime
+ /// of the runtime instance.
+ ///
+ /// `bucket` is the index of the bucket being queried. The bucket is scoped
+ /// to the worker. The range represented by the bucket can be queried by
+ /// calling [`poll_count_histogram_bucket_range()`]. Each worker maintains
+ /// identical bucket ranges.
+ ///
+ /// # Panics
+ ///
+ /// The method panics when `worker` represents an invalid worker, i.e. is
+ /// greater than or equal to `num_workers()` or if `bucket` represents an
+ /// invalid bucket.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::{self, Handle};
+ ///
+ /// fn main() {
+ /// runtime::Builder::new_current_thread()
+ /// .enable_metrics_poll_count_histogram()
+ /// .build()
+ /// .unwrap()
+ /// .block_on(async {
+ /// let metrics = Handle::current().metrics();
+ /// let buckets = metrics.poll_count_histogram_num_buckets();
+ ///
+ /// for worker in 0..metrics.num_workers() {
+ /// for i in 0..buckets {
+ /// let count = metrics.poll_count_histogram_bucket_count(worker, i);
+ /// println!("Poll count {}", count);
+ /// }
+ /// }
+ /// });
+ /// }
+ /// ```
+ ///
+ /// [`poll_count_histogram_bucket_range()`]: crate::runtime::RuntimeMetrics::poll_count_histogram_bucket_range
+ #[track_caller]
+ pub fn poll_count_histogram_bucket_count(&self, worker: usize, bucket: usize) -> u64 {
+ self.handle
+ .inner
+ .worker_metrics(worker)
+ .poll_count_histogram
+ .as_ref()
+ .map(|histogram| histogram.get(bucket))
+ .unwrap_or_default()
+ }
+
+ /// Returns the number of tasks currently scheduled in the blocking
+ /// thread pool, spawned using `spawn_blocking`.
+ ///
+ /// This metric returns the **current** number of tasks pending in
+ /// blocking thread pool. As such, the returned value may increase
+ /// or decrease as new tasks are scheduled and processed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.blocking_queue_depth();
+ /// println!("{} tasks currently pending in the blocking thread pool", n);
+ /// }
+ /// ```
+ pub fn blocking_queue_depth(&self) -> usize {
+ self.handle.inner.blocking_queue_depth()
+ }
+}
+
+cfg_net! {
+ impl RuntimeMetrics {
+ /// Returns the number of file descriptors that have been registered with the
+ /// runtime's I/O driver.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let registered_fds = metrics.io_driver_fd_registered_count();
+ /// println!("{} fds have been registered with the runtime's I/O driver.", registered_fds);
+ ///
+ /// let deregistered_fds = metrics.io_driver_fd_deregistered_count();
+ ///
+ /// let current_fd_count = registered_fds - deregistered_fds;
+ /// println!("{} fds are currently registered by the runtime's I/O driver.", current_fd_count);
+ /// }
+ /// ```
+ pub fn io_driver_fd_registered_count(&self) -> u64 {
+ self.with_io_driver_metrics(|m| {
+ m.fd_registered_count.load(Relaxed)
+ })
+ }
+
+ /// Returns the number of file descriptors that have been deregistered by the
+ /// runtime's I/O driver.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.io_driver_fd_deregistered_count();
+ /// println!("{} fds have been deregistered by the runtime's I/O driver.", n);
+ /// }
+ /// ```
+ pub fn io_driver_fd_deregistered_count(&self) -> u64 {
+ self.with_io_driver_metrics(|m| {
+ m.fd_deregistered_count.load(Relaxed)
+ })
+ }
+
+ /// Returns the number of ready events processed by the runtime's
+ /// I/O driver.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Handle;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let metrics = Handle::current().metrics();
+ ///
+ /// let n = metrics.io_driver_ready_count();
+ /// println!("{} ready events processed by the runtime's I/O driver.", n);
+ /// }
+ /// ```
+ pub fn io_driver_ready_count(&self) -> u64 {
+ self.with_io_driver_metrics(|m| m.ready_count.load(Relaxed))
+ }
+
+ fn with_io_driver_metrics<F>(&self, f: F) -> u64
+ where
+ F: Fn(&super::IoDriverMetrics) -> u64,
+ {
+ // TODO: Investigate if this should return 0, most of our metrics always increase
+ // thus this breaks that guarantee.
+ self.handle
+ .inner
+ .driver()
+ .io
+ .as_ref()
+ .map(|h| f(&h.metrics))
+ .unwrap_or(0)
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/metrics/scheduler.rs b/third_party/rust/tokio/src/runtime/metrics/scheduler.rs
new file mode 100644
index 0000000000..d9f8edfaab
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/metrics/scheduler.rs
@@ -0,0 +1,34 @@
+use crate::loom::sync::atomic::{AtomicU64, Ordering::Relaxed};
+
+/// Retrieves metrics from the Tokio runtime.
+///
+/// **Note**: This is an [unstable API][unstable]. The public API of this type
+/// may break in 1.x releases. See [the documentation on unstable
+/// features][unstable] for details.
+///
+/// [unstable]: crate#unstable-features
+#[derive(Debug)]
+pub(crate) struct SchedulerMetrics {
+ /// Number of tasks that are scheduled from outside the runtime.
+ pub(super) remote_schedule_count: AtomicU64,
+ pub(super) budget_forced_yield_count: AtomicU64,
+}
+
+impl SchedulerMetrics {
+ pub(crate) fn new() -> SchedulerMetrics {
+ SchedulerMetrics {
+ remote_schedule_count: AtomicU64::new(0),
+ budget_forced_yield_count: AtomicU64::new(0),
+ }
+ }
+
+ /// Increment the number of tasks scheduled externally
+ pub(crate) fn inc_remote_schedule_count(&self) {
+ self.remote_schedule_count.fetch_add(1, Relaxed);
+ }
+
+ /// Increment the number of tasks forced to yield due to budget exhaustion
+ pub(crate) fn inc_budget_forced_yield_count(&self) {
+ self.budget_forced_yield_count.fetch_add(1, Relaxed);
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/metrics/worker.rs b/third_party/rust/tokio/src/runtime/metrics/worker.rs
new file mode 100644
index 0000000000..e0f23e6a08
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/metrics/worker.rs
@@ -0,0 +1,80 @@
+use crate::loom::sync::atomic::Ordering::Relaxed;
+use crate::loom::sync::atomic::{AtomicU64, AtomicUsize};
+use crate::runtime::metrics::Histogram;
+use crate::runtime::Config;
+
+/// Retrieve runtime worker metrics.
+///
+/// **Note**: This is an [unstable API][unstable]. The public API of this type
+/// may break in 1.x releases. See [the documentation on unstable
+/// features][unstable] for details.
+///
+/// [unstable]: crate#unstable-features
+#[derive(Debug)]
+#[repr(align(128))]
+pub(crate) struct WorkerMetrics {
+ /// Number of times the worker parked.
+ pub(crate) park_count: AtomicU64,
+
+ /// Number of times the worker woke then parked again without doing work.
+ pub(crate) noop_count: AtomicU64,
+
+ /// Number of tasks the worker stole.
+ pub(crate) steal_count: AtomicU64,
+
+ /// Number of times the worker stole
+ pub(crate) steal_operations: AtomicU64,
+
+ /// Number of tasks the worker polled.
+ pub(crate) poll_count: AtomicU64,
+
+ /// Amount of time the worker spent doing work vs. parking.
+ pub(crate) busy_duration_total: AtomicU64,
+
+ /// Number of tasks scheduled for execution on the worker's local queue.
+ pub(crate) local_schedule_count: AtomicU64,
+
+ /// Number of tasks moved from the local queue to the global queue to free space.
+ pub(crate) overflow_count: AtomicU64,
+
+ /// Number of tasks currently in the local queue. Used only by the
+ /// current-thread scheduler.
+ pub(crate) queue_depth: AtomicUsize,
+
+ /// If `Some`, tracks the the number of polls by duration range.
+ pub(super) poll_count_histogram: Option<Histogram>,
+}
+
+impl WorkerMetrics {
+ pub(crate) fn from_config(config: &Config) -> WorkerMetrics {
+ let mut worker_metrics = WorkerMetrics::new();
+ worker_metrics.poll_count_histogram = config
+ .metrics_poll_count_histogram
+ .as_ref()
+ .map(|histogram_builder| histogram_builder.build());
+ worker_metrics
+ }
+
+ pub(crate) fn new() -> WorkerMetrics {
+ WorkerMetrics {
+ park_count: AtomicU64::new(0),
+ noop_count: AtomicU64::new(0),
+ steal_count: AtomicU64::new(0),
+ steal_operations: AtomicU64::new(0),
+ poll_count: AtomicU64::new(0),
+ overflow_count: AtomicU64::new(0),
+ busy_duration_total: AtomicU64::new(0),
+ local_schedule_count: AtomicU64::new(0),
+ queue_depth: AtomicUsize::new(0),
+ poll_count_histogram: None,
+ }
+ }
+
+ pub(crate) fn queue_depth(&self) -> usize {
+ self.queue_depth.load(Relaxed)
+ }
+
+ pub(crate) fn set_queue_depth(&self, len: usize) {
+ self.queue_depth.store(len, Relaxed);
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/mod.rs b/third_party/rust/tokio/src/runtime/mod.rs
new file mode 100644
index 0000000000..cb198f51f0
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/mod.rs
@@ -0,0 +1,265 @@
+//! The Tokio runtime.
+//!
+//! Unlike other Rust programs, asynchronous applications require runtime
+//! support. In particular, the following runtime services are necessary:
+//!
+//! * An **I/O event loop**, called the driver, which drives I/O resources and
+//! dispatches I/O events to tasks that depend on them.
+//! * A **scheduler** to execute [tasks] that use these I/O resources.
+//! * A **timer** for scheduling work to run after a set period of time.
+//!
+//! Tokio's [`Runtime`] bundles all of these services as a single type, allowing
+//! them to be started, shut down, and configured together. However, often it is
+//! not required to configure a [`Runtime`] manually, and a user may just use the
+//! [`tokio::main`] attribute macro, which creates a [`Runtime`] under the hood.
+//!
+//! # Usage
+//!
+//! When no fine tuning is required, the [`tokio::main`] attribute macro can be
+//! used.
+//!
+//! ```no_run
+//! use tokio::net::TcpListener;
+//! use tokio::io::{AsyncReadExt, AsyncWriteExt};
+//!
+//! #[tokio::main]
+//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! let listener = TcpListener::bind("127.0.0.1:8080").await?;
+//!
+//! loop {
+//! let (mut socket, _) = listener.accept().await?;
+//!
+//! tokio::spawn(async move {
+//! let mut buf = [0; 1024];
+//!
+//! // In a loop, read data from the socket and write the data back.
+//! loop {
+//! let n = match socket.read(&mut buf).await {
+//! // socket closed
+//! Ok(n) if n == 0 => return,
+//! Ok(n) => n,
+//! Err(e) => {
+//! println!("failed to read from socket; err = {:?}", e);
+//! return;
+//! }
+//! };
+//!
+//! // Write the data back
+//! if let Err(e) = socket.write_all(&buf[0..n]).await {
+//! println!("failed to write to socket; err = {:?}", e);
+//! return;
+//! }
+//! }
+//! });
+//! }
+//! }
+//! ```
+//!
+//! From within the context of the runtime, additional tasks are spawned using
+//! the [`tokio::spawn`] function. Futures spawned using this function will be
+//! executed on the same thread pool used by the [`Runtime`].
+//!
+//! A [`Runtime`] instance can also be used directly.
+//!
+//! ```no_run
+//! use tokio::net::TcpListener;
+//! use tokio::io::{AsyncReadExt, AsyncWriteExt};
+//! use tokio::runtime::Runtime;
+//!
+//! fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! // Create the runtime
+//! let rt = Runtime::new()?;
+//!
+//! // Spawn the root task
+//! rt.block_on(async {
+//! let listener = TcpListener::bind("127.0.0.1:8080").await?;
+//!
+//! loop {
+//! let (mut socket, _) = listener.accept().await?;
+//!
+//! tokio::spawn(async move {
+//! let mut buf = [0; 1024];
+//!
+//! // In a loop, read data from the socket and write the data back.
+//! loop {
+//! let n = match socket.read(&mut buf).await {
+//! // socket closed
+//! Ok(n) if n == 0 => return,
+//! Ok(n) => n,
+//! Err(e) => {
+//! println!("failed to read from socket; err = {:?}", e);
+//! return;
+//! }
+//! };
+//!
+//! // Write the data back
+//! if let Err(e) = socket.write_all(&buf[0..n]).await {
+//! println!("failed to write to socket; err = {:?}", e);
+//! return;
+//! }
+//! }
+//! });
+//! }
+//! })
+//! }
+//! ```
+//!
+//! ## Runtime Configurations
+//!
+//! Tokio provides multiple task scheduling strategies, suitable for different
+//! applications. The [runtime builder] or `#[tokio::main]` attribute may be
+//! used to select which scheduler to use.
+//!
+//! #### Multi-Thread Scheduler
+//!
+//! The multi-thread scheduler executes futures on a _thread pool_, using a
+//! work-stealing strategy. By default, it will start a worker thread for each
+//! CPU core available on the system. This tends to be the ideal configuration
+//! for most applications. The multi-thread scheduler requires the `rt-multi-thread`
+//! feature flag, and is selected by default:
+//! ```
+//! use tokio::runtime;
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! let threaded_rt = runtime::Runtime::new()?;
+//! # Ok(()) }
+//! ```
+//!
+//! Most applications should use the multi-thread scheduler, except in some
+//! niche use-cases, such as when running only a single thread is required.
+//!
+//! #### Current-Thread Scheduler
+//!
+//! The current-thread scheduler provides a _single-threaded_ future executor.
+//! All tasks will be created and executed on the current thread. This requires
+//! the `rt` feature flag.
+//! ```
+//! use tokio::runtime;
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! let rt = runtime::Builder::new_current_thread()
+//! .build()?;
+//! # Ok(()) }
+//! ```
+//!
+//! #### Resource drivers
+//!
+//! When configuring a runtime by hand, no resource drivers are enabled by
+//! default. In this case, attempting to use networking types or time types will
+//! fail. In order to enable these types, the resource drivers must be enabled.
+//! This is done with [`Builder::enable_io`] and [`Builder::enable_time`]. As a
+//! shorthand, [`Builder::enable_all`] enables both resource drivers.
+//!
+//! ## Lifetime of spawned threads
+//!
+//! The runtime may spawn threads depending on its configuration and usage. The
+//! multi-thread scheduler spawns threads to schedule tasks and for `spawn_blocking`
+//! calls.
+//!
+//! While the `Runtime` is active, threads may shut down after periods of being
+//! idle. Once `Runtime` is dropped, all runtime threads have usually been
+//! terminated, but in the presence of unstoppable spawned work are not
+//! guaranteed to have been terminated. See the
+//! [struct level documentation](Runtime#shutdown) for more details.
+//!
+//! [tasks]: crate::task
+//! [`Runtime`]: Runtime
+//! [`tokio::spawn`]: crate::spawn
+//! [`tokio::main`]: ../attr.main.html
+//! [runtime builder]: crate::runtime::Builder
+//! [`Runtime::new`]: crate::runtime::Runtime::new
+//! [`Builder::threaded_scheduler`]: crate::runtime::Builder::threaded_scheduler
+//! [`Builder::enable_io`]: crate::runtime::Builder::enable_io
+//! [`Builder::enable_time`]: crate::runtime::Builder::enable_time
+//! [`Builder::enable_all`]: crate::runtime::Builder::enable_all
+
+// At the top due to macros
+#[cfg(test)]
+#[cfg(not(tokio_wasm))]
+#[macro_use]
+mod tests;
+
+pub(crate) mod context;
+
+pub(crate) mod coop;
+
+pub(crate) mod park;
+
+mod driver;
+
+pub(crate) mod scheduler;
+
+cfg_io_driver_impl! {
+ pub(crate) mod io;
+}
+
+cfg_process_driver! {
+ mod process;
+}
+
+cfg_time! {
+ pub(crate) mod time;
+}
+
+cfg_signal_internal_and_unix! {
+ pub(crate) mod signal;
+}
+
+cfg_rt! {
+ pub(crate) mod task;
+
+ mod config;
+ use config::Config;
+
+ mod blocking;
+ #[cfg_attr(tokio_wasi, allow(unused_imports))]
+ pub(crate) use blocking::spawn_blocking;
+
+ cfg_trace! {
+ pub(crate) use blocking::Mandatory;
+ }
+
+ cfg_fs! {
+ pub(crate) use blocking::spawn_mandatory_blocking;
+ }
+
+ mod builder;
+ pub use self::builder::Builder;
+ cfg_unstable! {
+ pub use self::builder::UnhandledPanic;
+ pub use crate::util::rand::RngSeed;
+ }
+
+ cfg_taskdump! {
+ pub mod dump;
+ pub use dump::Dump;
+ }
+
+ mod handle;
+ pub use handle::{EnterGuard, Handle, TryCurrentError};
+
+ mod runtime;
+ pub use runtime::{Runtime, RuntimeFlavor};
+
+ mod thread_id;
+ pub(crate) use thread_id::ThreadId;
+
+ cfg_metrics! {
+ mod metrics;
+ pub use metrics::{RuntimeMetrics, HistogramScale};
+
+ pub(crate) use metrics::{MetricsBatch, SchedulerMetrics, WorkerMetrics, HistogramBuilder};
+
+ cfg_net! {
+ pub(crate) use metrics::IoDriverMetrics;
+ }
+ }
+
+ cfg_not_metrics! {
+ pub(crate) mod metrics;
+ pub(crate) use metrics::{SchedulerMetrics, WorkerMetrics, MetricsBatch, HistogramBuilder};
+ }
+
+ /// After thread starts / before thread stops
+ type Callback = std::sync::Arc<dyn Fn() + Send + Sync>;
+}
diff --git a/third_party/rust/tokio/src/runtime/park.rs b/third_party/rust/tokio/src/runtime/park.rs
new file mode 100644
index 0000000000..2392846abe
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/park.rs
@@ -0,0 +1,348 @@
+#![cfg_attr(not(feature = "full"), allow(dead_code))]
+
+use crate::loom::sync::atomic::AtomicUsize;
+use crate::loom::sync::{Arc, Condvar, Mutex};
+
+use std::sync::atomic::Ordering::SeqCst;
+use std::time::Duration;
+
+#[derive(Debug)]
+pub(crate) struct ParkThread {
+ inner: Arc<Inner>,
+}
+
+/// Unblocks a thread that was blocked by `ParkThread`.
+#[derive(Clone, Debug)]
+pub(crate) struct UnparkThread {
+ inner: Arc<Inner>,
+}
+
+#[derive(Debug)]
+struct Inner {
+ state: AtomicUsize,
+ mutex: Mutex<()>,
+ condvar: Condvar,
+}
+
+const EMPTY: usize = 0;
+const PARKED: usize = 1;
+const NOTIFIED: usize = 2;
+
+tokio_thread_local! {
+ static CURRENT_PARKER: ParkThread = ParkThread::new();
+}
+
+// Bit of a hack, but it is only for loom
+#[cfg(loom)]
+tokio_thread_local! {
+ static CURRENT_THREAD_PARK_COUNT: AtomicUsize = AtomicUsize::new(0);
+}
+
+// ==== impl ParkThread ====
+
+impl ParkThread {
+ pub(crate) fn new() -> Self {
+ Self {
+ inner: Arc::new(Inner {
+ state: AtomicUsize::new(EMPTY),
+ mutex: Mutex::new(()),
+ condvar: Condvar::new(),
+ }),
+ }
+ }
+
+ pub(crate) fn unpark(&self) -> UnparkThread {
+ let inner = self.inner.clone();
+ UnparkThread { inner }
+ }
+
+ pub(crate) fn park(&mut self) {
+ #[cfg(loom)]
+ CURRENT_THREAD_PARK_COUNT.with(|count| count.fetch_add(1, SeqCst));
+ self.inner.park();
+ }
+
+ pub(crate) fn park_timeout(&mut self, duration: Duration) {
+ #[cfg(loom)]
+ CURRENT_THREAD_PARK_COUNT.with(|count| count.fetch_add(1, SeqCst));
+
+ // Wasm doesn't have threads, so just sleep.
+ #[cfg(not(tokio_wasm))]
+ self.inner.park_timeout(duration);
+ #[cfg(tokio_wasm)]
+ std::thread::sleep(duration);
+ }
+
+ pub(crate) fn shutdown(&mut self) {
+ self.inner.shutdown();
+ }
+}
+
+// ==== impl Inner ====
+
+impl Inner {
+ /// Parks the current thread for at most `dur`.
+ fn park(&self) {
+ // If we were previously notified then we consume this notification and
+ // return quickly.
+ if self
+ .state
+ .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst)
+ .is_ok()
+ {
+ return;
+ }
+
+ // Otherwise we need to coordinate going to sleep
+ let mut m = self.mutex.lock();
+
+ match self.state.compare_exchange(EMPTY, PARKED, SeqCst, SeqCst) {
+ Ok(_) => {}
+ Err(NOTIFIED) => {
+ // We must read here, even though we know it will be `NOTIFIED`.
+ // This is because `unpark` may have been called again since we read
+ // `NOTIFIED` in the `compare_exchange` above. We must perform an
+ // acquire operation that synchronizes with that `unpark` to observe
+ // any writes it made before the call to unpark. To do that we must
+ // read from the write it made to `state`.
+ let old = self.state.swap(EMPTY, SeqCst);
+ debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly");
+
+ return;
+ }
+ Err(actual) => panic!("inconsistent park state; actual = {}", actual),
+ }
+
+ loop {
+ m = self.condvar.wait(m).unwrap();
+
+ if self
+ .state
+ .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst)
+ .is_ok()
+ {
+ // got a notification
+ return;
+ }
+
+ // spurious wakeup, go back to sleep
+ }
+ }
+
+ fn park_timeout(&self, dur: Duration) {
+ // Like `park` above we have a fast path for an already-notified thread,
+ // and afterwards we start coordinating for a sleep. Return quickly.
+ if self
+ .state
+ .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst)
+ .is_ok()
+ {
+ return;
+ }
+
+ if dur == Duration::from_millis(0) {
+ return;
+ }
+
+ let m = self.mutex.lock();
+
+ match self.state.compare_exchange(EMPTY, PARKED, SeqCst, SeqCst) {
+ Ok(_) => {}
+ Err(NOTIFIED) => {
+ // We must read again here, see `park`.
+ let old = self.state.swap(EMPTY, SeqCst);
+ debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly");
+
+ return;
+ }
+ Err(actual) => panic!("inconsistent park_timeout state; actual = {}", actual),
+ }
+
+ // Wait with a timeout, and if we spuriously wake up or otherwise wake up
+ // from a notification, we just want to unconditionally set the state back to
+ // empty, either consuming a notification or un-flagging ourselves as
+ // parked.
+ let (_m, _result) = self.condvar.wait_timeout(m, dur).unwrap();
+
+ match self.state.swap(EMPTY, SeqCst) {
+ NOTIFIED => {} // got a notification, hurray!
+ PARKED => {} // no notification, alas
+ n => panic!("inconsistent park_timeout state: {}", n),
+ }
+ }
+
+ fn unpark(&self) {
+ // To ensure the unparked thread will observe any writes we made before
+ // this call, we must perform a release operation that `park` can
+ // synchronize with. To do that we must write `NOTIFIED` even if `state`
+ // is already `NOTIFIED`. That is why this must be a swap rather than a
+ // compare-and-swap that returns if it reads `NOTIFIED` on failure.
+ match self.state.swap(NOTIFIED, SeqCst) {
+ EMPTY => return, // no one was waiting
+ NOTIFIED => return, // already unparked
+ PARKED => {} // gotta go wake someone up
+ _ => panic!("inconsistent state in unpark"),
+ }
+
+ // There is a period between when the parked thread sets `state` to
+ // `PARKED` (or last checked `state` in the case of a spurious wake
+ // up) and when it actually waits on `cvar`. If we were to notify
+ // during this period it would be ignored and then when the parked
+ // thread went to sleep it would never wake up. Fortunately, it has
+ // `lock` locked at this stage so we can acquire `lock` to wait until
+ // it is ready to receive the notification.
+ //
+ // Releasing `lock` before the call to `notify_one` means that when the
+ // parked thread wakes it doesn't get woken only to have to wait for us
+ // to release `lock`.
+ drop(self.mutex.lock());
+
+ self.condvar.notify_one()
+ }
+
+ fn shutdown(&self) {
+ self.condvar.notify_all();
+ }
+}
+
+impl Default for ParkThread {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+// ===== impl UnparkThread =====
+
+impl UnparkThread {
+ pub(crate) fn unpark(&self) {
+ self.inner.unpark();
+ }
+}
+
+use crate::loom::thread::AccessError;
+use std::future::Future;
+use std::marker::PhantomData;
+use std::mem;
+use std::rc::Rc;
+use std::task::{RawWaker, RawWakerVTable, Waker};
+
+/// Blocks the current thread using a condition variable.
+#[derive(Debug)]
+pub(crate) struct CachedParkThread {
+ _anchor: PhantomData<Rc<()>>,
+}
+
+impl CachedParkThread {
+ /// Creates a new `ParkThread` handle for the current thread.
+ ///
+ /// This type cannot be moved to other threads, so it should be created on
+ /// the thread that the caller intends to park.
+ pub(crate) fn new() -> CachedParkThread {
+ CachedParkThread {
+ _anchor: PhantomData,
+ }
+ }
+
+ pub(crate) fn waker(&self) -> Result<Waker, AccessError> {
+ self.unpark().map(|unpark| unpark.into_waker())
+ }
+
+ fn unpark(&self) -> Result<UnparkThread, AccessError> {
+ self.with_current(|park_thread| park_thread.unpark())
+ }
+
+ pub(crate) fn park(&mut self) {
+ self.with_current(|park_thread| park_thread.inner.park())
+ .unwrap();
+ }
+
+ pub(crate) fn park_timeout(&mut self, duration: Duration) {
+ self.with_current(|park_thread| park_thread.inner.park_timeout(duration))
+ .unwrap();
+ }
+
+ /// Gets a reference to the `ParkThread` handle for this thread.
+ fn with_current<F, R>(&self, f: F) -> Result<R, AccessError>
+ where
+ F: FnOnce(&ParkThread) -> R,
+ {
+ CURRENT_PARKER.try_with(|inner| f(inner))
+ }
+
+ pub(crate) fn block_on<F: Future>(&mut self, f: F) -> Result<F::Output, AccessError> {
+ use std::task::Context;
+ use std::task::Poll::Ready;
+
+ // `get_unpark()` should not return a Result
+ let waker = self.waker()?;
+ let mut cx = Context::from_waker(&waker);
+
+ pin!(f);
+
+ loop {
+ if let Ready(v) = crate::runtime::coop::budget(|| f.as_mut().poll(&mut cx)) {
+ return Ok(v);
+ }
+
+ self.park();
+ }
+ }
+}
+
+impl UnparkThread {
+ pub(crate) fn into_waker(self) -> Waker {
+ unsafe {
+ let raw = unparker_to_raw_waker(self.inner);
+ Waker::from_raw(raw)
+ }
+ }
+}
+
+impl Inner {
+ #[allow(clippy::wrong_self_convention)]
+ fn into_raw(this: Arc<Inner>) -> *const () {
+ Arc::into_raw(this) as *const ()
+ }
+
+ unsafe fn from_raw(ptr: *const ()) -> Arc<Inner> {
+ Arc::from_raw(ptr as *const Inner)
+ }
+}
+
+unsafe fn unparker_to_raw_waker(unparker: Arc<Inner>) -> RawWaker {
+ RawWaker::new(
+ Inner::into_raw(unparker),
+ &RawWakerVTable::new(clone, wake, wake_by_ref, drop_waker),
+ )
+}
+
+unsafe fn clone(raw: *const ()) -> RawWaker {
+ let unparker = Inner::from_raw(raw);
+
+ // Increment the ref count
+ mem::forget(unparker.clone());
+
+ unparker_to_raw_waker(unparker)
+}
+
+unsafe fn drop_waker(raw: *const ()) {
+ let _ = Inner::from_raw(raw);
+}
+
+unsafe fn wake(raw: *const ()) {
+ let unparker = Inner::from_raw(raw);
+ unparker.unpark();
+}
+
+unsafe fn wake_by_ref(raw: *const ()) {
+ let unparker = Inner::from_raw(raw);
+ unparker.unpark();
+
+ // We don't actually own a reference to the unparker
+ mem::forget(unparker);
+}
+
+#[cfg(loom)]
+pub(crate) fn current_thread_park_count() -> usize {
+ CURRENT_THREAD_PARK_COUNT.with(|count| count.load(SeqCst))
+}
diff --git a/third_party/rust/tokio/src/runtime/process.rs b/third_party/rust/tokio/src/runtime/process.rs
new file mode 100644
index 0000000000..df339b0e72
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/process.rs
@@ -0,0 +1,44 @@
+#![cfg_attr(not(feature = "rt"), allow(dead_code))]
+
+//! Process driver.
+
+use crate::process::unix::GlobalOrphanQueue;
+use crate::runtime::driver;
+use crate::runtime::signal::{Driver as SignalDriver, Handle as SignalHandle};
+
+use std::time::Duration;
+
+/// Responsible for cleaning up orphaned child processes on Unix platforms.
+#[derive(Debug)]
+pub(crate) struct Driver {
+ park: SignalDriver,
+ signal_handle: SignalHandle,
+}
+
+// ===== impl Driver =====
+
+impl Driver {
+ /// Creates a new signal `Driver` instance that delegates wakeups to `park`.
+ pub(crate) fn new(park: SignalDriver) -> Self {
+ let signal_handle = park.handle();
+
+ Self {
+ park,
+ signal_handle,
+ }
+ }
+
+ pub(crate) fn park(&mut self, handle: &driver::Handle) {
+ self.park.park(handle);
+ GlobalOrphanQueue::reap_orphans(&self.signal_handle);
+ }
+
+ pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) {
+ self.park.park_timeout(handle, duration);
+ GlobalOrphanQueue::reap_orphans(&self.signal_handle);
+ }
+
+ pub(crate) fn shutdown(&mut self, handle: &driver::Handle) {
+ self.park.shutdown(handle)
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/runtime.rs b/third_party/rust/tokio/src/runtime/runtime.rs
new file mode 100644
index 0000000000..3f34999758
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/runtime.rs
@@ -0,0 +1,445 @@
+use crate::runtime::blocking::BlockingPool;
+use crate::runtime::scheduler::CurrentThread;
+use crate::runtime::{context, EnterGuard, Handle};
+use crate::task::JoinHandle;
+
+use std::future::Future;
+use std::time::Duration;
+
+cfg_rt_multi_thread! {
+ use crate::runtime::Builder;
+ use crate::runtime::scheduler::MultiThread;
+}
+
+/// The Tokio runtime.
+///
+/// The runtime provides an I/O driver, task scheduler, [timer], and
+/// blocking pool, necessary for running asynchronous tasks.
+///
+/// Instances of `Runtime` can be created using [`new`], or [`Builder`].
+/// However, most users will use the `#[tokio::main]` annotation on their
+/// entry point instead.
+///
+/// See [module level][mod] documentation for more details.
+///
+/// # Shutdown
+///
+/// Shutting down the runtime is done by dropping the value, or calling
+/// [`Runtime::shutdown_background`] or [`Runtime::shutdown_timeout`].
+///
+/// Tasks spawned through [`Runtime::spawn`] keep running until they yield.
+/// Then they are dropped. They are not *guaranteed* to run to completion, but
+/// *might* do so if they do not yield until completion.
+///
+/// Blocking functions spawned through [`Runtime::spawn_blocking`] keep running
+/// until they return.
+///
+/// The thread initiating the shutdown blocks until all spawned work has been
+/// stopped. This can take an indefinite amount of time. The `Drop`
+/// implementation waits forever for this.
+///
+/// `shutdown_background` and `shutdown_timeout` can be used if waiting forever
+/// is undesired. When the timeout is reached, spawned work that did not stop
+/// in time and threads running it are leaked. The work continues to run until
+/// one of the stopping conditions is fulfilled, but the thread initiating the
+/// shutdown is unblocked.
+///
+/// Once the runtime has been dropped, any outstanding I/O resources bound to
+/// it will no longer function. Calling any method on them will result in an
+/// error.
+///
+/// # Sharing
+///
+/// The Tokio runtime implements `Sync` and `Send` to allow you to wrap it
+/// in a `Arc`. Most fn take `&self` to allow you to call them concurrently
+/// across multiple threads.
+///
+/// Calls to `shutdown` and `shutdown_timeout` require exclusive ownership of
+/// the runtime type and this can be achieved via `Arc::try_unwrap` when only
+/// one strong count reference is left over.
+///
+/// [timer]: crate::time
+/// [mod]: index.html
+/// [`new`]: method@Self::new
+/// [`Builder`]: struct@Builder
+#[derive(Debug)]
+pub struct Runtime {
+ /// Task scheduler
+ scheduler: Scheduler,
+
+ /// Handle to runtime, also contains driver handles
+ handle: Handle,
+
+ /// Blocking pool handle, used to signal shutdown
+ blocking_pool: BlockingPool,
+}
+
+/// The flavor of a `Runtime`.
+///
+/// This is the return type for [`Handle::runtime_flavor`](crate::runtime::Handle::runtime_flavor()).
+#[derive(Debug, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum RuntimeFlavor {
+ /// The flavor that executes all tasks on the current thread.
+ CurrentThread,
+ /// The flavor that executes tasks across multiple threads.
+ MultiThread,
+}
+
+/// The runtime scheduler is either a multi-thread or a current-thread executor.
+#[derive(Debug)]
+pub(super) enum Scheduler {
+ /// Execute all tasks on the current-thread.
+ CurrentThread(CurrentThread),
+
+ /// Execute tasks across multiple threads.
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ MultiThread(MultiThread),
+}
+
+impl Runtime {
+ pub(super) fn from_parts(
+ scheduler: Scheduler,
+ handle: Handle,
+ blocking_pool: BlockingPool,
+ ) -> Runtime {
+ Runtime {
+ scheduler,
+ handle,
+ blocking_pool,
+ }
+ }
+
+ cfg_not_wasi! {
+ /// Creates a new runtime instance with default configuration values.
+ ///
+ /// This results in the multi threaded scheduler, I/O driver, and time driver being
+ /// initialized.
+ ///
+ /// Most applications will not need to call this function directly. Instead,
+ /// they will use the [`#[tokio::main]` attribute][main]. When a more complex
+ /// configuration is necessary, the [runtime builder] may be used.
+ ///
+ /// See [module level][mod] documentation for more details.
+ ///
+ /// # Examples
+ ///
+ /// Creating a new `Runtime` with default configuration values.
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// let rt = Runtime::new()
+ /// .unwrap();
+ ///
+ /// // Use the runtime...
+ /// ```
+ ///
+ /// [mod]: index.html
+ /// [main]: ../attr.main.html
+ /// [threaded scheduler]: index.html#threaded-scheduler
+ /// [runtime builder]: crate::runtime::Builder
+ #[cfg(feature = "rt-multi-thread")]
+ #[cfg_attr(docsrs, doc(cfg(feature = "rt-multi-thread")))]
+ pub fn new() -> std::io::Result<Runtime> {
+ Builder::new_multi_thread().enable_all().build()
+ }
+ }
+
+ /// Returns a handle to the runtime's spawner.
+ ///
+ /// The returned handle can be used to spawn tasks that run on this runtime, and can
+ /// be cloned to allow moving the `Handle` to other threads.
+ ///
+ /// Calling [`Handle::block_on`] on a handle to a `current_thread` runtime is error-prone.
+ /// Refer to the documentation of [`Handle::block_on`] for more.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// let rt = Runtime::new()
+ /// .unwrap();
+ ///
+ /// let handle = rt.handle();
+ ///
+ /// // Use the handle...
+ /// ```
+ pub fn handle(&self) -> &Handle {
+ &self.handle
+ }
+
+ /// Spawns a future onto the Tokio runtime.
+ ///
+ /// This spawns the given future onto the runtime's executor, usually a
+ /// thread pool. The thread pool is then responsible for polling the future
+ /// until it completes.
+ ///
+ /// The provided future will start running in the background immediately
+ /// when `spawn` is called, even if you don't await the returned
+ /// `JoinHandle`.
+ ///
+ /// See [module level][mod] documentation for more details.
+ ///
+ /// [mod]: index.html
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// # fn dox() {
+ /// // Create the runtime
+ /// let rt = Runtime::new().unwrap();
+ ///
+ /// // Spawn a future onto the runtime
+ /// rt.spawn(async {
+ /// println!("now running on a worker thread");
+ /// });
+ /// # }
+ /// ```
+ #[track_caller]
+ pub fn spawn<F>(&self, future: F) -> JoinHandle<F::Output>
+ where
+ F: Future + Send + 'static,
+ F::Output: Send + 'static,
+ {
+ self.handle.spawn(future)
+ }
+
+ /// Runs the provided function on an executor dedicated to blocking operations.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// # fn dox() {
+ /// // Create the runtime
+ /// let rt = Runtime::new().unwrap();
+ ///
+ /// // Spawn a blocking function onto the runtime
+ /// rt.spawn_blocking(|| {
+ /// println!("now running on a worker thread");
+ /// });
+ /// # }
+ #[track_caller]
+ pub fn spawn_blocking<F, R>(&self, func: F) -> JoinHandle<R>
+ where
+ F: FnOnce() -> R + Send + 'static,
+ R: Send + 'static,
+ {
+ self.handle.spawn_blocking(func)
+ }
+
+ /// Runs a future to completion on the Tokio runtime. This is the
+ /// runtime's entry point.
+ ///
+ /// This runs the given future on the current thread, blocking until it is
+ /// complete, and yielding its resolved result. Any tasks or timers
+ /// which the future spawns internally will be executed on the runtime.
+ ///
+ /// # Non-worker future
+ ///
+ /// Note that the future required by this function does not run as a
+ /// worker. The expectation is that other tasks are spawned by the future here.
+ /// Awaiting on other futures from the future provided here will not
+ /// perform as fast as those spawned as workers.
+ ///
+ /// # Multi thread scheduler
+ ///
+ /// When the multi thread scheduler is used this will allow futures
+ /// to run within the io driver and timer context of the overall runtime.
+ ///
+ /// Any spawned tasks will continue running after `block_on` returns.
+ ///
+ /// # Current thread scheduler
+ ///
+ /// When the current thread scheduler is enabled `block_on`
+ /// can be called concurrently from multiple threads. The first call
+ /// will take ownership of the io and timer drivers. This means
+ /// other threads which do not own the drivers will hook into that one.
+ /// When the first `block_on` completes, other threads will be able to
+ /// "steal" the driver to allow continued execution of their futures.
+ ///
+ /// Any spawned tasks will be suspended after `block_on` returns. Calling
+ /// `block_on` again will resume previously spawned tasks.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if the provided future panics, or if called within an
+ /// asynchronous execution context.
+ ///
+ /// # Examples
+ ///
+ /// ```no_run
+ /// use tokio::runtime::Runtime;
+ ///
+ /// // Create the runtime
+ /// let rt = Runtime::new().unwrap();
+ ///
+ /// // Execute the future, blocking the current thread until completion
+ /// rt.block_on(async {
+ /// println!("hello");
+ /// });
+ /// ```
+ ///
+ /// [handle]: fn@Handle::block_on
+ #[track_caller]
+ pub fn block_on<F: Future>(&self, future: F) -> F::Output {
+ #[cfg(all(
+ tokio_unstable,
+ tokio_taskdump,
+ feature = "rt",
+ target_os = "linux",
+ any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")
+ ))]
+ let future = super::task::trace::Trace::root(future);
+
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ let future = crate::util::trace::task(
+ future,
+ "block_on",
+ None,
+ crate::runtime::task::Id::next().as_u64(),
+ );
+
+ let _enter = self.enter();
+
+ match &self.scheduler {
+ Scheduler::CurrentThread(exec) => exec.block_on(&self.handle.inner, future),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Scheduler::MultiThread(exec) => exec.block_on(&self.handle.inner, future),
+ }
+ }
+
+ /// Enters the runtime context.
+ ///
+ /// This allows you to construct types that must have an executor
+ /// available on creation such as [`Sleep`] or [`TcpStream`]. It will
+ /// also allow you to call methods such as [`tokio::spawn`].
+ ///
+ /// [`Sleep`]: struct@crate::time::Sleep
+ /// [`TcpStream`]: struct@crate::net::TcpStream
+ /// [`tokio::spawn`]: fn@crate::spawn
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// fn function_that_spawns(msg: String) {
+ /// // Had we not used `rt.enter` below, this would panic.
+ /// tokio::spawn(async move {
+ /// println!("{}", msg);
+ /// });
+ /// }
+ ///
+ /// fn main() {
+ /// let rt = Runtime::new().unwrap();
+ ///
+ /// let s = "Hello World!".to_string();
+ ///
+ /// // By entering the context, we tie `tokio::spawn` to this executor.
+ /// let _guard = rt.enter();
+ /// function_that_spawns(s);
+ /// }
+ /// ```
+ pub fn enter(&self) -> EnterGuard<'_> {
+ self.handle.enter()
+ }
+
+ /// Shuts down the runtime, waiting for at most `duration` for all spawned
+ /// work to stop.
+ ///
+ /// See the [struct level documentation](Runtime#shutdown) for more details.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ /// use tokio::task;
+ ///
+ /// use std::thread;
+ /// use std::time::Duration;
+ ///
+ /// fn main() {
+ /// let runtime = Runtime::new().unwrap();
+ ///
+ /// runtime.block_on(async move {
+ /// task::spawn_blocking(move || {
+ /// thread::sleep(Duration::from_secs(10_000));
+ /// });
+ /// });
+ ///
+ /// runtime.shutdown_timeout(Duration::from_millis(100));
+ /// }
+ /// ```
+ pub fn shutdown_timeout(mut self, duration: Duration) {
+ // Wakeup and shutdown all the worker threads
+ self.handle.inner.shutdown();
+ self.blocking_pool.shutdown(Some(duration));
+ }
+
+ /// Shuts down the runtime, without waiting for any spawned work to stop.
+ ///
+ /// This can be useful if you want to drop a runtime from within another runtime.
+ /// Normally, dropping a runtime will block indefinitely for spawned blocking tasks
+ /// to complete, which would normally not be permitted within an asynchronous context.
+ /// By calling `shutdown_background()`, you can drop the runtime from such a context.
+ ///
+ /// Note however, that because we do not wait for any blocking tasks to complete, this
+ /// may result in a resource leak (in that any blocking tasks are still running until they
+ /// return.
+ ///
+ /// See the [struct level documentation](Runtime#shutdown) for more details.
+ ///
+ /// This function is equivalent to calling `shutdown_timeout(Duration::from_nanos(0))`.
+ ///
+ /// ```
+ /// use tokio::runtime::Runtime;
+ ///
+ /// fn main() {
+ /// let runtime = Runtime::new().unwrap();
+ ///
+ /// runtime.block_on(async move {
+ /// let inner_runtime = Runtime::new().unwrap();
+ /// // ...
+ /// inner_runtime.shutdown_background();
+ /// });
+ /// }
+ /// ```
+ pub fn shutdown_background(self) {
+ self.shutdown_timeout(Duration::from_nanos(0))
+ }
+}
+
+#[allow(clippy::single_match)] // there are comments in the error branch, so we don't want if-let
+impl Drop for Runtime {
+ fn drop(&mut self) {
+ match &mut self.scheduler {
+ Scheduler::CurrentThread(current_thread) => {
+ // This ensures that tasks spawned on the current-thread
+ // runtime are dropped inside the runtime's context.
+ let _guard = context::try_set_current(&self.handle.inner);
+ current_thread.shutdown(&self.handle.inner);
+ }
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Scheduler::MultiThread(multi_thread) => {
+ // The threaded scheduler drops its tasks on its worker threads, which is
+ // already in the runtime's context.
+ multi_thread.shutdown(&self.handle.inner);
+ }
+ }
+ }
+}
+
+cfg_metrics! {
+ impl Runtime {
+ /// TODO
+ pub fn metrics(&self) -> crate::runtime::RuntimeMetrics {
+ self.handle.metrics()
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/current_thread.rs b/third_party/rust/tokio/src/runtime/scheduler/current_thread.rs
new file mode 100644
index 0000000000..ac4a8d6fac
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/current_thread.rs
@@ -0,0 +1,750 @@
+use crate::future::poll_fn;
+use crate::loom::sync::atomic::AtomicBool;
+use crate::loom::sync::Arc;
+use crate::runtime::driver::{self, Driver};
+use crate::runtime::scheduler::{self, Defer, Inject};
+use crate::runtime::task::{self, JoinHandle, OwnedTasks, Schedule, Task};
+use crate::runtime::{blocking, context, Config, MetricsBatch, SchedulerMetrics, WorkerMetrics};
+use crate::sync::notify::Notify;
+use crate::util::atomic_cell::AtomicCell;
+use crate::util::{waker_ref, RngSeedGenerator, Wake, WakerRef};
+
+use std::cell::RefCell;
+use std::collections::VecDeque;
+use std::fmt;
+use std::future::Future;
+use std::sync::atomic::Ordering::{AcqRel, Release};
+use std::task::Poll::{Pending, Ready};
+use std::task::Waker;
+use std::time::Duration;
+
+/// Executes tasks on the current thread
+pub(crate) struct CurrentThread {
+ /// Core scheduler data is acquired by a thread entering `block_on`.
+ core: AtomicCell<Core>,
+
+ /// Notifier for waking up other threads to steal the
+ /// driver.
+ notify: Notify,
+}
+
+/// Handle to the current thread scheduler
+pub(crate) struct Handle {
+ /// Scheduler state shared across threads
+ shared: Shared,
+
+ /// Resource driver handles
+ pub(crate) driver: driver::Handle,
+
+ /// Blocking pool spawner
+ pub(crate) blocking_spawner: blocking::Spawner,
+
+ /// Current random number generator seed
+ pub(crate) seed_generator: RngSeedGenerator,
+}
+
+/// Data required for executing the scheduler. The struct is passed around to
+/// a function that will perform the scheduling work and acts as a capability token.
+struct Core {
+ /// Scheduler run queue
+ tasks: VecDeque<Notified>,
+
+ /// Current tick
+ tick: u32,
+
+ /// Runtime driver
+ ///
+ /// The driver is removed before starting to park the thread
+ driver: Option<Driver>,
+
+ /// Metrics batch
+ metrics: MetricsBatch,
+
+ /// How often to check the global queue
+ global_queue_interval: u32,
+
+ /// True if a task panicked without being handled and the runtime is
+ /// configured to shutdown on unhandled panic.
+ unhandled_panic: bool,
+}
+
+/// Scheduler state shared between threads.
+struct Shared {
+ /// Remote run queue
+ inject: Inject<Arc<Handle>>,
+
+ /// Collection of all active tasks spawned onto this executor.
+ owned: OwnedTasks<Arc<Handle>>,
+
+ /// Indicates whether the blocked on thread was woken.
+ woken: AtomicBool,
+
+ /// Scheduler configuration options
+ config: Config,
+
+ /// Keeps track of various runtime metrics.
+ scheduler_metrics: SchedulerMetrics,
+
+ /// This scheduler only has one worker.
+ worker_metrics: WorkerMetrics,
+}
+
+/// Thread-local context.
+///
+/// pub(crate) to store in `runtime::context`.
+pub(crate) struct Context {
+ /// Scheduler handle
+ handle: Arc<Handle>,
+
+ /// Scheduler core, enabling the holder of `Context` to execute the
+ /// scheduler.
+ core: RefCell<Option<Box<Core>>>,
+
+ /// Deferred tasks, usually ones that called `task::yield_now()`.
+ pub(crate) defer: Defer,
+}
+
+type Notified = task::Notified<Arc<Handle>>;
+
+/// Initial queue capacity.
+const INITIAL_CAPACITY: usize = 64;
+
+/// Used if none is specified. This is a temporary constant and will be removed
+/// as we unify tuning logic between the multi-thread and current-thread
+/// schedulers.
+const DEFAULT_GLOBAL_QUEUE_INTERVAL: u32 = 31;
+
+impl CurrentThread {
+ pub(crate) fn new(
+ driver: Driver,
+ driver_handle: driver::Handle,
+ blocking_spawner: blocking::Spawner,
+ seed_generator: RngSeedGenerator,
+ config: Config,
+ ) -> (CurrentThread, Arc<Handle>) {
+ let worker_metrics = WorkerMetrics::from_config(&config);
+
+ // Get the configured global queue interval, or use the default.
+ let global_queue_interval = config
+ .global_queue_interval
+ .unwrap_or(DEFAULT_GLOBAL_QUEUE_INTERVAL);
+
+ let handle = Arc::new(Handle {
+ shared: Shared {
+ inject: Inject::new(),
+ owned: OwnedTasks::new(),
+ woken: AtomicBool::new(false),
+ config,
+ scheduler_metrics: SchedulerMetrics::new(),
+ worker_metrics,
+ },
+ driver: driver_handle,
+ blocking_spawner,
+ seed_generator,
+ });
+
+ let core = AtomicCell::new(Some(Box::new(Core {
+ tasks: VecDeque::with_capacity(INITIAL_CAPACITY),
+ tick: 0,
+ driver: Some(driver),
+ metrics: MetricsBatch::new(&handle.shared.worker_metrics),
+ global_queue_interval,
+ unhandled_panic: false,
+ })));
+
+ let scheduler = CurrentThread {
+ core,
+ notify: Notify::new(),
+ };
+
+ (scheduler, handle)
+ }
+
+ #[track_caller]
+ pub(crate) fn block_on<F: Future>(&self, handle: &scheduler::Handle, future: F) -> F::Output {
+ pin!(future);
+
+ crate::runtime::context::enter_runtime(handle, false, |blocking| {
+ let handle = handle.as_current_thread();
+
+ // Attempt to steal the scheduler core and block_on the future if we can
+ // there, otherwise, lets select on a notification that the core is
+ // available or the future is complete.
+ loop {
+ if let Some(core) = self.take_core(handle) {
+ return core.block_on(future);
+ } else {
+ let notified = self.notify.notified();
+ pin!(notified);
+
+ if let Some(out) = blocking
+ .block_on(poll_fn(|cx| {
+ if notified.as_mut().poll(cx).is_ready() {
+ return Ready(None);
+ }
+
+ if let Ready(out) = future.as_mut().poll(cx) {
+ return Ready(Some(out));
+ }
+
+ Pending
+ }))
+ .expect("Failed to `Enter::block_on`")
+ {
+ return out;
+ }
+ }
+ }
+ })
+ }
+
+ fn take_core(&self, handle: &Arc<Handle>) -> Option<CoreGuard<'_>> {
+ let core = self.core.take()?;
+
+ Some(CoreGuard {
+ context: scheduler::Context::CurrentThread(Context {
+ handle: handle.clone(),
+ core: RefCell::new(Some(core)),
+ defer: Defer::new(),
+ }),
+ scheduler: self,
+ })
+ }
+
+ pub(crate) fn shutdown(&mut self, handle: &scheduler::Handle) {
+ let handle = handle.as_current_thread();
+
+ // Avoid a double panic if we are currently panicking and
+ // the lock may be poisoned.
+
+ let core = match self.take_core(handle) {
+ Some(core) => core,
+ None if std::thread::panicking() => return,
+ None => panic!("Oh no! We never placed the Core back, this is a bug!"),
+ };
+
+ // Check that the thread-local is not being destroyed
+ let tls_available = context::with_current(|_| ()).is_ok();
+
+ if tls_available {
+ core.enter(|core, _context| {
+ let core = shutdown2(core, handle);
+ (core, ())
+ });
+ } else {
+ // Shutdown without setting the context. `tokio::spawn` calls will
+ // fail, but those will fail either way because the thread-local is
+ // not available anymore.
+ let context = core.context.expect_current_thread();
+ let core = context.core.borrow_mut().take().unwrap();
+
+ let core = shutdown2(core, handle);
+ *context.core.borrow_mut() = Some(core);
+ }
+ }
+}
+
+fn shutdown2(mut core: Box<Core>, handle: &Handle) -> Box<Core> {
+ // Drain the OwnedTasks collection. This call also closes the
+ // collection, ensuring that no tasks are ever pushed after this
+ // call returns.
+ handle.shared.owned.close_and_shutdown_all();
+
+ // Drain local queue
+ // We already shut down every task, so we just need to drop the task.
+ while let Some(task) = core.next_local_task(handle) {
+ drop(task);
+ }
+
+ // Close the injection queue
+ handle.shared.inject.close();
+
+ // Drain remote queue
+ while let Some(task) = handle.shared.inject.pop() {
+ drop(task);
+ }
+
+ assert!(handle.shared.owned.is_empty());
+
+ // Submit metrics
+ core.submit_metrics(handle);
+
+ // Shutdown the resource drivers
+ if let Some(driver) = core.driver.as_mut() {
+ driver.shutdown(&handle.driver);
+ }
+
+ core
+}
+
+impl fmt::Debug for CurrentThread {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("CurrentThread").finish()
+ }
+}
+
+// ===== impl Core =====
+
+impl Core {
+ /// Get and increment the current tick
+ fn tick(&mut self) {
+ self.tick = self.tick.wrapping_add(1);
+ }
+
+ fn next_task(&mut self, handle: &Handle) -> Option<Notified> {
+ if self.tick % self.global_queue_interval == 0 {
+ handle
+ .next_remote_task()
+ .or_else(|| self.next_local_task(handle))
+ } else {
+ self.next_local_task(handle)
+ .or_else(|| handle.next_remote_task())
+ }
+ }
+
+ fn next_local_task(&mut self, handle: &Handle) -> Option<Notified> {
+ let ret = self.tasks.pop_front();
+ handle
+ .shared
+ .worker_metrics
+ .set_queue_depth(self.tasks.len());
+ ret
+ }
+
+ fn push_task(&mut self, handle: &Handle, task: Notified) {
+ self.tasks.push_back(task);
+ self.metrics.inc_local_schedule_count();
+ handle
+ .shared
+ .worker_metrics
+ .set_queue_depth(self.tasks.len());
+ }
+
+ fn submit_metrics(&mut self, handle: &Handle) {
+ self.metrics.submit(&handle.shared.worker_metrics);
+ }
+}
+
+#[cfg(tokio_taskdump)]
+fn wake_deferred_tasks_and_free(context: &Context) {
+ let wakers = context.defer.take_deferred();
+ for waker in wakers {
+ waker.wake();
+ }
+}
+
+// ===== impl Context =====
+
+impl Context {
+ /// Execute the closure with the given scheduler core stored in the
+ /// thread-local context.
+ fn run_task<R>(&self, mut core: Box<Core>, f: impl FnOnce() -> R) -> (Box<Core>, R) {
+ core.metrics.start_poll();
+ let mut ret = self.enter(core, || crate::runtime::coop::budget(f));
+ ret.0.metrics.end_poll();
+ ret
+ }
+
+ /// Blocks the current thread until an event is received by the driver,
+ /// including I/O events, timer events, ...
+ fn park(&self, mut core: Box<Core>, handle: &Handle) -> Box<Core> {
+ let mut driver = core.driver.take().expect("driver missing");
+
+ if let Some(f) = &handle.shared.config.before_park {
+ // Incorrect lint, the closures are actually different types so `f`
+ // cannot be passed as an argument to `enter`.
+ #[allow(clippy::redundant_closure)]
+ let (c, _) = self.enter(core, || f());
+ core = c;
+ }
+
+ // This check will fail if `before_park` spawns a task for us to run
+ // instead of parking the thread
+ if core.tasks.is_empty() {
+ // Park until the thread is signaled
+ core.metrics.about_to_park();
+ core.submit_metrics(handle);
+
+ let (c, _) = self.enter(core, || {
+ driver.park(&handle.driver);
+ self.defer.wake();
+ });
+
+ core = c;
+ }
+
+ if let Some(f) = &handle.shared.config.after_unpark {
+ // Incorrect lint, the closures are actually different types so `f`
+ // cannot be passed as an argument to `enter`.
+ #[allow(clippy::redundant_closure)]
+ let (c, _) = self.enter(core, || f());
+ core = c;
+ }
+
+ core.driver = Some(driver);
+ core
+ }
+
+ /// Checks the driver for new events without blocking the thread.
+ fn park_yield(&self, mut core: Box<Core>, handle: &Handle) -> Box<Core> {
+ let mut driver = core.driver.take().expect("driver missing");
+
+ core.submit_metrics(handle);
+
+ let (mut core, _) = self.enter(core, || {
+ driver.park_timeout(&handle.driver, Duration::from_millis(0));
+ self.defer.wake();
+ });
+
+ core.driver = Some(driver);
+ core
+ }
+
+ fn enter<R>(&self, core: Box<Core>, f: impl FnOnce() -> R) -> (Box<Core>, R) {
+ // Store the scheduler core in the thread-local context
+ //
+ // A drop-guard is employed at a higher level.
+ *self.core.borrow_mut() = Some(core);
+
+ // Execute the closure while tracking the execution budget
+ let ret = f();
+
+ // Take the scheduler core back
+ let core = self.core.borrow_mut().take().expect("core missing");
+ (core, ret)
+ }
+
+ pub(crate) fn defer(&self, waker: &Waker) {
+ self.defer.defer(waker);
+ }
+}
+
+// ===== impl Handle =====
+
+impl Handle {
+ /// Spawns a future onto the `CurrentThread` scheduler
+ pub(crate) fn spawn<F>(
+ me: &Arc<Self>,
+ future: F,
+ id: crate::runtime::task::Id,
+ ) -> JoinHandle<F::Output>
+ where
+ F: crate::future::Future + Send + 'static,
+ F::Output: Send + 'static,
+ {
+ let (handle, notified) = me.shared.owned.bind(future, me.clone(), id);
+
+ if let Some(notified) = notified {
+ me.schedule(notified);
+ }
+
+ handle
+ }
+
+ /// Capture a snapshot of this runtime's state.
+ #[cfg(all(
+ tokio_unstable,
+ tokio_taskdump,
+ target_os = "linux",
+ any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")
+ ))]
+ pub(crate) fn dump(&self) -> crate::runtime::Dump {
+ use crate::runtime::dump;
+ use task::trace::trace_current_thread;
+
+ let mut traces = vec![];
+
+ // todo: how to make this work outside of a runtime context?
+ context::with_scheduler(|maybe_context| {
+ // drain the local queue
+ let context = if let Some(context) = maybe_context {
+ context.expect_current_thread()
+ } else {
+ return;
+ };
+ let mut maybe_core = context.core.borrow_mut();
+ let core = if let Some(core) = maybe_core.as_mut() {
+ core
+ } else {
+ return;
+ };
+ let local = &mut core.tasks;
+
+ if self.shared.inject.is_closed() {
+ return;
+ }
+
+ traces = trace_current_thread(&self.shared.owned, local, &self.shared.inject)
+ .into_iter()
+ .map(dump::Task::new)
+ .collect();
+
+ // Avoid double borrow panic
+ drop(maybe_core);
+
+ // Taking a taskdump could wakes every task, but we probably don't want
+ // the `yield_now` vector to be that large under normal circumstances.
+ // Therefore, we free its allocation.
+ wake_deferred_tasks_and_free(context);
+ });
+
+ dump::Dump::new(traces)
+ }
+
+ fn next_remote_task(&self) -> Option<Notified> {
+ self.shared.inject.pop()
+ }
+
+ fn waker_ref(me: &Arc<Self>) -> WakerRef<'_> {
+ // Set woken to true when enter block_on, ensure outer future
+ // be polled for the first time when enter loop
+ me.shared.woken.store(true, Release);
+ waker_ref(me)
+ }
+
+ // reset woken to false and return original value
+ pub(crate) fn reset_woken(&self) -> bool {
+ self.shared.woken.swap(false, AcqRel)
+ }
+}
+
+cfg_metrics! {
+ impl Handle {
+ pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics {
+ &self.shared.scheduler_metrics
+ }
+
+ pub(crate) fn injection_queue_depth(&self) -> usize {
+ self.shared.inject.len()
+ }
+
+ pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics {
+ assert_eq!(0, worker);
+ &self.shared.worker_metrics
+ }
+
+ pub(crate) fn num_blocking_threads(&self) -> usize {
+ self.blocking_spawner.num_threads()
+ }
+
+ pub(crate) fn num_idle_blocking_threads(&self) -> usize {
+ self.blocking_spawner.num_idle_threads()
+ }
+
+ pub(crate) fn blocking_queue_depth(&self) -> usize {
+ self.blocking_spawner.queue_depth()
+ }
+
+ pub(crate) fn active_tasks_count(&self) -> usize {
+ self.shared.owned.active_tasks_count()
+ }
+ }
+}
+
+impl fmt::Debug for Handle {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("current_thread::Handle { ... }").finish()
+ }
+}
+
+// ===== impl Shared =====
+
+impl Schedule for Arc<Handle> {
+ fn release(&self, task: &Task<Self>) -> Option<Task<Self>> {
+ self.shared.owned.remove(task)
+ }
+
+ fn schedule(&self, task: task::Notified<Self>) {
+ use scheduler::Context::CurrentThread;
+
+ context::with_scheduler(|maybe_cx| match maybe_cx {
+ Some(CurrentThread(cx)) if Arc::ptr_eq(self, &cx.handle) => {
+ let mut core = cx.core.borrow_mut();
+
+ // If `None`, the runtime is shutting down, so there is no need
+ // to schedule the task.
+ if let Some(core) = core.as_mut() {
+ core.push_task(self, task);
+ }
+ }
+ _ => {
+ // Track that a task was scheduled from **outside** of the runtime.
+ self.shared.scheduler_metrics.inc_remote_schedule_count();
+
+ // Schedule the task
+ self.shared.inject.push(task);
+ self.driver.unpark();
+ }
+ });
+ }
+
+ cfg_unstable! {
+ fn unhandled_panic(&self) {
+ use crate::runtime::UnhandledPanic;
+
+ match self.shared.config.unhandled_panic {
+ UnhandledPanic::Ignore => {
+ // Do nothing
+ }
+ UnhandledPanic::ShutdownRuntime => {
+ use scheduler::Context::CurrentThread;
+
+ // This hook is only called from within the runtime, so
+ // `context::with_scheduler` should match with `&self`, i.e.
+ // there is no opportunity for a nested scheduler to be
+ // called.
+ context::with_scheduler(|maybe_cx| match maybe_cx {
+ Some(CurrentThread(cx)) if Arc::ptr_eq(self, &cx.handle) => {
+ let mut core = cx.core.borrow_mut();
+
+ // If `None`, the runtime is shutting down, so there is no need to signal shutdown
+ if let Some(core) = core.as_mut() {
+ core.unhandled_panic = true;
+ self.shared.owned.close_and_shutdown_all();
+ }
+ }
+ _ => unreachable!("runtime core not set in CURRENT thread-local"),
+ })
+ }
+ }
+ }
+ }
+}
+
+impl Wake for Handle {
+ fn wake(arc_self: Arc<Self>) {
+ Wake::wake_by_ref(&arc_self)
+ }
+
+ /// Wake by reference
+ fn wake_by_ref(arc_self: &Arc<Self>) {
+ arc_self.shared.woken.store(true, Release);
+ arc_self.driver.unpark();
+ }
+}
+
+// ===== CoreGuard =====
+
+/// Used to ensure we always place the `Core` value back into its slot in
+/// `CurrentThread`, even if the future panics.
+struct CoreGuard<'a> {
+ context: scheduler::Context,
+ scheduler: &'a CurrentThread,
+}
+
+impl CoreGuard<'_> {
+ #[track_caller]
+ fn block_on<F: Future>(self, future: F) -> F::Output {
+ let ret = self.enter(|mut core, context| {
+ let waker = Handle::waker_ref(&context.handle);
+ let mut cx = std::task::Context::from_waker(&waker);
+
+ pin!(future);
+
+ core.metrics.start_processing_scheduled_tasks();
+
+ 'outer: loop {
+ let handle = &context.handle;
+
+ if handle.reset_woken() {
+ let (c, res) = context.enter(core, || {
+ crate::runtime::coop::budget(|| future.as_mut().poll(&mut cx))
+ });
+
+ core = c;
+
+ if let Ready(v) = res {
+ return (core, Some(v));
+ }
+ }
+
+ for _ in 0..handle.shared.config.event_interval {
+ // Make sure we didn't hit an unhandled_panic
+ if core.unhandled_panic {
+ return (core, None);
+ }
+
+ core.tick();
+
+ let entry = core.next_task(handle);
+
+ let task = match entry {
+ Some(entry) => entry,
+ None => {
+ core.metrics.end_processing_scheduled_tasks();
+
+ core = if !context.defer.is_empty() {
+ context.park_yield(core, handle)
+ } else {
+ context.park(core, handle)
+ };
+
+ core.metrics.start_processing_scheduled_tasks();
+
+ // Try polling the `block_on` future next
+ continue 'outer;
+ }
+ };
+
+ let task = context.handle.shared.owned.assert_owner(task);
+
+ let (c, _) = context.run_task(core, || {
+ task.run();
+ });
+
+ core = c;
+ }
+
+ core.metrics.end_processing_scheduled_tasks();
+
+ // Yield to the driver, this drives the timer and pulls any
+ // pending I/O events.
+ core = context.park_yield(core, handle);
+
+ core.metrics.start_processing_scheduled_tasks();
+ }
+ });
+
+ match ret {
+ Some(ret) => ret,
+ None => {
+ // `block_on` panicked.
+ panic!("a spawned task panicked and the runtime is configured to shut down on unhandled panic");
+ }
+ }
+ }
+
+ /// Enters the scheduler context. This sets the queue and other necessary
+ /// scheduler state in the thread-local.
+ fn enter<F, R>(self, f: F) -> R
+ where
+ F: FnOnce(Box<Core>, &Context) -> (Box<Core>, R),
+ {
+ let context = self.context.expect_current_thread();
+
+ // Remove `core` from `context` to pass into the closure.
+ let core = context.core.borrow_mut().take().expect("core missing");
+
+ // Call the closure and place `core` back
+ let (core, ret) = context::set_scheduler(&self.context, || f(core, context));
+
+ *context.core.borrow_mut() = Some(core);
+
+ ret
+ }
+}
+
+impl Drop for CoreGuard<'_> {
+ fn drop(&mut self) {
+ let context = self.context.expect_current_thread();
+
+ if let Some(core) = context.core.borrow_mut().take() {
+ // Replace old scheduler back into the state to allow
+ // other threads to pick it up and drive it.
+ self.scheduler.core.set(core);
+
+ // Wake up other possible threads that could steal the driver.
+ self.scheduler.notify.notify_one()
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/defer.rs b/third_party/rust/tokio/src/runtime/scheduler/defer.rs
new file mode 100644
index 0000000000..a4be8ef2e5
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/defer.rs
@@ -0,0 +1,43 @@
+use std::cell::RefCell;
+use std::task::Waker;
+
+pub(crate) struct Defer {
+ deferred: RefCell<Vec<Waker>>,
+}
+
+impl Defer {
+ pub(crate) fn new() -> Defer {
+ Defer {
+ deferred: Default::default(),
+ }
+ }
+
+ pub(crate) fn defer(&self, waker: &Waker) {
+ let mut deferred = self.deferred.borrow_mut();
+
+ // If the same task adds itself a bunch of times, then only add it once.
+ if let Some(last) = deferred.last() {
+ if last.will_wake(waker) {
+ return;
+ }
+ }
+
+ deferred.push(waker.clone());
+ }
+
+ pub(crate) fn is_empty(&self) -> bool {
+ self.deferred.borrow().is_empty()
+ }
+
+ pub(crate) fn wake(&self) {
+ while let Some(waker) = self.deferred.borrow_mut().pop() {
+ waker.wake();
+ }
+ }
+
+ #[cfg(tokio_taskdump)]
+ pub(crate) fn take_deferred(&self) -> Vec<Waker> {
+ let mut deferred = self.deferred.borrow_mut();
+ std::mem::take(&mut *deferred)
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject.rs b/third_party/rust/tokio/src/runtime/scheduler/inject.rs
new file mode 100644
index 0000000000..39976fcd7a
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/inject.rs
@@ -0,0 +1,72 @@
+//! Inject queue used to send wakeups to a work-stealing scheduler
+
+use crate::loom::sync::Mutex;
+use crate::runtime::task;
+
+mod pop;
+pub(crate) use pop::Pop;
+
+mod shared;
+pub(crate) use shared::Shared;
+
+mod synced;
+pub(crate) use synced::Synced;
+
+cfg_rt_multi_thread! {
+ mod rt_multi_thread;
+}
+
+cfg_metrics! {
+ mod metrics;
+}
+
+/// Growable, MPMC queue used to inject new tasks into the scheduler and as an
+/// overflow queue when the local, fixed-size, array queue overflows.
+pub(crate) struct Inject<T: 'static> {
+ shared: Shared<T>,
+ synced: Mutex<Synced>,
+}
+
+impl<T: 'static> Inject<T> {
+ pub(crate) fn new() -> Inject<T> {
+ let (shared, synced) = Shared::new();
+
+ Inject {
+ shared,
+ synced: Mutex::new(synced),
+ }
+ }
+
+ // Kind of annoying to have to include the cfg here
+ #[cfg(tokio_taskdump)]
+ pub(crate) fn is_closed(&self) -> bool {
+ let synced = self.synced.lock();
+ self.shared.is_closed(&synced)
+ }
+
+ /// Closes the injection queue, returns `true` if the queue is open when the
+ /// transition is made.
+ pub(crate) fn close(&self) -> bool {
+ let mut synced = self.synced.lock();
+ self.shared.close(&mut synced)
+ }
+
+ /// Pushes a value into the queue.
+ ///
+ /// This does nothing if the queue is closed.
+ pub(crate) fn push(&self, task: task::Notified<T>) {
+ let mut synced = self.synced.lock();
+ // safety: passing correct `Synced`
+ unsafe { self.shared.push(&mut synced, task) }
+ }
+
+ pub(crate) fn pop(&self) -> Option<task::Notified<T>> {
+ if self.shared.is_empty() {
+ return None;
+ }
+
+ let mut synced = self.synced.lock();
+ // safety: passing correct `Synced`
+ unsafe { self.shared.pop(&mut synced) }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/metrics.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/metrics.rs
new file mode 100644
index 0000000000..76f045fdbd
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/inject/metrics.rs
@@ -0,0 +1,7 @@
+use super::Inject;
+
+impl<T: 'static> Inject<T> {
+ pub(crate) fn len(&self) -> usize {
+ self.shared.len()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/pop.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/pop.rs
new file mode 100644
index 0000000000..4e6d5d3be3
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/inject/pop.rs
@@ -0,0 +1,55 @@
+use super::Synced;
+
+use crate::runtime::task;
+
+use std::marker::PhantomData;
+
+pub(crate) struct Pop<'a, T: 'static> {
+ len: usize,
+ synced: &'a mut Synced,
+ _p: PhantomData<T>,
+}
+
+impl<'a, T: 'static> Pop<'a, T> {
+ pub(super) fn new(len: usize, synced: &'a mut Synced) -> Pop<'a, T> {
+ Pop {
+ len,
+ synced,
+ _p: PhantomData,
+ }
+ }
+}
+
+impl<'a, T: 'static> Iterator for Pop<'a, T> {
+ type Item = task::Notified<T>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.len == 0 {
+ return None;
+ }
+
+ let ret = self.synced.pop();
+
+ // Should be `Some` when `len > 0`
+ debug_assert!(ret.is_some());
+
+ self.len -= 1;
+ ret
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.len, Some(self.len))
+ }
+}
+
+impl<'a, T: 'static> ExactSizeIterator for Pop<'a, T> {
+ fn len(&self) -> usize {
+ self.len
+ }
+}
+
+impl<'a, T: 'static> Drop for Pop<'a, T> {
+ fn drop(&mut self) {
+ for _ in self.by_ref() {}
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs
new file mode 100644
index 0000000000..07d1063c5d
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs
@@ -0,0 +1,98 @@
+use super::{Shared, Synced};
+
+use crate::runtime::scheduler::Lock;
+use crate::runtime::task;
+
+use std::sync::atomic::Ordering::Release;
+
+impl<'a> Lock<Synced> for &'a mut Synced {
+ type Handle = &'a mut Synced;
+
+ fn lock(self) -> Self::Handle {
+ self
+ }
+}
+
+impl AsMut<Synced> for Synced {
+ fn as_mut(&mut self) -> &mut Synced {
+ self
+ }
+}
+
+impl<T: 'static> Shared<T> {
+ /// Pushes several values into the queue.
+ ///
+ /// # Safety
+ ///
+ /// Must be called with the same `Synced` instance returned by `Inject::new`
+ #[inline]
+ pub(crate) unsafe fn push_batch<L, I>(&self, shared: L, mut iter: I)
+ where
+ L: Lock<Synced>,
+ I: Iterator<Item = task::Notified<T>>,
+ {
+ let first = match iter.next() {
+ Some(first) => first.into_raw(),
+ None => return,
+ };
+
+ // Link up all the tasks.
+ let mut prev = first;
+ let mut counter = 1;
+
+ // We are going to be called with an `std::iter::Chain`, and that
+ // iterator overrides `for_each` to something that is easier for the
+ // compiler to optimize than a loop.
+ iter.for_each(|next| {
+ let next = next.into_raw();
+
+ // safety: Holding the Notified for a task guarantees exclusive
+ // access to the `queue_next` field.
+ unsafe { prev.set_queue_next(Some(next)) };
+ prev = next;
+ counter += 1;
+ });
+
+ // Now that the tasks are linked together, insert them into the
+ // linked list.
+ self.push_batch_inner(shared, first, prev, counter);
+ }
+
+ /// Inserts several tasks that have been linked together into the queue.
+ ///
+ /// The provided head and tail may be be the same task. In this case, a
+ /// single task is inserted.
+ #[inline]
+ unsafe fn push_batch_inner<L>(
+ &self,
+ shared: L,
+ batch_head: task::RawTask,
+ batch_tail: task::RawTask,
+ num: usize,
+ ) where
+ L: Lock<Synced>,
+ {
+ debug_assert!(unsafe { batch_tail.get_queue_next().is_none() });
+
+ let mut synced = shared.lock();
+ let synced = synced.as_mut();
+
+ if let Some(tail) = synced.tail {
+ unsafe {
+ tail.set_queue_next(Some(batch_head));
+ }
+ } else {
+ synced.head = Some(batch_head);
+ }
+
+ synced.tail = Some(batch_tail);
+
+ // Increment the count.
+ //
+ // safety: All updates to the len atomic are guarded by the mutex. As
+ // such, a non-atomic load followed by a store is safe.
+ let len = self.len.unsync_load();
+
+ self.len.store(len + num, Release);
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/shared.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/shared.rs
new file mode 100644
index 0000000000..7fdd2839dd
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/inject/shared.rs
@@ -0,0 +1,119 @@
+use super::{Pop, Synced};
+
+use crate::loom::sync::atomic::AtomicUsize;
+use crate::runtime::task;
+
+use std::marker::PhantomData;
+use std::sync::atomic::Ordering::{Acquire, Release};
+
+pub(crate) struct Shared<T: 'static> {
+ /// Number of pending tasks in the queue. This helps prevent unnecessary
+ /// locking in the hot path.
+ pub(super) len: AtomicUsize,
+
+ _p: PhantomData<T>,
+}
+
+unsafe impl<T> Send for Shared<T> {}
+unsafe impl<T> Sync for Shared<T> {}
+
+impl<T: 'static> Shared<T> {
+ pub(crate) fn new() -> (Shared<T>, Synced) {
+ let inject = Shared {
+ len: AtomicUsize::new(0),
+ _p: PhantomData,
+ };
+
+ let synced = Synced {
+ is_closed: false,
+ head: None,
+ tail: None,
+ };
+
+ (inject, synced)
+ }
+
+ pub(crate) fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ // Kind of annoying to have to include the cfg here
+ #[cfg(any(tokio_taskdump, all(feature = "rt-multi-thread", not(tokio_wasi))))]
+ pub(crate) fn is_closed(&self, synced: &Synced) -> bool {
+ synced.is_closed
+ }
+
+ /// Closes the injection queue, returns `true` if the queue is open when the
+ /// transition is made.
+ pub(crate) fn close(&self, synced: &mut Synced) -> bool {
+ if synced.is_closed {
+ return false;
+ }
+
+ synced.is_closed = true;
+ true
+ }
+
+ pub(crate) fn len(&self) -> usize {
+ self.len.load(Acquire)
+ }
+
+ /// Pushes a value into the queue.
+ ///
+ /// This does nothing if the queue is closed.
+ ///
+ /// # Safety
+ ///
+ /// Must be called with the same `Synced` instance returned by `Inject::new`
+ pub(crate) unsafe fn push(&self, synced: &mut Synced, task: task::Notified<T>) {
+ if synced.is_closed {
+ return;
+ }
+
+ // safety: only mutated with the lock held
+ let len = self.len.unsync_load();
+ let task = task.into_raw();
+
+ // The next pointer should already be null
+ debug_assert!(unsafe { task.get_queue_next().is_none() });
+
+ if let Some(tail) = synced.tail {
+ // safety: Holding the Notified for a task guarantees exclusive
+ // access to the `queue_next` field.
+ unsafe { tail.set_queue_next(Some(task)) };
+ } else {
+ synced.head = Some(task);
+ }
+
+ synced.tail = Some(task);
+ self.len.store(len + 1, Release);
+ }
+
+ /// Pop a value from the queue.
+ ///
+ /// # Safety
+ ///
+ /// Must be called with the same `Synced` instance returned by `Inject::new`
+ pub(crate) unsafe fn pop(&self, synced: &mut Synced) -> Option<task::Notified<T>> {
+ self.pop_n(synced, 1).next()
+ }
+
+ /// Pop `n` values from the queue
+ ///
+ /// # Safety
+ ///
+ /// Must be called with the same `Synced` instance returned by `Inject::new`
+ pub(crate) unsafe fn pop_n<'a>(&'a self, synced: &'a mut Synced, n: usize) -> Pop<'a, T> {
+ use std::cmp;
+
+ // safety: All updates to the len atomic are guarded by the mutex. As
+ // such, a non-atomic load followed by a store is safe.
+ let len = self.len.unsync_load();
+ let n = cmp::min(n, len);
+
+ // Decrement the count.
+ self.len.store(len - n, Release);
+
+ Pop::new(n, synced)
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/inject/synced.rs b/third_party/rust/tokio/src/runtime/scheduler/inject/synced.rs
new file mode 100644
index 0000000000..6847f68e5d
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/inject/synced.rs
@@ -0,0 +1,32 @@
+use crate::runtime::task;
+
+pub(crate) struct Synced {
+ /// True if the queue is closed.
+ pub(super) is_closed: bool,
+
+ /// Linked-list head.
+ pub(super) head: Option<task::RawTask>,
+
+ /// Linked-list tail.
+ pub(super) tail: Option<task::RawTask>,
+}
+
+unsafe impl Send for Synced {}
+unsafe impl Sync for Synced {}
+
+impl Synced {
+ pub(super) fn pop<T: 'static>(&mut self) -> Option<task::Notified<T>> {
+ let task = self.head?;
+
+ self.head = unsafe { task.get_queue_next() };
+
+ if self.head.is_none() {
+ self.tail = None;
+ }
+
+ unsafe { task.set_queue_next(None) };
+
+ // safety: a `Notified` is pushed into the queue and now it is popped!
+ Some(unsafe { task::Notified::from_raw(task) })
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/lock.rs b/third_party/rust/tokio/src/runtime/scheduler/lock.rs
new file mode 100644
index 0000000000..0901c2b37c
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/lock.rs
@@ -0,0 +1,6 @@
+/// A lock (mutex) yielding generic data.
+pub(crate) trait Lock<T> {
+ type Handle: AsMut<T>;
+
+ fn lock(self) -> Self::Handle;
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/mod.rs b/third_party/rust/tokio/src/runtime/scheduler/mod.rs
new file mode 100644
index 0000000000..3e3151711f
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/mod.rs
@@ -0,0 +1,249 @@
+cfg_rt! {
+ pub(crate) mod current_thread;
+ pub(crate) use current_thread::CurrentThread;
+
+ mod defer;
+ use defer::Defer;
+
+ pub(crate) mod inject;
+ pub(crate) use inject::Inject;
+}
+
+cfg_rt_multi_thread! {
+ mod lock;
+ use lock::Lock;
+
+ pub(crate) mod multi_thread;
+ pub(crate) use multi_thread::MultiThread;
+}
+
+use crate::runtime::driver;
+
+#[derive(Debug, Clone)]
+pub(crate) enum Handle {
+ #[cfg(feature = "rt")]
+ CurrentThread(Arc<current_thread::Handle>),
+
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ MultiThread(Arc<multi_thread::Handle>),
+
+ // TODO: This is to avoid triggering "dead code" warnings many other places
+ // in the codebase. Remove this during a later cleanup
+ #[cfg(not(feature = "rt"))]
+ #[allow(dead_code)]
+ Disabled,
+}
+
+#[cfg(feature = "rt")]
+pub(super) enum Context {
+ CurrentThread(current_thread::Context),
+
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ MultiThread(multi_thread::Context),
+}
+
+impl Handle {
+ #[cfg_attr(not(feature = "full"), allow(dead_code))]
+ pub(crate) fn driver(&self) -> &driver::Handle {
+ match *self {
+ #[cfg(feature = "rt")]
+ Handle::CurrentThread(ref h) => &h.driver,
+
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(ref h) => &h.driver,
+
+ #[cfg(not(feature = "rt"))]
+ Handle::Disabled => unreachable!(),
+ }
+ }
+}
+
+cfg_rt! {
+ use crate::future::Future;
+ use crate::loom::sync::Arc;
+ use crate::runtime::{blocking, task::Id};
+ use crate::runtime::context;
+ use crate::task::JoinHandle;
+ use crate::util::RngSeedGenerator;
+ use std::task::Waker;
+
+ impl Handle {
+ #[track_caller]
+ pub(crate) fn current() -> Handle {
+ match context::with_current(Clone::clone) {
+ Ok(handle) => handle,
+ Err(e) => panic!("{}", e),
+ }
+ }
+
+ pub(crate) fn blocking_spawner(&self) -> &blocking::Spawner {
+ match self {
+ Handle::CurrentThread(h) => &h.blocking_spawner,
+
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(h) => &h.blocking_spawner,
+ }
+ }
+
+ pub(crate) fn spawn<F>(&self, future: F, id: Id) -> JoinHandle<F::Output>
+ where
+ F: Future + Send + 'static,
+ F::Output: Send + 'static,
+ {
+ match self {
+ Handle::CurrentThread(h) => current_thread::Handle::spawn(h, future, id),
+
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(h) => multi_thread::Handle::spawn(h, future, id),
+ }
+ }
+
+ pub(crate) fn shutdown(&self) {
+ match *self {
+ Handle::CurrentThread(_) => {},
+
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(ref h) => h.shutdown(),
+ }
+ }
+
+ pub(crate) fn seed_generator(&self) -> &RngSeedGenerator {
+ match self {
+ Handle::CurrentThread(h) => &h.seed_generator,
+
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(h) => &h.seed_generator,
+ }
+ }
+
+ pub(crate) fn as_current_thread(&self) -> &Arc<current_thread::Handle> {
+ match self {
+ Handle::CurrentThread(handle) => handle,
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ _ => panic!("not a CurrentThread handle"),
+ }
+ }
+ }
+
+ cfg_metrics! {
+ use crate::runtime::{SchedulerMetrics, WorkerMetrics};
+
+ impl Handle {
+ pub(crate) fn num_workers(&self) -> usize {
+ match self {
+ Handle::CurrentThread(_) => 1,
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(handle) => handle.num_workers(),
+ }
+ }
+
+ pub(crate) fn num_blocking_threads(&self) -> usize {
+ match self {
+ Handle::CurrentThread(handle) => handle.num_blocking_threads(),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(handle) => handle.num_blocking_threads(),
+ }
+ }
+
+ pub(crate) fn num_idle_blocking_threads(&self) -> usize {
+ match self {
+ Handle::CurrentThread(handle) => handle.num_idle_blocking_threads(),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(handle) => handle.num_idle_blocking_threads(),
+ }
+ }
+
+ pub(crate) fn active_tasks_count(&self) -> usize {
+ match self {
+ Handle::CurrentThread(handle) => handle.active_tasks_count(),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(handle) => handle.active_tasks_count(),
+ }
+ }
+
+ pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics {
+ match self {
+ Handle::CurrentThread(handle) => handle.scheduler_metrics(),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(handle) => handle.scheduler_metrics(),
+ }
+ }
+
+ pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics {
+ match self {
+ Handle::CurrentThread(handle) => handle.worker_metrics(worker),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(handle) => handle.worker_metrics(worker),
+ }
+ }
+
+ pub(crate) fn injection_queue_depth(&self) -> usize {
+ match self {
+ Handle::CurrentThread(handle) => handle.injection_queue_depth(),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(handle) => handle.injection_queue_depth(),
+ }
+ }
+
+ pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize {
+ match self {
+ Handle::CurrentThread(handle) => handle.worker_metrics(worker).queue_depth(),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(handle) => handle.worker_local_queue_depth(worker),
+ }
+ }
+
+ pub(crate) fn blocking_queue_depth(&self) -> usize {
+ match self {
+ Handle::CurrentThread(handle) => handle.blocking_queue_depth(),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Handle::MultiThread(handle) => handle.blocking_queue_depth(),
+ }
+ }
+ }
+ }
+
+ impl Context {
+ #[track_caller]
+ pub(crate) fn expect_current_thread(&self) -> &current_thread::Context {
+ match self {
+ Context::CurrentThread(context) => context,
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ _ => panic!("expected `CurrentThread::Context`")
+ }
+ }
+
+ pub(crate) fn defer(&self, waker: &Waker) {
+ match self {
+ Context::CurrentThread(context) => context.defer(waker),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ Context::MultiThread(context) => context.defer(waker),
+ }
+ }
+
+ cfg_rt_multi_thread! {
+ #[track_caller]
+ pub(crate) fn expect_multi_thread(&self) -> &multi_thread::Context {
+ match self {
+ Context::MultiThread(context) => context,
+ _ => panic!("expected `MultiThread::Context`")
+ }
+ }
+ }
+ }
+}
+
+cfg_not_rt! {
+ #[cfg(any(
+ feature = "net",
+ all(unix, feature = "process"),
+ all(unix, feature = "signal"),
+ feature = "time",
+ ))]
+ impl Handle {
+ #[track_caller]
+ pub(crate) fn current() -> Handle {
+ panic!("{}", crate::util::error::CONTEXT_MISSING_ERROR)
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/counters.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/counters.rs
new file mode 100644
index 0000000000..50bcc11985
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/counters.rs
@@ -0,0 +1,62 @@
+#[cfg(tokio_internal_mt_counters)]
+mod imp {
+ use std::sync::atomic::AtomicUsize;
+ use std::sync::atomic::Ordering::Relaxed;
+
+ static NUM_MAINTENANCE: AtomicUsize = AtomicUsize::new(0);
+ static NUM_NOTIFY_LOCAL: AtomicUsize = AtomicUsize::new(0);
+ static NUM_UNPARKS_LOCAL: AtomicUsize = AtomicUsize::new(0);
+ static NUM_LIFO_SCHEDULES: AtomicUsize = AtomicUsize::new(0);
+ static NUM_LIFO_CAPPED: AtomicUsize = AtomicUsize::new(0);
+
+ impl Drop for super::Counters {
+ fn drop(&mut self) {
+ let notifies_local = NUM_NOTIFY_LOCAL.load(Relaxed);
+ let unparks_local = NUM_UNPARKS_LOCAL.load(Relaxed);
+ let maintenance = NUM_MAINTENANCE.load(Relaxed);
+ let lifo_scheds = NUM_LIFO_SCHEDULES.load(Relaxed);
+ let lifo_capped = NUM_LIFO_CAPPED.load(Relaxed);
+
+ println!("---");
+ println!("notifies (local): {}", notifies_local);
+ println!(" unparks (local): {}", unparks_local);
+ println!(" maintenance: {}", maintenance);
+ println!(" LIFO schedules: {}", lifo_scheds);
+ println!(" LIFO capped: {}", lifo_capped);
+ }
+ }
+
+ pub(crate) fn inc_num_inc_notify_local() {
+ NUM_NOTIFY_LOCAL.fetch_add(1, Relaxed);
+ }
+
+ pub(crate) fn inc_num_unparks_local() {
+ NUM_UNPARKS_LOCAL.fetch_add(1, Relaxed);
+ }
+
+ pub(crate) fn inc_num_maintenance() {
+ NUM_MAINTENANCE.fetch_add(1, Relaxed);
+ }
+
+ pub(crate) fn inc_lifo_schedules() {
+ NUM_LIFO_SCHEDULES.fetch_add(1, Relaxed);
+ }
+
+ pub(crate) fn inc_lifo_capped() {
+ NUM_LIFO_CAPPED.fetch_add(1, Relaxed);
+ }
+}
+
+#[cfg(not(tokio_internal_mt_counters))]
+mod imp {
+ pub(crate) fn inc_num_inc_notify_local() {}
+ pub(crate) fn inc_num_unparks_local() {}
+ pub(crate) fn inc_num_maintenance() {}
+ pub(crate) fn inc_lifo_schedules() {}
+ pub(crate) fn inc_lifo_capped() {}
+}
+
+#[derive(Debug)]
+pub(crate) struct Counters;
+
+pub(super) use imp::*;
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle.rs
new file mode 100644
index 0000000000..98e4765856
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle.rs
@@ -0,0 +1,68 @@
+use crate::future::Future;
+use crate::loom::sync::Arc;
+use crate::runtime::scheduler::multi_thread::worker;
+use crate::runtime::{
+ blocking, driver,
+ task::{self, JoinHandle},
+};
+use crate::util::RngSeedGenerator;
+
+use std::fmt;
+
+cfg_metrics! {
+ mod metrics;
+}
+
+cfg_taskdump! {
+ mod taskdump;
+}
+
+/// Handle to the multi thread scheduler
+pub(crate) struct Handle {
+ /// Task spawner
+ pub(super) shared: worker::Shared,
+
+ /// Resource driver handles
+ pub(crate) driver: driver::Handle,
+
+ /// Blocking pool spawner
+ pub(crate) blocking_spawner: blocking::Spawner,
+
+ /// Current random number generator seed
+ pub(crate) seed_generator: RngSeedGenerator,
+}
+
+impl Handle {
+ /// Spawns a future onto the thread pool
+ pub(crate) fn spawn<F>(me: &Arc<Self>, future: F, id: task::Id) -> JoinHandle<F::Output>
+ where
+ F: crate::future::Future + Send + 'static,
+ F::Output: Send + 'static,
+ {
+ Self::bind_new_task(me, future, id)
+ }
+
+ pub(crate) fn shutdown(&self) {
+ self.close();
+ }
+
+ pub(super) fn bind_new_task<T>(me: &Arc<Self>, future: T, id: task::Id) -> JoinHandle<T::Output>
+ where
+ T: Future + Send + 'static,
+ T::Output: Send + 'static,
+ {
+ let (handle, notified) = me.shared.owned.bind(future, me.clone(), id);
+
+ if let Some(notified) = notified {
+ me.schedule_task(notified, false);
+ }
+
+ handle
+ }
+}
+
+impl fmt::Debug for Handle {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("multi_thread::Handle { ... }").finish()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/metrics.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/metrics.rs
new file mode 100644
index 0000000000..838694fc89
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/metrics.rs
@@ -0,0 +1,41 @@
+use super::Handle;
+
+use crate::runtime::{SchedulerMetrics, WorkerMetrics};
+
+impl Handle {
+ pub(crate) fn num_workers(&self) -> usize {
+ self.shared.worker_metrics.len()
+ }
+
+ pub(crate) fn num_blocking_threads(&self) -> usize {
+ self.blocking_spawner.num_threads()
+ }
+
+ pub(crate) fn num_idle_blocking_threads(&self) -> usize {
+ self.blocking_spawner.num_idle_threads()
+ }
+
+ pub(crate) fn active_tasks_count(&self) -> usize {
+ self.shared.owned.active_tasks_count()
+ }
+
+ pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics {
+ &self.shared.scheduler_metrics
+ }
+
+ pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics {
+ &self.shared.worker_metrics[worker]
+ }
+
+ pub(crate) fn injection_queue_depth(&self) -> usize {
+ self.shared.injection_queue_depth()
+ }
+
+ pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize {
+ self.shared.worker_local_queue_depth(worker)
+ }
+
+ pub(crate) fn blocking_queue_depth(&self) -> usize {
+ self.blocking_spawner.queue_depth()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/taskdump.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/taskdump.rs
new file mode 100644
index 0000000000..477d857d88
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/handle/taskdump.rs
@@ -0,0 +1,26 @@
+use super::Handle;
+
+use crate::runtime::Dump;
+
+impl Handle {
+ pub(crate) async fn dump(&self) -> Dump {
+ let trace_status = &self.shared.trace_status;
+
+ // If a dump is in progress, block.
+ trace_status.start_trace_request(&self).await;
+
+ let result = loop {
+ if let Some(result) = trace_status.take_result() {
+ break result;
+ } else {
+ self.notify_all();
+ trace_status.result_ready.notified().await;
+ }
+ };
+
+ // Allow other queued dumps to proceed.
+ trace_status.end_trace_request(&self).await;
+
+ result
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/idle.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/idle.rs
new file mode 100644
index 0000000000..834bc2b66f
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/idle.rs
@@ -0,0 +1,240 @@
+//! Coordinates idling workers
+
+use crate::loom::sync::atomic::AtomicUsize;
+use crate::runtime::scheduler::multi_thread::Shared;
+
+use std::fmt;
+use std::sync::atomic::Ordering::{self, SeqCst};
+
+pub(super) struct Idle {
+ /// Tracks both the number of searching workers and the number of unparked
+ /// workers.
+ ///
+ /// Used as a fast-path to avoid acquiring the lock when needed.
+ state: AtomicUsize,
+
+ /// Total number of workers.
+ num_workers: usize,
+}
+
+/// Data synchronized by the scheduler mutex
+pub(super) struct Synced {
+ /// Sleeping workers
+ sleepers: Vec<usize>,
+}
+
+const UNPARK_SHIFT: usize = 16;
+const UNPARK_MASK: usize = !SEARCH_MASK;
+const SEARCH_MASK: usize = (1 << UNPARK_SHIFT) - 1;
+
+#[derive(Copy, Clone)]
+struct State(usize);
+
+impl Idle {
+ pub(super) fn new(num_workers: usize) -> (Idle, Synced) {
+ let init = State::new(num_workers);
+
+ let idle = Idle {
+ state: AtomicUsize::new(init.into()),
+ num_workers,
+ };
+
+ let synced = Synced {
+ sleepers: Vec::with_capacity(num_workers),
+ };
+
+ (idle, synced)
+ }
+
+ /// If there are no workers actively searching, returns the index of a
+ /// worker currently sleeping.
+ pub(super) fn worker_to_notify(&self, shared: &Shared) -> Option<usize> {
+ // If at least one worker is spinning, work being notified will
+ // eventually be found. A searching thread will find **some** work and
+ // notify another worker, eventually leading to our work being found.
+ //
+ // For this to happen, this load must happen before the thread
+ // transitioning `num_searching` to zero. Acquire / Release does not
+ // provide sufficient guarantees, so this load is done with `SeqCst` and
+ // will pair with the `fetch_sub(1)` when transitioning out of
+ // searching.
+ if !self.notify_should_wakeup() {
+ return None;
+ }
+
+ // Acquire the lock
+ let mut lock = shared.synced.lock();
+
+ // Check again, now that the lock is acquired
+ if !self.notify_should_wakeup() {
+ return None;
+ }
+
+ // A worker should be woken up, atomically increment the number of
+ // searching workers as well as the number of unparked workers.
+ State::unpark_one(&self.state, 1);
+
+ // Get the worker to unpark
+ let ret = lock.idle.sleepers.pop();
+ debug_assert!(ret.is_some());
+
+ ret
+ }
+
+ /// Returns `true` if the worker needs to do a final check for submitted
+ /// work.
+ pub(super) fn transition_worker_to_parked(
+ &self,
+ shared: &Shared,
+ worker: usize,
+ is_searching: bool,
+ ) -> bool {
+ // Acquire the lock
+ let mut lock = shared.synced.lock();
+
+ // Decrement the number of unparked threads
+ let ret = State::dec_num_unparked(&self.state, is_searching);
+
+ // Track the sleeping worker
+ lock.idle.sleepers.push(worker);
+
+ ret
+ }
+
+ pub(super) fn transition_worker_to_searching(&self) -> bool {
+ let state = State::load(&self.state, SeqCst);
+ if 2 * state.num_searching() >= self.num_workers {
+ return false;
+ }
+
+ // It is possible for this routine to allow more than 50% of the workers
+ // to search. That is OK. Limiting searchers is only an optimization to
+ // prevent too much contention.
+ State::inc_num_searching(&self.state, SeqCst);
+ true
+ }
+
+ /// A lightweight transition from searching -> running.
+ ///
+ /// Returns `true` if this is the final searching worker. The caller
+ /// **must** notify a new worker.
+ pub(super) fn transition_worker_from_searching(&self) -> bool {
+ State::dec_num_searching(&self.state)
+ }
+
+ /// Unpark a specific worker. This happens if tasks are submitted from
+ /// within the worker's park routine.
+ ///
+ /// Returns `true` if the worker was parked before calling the method.
+ pub(super) fn unpark_worker_by_id(&self, shared: &Shared, worker_id: usize) -> bool {
+ let mut lock = shared.synced.lock();
+ let sleepers = &mut lock.idle.sleepers;
+
+ for index in 0..sleepers.len() {
+ if sleepers[index] == worker_id {
+ sleepers.swap_remove(index);
+
+ // Update the state accordingly while the lock is held.
+ State::unpark_one(&self.state, 0);
+
+ return true;
+ }
+ }
+
+ false
+ }
+
+ /// Returns `true` if `worker_id` is contained in the sleep set.
+ pub(super) fn is_parked(&self, shared: &Shared, worker_id: usize) -> bool {
+ let lock = shared.synced.lock();
+ lock.idle.sleepers.contains(&worker_id)
+ }
+
+ fn notify_should_wakeup(&self) -> bool {
+ let state = State(self.state.fetch_add(0, SeqCst));
+ state.num_searching() == 0 && state.num_unparked() < self.num_workers
+ }
+}
+
+impl State {
+ fn new(num_workers: usize) -> State {
+ // All workers start in the unparked state
+ let ret = State(num_workers << UNPARK_SHIFT);
+ debug_assert_eq!(num_workers, ret.num_unparked());
+ debug_assert_eq!(0, ret.num_searching());
+ ret
+ }
+
+ fn load(cell: &AtomicUsize, ordering: Ordering) -> State {
+ State(cell.load(ordering))
+ }
+
+ fn unpark_one(cell: &AtomicUsize, num_searching: usize) {
+ cell.fetch_add(num_searching | (1 << UNPARK_SHIFT), SeqCst);
+ }
+
+ fn inc_num_searching(cell: &AtomicUsize, ordering: Ordering) {
+ cell.fetch_add(1, ordering);
+ }
+
+ /// Returns `true` if this is the final searching worker
+ fn dec_num_searching(cell: &AtomicUsize) -> bool {
+ let state = State(cell.fetch_sub(1, SeqCst));
+ state.num_searching() == 1
+ }
+
+ /// Track a sleeping worker
+ ///
+ /// Returns `true` if this is the final searching worker.
+ fn dec_num_unparked(cell: &AtomicUsize, is_searching: bool) -> bool {
+ let mut dec = 1 << UNPARK_SHIFT;
+
+ if is_searching {
+ dec += 1;
+ }
+
+ let prev = State(cell.fetch_sub(dec, SeqCst));
+ is_searching && prev.num_searching() == 1
+ }
+
+ /// Number of workers currently searching
+ fn num_searching(self) -> usize {
+ self.0 & SEARCH_MASK
+ }
+
+ /// Number of workers currently unparked
+ fn num_unparked(self) -> usize {
+ (self.0 & UNPARK_MASK) >> UNPARK_SHIFT
+ }
+}
+
+impl From<usize> for State {
+ fn from(src: usize) -> State {
+ State(src)
+ }
+}
+
+impl From<State> for usize {
+ fn from(src: State) -> usize {
+ src.0
+ }
+}
+
+impl fmt::Debug for State {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("worker::State")
+ .field("num_unparked", &self.num_unparked())
+ .field("num_searching", &self.num_searching())
+ .finish()
+ }
+}
+
+#[test]
+fn test_state() {
+ assert_eq!(0, UNPARK_MASK & SEARCH_MASK);
+ assert_eq!(0, !(UNPARK_MASK | SEARCH_MASK));
+
+ let state = State::new(10);
+ assert_eq!(10, state.num_unparked());
+ assert_eq!(0, state.num_searching());
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/mod.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/mod.rs
new file mode 100644
index 0000000000..d85a0ae0a2
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/mod.rs
@@ -0,0 +1,103 @@
+//! Multi-threaded runtime
+
+mod counters;
+use counters::Counters;
+
+mod handle;
+pub(crate) use handle::Handle;
+
+mod overflow;
+pub(crate) use overflow::Overflow;
+
+mod idle;
+use self::idle::Idle;
+
+mod stats;
+pub(crate) use stats::Stats;
+
+mod park;
+pub(crate) use park::{Parker, Unparker};
+
+pub(crate) mod queue;
+
+mod worker;
+pub(crate) use worker::{Context, Launch, Shared};
+
+cfg_taskdump! {
+ mod trace;
+ use trace::TraceStatus;
+
+ pub(crate) use worker::Synced;
+}
+
+cfg_not_taskdump! {
+ mod trace_mock;
+ use trace_mock::TraceStatus;
+}
+
+pub(crate) use worker::block_in_place;
+
+use crate::loom::sync::Arc;
+use crate::runtime::{
+ blocking,
+ driver::{self, Driver},
+ scheduler, Config,
+};
+use crate::util::RngSeedGenerator;
+
+use std::fmt;
+use std::future::Future;
+
+/// Work-stealing based thread pool for executing futures.
+pub(crate) struct MultiThread;
+
+// ===== impl MultiThread =====
+
+impl MultiThread {
+ pub(crate) fn new(
+ size: usize,
+ driver: Driver,
+ driver_handle: driver::Handle,
+ blocking_spawner: blocking::Spawner,
+ seed_generator: RngSeedGenerator,
+ config: Config,
+ ) -> (MultiThread, Arc<Handle>, Launch) {
+ let parker = Parker::new(driver);
+ let (handle, launch) = worker::create(
+ size,
+ parker,
+ driver_handle,
+ blocking_spawner,
+ seed_generator,
+ config,
+ );
+
+ (MultiThread, handle, launch)
+ }
+
+ /// Blocks the current thread waiting for the future to complete.
+ ///
+ /// The future will execute on the current thread, but all spawned tasks
+ /// will be executed on the thread pool.
+ pub(crate) fn block_on<F>(&self, handle: &scheduler::Handle, future: F) -> F::Output
+ where
+ F: Future,
+ {
+ crate::runtime::context::enter_runtime(handle, true, |blocking| {
+ blocking.block_on(future).expect("failed to park thread")
+ })
+ }
+
+ pub(crate) fn shutdown(&mut self, handle: &scheduler::Handle) {
+ match handle {
+ scheduler::Handle::MultiThread(handle) => handle.shutdown(),
+ _ => panic!("expected MultiThread scheduler"),
+ }
+ }
+}
+
+impl fmt::Debug for MultiThread {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("MultiThread").finish()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/overflow.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/overflow.rs
new file mode 100644
index 0000000000..ab664811cf
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/overflow.rs
@@ -0,0 +1,26 @@
+use crate::runtime::task;
+
+#[cfg(test)]
+use std::cell::RefCell;
+
+pub(crate) trait Overflow<T: 'static> {
+ fn push(&self, task: task::Notified<T>);
+
+ fn push_batch<I>(&self, iter: I)
+ where
+ I: Iterator<Item = task::Notified<T>>;
+}
+
+#[cfg(test)]
+impl<T: 'static> Overflow<T> for RefCell<Vec<task::Notified<T>>> {
+ fn push(&self, task: task::Notified<T>) {
+ self.borrow_mut().push(task);
+ }
+
+ fn push_batch<I>(&self, iter: I)
+ where
+ I: Iterator<Item = task::Notified<T>>,
+ {
+ self.borrow_mut().extend(iter);
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/park.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/park.rs
new file mode 100644
index 0000000000..0a00ea004e
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/park.rs
@@ -0,0 +1,232 @@
+//! Parks the runtime.
+//!
+//! A combination of the various resource driver park handles.
+
+use crate::loom::sync::atomic::AtomicUsize;
+use crate::loom::sync::{Arc, Condvar, Mutex};
+use crate::runtime::driver::{self, Driver};
+use crate::util::TryLock;
+
+use std::sync::atomic::Ordering::SeqCst;
+use std::time::Duration;
+
+pub(crate) struct Parker {
+ inner: Arc<Inner>,
+}
+
+pub(crate) struct Unparker {
+ inner: Arc<Inner>,
+}
+
+struct Inner {
+ /// Avoids entering the park if possible
+ state: AtomicUsize,
+
+ /// Used to coordinate access to the driver / condvar
+ mutex: Mutex<()>,
+
+ /// Condvar to block on if the driver is unavailable.
+ condvar: Condvar,
+
+ /// Resource (I/O, time, ...) driver
+ shared: Arc<Shared>,
+}
+
+const EMPTY: usize = 0;
+const PARKED_CONDVAR: usize = 1;
+const PARKED_DRIVER: usize = 2;
+const NOTIFIED: usize = 3;
+
+/// Shared across multiple Parker handles
+struct Shared {
+ /// Shared driver. Only one thread at a time can use this
+ driver: TryLock<Driver>,
+}
+
+impl Parker {
+ pub(crate) fn new(driver: Driver) -> Parker {
+ Parker {
+ inner: Arc::new(Inner {
+ state: AtomicUsize::new(EMPTY),
+ mutex: Mutex::new(()),
+ condvar: Condvar::new(),
+ shared: Arc::new(Shared {
+ driver: TryLock::new(driver),
+ }),
+ }),
+ }
+ }
+
+ pub(crate) fn unpark(&self) -> Unparker {
+ Unparker {
+ inner: self.inner.clone(),
+ }
+ }
+
+ pub(crate) fn park(&mut self, handle: &driver::Handle) {
+ self.inner.park(handle);
+ }
+
+ pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) {
+ // Only parking with zero is supported...
+ assert_eq!(duration, Duration::from_millis(0));
+
+ if let Some(mut driver) = self.inner.shared.driver.try_lock() {
+ driver.park_timeout(handle, duration)
+ }
+ }
+
+ pub(crate) fn shutdown(&mut self, handle: &driver::Handle) {
+ self.inner.shutdown(handle);
+ }
+}
+
+impl Clone for Parker {
+ fn clone(&self) -> Parker {
+ Parker {
+ inner: Arc::new(Inner {
+ state: AtomicUsize::new(EMPTY),
+ mutex: Mutex::new(()),
+ condvar: Condvar::new(),
+ shared: self.inner.shared.clone(),
+ }),
+ }
+ }
+}
+
+impl Unparker {
+ pub(crate) fn unpark(&self, driver: &driver::Handle) {
+ self.inner.unpark(driver);
+ }
+}
+
+impl Inner {
+ /// Parks the current thread for at most `dur`.
+ fn park(&self, handle: &driver::Handle) {
+ // If we were previously notified then we consume this notification and
+ // return quickly.
+ if self
+ .state
+ .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst)
+ .is_ok()
+ {
+ return;
+ }
+
+ if let Some(mut driver) = self.shared.driver.try_lock() {
+ self.park_driver(&mut driver, handle);
+ } else {
+ self.park_condvar();
+ }
+ }
+
+ fn park_condvar(&self) {
+ // Otherwise we need to coordinate going to sleep
+ let mut m = self.mutex.lock();
+
+ match self
+ .state
+ .compare_exchange(EMPTY, PARKED_CONDVAR, SeqCst, SeqCst)
+ {
+ Ok(_) => {}
+ Err(NOTIFIED) => {
+ // We must read here, even though we know it will be `NOTIFIED`.
+ // This is because `unpark` may have been called again since we read
+ // `NOTIFIED` in the `compare_exchange` above. We must perform an
+ // acquire operation that synchronizes with that `unpark` to observe
+ // any writes it made before the call to unpark. To do that we must
+ // read from the write it made to `state`.
+ let old = self.state.swap(EMPTY, SeqCst);
+ debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly");
+
+ return;
+ }
+ Err(actual) => panic!("inconsistent park state; actual = {}", actual),
+ }
+
+ loop {
+ m = self.condvar.wait(m).unwrap();
+
+ if self
+ .state
+ .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst)
+ .is_ok()
+ {
+ // got a notification
+ return;
+ }
+
+ // spurious wakeup, go back to sleep
+ }
+ }
+
+ fn park_driver(&self, driver: &mut Driver, handle: &driver::Handle) {
+ match self
+ .state
+ .compare_exchange(EMPTY, PARKED_DRIVER, SeqCst, SeqCst)
+ {
+ Ok(_) => {}
+ Err(NOTIFIED) => {
+ // We must read here, even though we know it will be `NOTIFIED`.
+ // This is because `unpark` may have been called again since we read
+ // `NOTIFIED` in the `compare_exchange` above. We must perform an
+ // acquire operation that synchronizes with that `unpark` to observe
+ // any writes it made before the call to unpark. To do that we must
+ // read from the write it made to `state`.
+ let old = self.state.swap(EMPTY, SeqCst);
+ debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly");
+
+ return;
+ }
+ Err(actual) => panic!("inconsistent park state; actual = {}", actual),
+ }
+
+ driver.park(handle);
+
+ match self.state.swap(EMPTY, SeqCst) {
+ NOTIFIED => {} // got a notification, hurray!
+ PARKED_DRIVER => {} // no notification, alas
+ n => panic!("inconsistent park_timeout state: {}", n),
+ }
+ }
+
+ fn unpark(&self, driver: &driver::Handle) {
+ // To ensure the unparked thread will observe any writes we made before
+ // this call, we must perform a release operation that `park` can
+ // synchronize with. To do that we must write `NOTIFIED` even if `state`
+ // is already `NOTIFIED`. That is why this must be a swap rather than a
+ // compare-and-swap that returns if it reads `NOTIFIED` on failure.
+ match self.state.swap(NOTIFIED, SeqCst) {
+ EMPTY => {} // no one was waiting
+ NOTIFIED => {} // already unparked
+ PARKED_CONDVAR => self.unpark_condvar(),
+ PARKED_DRIVER => driver.unpark(),
+ actual => panic!("inconsistent state in unpark; actual = {}", actual),
+ }
+ }
+
+ fn unpark_condvar(&self) {
+ // There is a period between when the parked thread sets `state` to
+ // `PARKED` (or last checked `state` in the case of a spurious wake
+ // up) and when it actually waits on `cvar`. If we were to notify
+ // during this period it would be ignored and then when the parked
+ // thread went to sleep it would never wake up. Fortunately, it has
+ // `lock` locked at this stage so we can acquire `lock` to wait until
+ // it is ready to receive the notification.
+ //
+ // Releasing `lock` before the call to `notify_one` means that when the
+ // parked thread wakes it doesn't get woken only to have to wait for us
+ // to release `lock`.
+ drop(self.mutex.lock());
+
+ self.condvar.notify_one()
+ }
+
+ fn shutdown(&self, handle: &driver::Handle) {
+ if let Some(mut driver) = self.shared.driver.try_lock() {
+ driver.shutdown(handle);
+ }
+
+ self.condvar.notify_all();
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/queue.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/queue.rs
new file mode 100644
index 0000000000..dd66fa2dde
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/queue.rs
@@ -0,0 +1,608 @@
+//! Run-queue structures to support a work-stealing scheduler
+
+use crate::loom::cell::UnsafeCell;
+use crate::loom::sync::Arc;
+use crate::runtime::scheduler::multi_thread::{Overflow, Stats};
+use crate::runtime::task;
+
+use std::mem::{self, MaybeUninit};
+use std::ptr;
+use std::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release};
+
+// Use wider integers when possible to increase ABA resilience.
+//
+// See issue #5041: <https://github.com/tokio-rs/tokio/issues/5041>.
+cfg_has_atomic_u64! {
+ type UnsignedShort = u32;
+ type UnsignedLong = u64;
+ type AtomicUnsignedShort = crate::loom::sync::atomic::AtomicU32;
+ type AtomicUnsignedLong = crate::loom::sync::atomic::AtomicU64;
+}
+cfg_not_has_atomic_u64! {
+ type UnsignedShort = u16;
+ type UnsignedLong = u32;
+ type AtomicUnsignedShort = crate::loom::sync::atomic::AtomicU16;
+ type AtomicUnsignedLong = crate::loom::sync::atomic::AtomicU32;
+}
+
+/// Producer handle. May only be used from a single thread.
+pub(crate) struct Local<T: 'static> {
+ inner: Arc<Inner<T>>,
+}
+
+/// Consumer handle. May be used from many threads.
+pub(crate) struct Steal<T: 'static>(Arc<Inner<T>>);
+
+pub(crate) struct Inner<T: 'static> {
+ /// Concurrently updated by many threads.
+ ///
+ /// Contains two `UnsignedShort` values. The LSB byte is the "real" head of
+ /// the queue. The `UnsignedShort` in the MSB is set by a stealer in process
+ /// of stealing values. It represents the first value being stolen in the
+ /// batch. The `UnsignedShort` indices are intentionally wider than strictly
+ /// required for buffer indexing in order to provide ABA mitigation and make
+ /// it possible to distinguish between full and empty buffers.
+ ///
+ /// When both `UnsignedShort` values are the same, there is no active
+ /// stealer.
+ ///
+ /// Tracking an in-progress stealer prevents a wrapping scenario.
+ head: AtomicUnsignedLong,
+
+ /// Only updated by producer thread but read by many threads.
+ tail: AtomicUnsignedShort,
+
+ /// Elements
+ buffer: Box<[UnsafeCell<MaybeUninit<task::Notified<T>>>; LOCAL_QUEUE_CAPACITY]>,
+}
+
+unsafe impl<T> Send for Inner<T> {}
+unsafe impl<T> Sync for Inner<T> {}
+
+#[cfg(not(loom))]
+const LOCAL_QUEUE_CAPACITY: usize = 256;
+
+// Shrink the size of the local queue when using loom. This shouldn't impact
+// logic, but allows loom to test more edge cases in a reasonable a mount of
+// time.
+#[cfg(loom)]
+const LOCAL_QUEUE_CAPACITY: usize = 4;
+
+const MASK: usize = LOCAL_QUEUE_CAPACITY - 1;
+
+// Constructing the fixed size array directly is very awkward. The only way to
+// do it is to repeat `UnsafeCell::new(MaybeUninit::uninit())` 256 times, as
+// the contents are not Copy. The trick with defining a const doesn't work for
+// generic types.
+fn make_fixed_size<T>(buffer: Box<[T]>) -> Box<[T; LOCAL_QUEUE_CAPACITY]> {
+ assert_eq!(buffer.len(), LOCAL_QUEUE_CAPACITY);
+
+ // safety: We check that the length is correct.
+ unsafe { Box::from_raw(Box::into_raw(buffer).cast()) }
+}
+
+/// Create a new local run-queue
+pub(crate) fn local<T: 'static>() -> (Steal<T>, Local<T>) {
+ let mut buffer = Vec::with_capacity(LOCAL_QUEUE_CAPACITY);
+
+ for _ in 0..LOCAL_QUEUE_CAPACITY {
+ buffer.push(UnsafeCell::new(MaybeUninit::uninit()));
+ }
+
+ let inner = Arc::new(Inner {
+ head: AtomicUnsignedLong::new(0),
+ tail: AtomicUnsignedShort::new(0),
+ buffer: make_fixed_size(buffer.into_boxed_slice()),
+ });
+
+ let local = Local {
+ inner: inner.clone(),
+ };
+
+ let remote = Steal(inner);
+
+ (remote, local)
+}
+
+impl<T> Local<T> {
+ /// Returns the number of entries in the queue
+ pub(crate) fn len(&self) -> usize {
+ self.inner.len() as usize
+ }
+
+ /// How many tasks can be pushed into the queue
+ pub(crate) fn remaining_slots(&self) -> usize {
+ self.inner.remaining_slots()
+ }
+
+ pub(crate) fn max_capacity(&self) -> usize {
+ LOCAL_QUEUE_CAPACITY
+ }
+
+ /// Returns false if there are any entries in the queue
+ ///
+ /// Separate to is_stealable so that refactors of is_stealable to "protect"
+ /// some tasks from stealing won't affect this
+ pub(crate) fn has_tasks(&self) -> bool {
+ !self.inner.is_empty()
+ }
+
+ /// Pushes a batch of tasks to the back of the queue. All tasks must fit in
+ /// the local queue.
+ ///
+ /// # Panics
+ ///
+ /// The method panics if there is not enough capacity to fit in the queue.
+ pub(crate) fn push_back(&mut self, tasks: impl ExactSizeIterator<Item = task::Notified<T>>) {
+ let len = tasks.len();
+ assert!(len <= LOCAL_QUEUE_CAPACITY);
+
+ if len == 0 {
+ // Nothing to do
+ return;
+ }
+
+ let head = self.inner.head.load(Acquire);
+ let (steal, _) = unpack(head);
+
+ // safety: this is the **only** thread that updates this cell.
+ let mut tail = unsafe { self.inner.tail.unsync_load() };
+
+ if tail.wrapping_sub(steal) <= (LOCAL_QUEUE_CAPACITY - len) as UnsignedShort {
+ // Yes, this if condition is structured a bit weird (first block
+ // does nothing, second returns an error). It is this way to match
+ // `push_back_or_overflow`.
+ } else {
+ panic!()
+ }
+
+ for task in tasks {
+ let idx = tail as usize & MASK;
+
+ self.inner.buffer[idx].with_mut(|ptr| {
+ // Write the task to the slot
+ //
+ // Safety: There is only one producer and the above `if`
+ // condition ensures we don't touch a cell if there is a
+ // value, thus no consumer.
+ unsafe {
+ ptr::write((*ptr).as_mut_ptr(), task);
+ }
+ });
+
+ tail = tail.wrapping_add(1);
+ }
+
+ self.inner.tail.store(tail, Release);
+ }
+
+ /// Pushes a task to the back of the local queue, if there is not enough
+ /// capacity in the queue, this triggers the overflow operation.
+ ///
+ /// When the queue overflows, half of the curent contents of the queue is
+ /// moved to the given Injection queue. This frees up capacity for more
+ /// tasks to be pushed into the local queue.
+ pub(crate) fn push_back_or_overflow<O: Overflow<T>>(
+ &mut self,
+ mut task: task::Notified<T>,
+ overflow: &O,
+ stats: &mut Stats,
+ ) {
+ let tail = loop {
+ let head = self.inner.head.load(Acquire);
+ let (steal, real) = unpack(head);
+
+ // safety: this is the **only** thread that updates this cell.
+ let tail = unsafe { self.inner.tail.unsync_load() };
+
+ if tail.wrapping_sub(steal) < LOCAL_QUEUE_CAPACITY as UnsignedShort {
+ // There is capacity for the task
+ break tail;
+ } else if steal != real {
+ // Concurrently stealing, this will free up capacity, so only
+ // push the task onto the inject queue
+ overflow.push(task);
+ return;
+ } else {
+ // Push the current task and half of the queue into the
+ // inject queue.
+ match self.push_overflow(task, real, tail, overflow, stats) {
+ Ok(_) => return,
+ // Lost the race, try again
+ Err(v) => {
+ task = v;
+ }
+ }
+ }
+ };
+
+ self.push_back_finish(task, tail);
+ }
+
+ // Second half of `push_back`
+ fn push_back_finish(&self, task: task::Notified<T>, tail: UnsignedShort) {
+ // Map the position to a slot index.
+ let idx = tail as usize & MASK;
+
+ self.inner.buffer[idx].with_mut(|ptr| {
+ // Write the task to the slot
+ //
+ // Safety: There is only one producer and the above `if`
+ // condition ensures we don't touch a cell if there is a
+ // value, thus no consumer.
+ unsafe {
+ ptr::write((*ptr).as_mut_ptr(), task);
+ }
+ });
+
+ // Make the task available. Synchronizes with a load in
+ // `steal_into2`.
+ self.inner.tail.store(tail.wrapping_add(1), Release);
+ }
+
+ /// Moves a batch of tasks into the inject queue.
+ ///
+ /// This will temporarily make some of the tasks unavailable to stealers.
+ /// Once `push_overflow` is done, a notification is sent out, so if other
+ /// workers "missed" some of the tasks during a steal, they will get
+ /// another opportunity.
+ #[inline(never)]
+ fn push_overflow<O: Overflow<T>>(
+ &mut self,
+ task: task::Notified<T>,
+ head: UnsignedShort,
+ tail: UnsignedShort,
+ overflow: &O,
+ stats: &mut Stats,
+ ) -> Result<(), task::Notified<T>> {
+ /// How many elements are we taking from the local queue.
+ ///
+ /// This is one less than the number of tasks pushed to the inject
+ /// queue as we are also inserting the `task` argument.
+ const NUM_TASKS_TAKEN: UnsignedShort = (LOCAL_QUEUE_CAPACITY / 2) as UnsignedShort;
+
+ assert_eq!(
+ tail.wrapping_sub(head) as usize,
+ LOCAL_QUEUE_CAPACITY,
+ "queue is not full; tail = {}; head = {}",
+ tail,
+ head
+ );
+
+ let prev = pack(head, head);
+
+ // Claim a bunch of tasks
+ //
+ // We are claiming the tasks **before** reading them out of the buffer.
+ // This is safe because only the **current** thread is able to push new
+ // tasks.
+ //
+ // There isn't really any need for memory ordering... Relaxed would
+ // work. This is because all tasks are pushed into the queue from the
+ // current thread (or memory has been acquired if the local queue handle
+ // moved).
+ if self
+ .inner
+ .head
+ .compare_exchange(
+ prev,
+ pack(
+ head.wrapping_add(NUM_TASKS_TAKEN),
+ head.wrapping_add(NUM_TASKS_TAKEN),
+ ),
+ Release,
+ Relaxed,
+ )
+ .is_err()
+ {
+ // We failed to claim the tasks, losing the race. Return out of
+ // this function and try the full `push` routine again. The queue
+ // may not be full anymore.
+ return Err(task);
+ }
+
+ /// An iterator that takes elements out of the run queue.
+ struct BatchTaskIter<'a, T: 'static> {
+ buffer: &'a [UnsafeCell<MaybeUninit<task::Notified<T>>>; LOCAL_QUEUE_CAPACITY],
+ head: UnsignedLong,
+ i: UnsignedLong,
+ }
+ impl<'a, T: 'static> Iterator for BatchTaskIter<'a, T> {
+ type Item = task::Notified<T>;
+
+ #[inline]
+ fn next(&mut self) -> Option<task::Notified<T>> {
+ if self.i == UnsignedLong::from(NUM_TASKS_TAKEN) {
+ None
+ } else {
+ let i_idx = self.i.wrapping_add(self.head) as usize & MASK;
+ let slot = &self.buffer[i_idx];
+
+ // safety: Our CAS from before has assumed exclusive ownership
+ // of the task pointers in this range.
+ let task = slot.with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) });
+
+ self.i += 1;
+ Some(task)
+ }
+ }
+ }
+
+ // safety: The CAS above ensures that no consumer will look at these
+ // values again, and we are the only producer.
+ let batch_iter = BatchTaskIter {
+ buffer: &self.inner.buffer,
+ head: head as UnsignedLong,
+ i: 0,
+ };
+ overflow.push_batch(batch_iter.chain(std::iter::once(task)));
+
+ // Add 1 to factor in the task currently being scheduled.
+ stats.incr_overflow_count();
+
+ Ok(())
+ }
+
+ /// Pops a task from the local queue.
+ pub(crate) fn pop(&mut self) -> Option<task::Notified<T>> {
+ let mut head = self.inner.head.load(Acquire);
+
+ let idx = loop {
+ let (steal, real) = unpack(head);
+
+ // safety: this is the **only** thread that updates this cell.
+ let tail = unsafe { self.inner.tail.unsync_load() };
+
+ if real == tail {
+ // queue is empty
+ return None;
+ }
+
+ let next_real = real.wrapping_add(1);
+
+ // If `steal == real` there are no concurrent stealers. Both `steal`
+ // and `real` are updated.
+ let next = if steal == real {
+ pack(next_real, next_real)
+ } else {
+ assert_ne!(steal, next_real);
+ pack(steal, next_real)
+ };
+
+ // Attempt to claim a task.
+ let res = self
+ .inner
+ .head
+ .compare_exchange(head, next, AcqRel, Acquire);
+
+ match res {
+ Ok(_) => break real as usize & MASK,
+ Err(actual) => head = actual,
+ }
+ };
+
+ Some(self.inner.buffer[idx].with(|ptr| unsafe { ptr::read(ptr).assume_init() }))
+ }
+}
+
+impl<T> Steal<T> {
+ pub(crate) fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Steals half the tasks from self and place them into `dst`.
+ pub(crate) fn steal_into(
+ &self,
+ dst: &mut Local<T>,
+ dst_stats: &mut Stats,
+ ) -> Option<task::Notified<T>> {
+ // Safety: the caller is the only thread that mutates `dst.tail` and
+ // holds a mutable reference.
+ let dst_tail = unsafe { dst.inner.tail.unsync_load() };
+
+ // To the caller, `dst` may **look** empty but still have values
+ // contained in the buffer. If another thread is concurrently stealing
+ // from `dst` there may not be enough capacity to steal.
+ let (steal, _) = unpack(dst.inner.head.load(Acquire));
+
+ if dst_tail.wrapping_sub(steal) > LOCAL_QUEUE_CAPACITY as UnsignedShort / 2 {
+ // we *could* try to steal less here, but for simplicity, we're just
+ // going to abort.
+ return None;
+ }
+
+ // Steal the tasks into `dst`'s buffer. This does not yet expose the
+ // tasks in `dst`.
+ let mut n = self.steal_into2(dst, dst_tail);
+
+ if n == 0 {
+ // No tasks were stolen
+ return None;
+ }
+
+ dst_stats.incr_steal_count(n as u16);
+ dst_stats.incr_steal_operations();
+
+ // We are returning a task here
+ n -= 1;
+
+ let ret_pos = dst_tail.wrapping_add(n);
+ let ret_idx = ret_pos as usize & MASK;
+
+ // safety: the value was written as part of `steal_into2` and not
+ // exposed to stealers, so no other thread can access it.
+ let ret = dst.inner.buffer[ret_idx].with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) });
+
+ if n == 0 {
+ // The `dst` queue is empty, but a single task was stolen
+ return Some(ret);
+ }
+
+ // Make the stolen items available to consumers
+ dst.inner.tail.store(dst_tail.wrapping_add(n), Release);
+
+ Some(ret)
+ }
+
+ // Steal tasks from `self`, placing them into `dst`. Returns the number of
+ // tasks that were stolen.
+ fn steal_into2(&self, dst: &mut Local<T>, dst_tail: UnsignedShort) -> UnsignedShort {
+ let mut prev_packed = self.0.head.load(Acquire);
+ let mut next_packed;
+
+ let n = loop {
+ let (src_head_steal, src_head_real) = unpack(prev_packed);
+ let src_tail = self.0.tail.load(Acquire);
+
+ // If these two do not match, another thread is concurrently
+ // stealing from the queue.
+ if src_head_steal != src_head_real {
+ return 0;
+ }
+
+ // Number of available tasks to steal
+ let n = src_tail.wrapping_sub(src_head_real);
+ let n = n - n / 2;
+
+ if n == 0 {
+ // No tasks available to steal
+ return 0;
+ }
+
+ // Update the real head index to acquire the tasks.
+ let steal_to = src_head_real.wrapping_add(n);
+ assert_ne!(src_head_steal, steal_to);
+ next_packed = pack(src_head_steal, steal_to);
+
+ // Claim all those tasks. This is done by incrementing the "real"
+ // head but not the steal. By doing this, no other thread is able to
+ // steal from this queue until the current thread completes.
+ let res = self
+ .0
+ .head
+ .compare_exchange(prev_packed, next_packed, AcqRel, Acquire);
+
+ match res {
+ Ok(_) => break n,
+ Err(actual) => prev_packed = actual,
+ }
+ };
+
+ assert!(
+ n <= LOCAL_QUEUE_CAPACITY as UnsignedShort / 2,
+ "actual = {}",
+ n
+ );
+
+ let (first, _) = unpack(next_packed);
+
+ // Take all the tasks
+ for i in 0..n {
+ // Compute the positions
+ let src_pos = first.wrapping_add(i);
+ let dst_pos = dst_tail.wrapping_add(i);
+
+ // Map to slots
+ let src_idx = src_pos as usize & MASK;
+ let dst_idx = dst_pos as usize & MASK;
+
+ // Read the task
+ //
+ // safety: We acquired the task with the atomic exchange above.
+ let task = self.0.buffer[src_idx].with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) });
+
+ // Write the task to the new slot
+ //
+ // safety: `dst` queue is empty and we are the only producer to
+ // this queue.
+ dst.inner.buffer[dst_idx]
+ .with_mut(|ptr| unsafe { ptr::write((*ptr).as_mut_ptr(), task) });
+ }
+
+ let mut prev_packed = next_packed;
+
+ // Update `src_head_steal` to match `src_head_real` signalling that the
+ // stealing routine is complete.
+ loop {
+ let head = unpack(prev_packed).1;
+ next_packed = pack(head, head);
+
+ let res = self
+ .0
+ .head
+ .compare_exchange(prev_packed, next_packed, AcqRel, Acquire);
+
+ match res {
+ Ok(_) => return n,
+ Err(actual) => {
+ let (actual_steal, actual_real) = unpack(actual);
+
+ assert_ne!(actual_steal, actual_real);
+
+ prev_packed = actual;
+ }
+ }
+ }
+ }
+}
+
+cfg_metrics! {
+ impl<T> Steal<T> {
+ pub(crate) fn len(&self) -> usize {
+ self.0.len() as _
+ }
+ }
+}
+
+impl<T> Clone for Steal<T> {
+ fn clone(&self) -> Steal<T> {
+ Steal(self.0.clone())
+ }
+}
+
+impl<T> Drop for Local<T> {
+ fn drop(&mut self) {
+ if !std::thread::panicking() {
+ assert!(self.pop().is_none(), "queue not empty");
+ }
+ }
+}
+
+impl<T> Inner<T> {
+ fn remaining_slots(&self) -> usize {
+ let (steal, _) = unpack(self.head.load(Acquire));
+ let tail = self.tail.load(Acquire);
+
+ LOCAL_QUEUE_CAPACITY - (tail.wrapping_sub(steal) as usize)
+ }
+
+ fn len(&self) -> UnsignedShort {
+ let (_, head) = unpack(self.head.load(Acquire));
+ let tail = self.tail.load(Acquire);
+
+ tail.wrapping_sub(head)
+ }
+
+ fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+}
+
+/// Split the head value into the real head and the index a stealer is working
+/// on.
+fn unpack(n: UnsignedLong) -> (UnsignedShort, UnsignedShort) {
+ let real = n & UnsignedShort::MAX as UnsignedLong;
+ let steal = n >> (mem::size_of::<UnsignedShort>() * 8);
+
+ (steal as UnsignedShort, real as UnsignedShort)
+}
+
+/// Join the two head values
+fn pack(steal: UnsignedShort, real: UnsignedShort) -> UnsignedLong {
+ (real as UnsignedLong) | ((steal as UnsignedLong) << (mem::size_of::<UnsignedShort>() * 8))
+}
+
+#[test]
+fn test_local_queue_capacity() {
+ assert!(LOCAL_QUEUE_CAPACITY - 1 <= u8::MAX as usize);
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/stats.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/stats.rs
new file mode 100644
index 0000000000..f01daaa1bf
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/stats.rs
@@ -0,0 +1,140 @@
+use crate::runtime::{Config, MetricsBatch, WorkerMetrics};
+
+use std::cmp;
+use std::time::{Duration, Instant};
+
+/// Per-worker statistics. This is used for both tuning the scheduler and
+/// reporting runtime-level metrics/stats.
+pub(crate) struct Stats {
+ /// The metrics batch used to report runtime-level metrics/stats to the
+ /// user.
+ batch: MetricsBatch,
+
+ /// Instant at which work last resumed (continued after park).
+ ///
+ /// This duplicates the value stored in `MetricsBatch`. We will unify
+ /// `Stats` and `MetricsBatch` when we stabilize metrics.
+ processing_scheduled_tasks_started_at: Instant,
+
+ /// Number of tasks polled in the batch of scheduled tasks
+ tasks_polled_in_batch: usize,
+
+ /// Exponentially-weighted moving average of time spent polling scheduled a
+ /// task.
+ ///
+ /// Tracked in nanoseconds, stored as a f64 since that is what we use with
+ /// the EWMA calculations
+ task_poll_time_ewma: f64,
+}
+
+/// How to weigh each individual poll time, value is plucked from thin air.
+const TASK_POLL_TIME_EWMA_ALPHA: f64 = 0.1;
+
+/// Ideally, we wouldn't go above this, value is plucked from thin air.
+const TARGET_GLOBAL_QUEUE_INTERVAL: f64 = Duration::from_micros(200).as_nanos() as f64;
+
+/// Max value for the global queue interval. This is 2x the previous default
+const MAX_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL: u32 = 127;
+
+/// This is the previous default
+const TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL: u32 = 61;
+
+impl Stats {
+ pub(crate) fn new(worker_metrics: &WorkerMetrics) -> Stats {
+ // Seed the value with what we hope to see.
+ let task_poll_time_ewma =
+ TARGET_GLOBAL_QUEUE_INTERVAL / TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL as f64;
+
+ Stats {
+ batch: MetricsBatch::new(worker_metrics),
+ processing_scheduled_tasks_started_at: Instant::now(),
+ tasks_polled_in_batch: 0,
+ task_poll_time_ewma,
+ }
+ }
+
+ pub(crate) fn tuned_global_queue_interval(&self, config: &Config) -> u32 {
+ // If an interval is explicitly set, don't tune.
+ if let Some(configured) = config.global_queue_interval {
+ return configured;
+ }
+
+ // As of Rust 1.45, casts from f64 -> u32 are saturating, which is fine here.
+ let tasks_per_interval = (TARGET_GLOBAL_QUEUE_INTERVAL / self.task_poll_time_ewma) as u32;
+
+ cmp::max(
+ // We don't want to return less than 2 as that would result in the
+ // global queue always getting checked first.
+ 2,
+ cmp::min(
+ MAX_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL,
+ tasks_per_interval,
+ ),
+ )
+ }
+
+ pub(crate) fn submit(&mut self, to: &WorkerMetrics) {
+ self.batch.submit(to);
+ }
+
+ pub(crate) fn about_to_park(&mut self) {
+ self.batch.about_to_park();
+ }
+
+ pub(crate) fn inc_local_schedule_count(&mut self) {
+ self.batch.inc_local_schedule_count();
+ }
+
+ pub(crate) fn start_processing_scheduled_tasks(&mut self) {
+ self.batch.start_processing_scheduled_tasks();
+
+ self.processing_scheduled_tasks_started_at = Instant::now();
+ self.tasks_polled_in_batch = 0;
+ }
+
+ pub(crate) fn end_processing_scheduled_tasks(&mut self) {
+ self.batch.end_processing_scheduled_tasks();
+
+ // Update the EWMA task poll time
+ if self.tasks_polled_in_batch > 0 {
+ let now = Instant::now();
+
+ // If we "overflow" this conversion, we have bigger problems than
+ // slightly off stats.
+ let elapsed = (now - self.processing_scheduled_tasks_started_at).as_nanos() as f64;
+ let num_polls = self.tasks_polled_in_batch as f64;
+
+ // Calculate the mean poll duration for a single task in the batch
+ let mean_poll_duration = elapsed / num_polls;
+
+ // Compute the alpha weighted by the number of tasks polled this batch.
+ let weighted_alpha = 1.0 - (1.0 - TASK_POLL_TIME_EWMA_ALPHA).powf(num_polls);
+
+ // Now compute the new weighted average task poll time.
+ self.task_poll_time_ewma = weighted_alpha * mean_poll_duration
+ + (1.0 - weighted_alpha) * self.task_poll_time_ewma;
+ }
+ }
+
+ pub(crate) fn start_poll(&mut self) {
+ self.batch.start_poll();
+
+ self.tasks_polled_in_batch += 1;
+ }
+
+ pub(crate) fn end_poll(&mut self) {
+ self.batch.end_poll();
+ }
+
+ pub(crate) fn incr_steal_count(&mut self, by: u16) {
+ self.batch.incr_steal_count(by);
+ }
+
+ pub(crate) fn incr_steal_operations(&mut self) {
+ self.batch.incr_steal_operations();
+ }
+
+ pub(crate) fn incr_overflow_count(&mut self) {
+ self.batch.incr_overflow_count();
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace.rs
new file mode 100644
index 0000000000..7b4aeb5c1d
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace.rs
@@ -0,0 +1,61 @@
+use crate::loom::sync::atomic::{AtomicBool, Ordering};
+use crate::loom::sync::{Barrier, Mutex};
+use crate::runtime::dump::Dump;
+use crate::runtime::scheduler::multi_thread::Handle;
+use crate::sync::notify::Notify;
+
+/// Tracing status of the worker.
+pub(super) struct TraceStatus {
+ pub(super) trace_requested: AtomicBool,
+ pub(super) trace_start: Barrier,
+ pub(super) trace_end: Barrier,
+ pub(super) result_ready: Notify,
+ pub(super) trace_result: Mutex<Option<Dump>>,
+}
+
+impl TraceStatus {
+ pub(super) fn new(remotes_len: usize) -> Self {
+ Self {
+ trace_requested: AtomicBool::new(false),
+ trace_start: Barrier::new(remotes_len),
+ trace_end: Barrier::new(remotes_len),
+ result_ready: Notify::new(),
+ trace_result: Mutex::new(None),
+ }
+ }
+
+ pub(super) fn trace_requested(&self) -> bool {
+ self.trace_requested.load(Ordering::Relaxed)
+ }
+
+ pub(super) async fn start_trace_request(&self, handle: &Handle) {
+ while self
+ .trace_requested
+ .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
+ .is_err()
+ {
+ handle.notify_all();
+ crate::task::yield_now().await;
+ }
+ }
+
+ pub(super) fn stash_result(&self, dump: Dump) {
+ let _ = self.trace_result.lock().insert(dump);
+ self.result_ready.notify_one();
+ }
+
+ pub(super) fn take_result(&self) -> Option<Dump> {
+ self.trace_result.lock().take()
+ }
+
+ pub(super) async fn end_trace_request(&self, handle: &Handle) {
+ while self
+ .trace_requested
+ .compare_exchange(true, false, Ordering::Acquire, Ordering::Relaxed)
+ .is_err()
+ {
+ handle.notify_all();
+ crate::task::yield_now().await;
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace_mock.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace_mock.rs
new file mode 100644
index 0000000000..2c17a4e38b
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/trace_mock.rs
@@ -0,0 +1,11 @@
+pub(super) struct TraceStatus {}
+
+impl TraceStatus {
+ pub(super) fn new(_: usize) -> Self {
+ Self {}
+ }
+
+ pub(super) fn trace_requested(&self) -> bool {
+ false
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker.rs
new file mode 100644
index 0000000000..6ae1146337
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker.rs
@@ -0,0 +1,1216 @@
+//! A scheduler is initialized with a fixed number of workers. Each worker is
+//! driven by a thread. Each worker has a "core" which contains data such as the
+//! run queue and other state. When `block_in_place` is called, the worker's
+//! "core" is handed off to a new thread allowing the scheduler to continue to
+//! make progress while the originating thread blocks.
+//!
+//! # Shutdown
+//!
+//! Shutting down the runtime involves the following steps:
+//!
+//! 1. The Shared::close method is called. This closes the inject queue and
+//! OwnedTasks instance and wakes up all worker threads.
+//!
+//! 2. Each worker thread observes the close signal next time it runs
+//! Core::maintenance by checking whether the inject queue is closed.
+//! The Core::is_shutdown flag is set to true.
+//!
+//! 3. The worker thread calls `pre_shutdown` in parallel. Here, the worker
+//! will keep removing tasks from OwnedTasks until it is empty. No new
+//! tasks can be pushed to the OwnedTasks during or after this step as it
+//! was closed in step 1.
+//!
+//! 5. The workers call Shared::shutdown to enter the single-threaded phase of
+//! shutdown. These calls will push their core to Shared::shutdown_cores,
+//! and the last thread to push its core will finish the shutdown procedure.
+//!
+//! 6. The local run queue of each core is emptied, then the inject queue is
+//! emptied.
+//!
+//! At this point, shutdown has completed. It is not possible for any of the
+//! collections to contain any tasks at this point, as each collection was
+//! closed first, then emptied afterwards.
+//!
+//! ## Spawns during shutdown
+//!
+//! When spawning tasks during shutdown, there are two cases:
+//!
+//! * The spawner observes the OwnedTasks being open, and the inject queue is
+//! closed.
+//! * The spawner observes the OwnedTasks being closed and doesn't check the
+//! inject queue.
+//!
+//! The first case can only happen if the OwnedTasks::bind call happens before
+//! or during step 1 of shutdown. In this case, the runtime will clean up the
+//! task in step 3 of shutdown.
+//!
+//! In the latter case, the task was not spawned and the task is immediately
+//! cancelled by the spawner.
+//!
+//! The correctness of shutdown requires both the inject queue and OwnedTasks
+//! collection to have a closed bit. With a close bit on only the inject queue,
+//! spawning could run in to a situation where a task is successfully bound long
+//! after the runtime has shut down. With a close bit on only the OwnedTasks,
+//! the first spawning situation could result in the notification being pushed
+//! to the inject queue after step 6 of shutdown, which would leave a task in
+//! the inject queue indefinitely. This would be a ref-count cycle and a memory
+//! leak.
+
+use crate::loom::sync::{Arc, Mutex};
+use crate::runtime;
+use crate::runtime::context;
+use crate::runtime::scheduler::multi_thread::{
+ idle, queue, Counters, Handle, Idle, Overflow, Parker, Stats, TraceStatus, Unparker,
+};
+use crate::runtime::scheduler::{inject, Defer, Lock};
+use crate::runtime::task::OwnedTasks;
+use crate::runtime::{
+ blocking, coop, driver, scheduler, task, Config, SchedulerMetrics, WorkerMetrics,
+};
+use crate::util::atomic_cell::AtomicCell;
+use crate::util::rand::{FastRand, RngSeedGenerator};
+
+use std::cell::RefCell;
+use std::task::Waker;
+use std::time::Duration;
+
+cfg_metrics! {
+ mod metrics;
+}
+
+cfg_taskdump! {
+ mod taskdump;
+}
+
+cfg_not_taskdump! {
+ mod taskdump_mock;
+}
+
+/// A scheduler worker
+pub(super) struct Worker {
+ /// Reference to scheduler's handle
+ handle: Arc<Handle>,
+
+ /// Index holding this worker's remote state
+ index: usize,
+
+ /// Used to hand-off a worker's core to another thread.
+ core: AtomicCell<Core>,
+}
+
+/// Core data
+struct Core {
+ /// Used to schedule bookkeeping tasks every so often.
+ tick: u32,
+
+ /// When a task is scheduled from a worker, it is stored in this slot. The
+ /// worker will check this slot for a task **before** checking the run
+ /// queue. This effectively results in the **last** scheduled task to be run
+ /// next (LIFO). This is an optimization for improving locality which
+ /// benefits message passing patterns and helps to reduce latency.
+ lifo_slot: Option<Notified>,
+
+ /// When `true`, locally scheduled tasks go to the LIFO slot. When `false`,
+ /// they go to the back of the `run_queue`.
+ lifo_enabled: bool,
+
+ /// The worker-local run queue.
+ run_queue: queue::Local<Arc<Handle>>,
+
+ /// True if the worker is currently searching for more work. Searching
+ /// involves attempting to steal from other workers.
+ is_searching: bool,
+
+ /// True if the scheduler is being shutdown
+ is_shutdown: bool,
+
+ /// True if the scheduler is being traced
+ is_traced: bool,
+
+ /// Parker
+ ///
+ /// Stored in an `Option` as the parker is added / removed to make the
+ /// borrow checker happy.
+ park: Option<Parker>,
+
+ /// Per-worker runtime stats
+ stats: Stats,
+
+ /// How often to check the global queue
+ global_queue_interval: u32,
+
+ /// Fast random number generator.
+ rand: FastRand,
+}
+
+/// State shared across all workers
+pub(crate) struct Shared {
+ /// Per-worker remote state. All other workers have access to this and is
+ /// how they communicate between each other.
+ remotes: Box<[Remote]>,
+
+ /// Global task queue used for:
+ /// 1. Submit work to the scheduler while **not** currently on a worker thread.
+ /// 2. Submit work to the scheduler when a worker run queue is saturated
+ pub(super) inject: inject::Shared<Arc<Handle>>,
+
+ /// Coordinates idle workers
+ idle: Idle,
+
+ /// Collection of all active tasks spawned onto this executor.
+ pub(super) owned: OwnedTasks<Arc<Handle>>,
+
+ /// Data synchronized by the scheduler mutex
+ pub(super) synced: Mutex<Synced>,
+
+ /// Cores that have observed the shutdown signal
+ ///
+ /// The core is **not** placed back in the worker to avoid it from being
+ /// stolen by a thread that was spawned as part of `block_in_place`.
+ #[allow(clippy::vec_box)] // we're moving an already-boxed value
+ shutdown_cores: Mutex<Vec<Box<Core>>>,
+
+ /// The number of cores that have observed the trace signal.
+ pub(super) trace_status: TraceStatus,
+
+ /// Scheduler configuration options
+ config: Config,
+
+ /// Collects metrics from the runtime.
+ pub(super) scheduler_metrics: SchedulerMetrics,
+
+ pub(super) worker_metrics: Box<[WorkerMetrics]>,
+
+ /// Only held to trigger some code on drop. This is used to get internal
+ /// runtime metrics that can be useful when doing performance
+ /// investigations. This does nothing (empty struct, no drop impl) unless
+ /// the `tokio_internal_mt_counters` cfg flag is set.
+ _counters: Counters,
+}
+
+/// Data synchronized by the scheduler mutex
+pub(crate) struct Synced {
+ /// Synchronized state for `Idle`.
+ pub(super) idle: idle::Synced,
+
+ /// Synchronized state for `Inject`.
+ pub(crate) inject: inject::Synced,
+}
+
+/// Used to communicate with a worker from other threads.
+struct Remote {
+ /// Steals tasks from this worker.
+ pub(super) steal: queue::Steal<Arc<Handle>>,
+
+ /// Unparks the associated worker thread
+ unpark: Unparker,
+}
+
+/// Thread-local context
+pub(crate) struct Context {
+ /// Worker
+ worker: Arc<Worker>,
+
+ /// Core data
+ core: RefCell<Option<Box<Core>>>,
+
+ /// Tasks to wake after resource drivers are polled. This is mostly to
+ /// handle yielded tasks.
+ pub(crate) defer: Defer,
+}
+
+/// Starts the workers
+pub(crate) struct Launch(Vec<Arc<Worker>>);
+
+/// Running a task may consume the core. If the core is still available when
+/// running the task completes, it is returned. Otherwise, the worker will need
+/// to stop processing.
+type RunResult = Result<Box<Core>, ()>;
+
+/// A task handle
+type Task = task::Task<Arc<Handle>>;
+
+/// A notified task handle
+type Notified = task::Notified<Arc<Handle>>;
+
+/// Value picked out of thin-air. Running the LIFO slot a handful of times
+/// seemms sufficient to benefit from locality. More than 3 times probably is
+/// overweighing. The value can be tuned in the future with data that shows
+/// improvements.
+const MAX_LIFO_POLLS_PER_TICK: usize = 3;
+
+pub(super) fn create(
+ size: usize,
+ park: Parker,
+ driver_handle: driver::Handle,
+ blocking_spawner: blocking::Spawner,
+ seed_generator: RngSeedGenerator,
+ config: Config,
+) -> (Arc<Handle>, Launch) {
+ let mut cores = Vec::with_capacity(size);
+ let mut remotes = Vec::with_capacity(size);
+ let mut worker_metrics = Vec::with_capacity(size);
+
+ // Create the local queues
+ for _ in 0..size {
+ let (steal, run_queue) = queue::local();
+
+ let park = park.clone();
+ let unpark = park.unpark();
+ let metrics = WorkerMetrics::from_config(&config);
+ let stats = Stats::new(&metrics);
+
+ cores.push(Box::new(Core {
+ tick: 0,
+ lifo_slot: None,
+ lifo_enabled: !config.disable_lifo_slot,
+ run_queue,
+ is_searching: false,
+ is_shutdown: false,
+ is_traced: false,
+ park: Some(park),
+ global_queue_interval: stats.tuned_global_queue_interval(&config),
+ stats,
+ rand: FastRand::from_seed(config.seed_generator.next_seed()),
+ }));
+
+ remotes.push(Remote { steal, unpark });
+ worker_metrics.push(metrics);
+ }
+
+ let (idle, idle_synced) = Idle::new(size);
+ let (inject, inject_synced) = inject::Shared::new();
+
+ let remotes_len = remotes.len();
+ let handle = Arc::new(Handle {
+ shared: Shared {
+ remotes: remotes.into_boxed_slice(),
+ inject,
+ idle,
+ owned: OwnedTasks::new(),
+ synced: Mutex::new(Synced {
+ idle: idle_synced,
+ inject: inject_synced,
+ }),
+ shutdown_cores: Mutex::new(vec![]),
+ trace_status: TraceStatus::new(remotes_len),
+ config,
+ scheduler_metrics: SchedulerMetrics::new(),
+ worker_metrics: worker_metrics.into_boxed_slice(),
+ _counters: Counters,
+ },
+ driver: driver_handle,
+ blocking_spawner,
+ seed_generator,
+ });
+
+ let mut launch = Launch(vec![]);
+
+ for (index, core) in cores.drain(..).enumerate() {
+ launch.0.push(Arc::new(Worker {
+ handle: handle.clone(),
+ index,
+ core: AtomicCell::new(Some(core)),
+ }));
+ }
+
+ (handle, launch)
+}
+
+#[track_caller]
+pub(crate) fn block_in_place<F, R>(f: F) -> R
+where
+ F: FnOnce() -> R,
+{
+ // Try to steal the worker core back
+ struct Reset {
+ take_core: bool,
+ budget: coop::Budget,
+ }
+
+ impl Drop for Reset {
+ fn drop(&mut self) {
+ with_current(|maybe_cx| {
+ if let Some(cx) = maybe_cx {
+ if self.take_core {
+ let core = cx.worker.core.take();
+ let mut cx_core = cx.core.borrow_mut();
+ assert!(cx_core.is_none());
+ *cx_core = core;
+ }
+
+ // Reset the task budget as we are re-entering the
+ // runtime.
+ coop::set(self.budget);
+ }
+ });
+ }
+ }
+
+ let mut had_entered = false;
+ let mut take_core = false;
+
+ let setup_result = with_current(|maybe_cx| {
+ match (
+ crate::runtime::context::current_enter_context(),
+ maybe_cx.is_some(),
+ ) {
+ (context::EnterRuntime::Entered { .. }, true) => {
+ // We are on a thread pool runtime thread, so we just need to
+ // set up blocking.
+ had_entered = true;
+ }
+ (
+ context::EnterRuntime::Entered {
+ allow_block_in_place,
+ },
+ false,
+ ) => {
+ // We are on an executor, but _not_ on the thread pool. That is
+ // _only_ okay if we are in a thread pool runtime's block_on
+ // method:
+ if allow_block_in_place {
+ had_entered = true;
+ return Ok(());
+ } else {
+ // This probably means we are on the current_thread runtime or in a
+ // LocalSet, where it is _not_ okay to block.
+ return Err(
+ "can call blocking only when running on the multi-threaded runtime",
+ );
+ }
+ }
+ (context::EnterRuntime::NotEntered, true) => {
+ // This is a nested call to block_in_place (we already exited).
+ // All the necessary setup has already been done.
+ return Ok(());
+ }
+ (context::EnterRuntime::NotEntered, false) => {
+ // We are outside of the tokio runtime, so blocking is fine.
+ // We can also skip all of the thread pool blocking setup steps.
+ return Ok(());
+ }
+ }
+
+ let cx = maybe_cx.expect("no .is_some() == false cases above should lead here");
+
+ // Get the worker core. If none is set, then blocking is fine!
+ let core = match cx.core.borrow_mut().take() {
+ Some(core) => core,
+ None => return Ok(()),
+ };
+
+ // We are taking the core from the context and sending it to another
+ // thread.
+ take_core = true;
+
+ // The parker should be set here
+ assert!(core.park.is_some());
+
+ // In order to block, the core must be sent to another thread for
+ // execution.
+ //
+ // First, move the core back into the worker's shared core slot.
+ cx.worker.core.set(core);
+
+ // Next, clone the worker handle and send it to a new thread for
+ // processing.
+ //
+ // Once the blocking task is done executing, we will attempt to
+ // steal the core back.
+ let worker = cx.worker.clone();
+ runtime::spawn_blocking(move || run(worker));
+ Ok(())
+ });
+
+ if let Err(panic_message) = setup_result {
+ panic!("{}", panic_message);
+ }
+
+ if had_entered {
+ // Unset the current task's budget. Blocking sections are not
+ // constrained by task budgets.
+ let _reset = Reset {
+ take_core,
+ budget: coop::stop(),
+ };
+
+ crate::runtime::context::exit_runtime(f)
+ } else {
+ f()
+ }
+}
+
+impl Launch {
+ pub(crate) fn launch(mut self) {
+ for worker in self.0.drain(..) {
+ runtime::spawn_blocking(move || run(worker));
+ }
+ }
+}
+
+fn run(worker: Arc<Worker>) {
+ struct AbortOnPanic;
+
+ impl Drop for AbortOnPanic {
+ fn drop(&mut self) {
+ if std::thread::panicking() {
+ eprintln!("worker thread panicking; aborting process");
+ std::process::abort();
+ }
+ }
+ }
+
+ // Catching panics on worker threads in tests is quite tricky. Instead, when
+ // debug assertions are enabled, we just abort the process.
+ #[cfg(debug_assertions)]
+ let _abort_on_panic = AbortOnPanic;
+
+ // Acquire a core. If this fails, then another thread is running this
+ // worker and there is nothing further to do.
+ let core = match worker.core.take() {
+ Some(core) => core,
+ None => return,
+ };
+
+ let handle = scheduler::Handle::MultiThread(worker.handle.clone());
+
+ crate::runtime::context::enter_runtime(&handle, true, |_| {
+ // Set the worker context.
+ let cx = scheduler::Context::MultiThread(Context {
+ worker,
+ core: RefCell::new(None),
+ defer: Defer::new(),
+ });
+
+ context::set_scheduler(&cx, || {
+ let cx = cx.expect_multi_thread();
+
+ // This should always be an error. It only returns a `Result` to support
+ // using `?` to short circuit.
+ assert!(cx.run(core).is_err());
+
+ // Check if there are any deferred tasks to notify. This can happen when
+ // the worker core is lost due to `block_in_place()` being called from
+ // within the task.
+ cx.defer.wake();
+ });
+ });
+}
+
+impl Context {
+ fn run(&self, mut core: Box<Core>) -> RunResult {
+ // Reset `lifo_enabled` here in case the core was previously stolen from
+ // a task that had the LIFO slot disabled.
+ self.reset_lifo_enabled(&mut core);
+
+ // Start as "processing" tasks as polling tasks from the local queue
+ // will be one of the first things we do.
+ core.stats.start_processing_scheduled_tasks();
+
+ while !core.is_shutdown {
+ self.assert_lifo_enabled_is_correct(&core);
+
+ if core.is_traced {
+ core = self.worker.handle.trace_core(core);
+ }
+
+ // Increment the tick
+ core.tick();
+
+ // Run maintenance, if needed
+ core = self.maintenance(core);
+
+ // First, check work available to the current worker.
+ if let Some(task) = core.next_task(&self.worker) {
+ core = self.run_task(task, core)?;
+ continue;
+ }
+
+ // We consumed all work in the queues and will start searching for work.
+ core.stats.end_processing_scheduled_tasks();
+
+ // There is no more **local** work to process, try to steal work
+ // from other workers.
+ if let Some(task) = core.steal_work(&self.worker) {
+ // Found work, switch back to processing
+ core.stats.start_processing_scheduled_tasks();
+ core = self.run_task(task, core)?;
+ } else {
+ // Wait for work
+ core = if !self.defer.is_empty() {
+ self.park_timeout(core, Some(Duration::from_millis(0)))
+ } else {
+ self.park(core)
+ };
+ }
+ }
+
+ core.pre_shutdown(&self.worker);
+
+ // Signal shutdown
+ self.worker.handle.shutdown_core(core);
+ Err(())
+ }
+
+ fn run_task(&self, task: Notified, mut core: Box<Core>) -> RunResult {
+ let task = self.worker.handle.shared.owned.assert_owner(task);
+
+ // Make sure the worker is not in the **searching** state. This enables
+ // another idle worker to try to steal work.
+ core.transition_from_searching(&self.worker);
+
+ self.assert_lifo_enabled_is_correct(&core);
+
+ // Measure the poll start time. Note that we may end up polling other
+ // tasks under this measurement. In this case, the tasks came from the
+ // LIFO slot and are considered part of the current task for scheduling
+ // purposes. These tasks inherent the "parent"'s limits.
+ core.stats.start_poll();
+
+ // Make the core available to the runtime context
+ *self.core.borrow_mut() = Some(core);
+
+ // Run the task
+ coop::budget(|| {
+ task.run();
+ let mut lifo_polls = 0;
+
+ // As long as there is budget remaining and a task exists in the
+ // `lifo_slot`, then keep running.
+ loop {
+ // Check if we still have the core. If not, the core was stolen
+ // by another worker.
+ let mut core = match self.core.borrow_mut().take() {
+ Some(core) => core,
+ None => {
+ // In this case, we cannot call `reset_lifo_enabled()`
+ // because the core was stolen. The stealer will handle
+ // that at the top of `Context::run`
+ return Err(());
+ }
+ };
+
+ // Check for a task in the LIFO slot
+ let task = match core.lifo_slot.take() {
+ Some(task) => task,
+ None => {
+ self.reset_lifo_enabled(&mut core);
+ core.stats.end_poll();
+ return Ok(core);
+ }
+ };
+
+ if !coop::has_budget_remaining() {
+ core.stats.end_poll();
+
+ // Not enough budget left to run the LIFO task, push it to
+ // the back of the queue and return.
+ core.run_queue.push_back_or_overflow(
+ task,
+ &*self.worker.handle,
+ &mut core.stats,
+ );
+ // If we hit this point, the LIFO slot should be enabled.
+ // There is no need to reset it.
+ debug_assert!(core.lifo_enabled);
+ return Ok(core);
+ }
+
+ // Track that we are about to run a task from the LIFO slot.
+ lifo_polls += 1;
+ super::counters::inc_lifo_schedules();
+
+ // Disable the LIFO slot if we reach our limit
+ //
+ // In ping-ping style workloads where task A notifies task B,
+ // which notifies task A again, continuously prioritizing the
+ // LIFO slot can cause starvation as these two tasks will
+ // repeatedly schedule the other. To mitigate this, we limit the
+ // number of times the LIFO slot is prioritized.
+ if lifo_polls >= MAX_LIFO_POLLS_PER_TICK {
+ core.lifo_enabled = false;
+ super::counters::inc_lifo_capped();
+ }
+
+ // Run the LIFO task, then loop
+ *self.core.borrow_mut() = Some(core);
+ let task = self.worker.handle.shared.owned.assert_owner(task);
+ task.run();
+ }
+ })
+ }
+
+ fn reset_lifo_enabled(&self, core: &mut Core) {
+ core.lifo_enabled = !self.worker.handle.shared.config.disable_lifo_slot;
+ }
+
+ fn assert_lifo_enabled_is_correct(&self, core: &Core) {
+ debug_assert_eq!(
+ core.lifo_enabled,
+ !self.worker.handle.shared.config.disable_lifo_slot
+ );
+ }
+
+ fn maintenance(&self, mut core: Box<Core>) -> Box<Core> {
+ if core.tick % self.worker.handle.shared.config.event_interval == 0 {
+ super::counters::inc_num_maintenance();
+
+ core.stats.end_processing_scheduled_tasks();
+
+ // Call `park` with a 0 timeout. This enables the I/O driver, timer, ...
+ // to run without actually putting the thread to sleep.
+ core = self.park_timeout(core, Some(Duration::from_millis(0)));
+
+ // Run regularly scheduled maintenance
+ core.maintenance(&self.worker);
+
+ core.stats.start_processing_scheduled_tasks();
+ }
+
+ core
+ }
+
+ /// Parks the worker thread while waiting for tasks to execute.
+ ///
+ /// This function checks if indeed there's no more work left to be done before parking.
+ /// Also important to notice that, before parking, the worker thread will try to take
+ /// ownership of the Driver (IO/Time) and dispatch any events that might have fired.
+ /// Whenever a worker thread executes the Driver loop, all waken tasks are scheduled
+ /// in its own local queue until the queue saturates (ntasks > LOCAL_QUEUE_CAPACITY).
+ /// When the local queue is saturated, the overflow tasks are added to the injection queue
+ /// from where other workers can pick them up.
+ /// Also, we rely on the workstealing algorithm to spread the tasks amongst workers
+ /// after all the IOs get dispatched
+ fn park(&self, mut core: Box<Core>) -> Box<Core> {
+ if let Some(f) = &self.worker.handle.shared.config.before_park {
+ f();
+ }
+
+ if core.transition_to_parked(&self.worker) {
+ while !core.is_shutdown && !core.is_traced {
+ core.stats.about_to_park();
+ core = self.park_timeout(core, None);
+
+ // Run regularly scheduled maintenance
+ core.maintenance(&self.worker);
+
+ if core.transition_from_parked(&self.worker) {
+ break;
+ }
+ }
+ }
+
+ if let Some(f) = &self.worker.handle.shared.config.after_unpark {
+ f();
+ }
+ core
+ }
+
+ fn park_timeout(&self, mut core: Box<Core>, duration: Option<Duration>) -> Box<Core> {
+ self.assert_lifo_enabled_is_correct(&core);
+
+ // Take the parker out of core
+ let mut park = core.park.take().expect("park missing");
+
+ // Store `core` in context
+ *self.core.borrow_mut() = Some(core);
+
+ // Park thread
+ if let Some(timeout) = duration {
+ park.park_timeout(&self.worker.handle.driver, timeout);
+ } else {
+ park.park(&self.worker.handle.driver);
+ }
+
+ self.defer.wake();
+
+ // Remove `core` from context
+ core = self.core.borrow_mut().take().expect("core missing");
+
+ // Place `park` back in `core`
+ core.park = Some(park);
+
+ if core.should_notify_others() {
+ self.worker.handle.notify_parked_local();
+ }
+
+ core
+ }
+
+ pub(crate) fn defer(&self, waker: &Waker) {
+ self.defer.defer(waker);
+ }
+}
+
+impl Core {
+ /// Increment the tick
+ fn tick(&mut self) {
+ self.tick = self.tick.wrapping_add(1);
+ }
+
+ /// Return the next notified task available to this worker.
+ fn next_task(&mut self, worker: &Worker) -> Option<Notified> {
+ if self.tick % self.global_queue_interval == 0 {
+ // Update the global queue interval, if needed
+ self.tune_global_queue_interval(worker);
+
+ worker
+ .handle
+ .next_remote_task()
+ .or_else(|| self.next_local_task())
+ } else {
+ let maybe_task = self.next_local_task();
+
+ if maybe_task.is_some() {
+ return maybe_task;
+ }
+
+ if worker.inject().is_empty() {
+ return None;
+ }
+
+ // Other threads can only **remove** tasks from the current worker's
+ // `run_queue`. So, we can be confident that by the time we call
+ // `run_queue.push_back` below, there will be *at least* `cap`
+ // available slots in the queue.
+ let cap = usize::min(
+ self.run_queue.remaining_slots(),
+ self.run_queue.max_capacity() / 2,
+ );
+
+ // The worker is currently idle, pull a batch of work from the
+ // injection queue. We don't want to pull *all* the work so other
+ // workers can also get some.
+ let n = usize::min(
+ worker.inject().len() / worker.handle.shared.remotes.len() + 1,
+ cap,
+ );
+
+ let mut synced = worker.handle.shared.synced.lock();
+ // safety: passing in the correct `inject::Synced`.
+ let mut tasks = unsafe { worker.inject().pop_n(&mut synced.inject, n) };
+
+ // Pop the first task to return immedietly
+ let ret = tasks.next();
+
+ // Push the rest of the on the run queue
+ self.run_queue.push_back(tasks);
+
+ ret
+ }
+ }
+
+ fn next_local_task(&mut self) -> Option<Notified> {
+ self.lifo_slot.take().or_else(|| self.run_queue.pop())
+ }
+
+ /// Function responsible for stealing tasks from another worker
+ ///
+ /// Note: Only if less than half the workers are searching for tasks to steal
+ /// a new worker will actually try to steal. The idea is to make sure not all
+ /// workers will be trying to steal at the same time.
+ fn steal_work(&mut self, worker: &Worker) -> Option<Notified> {
+ if !self.transition_to_searching(worker) {
+ return None;
+ }
+
+ let num = worker.handle.shared.remotes.len();
+ // Start from a random worker
+ let start = self.rand.fastrand_n(num as u32) as usize;
+
+ for i in 0..num {
+ let i = (start + i) % num;
+
+ // Don't steal from ourself! We know we don't have work.
+ if i == worker.index {
+ continue;
+ }
+
+ let target = &worker.handle.shared.remotes[i];
+ if let Some(task) = target
+ .steal
+ .steal_into(&mut self.run_queue, &mut self.stats)
+ {
+ return Some(task);
+ }
+ }
+
+ // Fallback on checking the global queue
+ worker.handle.next_remote_task()
+ }
+
+ fn transition_to_searching(&mut self, worker: &Worker) -> bool {
+ if !self.is_searching {
+ self.is_searching = worker.handle.shared.idle.transition_worker_to_searching();
+ }
+
+ self.is_searching
+ }
+
+ fn transition_from_searching(&mut self, worker: &Worker) {
+ if !self.is_searching {
+ return;
+ }
+
+ self.is_searching = false;
+ worker.handle.transition_worker_from_searching();
+ }
+
+ fn has_tasks(&self) -> bool {
+ self.lifo_slot.is_some() || self.run_queue.has_tasks()
+ }
+
+ fn should_notify_others(&self) -> bool {
+ // If there are tasks available to steal, but this worker is not
+ // looking for tasks to steal, notify another worker.
+ if self.is_searching {
+ return false;
+ }
+ self.lifo_slot.is_some() as usize + self.run_queue.len() > 1
+ }
+
+ /// Prepares the worker state for parking.
+ ///
+ /// Returns true if the transition happened, false if there is work to do first.
+ fn transition_to_parked(&mut self, worker: &Worker) -> bool {
+ // Workers should not park if they have work to do
+ if self.has_tasks() || self.is_traced {
+ return false;
+ }
+
+ // When the final worker transitions **out** of searching to parked, it
+ // must check all the queues one last time in case work materialized
+ // between the last work scan and transitioning out of searching.
+ let is_last_searcher = worker.handle.shared.idle.transition_worker_to_parked(
+ &worker.handle.shared,
+ worker.index,
+ self.is_searching,
+ );
+
+ // The worker is no longer searching. Setting this is the local cache
+ // only.
+ self.is_searching = false;
+
+ if is_last_searcher {
+ worker.handle.notify_if_work_pending();
+ }
+
+ true
+ }
+
+ /// Returns `true` if the transition happened.
+ fn transition_from_parked(&mut self, worker: &Worker) -> bool {
+ // If a task is in the lifo slot/run queue, then we must unpark regardless of
+ // being notified
+ if self.has_tasks() {
+ // When a worker wakes, it should only transition to the "searching"
+ // state when the wake originates from another worker *or* a new task
+ // is pushed. We do *not* want the worker to transition to "searching"
+ // when it wakes when the I/O driver receives new events.
+ self.is_searching = !worker
+ .handle
+ .shared
+ .idle
+ .unpark_worker_by_id(&worker.handle.shared, worker.index);
+ return true;
+ }
+
+ if worker
+ .handle
+ .shared
+ .idle
+ .is_parked(&worker.handle.shared, worker.index)
+ {
+ return false;
+ }
+
+ // When unparked, the worker is in the searching state.
+ self.is_searching = true;
+ true
+ }
+
+ /// Runs maintenance work such as checking the pool's state.
+ fn maintenance(&mut self, worker: &Worker) {
+ self.stats
+ .submit(&worker.handle.shared.worker_metrics[worker.index]);
+
+ if !self.is_shutdown {
+ // Check if the scheduler has been shutdown
+ let synced = worker.handle.shared.synced.lock();
+ self.is_shutdown = worker.inject().is_closed(&synced.inject);
+ }
+
+ if !self.is_traced {
+ // Check if the worker should be tracing.
+ self.is_traced = worker.handle.shared.trace_status.trace_requested();
+ }
+ }
+
+ /// Signals all tasks to shut down, and waits for them to complete. Must run
+ /// before we enter the single-threaded phase of shutdown processing.
+ fn pre_shutdown(&mut self, worker: &Worker) {
+ // Signal to all tasks to shut down.
+ worker.handle.shared.owned.close_and_shutdown_all();
+
+ self.stats
+ .submit(&worker.handle.shared.worker_metrics[worker.index]);
+ }
+
+ /// Shuts down the core.
+ fn shutdown(&mut self, handle: &Handle) {
+ // Take the core
+ let mut park = self.park.take().expect("park missing");
+
+ // Drain the queue
+ while self.next_local_task().is_some() {}
+
+ park.shutdown(&handle.driver);
+ }
+
+ fn tune_global_queue_interval(&mut self, worker: &Worker) {
+ let next = self
+ .stats
+ .tuned_global_queue_interval(&worker.handle.shared.config);
+
+ debug_assert!(next > 1);
+
+ // Smooth out jitter
+ if abs_diff(self.global_queue_interval, next) > 2 {
+ self.global_queue_interval = next;
+ }
+ }
+}
+
+impl Worker {
+ /// Returns a reference to the scheduler's injection queue.
+ fn inject(&self) -> &inject::Shared<Arc<Handle>> {
+ &self.handle.shared.inject
+ }
+}
+
+// TODO: Move `Handle` impls into handle.rs
+impl task::Schedule for Arc<Handle> {
+ fn release(&self, task: &Task) -> Option<Task> {
+ self.shared.owned.remove(task)
+ }
+
+ fn schedule(&self, task: Notified) {
+ self.schedule_task(task, false);
+ }
+
+ fn yield_now(&self, task: Notified) {
+ self.schedule_task(task, true);
+ }
+}
+
+impl Handle {
+ pub(super) fn schedule_task(&self, task: Notified, is_yield: bool) {
+ with_current(|maybe_cx| {
+ if let Some(cx) = maybe_cx {
+ // Make sure the task is part of the **current** scheduler.
+ if self.ptr_eq(&cx.worker.handle) {
+ // And the current thread still holds a core
+ if let Some(core) = cx.core.borrow_mut().as_mut() {
+ self.schedule_local(core, task, is_yield);
+ return;
+ }
+ }
+ }
+
+ // Otherwise, use the inject queue.
+ self.push_remote_task(task);
+ self.notify_parked_remote();
+ })
+ }
+
+ fn schedule_local(&self, core: &mut Core, task: Notified, is_yield: bool) {
+ core.stats.inc_local_schedule_count();
+
+ // Spawning from the worker thread. If scheduling a "yield" then the
+ // task must always be pushed to the back of the queue, enabling other
+ // tasks to be executed. If **not** a yield, then there is more
+ // flexibility and the task may go to the front of the queue.
+ let should_notify = if is_yield || !core.lifo_enabled {
+ core.run_queue
+ .push_back_or_overflow(task, self, &mut core.stats);
+ true
+ } else {
+ // Push to the LIFO slot
+ let prev = core.lifo_slot.take();
+ let ret = prev.is_some();
+
+ if let Some(prev) = prev {
+ core.run_queue
+ .push_back_or_overflow(prev, self, &mut core.stats);
+ }
+
+ core.lifo_slot = Some(task);
+
+ ret
+ };
+
+ // Only notify if not currently parked. If `park` is `None`, then the
+ // scheduling is from a resource driver. As notifications often come in
+ // batches, the notification is delayed until the park is complete.
+ if should_notify && core.park.is_some() {
+ self.notify_parked_local();
+ }
+ }
+
+ fn next_remote_task(&self) -> Option<Notified> {
+ if self.shared.inject.is_empty() {
+ return None;
+ }
+
+ let mut synced = self.shared.synced.lock();
+ // safety: passing in correct `idle::Synced`
+ unsafe { self.shared.inject.pop(&mut synced.inject) }
+ }
+
+ fn push_remote_task(&self, task: Notified) {
+ self.shared.scheduler_metrics.inc_remote_schedule_count();
+
+ let mut synced = self.shared.synced.lock();
+ // safety: passing in correct `idle::Synced`
+ unsafe {
+ self.shared.inject.push(&mut synced.inject, task);
+ }
+ }
+
+ pub(super) fn close(&self) {
+ if self
+ .shared
+ .inject
+ .close(&mut self.shared.synced.lock().inject)
+ {
+ self.notify_all();
+ }
+ }
+
+ fn notify_parked_local(&self) {
+ super::counters::inc_num_inc_notify_local();
+
+ if let Some(index) = self.shared.idle.worker_to_notify(&self.shared) {
+ super::counters::inc_num_unparks_local();
+ self.shared.remotes[index].unpark.unpark(&self.driver);
+ }
+ }
+
+ fn notify_parked_remote(&self) {
+ if let Some(index) = self.shared.idle.worker_to_notify(&self.shared) {
+ self.shared.remotes[index].unpark.unpark(&self.driver);
+ }
+ }
+
+ pub(super) fn notify_all(&self) {
+ for remote in &self.shared.remotes[..] {
+ remote.unpark.unpark(&self.driver);
+ }
+ }
+
+ fn notify_if_work_pending(&self) {
+ for remote in &self.shared.remotes[..] {
+ if !remote.steal.is_empty() {
+ self.notify_parked_local();
+ return;
+ }
+ }
+
+ if !self.shared.inject.is_empty() {
+ self.notify_parked_local();
+ }
+ }
+
+ fn transition_worker_from_searching(&self) {
+ if self.shared.idle.transition_worker_from_searching() {
+ // We are the final searching worker. Because work was found, we
+ // need to notify another worker.
+ self.notify_parked_local();
+ }
+ }
+
+ /// Signals that a worker has observed the shutdown signal and has replaced
+ /// its core back into its handle.
+ ///
+ /// If all workers have reached this point, the final cleanup is performed.
+ fn shutdown_core(&self, core: Box<Core>) {
+ let mut cores = self.shared.shutdown_cores.lock();
+ cores.push(core);
+
+ if cores.len() != self.shared.remotes.len() {
+ return;
+ }
+
+ debug_assert!(self.shared.owned.is_empty());
+
+ for mut core in cores.drain(..) {
+ core.shutdown(self);
+ }
+
+ // Drain the injection queue
+ //
+ // We already shut down every task, so we can simply drop the tasks.
+ while let Some(task) = self.next_remote_task() {
+ drop(task);
+ }
+ }
+
+ fn ptr_eq(&self, other: &Handle) -> bool {
+ std::ptr::eq(self, other)
+ }
+}
+
+impl Overflow<Arc<Handle>> for Handle {
+ fn push(&self, task: task::Notified<Arc<Handle>>) {
+ self.push_remote_task(task);
+ }
+
+ fn push_batch<I>(&self, iter: I)
+ where
+ I: Iterator<Item = task::Notified<Arc<Handle>>>,
+ {
+ unsafe {
+ self.shared.inject.push_batch(self, iter);
+ }
+ }
+}
+
+pub(crate) struct InjectGuard<'a> {
+ lock: crate::loom::sync::MutexGuard<'a, Synced>,
+}
+
+impl<'a> AsMut<inject::Synced> for InjectGuard<'a> {
+ fn as_mut(&mut self) -> &mut inject::Synced {
+ &mut self.lock.inject
+ }
+}
+
+impl<'a> Lock<inject::Synced> for &'a Handle {
+ type Handle = InjectGuard<'a>;
+
+ fn lock(self) -> Self::Handle {
+ InjectGuard {
+ lock: self.shared.synced.lock(),
+ }
+ }
+}
+
+#[track_caller]
+fn with_current<R>(f: impl FnOnce(Option<&Context>) -> R) -> R {
+ use scheduler::Context::MultiThread;
+
+ context::with_scheduler(|ctx| match ctx {
+ Some(MultiThread(ctx)) => f(Some(ctx)),
+ _ => f(None),
+ })
+}
+
+// `u32::abs_diff` is not available on Tokio's MSRV.
+fn abs_diff(a: u32, b: u32) -> u32 {
+ if a > b {
+ a - b
+ } else {
+ b - a
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/metrics.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/metrics.rs
new file mode 100644
index 0000000000..a9a5ab3ed6
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/metrics.rs
@@ -0,0 +1,11 @@
+use super::Shared;
+
+impl Shared {
+ pub(crate) fn injection_queue_depth(&self) -> usize {
+ self.inject.len()
+ }
+
+ pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize {
+ self.remotes[worker].steal.len()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump.rs
new file mode 100644
index 0000000000..d310d9f6d3
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump.rs
@@ -0,0 +1,79 @@
+use super::{Core, Handle, Shared};
+
+use crate::loom::sync::Arc;
+use crate::runtime::scheduler::multi_thread::Stats;
+use crate::runtime::task::trace::trace_multi_thread;
+use crate::runtime::{dump, WorkerMetrics};
+
+use std::time::Duration;
+
+impl Handle {
+ pub(super) fn trace_core(&self, mut core: Box<Core>) -> Box<Core> {
+ core.is_traced = false;
+
+ if core.is_shutdown {
+ return core;
+ }
+
+ // wait for other workers, or timeout without tracing
+ let timeout = Duration::from_millis(250); // a _very_ generous timeout
+ let barrier =
+ if let Some(barrier) = self.shared.trace_status.trace_start.wait_timeout(timeout) {
+ barrier
+ } else {
+ // don't attempt to trace
+ return core;
+ };
+
+ if !barrier.is_leader() {
+ // wait for leader to finish tracing
+ self.shared.trace_status.trace_end.wait();
+ return core;
+ }
+
+ // trace
+
+ let owned = &self.shared.owned;
+ let mut local = self.shared.steal_all();
+ let synced = &self.shared.synced;
+ let injection = &self.shared.inject;
+
+ // safety: `trace_multi_thread` is invoked with the same `synced` that `injection`
+ // was created with.
+ let traces = unsafe { trace_multi_thread(owned, &mut local, synced, injection) }
+ .into_iter()
+ .map(dump::Task::new)
+ .collect();
+
+ let result = dump::Dump::new(traces);
+
+ // stash the result
+ self.shared.trace_status.stash_result(result);
+
+ // allow other workers to proceed
+ self.shared.trace_status.trace_end.wait();
+
+ core
+ }
+}
+
+impl Shared {
+ /// Steal all tasks from remotes into a single local queue.
+ pub(super) fn steal_all(&self) -> super::queue::Local<Arc<Handle>> {
+ let (_steal, mut local) = super::queue::local();
+
+ let worker_metrics = WorkerMetrics::new();
+ let mut stats = Stats::new(&worker_metrics);
+
+ for remote in self.remotes.iter() {
+ let steal = &remote.steal;
+ while !steal.is_empty() {
+ if let Some(task) = steal.steal_into(&mut local, &mut stats) {
+ local.push_back([task].into_iter());
+ }
+ }
+ }
+
+ local
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump_mock.rs b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump_mock.rs
new file mode 100644
index 0000000000..24c5600ce2
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/scheduler/multi_thread/worker/taskdump_mock.rs
@@ -0,0 +1,7 @@
+use super::{Core, Handle};
+
+impl Handle {
+ pub(super) fn trace_core(&self, core: Box<Core>) -> Box<Core> {
+ core
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/signal/mod.rs b/third_party/rust/tokio/src/runtime/signal/mod.rs
new file mode 100644
index 0000000000..24f2f4c6cb
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/signal/mod.rs
@@ -0,0 +1,142 @@
+#![cfg_attr(not(feature = "rt"), allow(dead_code))]
+
+//! Signal driver
+
+use crate::runtime::{driver, io};
+use crate::signal::registry::globals;
+
+use mio::net::UnixStream;
+use std::io::{self as std_io, Read};
+use std::sync::{Arc, Weak};
+use std::time::Duration;
+
+/// Responsible for registering wakeups when an OS signal is received, and
+/// subsequently dispatching notifications to any signal listeners as appropriate.
+///
+/// Note: this driver relies on having an enabled IO driver in order to listen to
+/// pipe write wakeups.
+#[derive(Debug)]
+pub(crate) struct Driver {
+ /// Thread parker. The `Driver` park implementation delegates to this.
+ io: io::Driver,
+
+ /// A pipe for receiving wake events from the signal handler
+ receiver: UnixStream,
+
+ /// Shared state. The driver keeps a strong ref and the handle keeps a weak
+ /// ref. The weak ref is used to check if the driver is still active before
+ /// trying to register a signal handler.
+ inner: Arc<()>,
+}
+
+#[derive(Debug, Default)]
+pub(crate) struct Handle {
+ /// Paired w/ the `Arc` above and is used to check if the driver is still
+ /// around before attempting to register a signal handler.
+ inner: Weak<()>,
+}
+
+// ===== impl Driver =====
+
+impl Driver {
+ /// Creates a new signal `Driver` instance that delegates wakeups to `park`.
+ pub(crate) fn new(io: io::Driver, io_handle: &io::Handle) -> std_io::Result<Self> {
+ use std::mem::ManuallyDrop;
+ use std::os::unix::io::{AsRawFd, FromRawFd};
+
+ // NB: We give each driver a "fresh" receiver file descriptor to avoid
+ // the issues described in alexcrichton/tokio-process#42.
+ //
+ // In the past we would reuse the actual receiver file descriptor and
+ // swallow any errors around double registration of the same descriptor.
+ // I'm not sure if the second (failed) registration simply doesn't end
+ // up receiving wake up notifications, or there could be some race
+ // condition when consuming readiness events, but having distinct
+ // descriptors appears to mitigate this.
+ //
+ // Unfortunately we cannot just use a single global UnixStream instance
+ // either, since we can't assume they will always be registered with the
+ // exact same reactor.
+ //
+ // Mio 0.7 removed `try_clone()` as an API due to unexpected behavior
+ // with registering dups with the same reactor. In this case, duping is
+ // safe as each dup is registered with separate reactors **and** we
+ // only expect at least one dup to receive the notification.
+
+ // Manually drop as we don't actually own this instance of UnixStream.
+ let receiver_fd = globals().receiver.as_raw_fd();
+
+ // safety: there is nothing unsafe about this, but the `from_raw_fd` fn is marked as unsafe.
+ let original =
+ ManuallyDrop::new(unsafe { std::os::unix::net::UnixStream::from_raw_fd(receiver_fd) });
+ let mut receiver = UnixStream::from_std(original.try_clone()?);
+
+ io_handle.register_signal_receiver(&mut receiver)?;
+
+ Ok(Self {
+ io,
+ receiver,
+ inner: Arc::new(()),
+ })
+ }
+
+ /// Returns a handle to this event loop which can be sent across threads
+ /// and can be used as a proxy to the event loop itself.
+ pub(crate) fn handle(&self) -> Handle {
+ Handle {
+ inner: Arc::downgrade(&self.inner),
+ }
+ }
+
+ pub(crate) fn park(&mut self, handle: &driver::Handle) {
+ self.io.park(handle);
+ self.process();
+ }
+
+ pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) {
+ self.io.park_timeout(handle, duration);
+ self.process();
+ }
+
+ pub(crate) fn shutdown(&mut self, handle: &driver::Handle) {
+ self.io.shutdown(handle)
+ }
+
+ fn process(&mut self) {
+ // If the signal pipe has not received a readiness event, then there is
+ // nothing else to do.
+ if !self.io.consume_signal_ready() {
+ return;
+ }
+
+ // Drain the pipe completely so we can receive a new readiness event
+ // if another signal has come in.
+ let mut buf = [0; 128];
+ loop {
+ match self.receiver.read(&mut buf) {
+ Ok(0) => panic!("EOF on self-pipe"),
+ Ok(_) => continue, // Keep reading
+ Err(e) if e.kind() == std_io::ErrorKind::WouldBlock => break,
+ Err(e) => panic!("Bad read on self-pipe: {}", e),
+ }
+ }
+
+ // Broadcast any signals which were received
+ globals().broadcast();
+ }
+}
+
+// ===== impl Handle =====
+
+impl Handle {
+ pub(crate) fn check_inner(&self) -> std_io::Result<()> {
+ if self.inner.strong_count() > 0 {
+ Ok(())
+ } else {
+ Err(std_io::Error::new(
+ std_io::ErrorKind::Other,
+ "signal driver gone",
+ ))
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/abort.rs b/third_party/rust/tokio/src/runtime/task/abort.rs
new file mode 100644
index 0000000000..6edca10040
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/abort.rs
@@ -0,0 +1,87 @@
+use crate::runtime::task::{Header, RawTask};
+use std::fmt;
+use std::panic::{RefUnwindSafe, UnwindSafe};
+
+/// An owned permission to abort a spawned task, without awaiting its completion.
+///
+/// Unlike a [`JoinHandle`], an `AbortHandle` does *not* represent the
+/// permission to await the task's completion, only to terminate it.
+///
+/// The task may be aborted by calling the [`AbortHandle::abort`] method.
+/// Dropping an `AbortHandle` releases the permission to terminate the task
+/// --- it does *not* abort the task.
+///
+/// [`JoinHandle`]: crate::task::JoinHandle
+#[cfg_attr(docsrs, doc(cfg(feature = "rt")))]
+pub struct AbortHandle {
+ raw: RawTask,
+}
+
+impl AbortHandle {
+ pub(super) fn new(raw: RawTask) -> Self {
+ Self { raw }
+ }
+
+ /// Abort the task associated with the handle.
+ ///
+ /// Awaiting a cancelled task might complete as usual if the task was
+ /// already completed at the time it was cancelled, but most likely it
+ /// will fail with a [cancelled] `JoinError`.
+ ///
+ /// If the task was already cancelled, such as by [`JoinHandle::abort`],
+ /// this method will do nothing.
+ ///
+ /// [cancelled]: method@super::error::JoinError::is_cancelled
+ /// [`JoinHandle::abort`]: method@super::JoinHandle::abort
+ pub fn abort(&self) {
+ self.raw.remote_abort();
+ }
+
+ /// Checks if the task associated with this `AbortHandle` has finished.
+ ///
+ /// Please note that this method can return `false` even if `abort` has been
+ /// called on the task. This is because the cancellation process may take
+ /// some time, and this method does not return `true` until it has
+ /// completed.
+ pub fn is_finished(&self) -> bool {
+ let state = self.raw.state().load();
+ state.is_complete()
+ }
+
+ /// Returns a [task ID] that uniquely identifies this task relative to other
+ /// currently spawned tasks.
+ ///
+ /// **Note**: This is an [unstable API][unstable]. The public API of this type
+ /// may break in 1.x releases. See [the documentation on unstable
+ /// features][unstable] for details.
+ ///
+ /// [task ID]: crate::task::Id
+ /// [unstable]: crate#unstable-features
+ #[cfg(tokio_unstable)]
+ #[cfg_attr(docsrs, doc(cfg(tokio_unstable)))]
+ pub fn id(&self) -> super::Id {
+ // Safety: The header pointer is valid.
+ unsafe { Header::get_id(self.raw.header_ptr()) }
+ }
+}
+
+unsafe impl Send for AbortHandle {}
+unsafe impl Sync for AbortHandle {}
+
+impl UnwindSafe for AbortHandle {}
+impl RefUnwindSafe for AbortHandle {}
+
+impl fmt::Debug for AbortHandle {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Safety: The header pointer is valid.
+ let id_ptr = unsafe { Header::get_id_ptr(self.raw.header_ptr()) };
+ let id = unsafe { id_ptr.as_ref() };
+ fmt.debug_struct("AbortHandle").field("id", id).finish()
+ }
+}
+
+impl Drop for AbortHandle {
+ fn drop(&mut self) {
+ self.raw.drop_abort_handle();
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/core.rs b/third_party/rust/tokio/src/runtime/task/core.rs
new file mode 100644
index 0000000000..110933e58f
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/core.rs
@@ -0,0 +1,470 @@
+//! Core task module.
+//!
+//! # Safety
+//!
+//! The functions in this module are private to the `task` module. All of them
+//! should be considered `unsafe` to use, but are not marked as such since it
+//! would be too noisy.
+//!
+//! Make sure to consult the relevant safety section of each function before
+//! use.
+
+use crate::future::Future;
+use crate::loom::cell::UnsafeCell;
+use crate::runtime::context;
+use crate::runtime::task::raw::{self, Vtable};
+use crate::runtime::task::state::State;
+use crate::runtime::task::{Id, Schedule};
+use crate::util::linked_list;
+
+use std::pin::Pin;
+use std::ptr::NonNull;
+use std::task::{Context, Poll, Waker};
+
+/// The task cell. Contains the components of the task.
+///
+/// It is critical for `Header` to be the first field as the task structure will
+/// be referenced by both *mut Cell and *mut Header.
+///
+/// Any changes to the layout of this struct _must_ also be reflected in the
+/// const fns in raw.rs.
+///
+// # This struct should be cache padded to avoid false sharing. The cache padding rules are copied
+// from crossbeam-utils/src/cache_padded.rs
+//
+// Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache
+// lines at a time, so we have to align to 128 bytes rather than 64.
+//
+// Sources:
+// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
+// - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107
+//
+// ARM's big.LITTLE architecture has asymmetric cores and "big" cores have 128-byte cache line size.
+//
+// Sources:
+// - https://www.mono-project.com/news/2016/09/12/arm64-icache/
+//
+// powerpc64 has 128-byte cache line size.
+//
+// Sources:
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9
+#[cfg_attr(
+ any(
+ target_arch = "x86_64",
+ target_arch = "aarch64",
+ target_arch = "powerpc64",
+ ),
+ repr(align(128))
+)]
+// arm, mips, mips64, riscv64, sparc, and hexagon have 32-byte cache line size.
+//
+// Sources:
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_riscv64.go#L7
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12
+//
+// riscv32 is assumed not to exceed the cache line size of riscv64.
+#[cfg_attr(
+ any(
+ target_arch = "arm",
+ target_arch = "mips",
+ target_arch = "mips64",
+ target_arch = "riscv32",
+ target_arch = "riscv64",
+ target_arch = "sparc",
+ target_arch = "hexagon",
+ ),
+ repr(align(32))
+)]
+// m68k has 16-byte cache line size.
+//
+// Sources:
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9
+#[cfg_attr(target_arch = "m68k", repr(align(16)))]
+// s390x has 256-byte cache line size.
+//
+// Sources:
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13
+#[cfg_attr(target_arch = "s390x", repr(align(256)))]
+// x86, wasm, and sparc64 have 64-byte cache line size.
+//
+// Sources:
+// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19
+//
+// All others are assumed to have 64-byte cache line size.
+#[cfg_attr(
+ not(any(
+ target_arch = "x86_64",
+ target_arch = "aarch64",
+ target_arch = "powerpc64",
+ target_arch = "arm",
+ target_arch = "mips",
+ target_arch = "mips64",
+ target_arch = "riscv32",
+ target_arch = "riscv64",
+ target_arch = "sparc",
+ target_arch = "hexagon",
+ target_arch = "m68k",
+ target_arch = "s390x",
+ )),
+ repr(align(64))
+)]
+#[repr(C)]
+pub(super) struct Cell<T: Future, S> {
+ /// Hot task state data
+ pub(super) header: Header,
+
+ /// Either the future or output, depending on the execution stage.
+ pub(super) core: Core<T, S>,
+
+ /// Cold data
+ pub(super) trailer: Trailer,
+}
+
+pub(super) struct CoreStage<T: Future> {
+ stage: UnsafeCell<Stage<T>>,
+}
+
+/// The core of the task.
+///
+/// Holds the future or output, depending on the stage of execution.
+///
+/// Any changes to the layout of this struct _must_ also be reflected in the
+/// const fns in raw.rs.
+#[repr(C)]
+pub(super) struct Core<T: Future, S> {
+ /// Scheduler used to drive this future.
+ pub(super) scheduler: S,
+
+ /// The task's ID, used for populating `JoinError`s.
+ pub(super) task_id: Id,
+
+ /// Either the future or the output.
+ pub(super) stage: CoreStage<T>,
+}
+
+/// Crate public as this is also needed by the pool.
+#[repr(C)]
+pub(crate) struct Header {
+ /// Task state.
+ pub(super) state: State,
+
+ /// Pointer to next task, used with the injection queue.
+ pub(super) queue_next: UnsafeCell<Option<NonNull<Header>>>,
+
+ /// Table of function pointers for executing actions on the task.
+ pub(super) vtable: &'static Vtable,
+
+ /// This integer contains the id of the OwnedTasks or LocalOwnedTasks that
+ /// this task is stored in. If the task is not in any list, should be the
+ /// id of the list that it was previously in, or zero if it has never been
+ /// in any list.
+ ///
+ /// Once a task has been bound to a list, it can never be bound to another
+ /// list, even if removed from the first list.
+ ///
+ /// The id is not unset when removed from a list because we want to be able
+ /// to read the id without synchronization, even if it is concurrently being
+ /// removed from the list.
+ pub(super) owner_id: UnsafeCell<u64>,
+
+ /// The tracing ID for this instrumented task.
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ pub(super) tracing_id: Option<tracing::Id>,
+}
+
+unsafe impl Send for Header {}
+unsafe impl Sync for Header {}
+
+/// Cold data is stored after the future. Data is considered cold if it is only
+/// used during creation or shutdown of the task.
+pub(super) struct Trailer {
+ /// Pointers for the linked list in the `OwnedTasks` that owns this task.
+ pub(super) owned: linked_list::Pointers<Header>,
+ /// Consumer task waiting on completion of this task.
+ pub(super) waker: UnsafeCell<Option<Waker>>,
+}
+
+generate_addr_of_methods! {
+ impl<> Trailer {
+ pub(super) unsafe fn addr_of_owned(self: NonNull<Self>) -> NonNull<linked_list::Pointers<Header>> {
+ &self.owned
+ }
+ }
+}
+
+/// Either the future or the output.
+pub(super) enum Stage<T: Future> {
+ Running(T),
+ Finished(super::Result<T::Output>),
+ Consumed,
+}
+
+impl<T: Future, S: Schedule> Cell<T, S> {
+ /// Allocates a new task cell, containing the header, trailer, and core
+ /// structures.
+ pub(super) fn new(future: T, scheduler: S, state: State, task_id: Id) -> Box<Cell<T, S>> {
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ let tracing_id = future.id();
+ let result = Box::new(Cell {
+ header: Header {
+ state,
+ queue_next: UnsafeCell::new(None),
+ vtable: raw::vtable::<T, S>(),
+ owner_id: UnsafeCell::new(0),
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ tracing_id,
+ },
+ core: Core {
+ scheduler,
+ stage: CoreStage {
+ stage: UnsafeCell::new(Stage::Running(future)),
+ },
+ task_id,
+ },
+ trailer: Trailer {
+ waker: UnsafeCell::new(None),
+ owned: linked_list::Pointers::new(),
+ },
+ });
+
+ #[cfg(debug_assertions)]
+ {
+ let trailer_addr = (&result.trailer) as *const Trailer as usize;
+ let trailer_ptr = unsafe { Header::get_trailer(NonNull::from(&result.header)) };
+ assert_eq!(trailer_addr, trailer_ptr.as_ptr() as usize);
+
+ let scheduler_addr = (&result.core.scheduler) as *const S as usize;
+ let scheduler_ptr =
+ unsafe { Header::get_scheduler::<S>(NonNull::from(&result.header)) };
+ assert_eq!(scheduler_addr, scheduler_ptr.as_ptr() as usize);
+
+ let id_addr = (&result.core.task_id) as *const Id as usize;
+ let id_ptr = unsafe { Header::get_id_ptr(NonNull::from(&result.header)) };
+ assert_eq!(id_addr, id_ptr.as_ptr() as usize);
+ }
+
+ result
+ }
+}
+
+impl<T: Future> CoreStage<T> {
+ pub(super) fn with_mut<R>(&self, f: impl FnOnce(*mut Stage<T>) -> R) -> R {
+ self.stage.with_mut(f)
+ }
+}
+
+/// Set and clear the task id in the context when the future is executed or
+/// dropped, or when the output produced by the future is dropped.
+pub(crate) struct TaskIdGuard {
+ parent_task_id: Option<Id>,
+}
+
+impl TaskIdGuard {
+ fn enter(id: Id) -> Self {
+ TaskIdGuard {
+ parent_task_id: context::set_current_task_id(Some(id)),
+ }
+ }
+}
+
+impl Drop for TaskIdGuard {
+ fn drop(&mut self) {
+ context::set_current_task_id(self.parent_task_id);
+ }
+}
+
+impl<T: Future, S: Schedule> Core<T, S> {
+ /// Polls the future.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure it is safe to mutate the `state` field. This
+ /// requires ensuring mutual exclusion between any concurrent thread that
+ /// might modify the future or output field.
+ ///
+ /// The mutual exclusion is implemented by `Harness` and the `Lifecycle`
+ /// component of the task state.
+ ///
+ /// `self` must also be pinned. This is handled by storing the task on the
+ /// heap.
+ pub(super) fn poll(&self, mut cx: Context<'_>) -> Poll<T::Output> {
+ let res = {
+ self.stage.stage.with_mut(|ptr| {
+ // Safety: The caller ensures mutual exclusion to the field.
+ let future = match unsafe { &mut *ptr } {
+ Stage::Running(future) => future,
+ _ => unreachable!("unexpected stage"),
+ };
+
+ // Safety: The caller ensures the future is pinned.
+ let future = unsafe { Pin::new_unchecked(future) };
+
+ let _guard = TaskIdGuard::enter(self.task_id);
+ future.poll(&mut cx)
+ })
+ };
+
+ if res.is_ready() {
+ self.drop_future_or_output();
+ }
+
+ res
+ }
+
+ /// Drops the future.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure it is safe to mutate the `stage` field.
+ pub(super) fn drop_future_or_output(&self) {
+ // Safety: the caller ensures mutual exclusion to the field.
+ unsafe {
+ self.set_stage(Stage::Consumed);
+ }
+ }
+
+ /// Stores the task output.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure it is safe to mutate the `stage` field.
+ pub(super) fn store_output(&self, output: super::Result<T::Output>) {
+ // Safety: the caller ensures mutual exclusion to the field.
+ unsafe {
+ self.set_stage(Stage::Finished(output));
+ }
+ }
+
+ /// Takes the task output.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure it is safe to mutate the `stage` field.
+ pub(super) fn take_output(&self) -> super::Result<T::Output> {
+ use std::mem;
+
+ self.stage.stage.with_mut(|ptr| {
+ // Safety:: the caller ensures mutual exclusion to the field.
+ match mem::replace(unsafe { &mut *ptr }, Stage::Consumed) {
+ Stage::Finished(output) => output,
+ _ => panic!("JoinHandle polled after completion"),
+ }
+ })
+ }
+
+ unsafe fn set_stage(&self, stage: Stage<T>) {
+ let _guard = TaskIdGuard::enter(self.task_id);
+ self.stage.stage.with_mut(|ptr| *ptr = stage)
+ }
+}
+
+impl Header {
+ pub(super) unsafe fn set_next(&self, next: Option<NonNull<Header>>) {
+ self.queue_next.with_mut(|ptr| *ptr = next);
+ }
+
+ // safety: The caller must guarantee exclusive access to this field, and
+ // must ensure that the id is either 0 or the id of the OwnedTasks
+ // containing this task.
+ pub(super) unsafe fn set_owner_id(&self, owner: u64) {
+ self.owner_id.with_mut(|ptr| *ptr = owner);
+ }
+
+ pub(super) fn get_owner_id(&self) -> u64 {
+ // safety: If there are concurrent writes, then that write has violated
+ // the safety requirements on `set_owner_id`.
+ unsafe { self.owner_id.with(|ptr| *ptr) }
+ }
+
+ /// Gets a pointer to the `Trailer` of the task containing this `Header`.
+ ///
+ /// # Safety
+ ///
+ /// The provided raw pointer must point at the header of a task.
+ pub(super) unsafe fn get_trailer(me: NonNull<Header>) -> NonNull<Trailer> {
+ let offset = me.as_ref().vtable.trailer_offset;
+ let trailer = me.as_ptr().cast::<u8>().add(offset).cast::<Trailer>();
+ NonNull::new_unchecked(trailer)
+ }
+
+ /// Gets a pointer to the scheduler of the task containing this `Header`.
+ ///
+ /// # Safety
+ ///
+ /// The provided raw pointer must point at the header of a task.
+ ///
+ /// The generic type S must be set to the correct scheduler type for this
+ /// task.
+ pub(super) unsafe fn get_scheduler<S>(me: NonNull<Header>) -> NonNull<S> {
+ let offset = me.as_ref().vtable.scheduler_offset;
+ let scheduler = me.as_ptr().cast::<u8>().add(offset).cast::<S>();
+ NonNull::new_unchecked(scheduler)
+ }
+
+ /// Gets a pointer to the id of the task containing this `Header`.
+ ///
+ /// # Safety
+ ///
+ /// The provided raw pointer must point at the header of a task.
+ pub(super) unsafe fn get_id_ptr(me: NonNull<Header>) -> NonNull<Id> {
+ let offset = me.as_ref().vtable.id_offset;
+ let id = me.as_ptr().cast::<u8>().add(offset).cast::<Id>();
+ NonNull::new_unchecked(id)
+ }
+
+ /// Gets the id of the task containing this `Header`.
+ ///
+ /// # Safety
+ ///
+ /// The provided raw pointer must point at the header of a task.
+ pub(super) unsafe fn get_id(me: NonNull<Header>) -> Id {
+ let ptr = Header::get_id_ptr(me).as_ptr();
+ *ptr
+ }
+
+ /// Gets the tracing id of the task containing this `Header`.
+ ///
+ /// # Safety
+ ///
+ /// The provided raw pointer must point at the header of a task.
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ pub(super) unsafe fn get_tracing_id(me: &NonNull<Header>) -> Option<&tracing::Id> {
+ me.as_ref().tracing_id.as_ref()
+ }
+}
+
+impl Trailer {
+ pub(super) unsafe fn set_waker(&self, waker: Option<Waker>) {
+ self.waker.with_mut(|ptr| {
+ *ptr = waker;
+ });
+ }
+
+ pub(super) unsafe fn will_wake(&self, waker: &Waker) -> bool {
+ self.waker
+ .with(|ptr| (*ptr).as_ref().unwrap().will_wake(waker))
+ }
+
+ pub(super) fn wake_join(&self) {
+ self.waker.with(|ptr| match unsafe { &*ptr } {
+ Some(waker) => waker.wake_by_ref(),
+ None => panic!("waker missing"),
+ });
+ }
+}
+
+#[test]
+#[cfg(not(loom))]
+fn header_lte_cache_line() {
+ use std::mem::size_of;
+
+ assert!(size_of::<Header>() <= 8 * size_of::<*const ()>());
+}
diff --git a/third_party/rust/tokio/src/runtime/task/error.rs b/third_party/rust/tokio/src/runtime/task/error.rs
new file mode 100644
index 0000000000..f7ead77b7c
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/error.rs
@@ -0,0 +1,165 @@
+use std::any::Any;
+use std::fmt;
+use std::io;
+
+use super::Id;
+use crate::util::SyncWrapper;
+cfg_rt! {
+ /// Task failed to execute to completion.
+ pub struct JoinError {
+ repr: Repr,
+ id: Id,
+ }
+}
+
+enum Repr {
+ Cancelled,
+ Panic(SyncWrapper<Box<dyn Any + Send + 'static>>),
+}
+
+impl JoinError {
+ pub(crate) fn cancelled(id: Id) -> JoinError {
+ JoinError {
+ repr: Repr::Cancelled,
+ id,
+ }
+ }
+
+ pub(crate) fn panic(id: Id, err: Box<dyn Any + Send + 'static>) -> JoinError {
+ JoinError {
+ repr: Repr::Panic(SyncWrapper::new(err)),
+ id,
+ }
+ }
+
+ /// Returns true if the error was caused by the task being cancelled.
+ pub fn is_cancelled(&self) -> bool {
+ matches!(&self.repr, Repr::Cancelled)
+ }
+
+ /// Returns true if the error was caused by the task panicking.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::panic;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let err = tokio::spawn(async {
+ /// panic!("boom");
+ /// }).await.unwrap_err();
+ ///
+ /// assert!(err.is_panic());
+ /// }
+ /// ```
+ pub fn is_panic(&self) -> bool {
+ matches!(&self.repr, Repr::Panic(_))
+ }
+
+ /// Consumes the join error, returning the object with which the task panicked.
+ ///
+ /// # Panics
+ ///
+ /// `into_panic()` panics if the `Error` does not represent the underlying
+ /// task terminating with a panic. Use `is_panic` to check the error reason
+ /// or `try_into_panic` for a variant that does not panic.
+ ///
+ /// # Examples
+ ///
+ /// ```should_panic
+ /// use std::panic;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let err = tokio::spawn(async {
+ /// panic!("boom");
+ /// }).await.unwrap_err();
+ ///
+ /// if err.is_panic() {
+ /// // Resume the panic on the main task
+ /// panic::resume_unwind(err.into_panic());
+ /// }
+ /// }
+ /// ```
+ #[track_caller]
+ pub fn into_panic(self) -> Box<dyn Any + Send + 'static> {
+ self.try_into_panic()
+ .expect("`JoinError` reason is not a panic.")
+ }
+
+ /// Consumes the join error, returning the object with which the task
+ /// panicked if the task terminated due to a panic. Otherwise, `self` is
+ /// returned.
+ ///
+ /// # Examples
+ ///
+ /// ```should_panic
+ /// use std::panic;
+ ///
+ /// #[tokio::main]
+ /// async fn main() {
+ /// let err = tokio::spawn(async {
+ /// panic!("boom");
+ /// }).await.unwrap_err();
+ ///
+ /// if let Ok(reason) = err.try_into_panic() {
+ /// // Resume the panic on the main task
+ /// panic::resume_unwind(reason);
+ /// }
+ /// }
+ /// ```
+ pub fn try_into_panic(self) -> Result<Box<dyn Any + Send + 'static>, JoinError> {
+ match self.repr {
+ Repr::Panic(p) => Ok(p.into_inner()),
+ _ => Err(self),
+ }
+ }
+
+ /// Returns a [task ID] that identifies the task which errored relative to
+ /// other currently spawned tasks.
+ ///
+ /// **Note**: This is an [unstable API][unstable]. The public API of this type
+ /// may break in 1.x releases. See [the documentation on unstable
+ /// features][unstable] for details.
+ ///
+ /// [task ID]: crate::task::Id
+ /// [unstable]: crate#unstable-features
+ #[cfg(tokio_unstable)]
+ #[cfg_attr(docsrs, doc(cfg(tokio_unstable)))]
+ pub fn id(&self) -> Id {
+ self.id
+ }
+}
+
+impl fmt::Display for JoinError {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match &self.repr {
+ Repr::Cancelled => write!(fmt, "task {} was cancelled", self.id),
+ Repr::Panic(_) => write!(fmt, "task {} panicked", self.id),
+ }
+ }
+}
+
+impl fmt::Debug for JoinError {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match &self.repr {
+ Repr::Cancelled => write!(fmt, "JoinError::Cancelled({:?})", self.id),
+ Repr::Panic(_) => write!(fmt, "JoinError::Panic({:?}, ...)", self.id),
+ }
+ }
+}
+
+impl std::error::Error for JoinError {}
+
+impl From<JoinError> for io::Error {
+ fn from(src: JoinError) -> io::Error {
+ io::Error::new(
+ io::ErrorKind::Other,
+ match src.repr {
+ Repr::Cancelled => "task was cancelled",
+ Repr::Panic(_) => "task panicked",
+ },
+ )
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/harness.rs b/third_party/rust/tokio/src/runtime/task/harness.rs
new file mode 100644
index 0000000000..8e3c3d14fa
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/harness.rs
@@ -0,0 +1,501 @@
+use crate::future::Future;
+use crate::runtime::task::core::{Cell, Core, Header, Trailer};
+use crate::runtime::task::state::{Snapshot, State};
+use crate::runtime::task::waker::waker_ref;
+use crate::runtime::task::{JoinError, Notified, RawTask, Schedule, Task};
+
+use std::mem;
+use std::mem::ManuallyDrop;
+use std::panic;
+use std::ptr::NonNull;
+use std::task::{Context, Poll, Waker};
+
+/// Typed raw task handle.
+pub(super) struct Harness<T: Future, S: 'static> {
+ cell: NonNull<Cell<T, S>>,
+}
+
+impl<T, S> Harness<T, S>
+where
+ T: Future,
+ S: 'static,
+{
+ pub(super) unsafe fn from_raw(ptr: NonNull<Header>) -> Harness<T, S> {
+ Harness {
+ cell: ptr.cast::<Cell<T, S>>(),
+ }
+ }
+
+ fn header_ptr(&self) -> NonNull<Header> {
+ self.cell.cast()
+ }
+
+ fn header(&self) -> &Header {
+ unsafe { &*self.header_ptr().as_ptr() }
+ }
+
+ fn state(&self) -> &State {
+ &self.header().state
+ }
+
+ fn trailer(&self) -> &Trailer {
+ unsafe { &self.cell.as_ref().trailer }
+ }
+
+ fn core(&self) -> &Core<T, S> {
+ unsafe { &self.cell.as_ref().core }
+ }
+}
+
+/// Task operations that can be implemented without being generic over the
+/// scheduler or task. Only one version of these methods should exist in the
+/// final binary.
+impl RawTask {
+ pub(super) fn drop_reference(self) {
+ if self.state().ref_dec() {
+ self.dealloc();
+ }
+ }
+
+ /// This call consumes a ref-count and notifies the task. This will create a
+ /// new Notified and submit it if necessary.
+ ///
+ /// The caller does not need to hold a ref-count besides the one that was
+ /// passed to this call.
+ pub(super) fn wake_by_val(&self) {
+ use super::state::TransitionToNotifiedByVal;
+
+ match self.state().transition_to_notified_by_val() {
+ TransitionToNotifiedByVal::Submit => {
+ // The caller has given us a ref-count, and the transition has
+ // created a new ref-count, so we now hold two. We turn the new
+ // ref-count Notified and pass it to the call to `schedule`.
+ //
+ // The old ref-count is retained for now to ensure that the task
+ // is not dropped during the call to `schedule` if the call
+ // drops the task it was given.
+ self.schedule();
+
+ // Now that we have completed the call to schedule, we can
+ // release our ref-count.
+ self.drop_reference();
+ }
+ TransitionToNotifiedByVal::Dealloc => {
+ self.dealloc();
+ }
+ TransitionToNotifiedByVal::DoNothing => {}
+ }
+ }
+
+ /// This call notifies the task. It will not consume any ref-counts, but the
+ /// caller should hold a ref-count. This will create a new Notified and
+ /// submit it if necessary.
+ pub(super) fn wake_by_ref(&self) {
+ use super::state::TransitionToNotifiedByRef;
+
+ match self.state().transition_to_notified_by_ref() {
+ TransitionToNotifiedByRef::Submit => {
+ // The transition above incremented the ref-count for a new task
+ // and the caller also holds a ref-count. The caller's ref-count
+ // ensures that the task is not destroyed even if the new task
+ // is dropped before `schedule` returns.
+ self.schedule();
+ }
+ TransitionToNotifiedByRef::DoNothing => {}
+ }
+ }
+
+ /// Remotely aborts the task.
+ ///
+ /// The caller should hold a ref-count, but we do not consume it.
+ ///
+ /// This is similar to `shutdown` except that it asks the runtime to perform
+ /// the shutdown. This is necessary to avoid the shutdown happening in the
+ /// wrong thread for non-Send tasks.
+ pub(super) fn remote_abort(&self) {
+ if self.state().transition_to_notified_and_cancel() {
+ // The transition has created a new ref-count, which we turn into
+ // a Notified and pass to the task.
+ //
+ // Since the caller holds a ref-count, the task cannot be destroyed
+ // before the call to `schedule` returns even if the call drops the
+ // `Notified` internally.
+ self.schedule();
+ }
+ }
+
+ /// Try to set the waker notified when the task is complete. Returns true if
+ /// the task has already completed. If this call returns false, then the
+ /// waker will not be notified.
+ pub(super) fn try_set_join_waker(&self, waker: &Waker) -> bool {
+ can_read_output(self.header(), self.trailer(), waker)
+ }
+}
+
+impl<T, S> Harness<T, S>
+where
+ T: Future,
+ S: Schedule,
+{
+ pub(super) fn drop_reference(self) {
+ if self.state().ref_dec() {
+ self.dealloc();
+ }
+ }
+
+ /// Polls the inner future. A ref-count is consumed.
+ ///
+ /// All necessary state checks and transitions are performed.
+ /// Panics raised while polling the future are handled.
+ pub(super) fn poll(self) {
+ // We pass our ref-count to `poll_inner`.
+ match self.poll_inner() {
+ PollFuture::Notified => {
+ // The `poll_inner` call has given us two ref-counts back.
+ // We give one of them to a new task and call `yield_now`.
+ self.core()
+ .scheduler
+ .yield_now(Notified(self.get_new_task()));
+
+ // The remaining ref-count is now dropped. We kept the extra
+ // ref-count until now to ensure that even if the `yield_now`
+ // call drops the provided task, the task isn't deallocated
+ // before after `yield_now` returns.
+ self.drop_reference();
+ }
+ PollFuture::Complete => {
+ self.complete();
+ }
+ PollFuture::Dealloc => {
+ self.dealloc();
+ }
+ PollFuture::Done => (),
+ }
+ }
+
+ /// Polls the task and cancel it if necessary. This takes ownership of a
+ /// ref-count.
+ ///
+ /// If the return value is Notified, the caller is given ownership of two
+ /// ref-counts.
+ ///
+ /// If the return value is Complete, the caller is given ownership of a
+ /// single ref-count, which should be passed on to `complete`.
+ ///
+ /// If the return value is Dealloc, then this call consumed the last
+ /// ref-count and the caller should call `dealloc`.
+ ///
+ /// Otherwise the ref-count is consumed and the caller should not access
+ /// `self` again.
+ fn poll_inner(&self) -> PollFuture {
+ use super::state::{TransitionToIdle, TransitionToRunning};
+
+ match self.state().transition_to_running() {
+ TransitionToRunning::Success => {
+ let header_ptr = self.header_ptr();
+ let waker_ref = waker_ref::<T, S>(&header_ptr);
+ let cx = Context::from_waker(&waker_ref);
+ let res = poll_future(self.core(), cx);
+
+ if res == Poll::Ready(()) {
+ // The future completed. Move on to complete the task.
+ return PollFuture::Complete;
+ }
+
+ match self.state().transition_to_idle() {
+ TransitionToIdle::Ok => PollFuture::Done,
+ TransitionToIdle::OkNotified => PollFuture::Notified,
+ TransitionToIdle::OkDealloc => PollFuture::Dealloc,
+ TransitionToIdle::Cancelled => {
+ // The transition to idle failed because the task was
+ // cancelled during the poll.
+ cancel_task(self.core());
+ PollFuture::Complete
+ }
+ }
+ }
+ TransitionToRunning::Cancelled => {
+ cancel_task(self.core());
+ PollFuture::Complete
+ }
+ TransitionToRunning::Failed => PollFuture::Done,
+ TransitionToRunning::Dealloc => PollFuture::Dealloc,
+ }
+ }
+
+ /// Forcibly shuts down the task.
+ ///
+ /// Attempt to transition to `Running` in order to forcibly shutdown the
+ /// task. If the task is currently running or in a state of completion, then
+ /// there is nothing further to do. When the task completes running, it will
+ /// notice the `CANCELLED` bit and finalize the task.
+ pub(super) fn shutdown(self) {
+ if !self.state().transition_to_shutdown() {
+ // The task is concurrently running. No further work needed.
+ self.drop_reference();
+ return;
+ }
+
+ // By transitioning the lifecycle to `Running`, we have permission to
+ // drop the future.
+ cancel_task(self.core());
+ self.complete();
+ }
+
+ pub(super) fn dealloc(self) {
+ // Release the join waker, if there is one.
+ self.trailer().waker.with_mut(drop);
+
+ // Check causality
+ self.core().stage.with_mut(drop);
+
+ // Safety: The caller of this method just transitioned our ref-count to
+ // zero, so it is our responsibility to release the allocation.
+ //
+ // We don't hold any references into the allocation at this point, but
+ // it is possible for another thread to still hold a `&State` into the
+ // allocation if that other thread has decremented its last ref-count,
+ // but has not yet returned from the relevant method on `State`.
+ //
+ // However, the `State` type consists of just an `AtomicUsize`, and an
+ // `AtomicUsize` wraps the entirety of its contents in an `UnsafeCell`.
+ // As explained in the documentation for `UnsafeCell`, such references
+ // are allowed to be dangling after their last use, even if the
+ // reference has not yet gone out of scope.
+ unsafe {
+ drop(Box::from_raw(self.cell.as_ptr()));
+ }
+ }
+
+ // ===== join handle =====
+
+ /// Read the task output into `dst`.
+ pub(super) fn try_read_output(self, dst: &mut Poll<super::Result<T::Output>>, waker: &Waker) {
+ if can_read_output(self.header(), self.trailer(), waker) {
+ *dst = Poll::Ready(self.core().take_output());
+ }
+ }
+
+ pub(super) fn drop_join_handle_slow(self) {
+ // Try to unset `JOIN_INTEREST`. This must be done as a first step in
+ // case the task concurrently completed.
+ if self.state().unset_join_interested().is_err() {
+ // It is our responsibility to drop the output. This is critical as
+ // the task output may not be `Send` and as such must remain with
+ // the scheduler or `JoinHandle`. i.e. if the output remains in the
+ // task structure until the task is deallocated, it may be dropped
+ // by a Waker on any arbitrary thread.
+ //
+ // Panics are delivered to the user via the `JoinHandle`. Given that
+ // they are dropping the `JoinHandle`, we assume they are not
+ // interested in the panic and swallow it.
+ let _ = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+ self.core().drop_future_or_output();
+ }));
+ }
+
+ // Drop the `JoinHandle` reference, possibly deallocating the task
+ self.drop_reference();
+ }
+
+ // ====== internal ======
+
+ /// Completes the task. This method assumes that the state is RUNNING.
+ fn complete(self) {
+ // The future has completed and its output has been written to the task
+ // stage. We transition from running to complete.
+
+ let snapshot = self.state().transition_to_complete();
+
+ // We catch panics here in case dropping the future or waking the
+ // JoinHandle panics.
+ let _ = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+ if !snapshot.is_join_interested() {
+ // The `JoinHandle` is not interested in the output of
+ // this task. It is our responsibility to drop the
+ // output.
+ self.core().drop_future_or_output();
+ } else if snapshot.is_join_waker_set() {
+ // Notify the waker. Reading the waker field is safe per rule 4
+ // in task/mod.rs, since the JOIN_WAKER bit is set and the call
+ // to transition_to_complete() above set the COMPLETE bit.
+ self.trailer().wake_join();
+ }
+ }));
+
+ // The task has completed execution and will no longer be scheduled.
+ let num_release = self.release();
+
+ if self.state().transition_to_terminal(num_release) {
+ self.dealloc();
+ }
+ }
+
+ /// Releases the task from the scheduler. Returns the number of ref-counts
+ /// that should be decremented.
+ fn release(&self) -> usize {
+ // We don't actually increment the ref-count here, but the new task is
+ // never destroyed, so that's ok.
+ let me = ManuallyDrop::new(self.get_new_task());
+
+ if let Some(task) = self.core().scheduler.release(&me) {
+ mem::forget(task);
+ 2
+ } else {
+ 1
+ }
+ }
+
+ /// Creates a new task that holds its own ref-count.
+ ///
+ /// # Safety
+ ///
+ /// Any use of `self` after this call must ensure that a ref-count to the
+ /// task holds the task alive until after the use of `self`. Passing the
+ /// returned Task to any method on `self` is unsound if dropping the Task
+ /// could drop `self` before the call on `self` returned.
+ fn get_new_task(&self) -> Task<S> {
+ // safety: The header is at the beginning of the cell, so this cast is
+ // safe.
+ unsafe { Task::from_raw(self.cell.cast()) }
+ }
+}
+
+fn can_read_output(header: &Header, trailer: &Trailer, waker: &Waker) -> bool {
+ // Load a snapshot of the current task state
+ let snapshot = header.state.load();
+
+ debug_assert!(snapshot.is_join_interested());
+
+ if !snapshot.is_complete() {
+ // If the task is not complete, try storing the provided waker in the
+ // task's waker field.
+
+ let res = if snapshot.is_join_waker_set() {
+ // If JOIN_WAKER is set, then JoinHandle has previously stored a
+ // waker in the waker field per step (iii) of rule 5 in task/mod.rs.
+
+ // Optimization: if the stored waker and the provided waker wake the
+ // same task, then return without touching the waker field. (Reading
+ // the waker field below is safe per rule 3 in task/mod.rs.)
+ if unsafe { trailer.will_wake(waker) } {
+ return false;
+ }
+
+ // Otherwise swap the stored waker with the provided waker by
+ // following the rule 5 in task/mod.rs.
+ header
+ .state
+ .unset_waker()
+ .and_then(|snapshot| set_join_waker(header, trailer, waker.clone(), snapshot))
+ } else {
+ // If JOIN_WAKER is unset, then JoinHandle has mutable access to the
+ // waker field per rule 2 in task/mod.rs; therefore, skip step (i)
+ // of rule 5 and try to store the provided waker in the waker field.
+ set_join_waker(header, trailer, waker.clone(), snapshot)
+ };
+
+ match res {
+ Ok(_) => return false,
+ Err(snapshot) => {
+ assert!(snapshot.is_complete());
+ }
+ }
+ }
+ true
+}
+
+fn set_join_waker(
+ header: &Header,
+ trailer: &Trailer,
+ waker: Waker,
+ snapshot: Snapshot,
+) -> Result<Snapshot, Snapshot> {
+ assert!(snapshot.is_join_interested());
+ assert!(!snapshot.is_join_waker_set());
+
+ // Safety: Only the `JoinHandle` may set the `waker` field. When
+ // `JOIN_INTEREST` is **not** set, nothing else will touch the field.
+ unsafe {
+ trailer.set_waker(Some(waker));
+ }
+
+ // Update the `JoinWaker` state accordingly
+ let res = header.state.set_join_waker();
+
+ // If the state could not be updated, then clear the join waker
+ if res.is_err() {
+ unsafe {
+ trailer.set_waker(None);
+ }
+ }
+
+ res
+}
+
+enum PollFuture {
+ Complete,
+ Notified,
+ Done,
+ Dealloc,
+}
+
+/// Cancels the task and store the appropriate error in the stage field.
+fn cancel_task<T: Future, S: Schedule>(core: &Core<T, S>) {
+ // Drop the future from a panic guard.
+ let res = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+ core.drop_future_or_output();
+ }));
+
+ match res {
+ Ok(()) => {
+ core.store_output(Err(JoinError::cancelled(core.task_id)));
+ }
+ Err(panic) => {
+ core.store_output(Err(JoinError::panic(core.task_id, panic)));
+ }
+ }
+}
+
+/// Polls the future. If the future completes, the output is written to the
+/// stage field.
+fn poll_future<T: Future, S: Schedule>(core: &Core<T, S>, cx: Context<'_>) -> Poll<()> {
+ // Poll the future.
+ let output = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+ struct Guard<'a, T: Future, S: Schedule> {
+ core: &'a Core<T, S>,
+ }
+ impl<'a, T: Future, S: Schedule> Drop for Guard<'a, T, S> {
+ fn drop(&mut self) {
+ // If the future panics on poll, we drop it inside the panic
+ // guard.
+ self.core.drop_future_or_output();
+ }
+ }
+ let guard = Guard { core };
+ let res = guard.core.poll(cx);
+ mem::forget(guard);
+ res
+ }));
+
+ // Prepare output for being placed in the core stage.
+ let output = match output {
+ Ok(Poll::Pending) => return Poll::Pending,
+ Ok(Poll::Ready(output)) => Ok(output),
+ Err(panic) => {
+ core.scheduler.unhandled_panic();
+ Err(JoinError::panic(core.task_id, panic))
+ }
+ };
+
+ // Catch and ignore panics if the future panics on drop.
+ let res = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+ core.store_output(output);
+ }));
+
+ if res.is_err() {
+ core.scheduler.unhandled_panic();
+ }
+
+ Poll::Ready(())
+}
diff --git a/third_party/rust/tokio/src/runtime/task/id.rs b/third_party/rust/tokio/src/runtime/task/id.rs
new file mode 100644
index 0000000000..2b0d95c024
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/id.rs
@@ -0,0 +1,87 @@
+use crate::runtime::context;
+
+use std::fmt;
+
+/// An opaque ID that uniquely identifies a task relative to all other currently
+/// running tasks.
+///
+/// # Notes
+///
+/// - Task IDs are unique relative to other *currently running* tasks. When a
+/// task completes, the same ID may be used for another task.
+/// - Task IDs are *not* sequential, and do not indicate the order in which
+/// tasks are spawned, what runtime a task is spawned on, or any other data.
+/// - The task ID of the currently running task can be obtained from inside the
+/// task via the [`task::try_id()`](crate::task::try_id()) and
+/// [`task::id()`](crate::task::id()) functions and from outside the task via
+/// the [`JoinHandle::id()`](crate::task::JoinHandle::id()) function.
+///
+/// **Note**: This is an [unstable API][unstable]. The public API of this type
+/// may break in 1.x releases. See [the documentation on unstable
+/// features][unstable] for details.
+///
+/// [unstable]: crate#unstable-features
+#[cfg_attr(docsrs, doc(cfg(all(feature = "rt", tokio_unstable))))]
+#[cfg_attr(not(tokio_unstable), allow(unreachable_pub))]
+#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
+pub struct Id(u64);
+
+/// Returns the [`Id`] of the currently running task.
+///
+/// # Panics
+///
+/// This function panics if called from outside a task. Please note that calls
+/// to `block_on` do not have task IDs, so the method will panic if called from
+/// within a call to `block_on`. For a version of this function that doesn't
+/// panic, see [`task::try_id()`](crate::runtime::task::try_id()).
+///
+/// **Note**: This is an [unstable API][unstable]. The public API of this type
+/// may break in 1.x releases. See [the documentation on unstable
+/// features][unstable] for details.
+///
+/// [task ID]: crate::task::Id
+/// [unstable]: crate#unstable-features
+#[cfg_attr(not(tokio_unstable), allow(unreachable_pub))]
+#[track_caller]
+pub fn id() -> Id {
+ context::current_task_id().expect("Can't get a task id when not inside a task")
+}
+
+/// Returns the [`Id`] of the currently running task, or `None` if called outside
+/// of a task.
+///
+/// This function is similar to [`task::id()`](crate::runtime::task::id()), except
+/// that it returns `None` rather than panicking if called outside of a task
+/// context.
+///
+/// **Note**: This is an [unstable API][unstable]. The public API of this type
+/// may break in 1.x releases. See [the documentation on unstable
+/// features][unstable] for details.
+///
+/// [task ID]: crate::task::Id
+/// [unstable]: crate#unstable-features
+#[cfg_attr(not(tokio_unstable), allow(unreachable_pub))]
+#[track_caller]
+pub fn try_id() -> Option<Id> {
+ context::current_task_id()
+}
+
+impl fmt::Display for Id {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+impl Id {
+ pub(crate) fn next() -> Self {
+ use crate::loom::sync::atomic::{Ordering::Relaxed, StaticAtomicU64};
+
+ static NEXT_ID: StaticAtomicU64 = StaticAtomicU64::new(1);
+
+ Self(NEXT_ID.fetch_add(1, Relaxed))
+ }
+
+ pub(crate) fn as_u64(&self) -> u64 {
+ self.0
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/join.rs b/third_party/rust/tokio/src/runtime/task/join.rs
new file mode 100644
index 0000000000..ee39258846
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/join.rs
@@ -0,0 +1,366 @@
+use crate::runtime::task::{Header, RawTask};
+
+use std::fmt;
+use std::future::Future;
+use std::marker::PhantomData;
+use std::panic::{RefUnwindSafe, UnwindSafe};
+use std::pin::Pin;
+use std::task::{Context, Poll, Waker};
+
+cfg_rt! {
+ /// An owned permission to join on a task (await its termination).
+ ///
+ /// This can be thought of as the equivalent of [`std::thread::JoinHandle`]
+ /// for a Tokio task rather than a thread. Note that the background task
+ /// associated with this `JoinHandle` started running immediately when you
+ /// called spawn, even if you have not yet awaited the `JoinHandle`.
+ ///
+ /// A `JoinHandle` *detaches* the associated task when it is dropped, which
+ /// means that there is no longer any handle to the task, and no way to `join`
+ /// on it.
+ ///
+ /// This `struct` is created by the [`task::spawn`] and [`task::spawn_blocking`]
+ /// functions.
+ ///
+ /// # Cancel safety
+ ///
+ /// The `&mut JoinHandle<T>` type is cancel safe. If it is used as the event
+ /// in a `tokio::select!` statement and some other branch completes first,
+ /// then it is guaranteed that the output of the task is not lost.
+ ///
+ /// If a `JoinHandle` is dropped, then the task continues running in the
+ /// background and its return value is lost.
+ ///
+ /// # Examples
+ ///
+ /// Creation from [`task::spawn`]:
+ ///
+ /// ```
+ /// use tokio::task;
+ ///
+ /// # async fn doc() {
+ /// let join_handle: task::JoinHandle<_> = task::spawn(async {
+ /// // some work here
+ /// });
+ /// # }
+ /// ```
+ ///
+ /// Creation from [`task::spawn_blocking`]:
+ ///
+ /// ```
+ /// use tokio::task;
+ ///
+ /// # async fn doc() {
+ /// let join_handle: task::JoinHandle<_> = task::spawn_blocking(|| {
+ /// // some blocking work here
+ /// });
+ /// # }
+ /// ```
+ ///
+ /// The generic parameter `T` in `JoinHandle<T>` is the return type of the spawned task.
+ /// If the return value is an i32, the join handle has type `JoinHandle<i32>`:
+ ///
+ /// ```
+ /// use tokio::task;
+ ///
+ /// # async fn doc() {
+ /// let join_handle: task::JoinHandle<i32> = task::spawn(async {
+ /// 5 + 3
+ /// });
+ /// # }
+ ///
+ /// ```
+ ///
+ /// If the task does not have a return value, the join handle has type `JoinHandle<()>`:
+ ///
+ /// ```
+ /// use tokio::task;
+ ///
+ /// # async fn doc() {
+ /// let join_handle: task::JoinHandle<()> = task::spawn(async {
+ /// println!("I return nothing.");
+ /// });
+ /// # }
+ /// ```
+ ///
+ /// Note that `handle.await` doesn't give you the return type directly. It is wrapped in a
+ /// `Result` because panics in the spawned task are caught by Tokio. The `?` operator has
+ /// to be double chained to extract the returned value:
+ ///
+ /// ```
+ /// use tokio::task;
+ /// use std::io;
+ ///
+ /// #[tokio::main]
+ /// async fn main() -> io::Result<()> {
+ /// let join_handle: task::JoinHandle<Result<i32, io::Error>> = tokio::spawn(async {
+ /// Ok(5 + 3)
+ /// });
+ ///
+ /// let result = join_handle.await??;
+ /// assert_eq!(result, 8);
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// If the task panics, the error is a [`JoinError`] that contains the panic:
+ ///
+ /// ```
+ /// use tokio::task;
+ /// use std::io;
+ /// use std::panic;
+ ///
+ /// #[tokio::main]
+ /// async fn main() -> io::Result<()> {
+ /// let join_handle: task::JoinHandle<Result<i32, io::Error>> = tokio::spawn(async {
+ /// panic!("boom");
+ /// });
+ ///
+ /// let err = join_handle.await.unwrap_err();
+ /// assert!(err.is_panic());
+ /// Ok(())
+ /// }
+ ///
+ /// ```
+ /// Child being detached and outliving its parent:
+ ///
+ /// ```no_run
+ /// use tokio::task;
+ /// use tokio::time;
+ /// use std::time::Duration;
+ ///
+ /// # #[tokio::main] async fn main() {
+ /// let original_task = task::spawn(async {
+ /// let _detached_task = task::spawn(async {
+ /// // Here we sleep to make sure that the first task returns before.
+ /// time::sleep(Duration::from_millis(10)).await;
+ /// // This will be called, even though the JoinHandle is dropped.
+ /// println!("♫ Still alive ♫");
+ /// });
+ /// });
+ ///
+ /// original_task.await.expect("The task being joined has panicked");
+ /// println!("Original task is joined.");
+ ///
+ /// // We make sure that the new task has time to run, before the main
+ /// // task returns.
+ ///
+ /// time::sleep(Duration::from_millis(1000)).await;
+ /// # }
+ /// ```
+ ///
+ /// [`task::spawn`]: crate::task::spawn()
+ /// [`task::spawn_blocking`]: crate::task::spawn_blocking
+ /// [`std::thread::JoinHandle`]: std::thread::JoinHandle
+ /// [`JoinError`]: crate::task::JoinError
+ pub struct JoinHandle<T> {
+ raw: RawTask,
+ _p: PhantomData<T>,
+ }
+}
+
+unsafe impl<T: Send> Send for JoinHandle<T> {}
+unsafe impl<T: Send> Sync for JoinHandle<T> {}
+
+impl<T> UnwindSafe for JoinHandle<T> {}
+impl<T> RefUnwindSafe for JoinHandle<T> {}
+
+impl<T> JoinHandle<T> {
+ pub(super) fn new(raw: RawTask) -> JoinHandle<T> {
+ JoinHandle {
+ raw,
+ _p: PhantomData,
+ }
+ }
+
+ /// Abort the task associated with the handle.
+ ///
+ /// Awaiting a cancelled task might complete as usual if the task was
+ /// already completed at the time it was cancelled, but most likely it
+ /// will fail with a [cancelled] `JoinError`.
+ ///
+ /// ```rust
+ /// use tokio::time;
+ ///
+ /// # #[tokio::main(flavor = "current_thread", start_paused = true)]
+ /// # async fn main() {
+ /// let mut handles = Vec::new();
+ ///
+ /// handles.push(tokio::spawn(async {
+ /// time::sleep(time::Duration::from_secs(10)).await;
+ /// true
+ /// }));
+ ///
+ /// handles.push(tokio::spawn(async {
+ /// time::sleep(time::Duration::from_secs(10)).await;
+ /// false
+ /// }));
+ ///
+ /// for handle in &handles {
+ /// handle.abort();
+ /// }
+ ///
+ /// for handle in handles {
+ /// assert!(handle.await.unwrap_err().is_cancelled());
+ /// }
+ /// # }
+ /// ```
+ /// [cancelled]: method@super::error::JoinError::is_cancelled
+ pub fn abort(&self) {
+ self.raw.remote_abort();
+ }
+
+ /// Checks if the task associated with this `JoinHandle` has finished.
+ ///
+ /// Please note that this method can return `false` even if [`abort`] has been
+ /// called on the task. This is because the cancellation process may take
+ /// some time, and this method does not return `true` until it has
+ /// completed.
+ ///
+ /// ```rust
+ /// use tokio::time;
+ ///
+ /// # #[tokio::main(flavor = "current_thread", start_paused = true)]
+ /// # async fn main() {
+ /// let handle1 = tokio::spawn(async {
+ /// // do some stuff here
+ /// });
+ /// let handle2 = tokio::spawn(async {
+ /// // do some other stuff here
+ /// time::sleep(time::Duration::from_secs(10)).await;
+ /// });
+ /// // Wait for the task to finish
+ /// handle2.abort();
+ /// time::sleep(time::Duration::from_secs(1)).await;
+ /// assert!(handle1.is_finished());
+ /// assert!(handle2.is_finished());
+ /// # }
+ /// ```
+ /// [`abort`]: method@JoinHandle::abort
+ pub fn is_finished(&self) -> bool {
+ let state = self.raw.header().state.load();
+ state.is_complete()
+ }
+
+ /// Set the waker that is notified when the task completes.
+ pub(crate) fn set_join_waker(&mut self, waker: &Waker) {
+ if self.raw.try_set_join_waker(waker) {
+ // In this case the task has already completed. We wake the waker immediately.
+ waker.wake_by_ref();
+ }
+ }
+
+ /// Returns a new `AbortHandle` that can be used to remotely abort this task.
+ ///
+ /// Awaiting a task cancelled by the `AbortHandle` might complete as usual if the task was
+ /// already completed at the time it was cancelled, but most likely it
+ /// will fail with a [cancelled] `JoinError`.
+ ///
+ /// ```rust
+ /// use tokio::{time, task};
+ ///
+ /// # #[tokio::main(flavor = "current_thread", start_paused = true)]
+ /// # async fn main() {
+ /// let mut handles = Vec::new();
+ ///
+ /// handles.push(tokio::spawn(async {
+ /// time::sleep(time::Duration::from_secs(10)).await;
+ /// true
+ /// }));
+ ///
+ /// handles.push(tokio::spawn(async {
+ /// time::sleep(time::Duration::from_secs(10)).await;
+ /// false
+ /// }));
+ ///
+ /// let abort_handles: Vec<task::AbortHandle> = handles.iter().map(|h| h.abort_handle()).collect();
+ ///
+ /// for handle in abort_handles {
+ /// handle.abort();
+ /// }
+ ///
+ /// for handle in handles {
+ /// assert!(handle.await.unwrap_err().is_cancelled());
+ /// }
+ /// # }
+ /// ```
+ /// [cancelled]: method@super::error::JoinError::is_cancelled
+ pub fn abort_handle(&self) -> super::AbortHandle {
+ self.raw.ref_inc();
+ super::AbortHandle::new(self.raw)
+ }
+
+ /// Returns a [task ID] that uniquely identifies this task relative to other
+ /// currently spawned tasks.
+ ///
+ /// **Note**: This is an [unstable API][unstable]. The public API of this type
+ /// may break in 1.x releases. See [the documentation on unstable
+ /// features][unstable] for details.
+ ///
+ /// [task ID]: crate::task::Id
+ /// [unstable]: crate#unstable-features
+ #[cfg(tokio_unstable)]
+ #[cfg_attr(docsrs, doc(cfg(tokio_unstable)))]
+ pub fn id(&self) -> super::Id {
+ // Safety: The header pointer is valid.
+ unsafe { Header::get_id(self.raw.header_ptr()) }
+ }
+}
+
+impl<T> Unpin for JoinHandle<T> {}
+
+impl<T> Future for JoinHandle<T> {
+ type Output = super::Result<T>;
+
+ fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+ ready!(crate::trace::trace_leaf(cx));
+ let mut ret = Poll::Pending;
+
+ // Keep track of task budget
+ let coop = ready!(crate::runtime::coop::poll_proceed(cx));
+
+ // Try to read the task output. If the task is not yet complete, the
+ // waker is stored and is notified once the task does complete.
+ //
+ // The function must go via the vtable, which requires erasing generic
+ // types. To do this, the function "return" is placed on the stack
+ // **before** calling the function and is passed into the function using
+ // `*mut ()`.
+ //
+ // Safety:
+ //
+ // The type of `T` must match the task's output type.
+ unsafe {
+ self.raw
+ .try_read_output(&mut ret as *mut _ as *mut (), cx.waker());
+ }
+
+ if ret.is_ready() {
+ coop.made_progress();
+ }
+
+ ret
+ }
+}
+
+impl<T> Drop for JoinHandle<T> {
+ fn drop(&mut self) {
+ if self.raw.state().drop_join_handle_fast().is_ok() {
+ return;
+ }
+
+ self.raw.drop_join_handle_slow();
+ }
+}
+
+impl<T> fmt::Debug for JoinHandle<T>
+where
+ T: fmt::Debug,
+{
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Safety: The header pointer is valid.
+ let id_ptr = unsafe { Header::get_id_ptr(self.raw.header_ptr()) };
+ let id = unsafe { id_ptr.as_ref() };
+ fmt.debug_struct("JoinHandle").field("id", id).finish()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/list.rs b/third_party/rust/tokio/src/runtime/task/list.rs
new file mode 100644
index 0000000000..fb7dbdc1d9
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/list.rs
@@ -0,0 +1,319 @@
+//! This module has containers for storing the tasks spawned on a scheduler. The
+//! `OwnedTasks` container is thread-safe but can only store tasks that
+//! implement Send. The `LocalOwnedTasks` container is not thread safe, but can
+//! store non-Send tasks.
+//!
+//! The collections can be closed to prevent adding new tasks during shutdown of
+//! the scheduler with the collection.
+
+use crate::future::Future;
+use crate::loom::cell::UnsafeCell;
+use crate::loom::sync::Mutex;
+use crate::runtime::task::{JoinHandle, LocalNotified, Notified, Schedule, Task};
+use crate::util::linked_list::{CountedLinkedList, Link, LinkedList};
+
+use std::marker::PhantomData;
+
+// The id from the module below is used to verify whether a given task is stored
+// in this OwnedTasks, or some other task. The counter starts at one so we can
+// use zero for tasks not owned by any list.
+//
+// The safety checks in this file can technically be violated if the counter is
+// overflown, but the checks are not supposed to ever fail unless there is a
+// bug in Tokio, so we accept that certain bugs would not be caught if the two
+// mixed up runtimes happen to have the same id.
+
+cfg_has_atomic_u64! {
+ use std::sync::atomic::{AtomicU64, Ordering};
+
+ static NEXT_OWNED_TASKS_ID: AtomicU64 = AtomicU64::new(1);
+
+ fn get_next_id() -> u64 {
+ loop {
+ let id = NEXT_OWNED_TASKS_ID.fetch_add(1, Ordering::Relaxed);
+ if id != 0 {
+ return id;
+ }
+ }
+ }
+}
+
+cfg_not_has_atomic_u64! {
+ use std::sync::atomic::{AtomicU32, Ordering};
+
+ static NEXT_OWNED_TASKS_ID: AtomicU32 = AtomicU32::new(1);
+
+ fn get_next_id() -> u64 {
+ loop {
+ let id = NEXT_OWNED_TASKS_ID.fetch_add(1, Ordering::Relaxed);
+ if id != 0 {
+ return u64::from(id);
+ }
+ }
+ }
+}
+
+pub(crate) struct OwnedTasks<S: 'static> {
+ inner: Mutex<CountedOwnedTasksInner<S>>,
+ id: u64,
+}
+struct CountedOwnedTasksInner<S: 'static> {
+ list: CountedLinkedList<Task<S>, <Task<S> as Link>::Target>,
+ closed: bool,
+}
+pub(crate) struct LocalOwnedTasks<S: 'static> {
+ inner: UnsafeCell<OwnedTasksInner<S>>,
+ id: u64,
+ _not_send_or_sync: PhantomData<*const ()>,
+}
+struct OwnedTasksInner<S: 'static> {
+ list: LinkedList<Task<S>, <Task<S> as Link>::Target>,
+ closed: bool,
+}
+
+impl<S: 'static> OwnedTasks<S> {
+ pub(crate) fn new() -> Self {
+ Self {
+ inner: Mutex::new(CountedOwnedTasksInner {
+ list: CountedLinkedList::new(),
+ closed: false,
+ }),
+ id: get_next_id(),
+ }
+ }
+
+ /// Binds the provided task to this OwnedTasks instance. This fails if the
+ /// OwnedTasks has been closed.
+ pub(crate) fn bind<T>(
+ &self,
+ task: T,
+ scheduler: S,
+ id: super::Id,
+ ) -> (JoinHandle<T::Output>, Option<Notified<S>>)
+ where
+ S: Schedule,
+ T: Future + Send + 'static,
+ T::Output: Send + 'static,
+ {
+ let (task, notified, join) = super::new_task(task, scheduler, id);
+
+ unsafe {
+ // safety: We just created the task, so we have exclusive access
+ // to the field.
+ task.header().set_owner_id(self.id);
+ }
+
+ let mut lock = self.inner.lock();
+ if lock.closed {
+ drop(lock);
+ drop(notified);
+ task.shutdown();
+ (join, None)
+ } else {
+ lock.list.push_front(task);
+ (join, Some(notified))
+ }
+ }
+
+ /// Asserts that the given task is owned by this OwnedTasks and convert it to
+ /// a LocalNotified, giving the thread permission to poll this task.
+ #[inline]
+ pub(crate) fn assert_owner(&self, task: Notified<S>) -> LocalNotified<S> {
+ assert_eq!(task.header().get_owner_id(), self.id);
+
+ // safety: All tasks bound to this OwnedTasks are Send, so it is safe
+ // to poll it on this thread no matter what thread we are on.
+ LocalNotified {
+ task: task.0,
+ _not_send: PhantomData,
+ }
+ }
+
+ /// Shuts down all tasks in the collection. This call also closes the
+ /// collection, preventing new items from being added.
+ pub(crate) fn close_and_shutdown_all(&self)
+ where
+ S: Schedule,
+ {
+ // The first iteration of the loop was unrolled so it can set the
+ // closed bool.
+ let first_task = {
+ let mut lock = self.inner.lock();
+ lock.closed = true;
+ lock.list.pop_back()
+ };
+ match first_task {
+ Some(task) => task.shutdown(),
+ None => return,
+ }
+
+ loop {
+ let task = match self.inner.lock().list.pop_back() {
+ Some(task) => task,
+ None => return,
+ };
+
+ task.shutdown();
+ }
+ }
+
+ pub(crate) fn active_tasks_count(&self) -> usize {
+ self.inner.lock().list.count()
+ }
+
+ pub(crate) fn remove(&self, task: &Task<S>) -> Option<Task<S>> {
+ let task_id = task.header().get_owner_id();
+ if task_id == 0 {
+ // The task is unowned.
+ return None;
+ }
+
+ assert_eq!(task_id, self.id);
+
+ // safety: We just checked that the provided task is not in some other
+ // linked list.
+ unsafe { self.inner.lock().list.remove(task.header_ptr()) }
+ }
+
+ pub(crate) fn is_empty(&self) -> bool {
+ self.inner.lock().list.is_empty()
+ }
+}
+
+cfg_taskdump! {
+ impl<S: 'static> OwnedTasks<S> {
+ /// Locks the tasks, and calls `f` on an iterator over them.
+ pub(crate) fn for_each<F>(&self, f: F)
+ where
+ F: FnMut(&Task<S>)
+ {
+ self.inner.lock().list.for_each(f)
+ }
+ }
+}
+
+impl<S: 'static> LocalOwnedTasks<S> {
+ pub(crate) fn new() -> Self {
+ Self {
+ inner: UnsafeCell::new(OwnedTasksInner {
+ list: LinkedList::new(),
+ closed: false,
+ }),
+ id: get_next_id(),
+ _not_send_or_sync: PhantomData,
+ }
+ }
+
+ pub(crate) fn bind<T>(
+ &self,
+ task: T,
+ scheduler: S,
+ id: super::Id,
+ ) -> (JoinHandle<T::Output>, Option<Notified<S>>)
+ where
+ S: Schedule,
+ T: Future + 'static,
+ T::Output: 'static,
+ {
+ let (task, notified, join) = super::new_task(task, scheduler, id);
+
+ unsafe {
+ // safety: We just created the task, so we have exclusive access
+ // to the field.
+ task.header().set_owner_id(self.id);
+ }
+
+ if self.is_closed() {
+ drop(notified);
+ task.shutdown();
+ (join, None)
+ } else {
+ self.with_inner(|inner| {
+ inner.list.push_front(task);
+ });
+ (join, Some(notified))
+ }
+ }
+
+ /// Shuts down all tasks in the collection. This call also closes the
+ /// collection, preventing new items from being added.
+ pub(crate) fn close_and_shutdown_all(&self)
+ where
+ S: Schedule,
+ {
+ self.with_inner(|inner| inner.closed = true);
+
+ while let Some(task) = self.with_inner(|inner| inner.list.pop_back()) {
+ task.shutdown();
+ }
+ }
+
+ pub(crate) fn remove(&self, task: &Task<S>) -> Option<Task<S>> {
+ let task_id = task.header().get_owner_id();
+ if task_id == 0 {
+ // The task is unowned.
+ return None;
+ }
+
+ assert_eq!(task_id, self.id);
+
+ self.with_inner(|inner|
+ // safety: We just checked that the provided task is not in some
+ // other linked list.
+ unsafe { inner.list.remove(task.header_ptr()) })
+ }
+
+ /// Asserts that the given task is owned by this LocalOwnedTasks and convert
+ /// it to a LocalNotified, giving the thread permission to poll this task.
+ #[inline]
+ pub(crate) fn assert_owner(&self, task: Notified<S>) -> LocalNotified<S> {
+ assert_eq!(task.header().get_owner_id(), self.id);
+
+ // safety: The task was bound to this LocalOwnedTasks, and the
+ // LocalOwnedTasks is not Send or Sync, so we are on the right thread
+ // for polling this task.
+ LocalNotified {
+ task: task.0,
+ _not_send: PhantomData,
+ }
+ }
+
+ #[inline]
+ fn with_inner<F, T>(&self, f: F) -> T
+ where
+ F: FnOnce(&mut OwnedTasksInner<S>) -> T,
+ {
+ // safety: This type is not Sync, so concurrent calls of this method
+ // can't happen. Furthermore, all uses of this method in this file make
+ // sure that they don't call `with_inner` recursively.
+ self.inner.with_mut(|ptr| unsafe { f(&mut *ptr) })
+ }
+
+ pub(crate) fn is_closed(&self) -> bool {
+ self.with_inner(|inner| inner.closed)
+ }
+
+ pub(crate) fn is_empty(&self) -> bool {
+ self.with_inner(|inner| inner.list.is_empty())
+ }
+}
+
+#[cfg(all(test))]
+mod tests {
+ use super::*;
+
+ // This test may run in parallel with other tests, so we only test that ids
+ // come in increasing order.
+ #[test]
+ fn test_id_not_broken() {
+ let mut last_id = get_next_id();
+ assert_ne!(last_id, 0);
+
+ for _ in 0..1000 {
+ let next_id = get_next_id();
+ assert_ne!(next_id, 0);
+ assert!(last_id < next_id);
+ last_id = next_id;
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/mod.rs b/third_party/rust/tokio/src/runtime/task/mod.rs
new file mode 100644
index 0000000000..932552fb91
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/mod.rs
@@ -0,0 +1,497 @@
+//! The task module.
+//!
+//! The task module contains the code that manages spawned tasks and provides a
+//! safe API for the rest of the runtime to use. Each task in a runtime is
+//! stored in an OwnedTasks or LocalOwnedTasks object.
+//!
+//! # Task reference types
+//!
+//! A task is usually referenced by multiple handles, and there are several
+//! types of handles.
+//!
+//! * OwnedTask - tasks stored in an OwnedTasks or LocalOwnedTasks are of this
+//! reference type.
+//!
+//! * JoinHandle - each task has a JoinHandle that allows access to the output
+//! of the task.
+//!
+//! * Waker - every waker for a task has this reference type. There can be any
+//! number of waker references.
+//!
+//! * Notified - tracks whether the task is notified.
+//!
+//! * Unowned - this task reference type is used for tasks not stored in any
+//! runtime. Mainly used for blocking tasks, but also in tests.
+//!
+//! The task uses a reference count to keep track of how many active references
+//! exist. The Unowned reference type takes up two ref-counts. All other
+//! reference types take up a single ref-count.
+//!
+//! Besides the waker type, each task has at most one of each reference type.
+//!
+//! # State
+//!
+//! The task stores its state in an atomic usize with various bitfields for the
+//! necessary information. The state has the following bitfields:
+//!
+//! * RUNNING - Tracks whether the task is currently being polled or cancelled.
+//! This bit functions as a lock around the task.
+//!
+//! * COMPLETE - Is one once the future has fully completed and has been
+//! dropped. Never unset once set. Never set together with RUNNING.
+//!
+//! * NOTIFIED - Tracks whether a Notified object currently exists.
+//!
+//! * CANCELLED - Is set to one for tasks that should be cancelled as soon as
+//! possible. May take any value for completed tasks.
+//!
+//! * JOIN_INTEREST - Is set to one if there exists a JoinHandle.
+//!
+//! * JOIN_WAKER - Acts as an access control bit for the join handle waker. The
+//! protocol for its usage is described below.
+//!
+//! The rest of the bits are used for the ref-count.
+//!
+//! # Fields in the task
+//!
+//! The task has various fields. This section describes how and when it is safe
+//! to access a field.
+//!
+//! * The state field is accessed with atomic instructions.
+//!
+//! * The OwnedTask reference has exclusive access to the `owned` field.
+//!
+//! * The Notified reference has exclusive access to the `queue_next` field.
+//!
+//! * The `owner_id` field can be set as part of construction of the task, but
+//! is otherwise immutable and anyone can access the field immutably without
+//! synchronization.
+//!
+//! * If COMPLETE is one, then the JoinHandle has exclusive access to the
+//! stage field. If COMPLETE is zero, then the RUNNING bitfield functions as
+//! a lock for the stage field, and it can be accessed only by the thread
+//! that set RUNNING to one.
+//!
+//! * The waker field may be concurrently accessed by different threads: in one
+//! thread the runtime may complete a task and *read* the waker field to
+//! invoke the waker, and in another thread the task's JoinHandle may be
+//! polled, and if the task hasn't yet completed, the JoinHandle may *write*
+//! a waker to the waker field. The JOIN_WAKER bit ensures safe access by
+//! multiple threads to the waker field using the following rules:
+//!
+//! 1. JOIN_WAKER is initialized to zero.
+//!
+//! 2. If JOIN_WAKER is zero, then the JoinHandle has exclusive (mutable)
+//! access to the waker field.
+//!
+//! 3. If JOIN_WAKER is one, then the JoinHandle has shared (read-only)
+//! access to the waker field.
+//!
+//! 4. If JOIN_WAKER is one and COMPLETE is one, then the runtime has shared
+//! (read-only) access to the waker field.
+//!
+//! 5. If the JoinHandle needs to write to the waker field, then the
+//! JoinHandle needs to (i) successfully set JOIN_WAKER to zero if it is
+//! not already zero to gain exclusive access to the waker field per rule
+//! 2, (ii) write a waker, and (iii) successfully set JOIN_WAKER to one.
+//!
+//! 6. The JoinHandle can change JOIN_WAKER only if COMPLETE is zero (i.e.
+//! the task hasn't yet completed).
+//!
+//! Rule 6 implies that the steps (i) or (iii) of rule 5 may fail due to a
+//! race. If step (i) fails, then the attempt to write a waker is aborted. If
+//! step (iii) fails because COMPLETE is set to one by another thread after
+//! step (i), then the waker field is cleared. Once COMPLETE is one (i.e.
+//! task has completed), the JoinHandle will not modify JOIN_WAKER. After the
+//! runtime sets COMPLETE to one, it invokes the waker if there is one.
+//!
+//! All other fields are immutable and can be accessed immutably without
+//! synchronization by anyone.
+//!
+//! # Safety
+//!
+//! This section goes through various situations and explains why the API is
+//! safe in that situation.
+//!
+//! ## Polling or dropping the future
+//!
+//! Any mutable access to the future happens after obtaining a lock by modifying
+//! the RUNNING field, so exclusive access is ensured.
+//!
+//! When the task completes, exclusive access to the output is transferred to
+//! the JoinHandle. If the JoinHandle is already dropped when the transition to
+//! complete happens, the thread performing that transition retains exclusive
+//! access to the output and should immediately drop it.
+//!
+//! ## Non-Send futures
+//!
+//! If a future is not Send, then it is bound to a LocalOwnedTasks. The future
+//! will only ever be polled or dropped given a LocalNotified or inside a call
+//! to LocalOwnedTasks::shutdown_all. In either case, it is guaranteed that the
+//! future is on the right thread.
+//!
+//! If the task is never removed from the LocalOwnedTasks, then it is leaked, so
+//! there is no risk that the task is dropped on some other thread when the last
+//! ref-count drops.
+//!
+//! ## Non-Send output
+//!
+//! When a task completes, the output is placed in the stage of the task. Then,
+//! a transition that sets COMPLETE to true is performed, and the value of
+//! JOIN_INTEREST when this transition happens is read.
+//!
+//! If JOIN_INTEREST is zero when the transition to COMPLETE happens, then the
+//! output is immediately dropped.
+//!
+//! If JOIN_INTEREST is one when the transition to COMPLETE happens, then the
+//! JoinHandle is responsible for cleaning up the output. If the output is not
+//! Send, then this happens:
+//!
+//! 1. The output is created on the thread that the future was polled on. Since
+//! only non-Send futures can have non-Send output, the future was polled on
+//! the thread that the future was spawned from.
+//! 2. Since `JoinHandle<Output>` is not Send if Output is not Send, the
+//! JoinHandle is also on the thread that the future was spawned from.
+//! 3. Thus, the JoinHandle will not move the output across threads when it
+//! takes or drops the output.
+//!
+//! ## Recursive poll/shutdown
+//!
+//! Calling poll from inside a shutdown call or vice-versa is not prevented by
+//! the API exposed by the task module, so this has to be safe. In either case,
+//! the lock in the RUNNING bitfield makes the inner call return immediately. If
+//! the inner call is a `shutdown` call, then the CANCELLED bit is set, and the
+//! poll call will notice it when the poll finishes, and the task is cancelled
+//! at that point.
+
+// Some task infrastructure is here to support `JoinSet`, which is currently
+// unstable. This should be removed once `JoinSet` is stabilized.
+#![cfg_attr(not(tokio_unstable), allow(dead_code))]
+
+mod core;
+use self::core::Cell;
+use self::core::Header;
+
+mod error;
+pub use self::error::JoinError;
+
+mod harness;
+use self::harness::Harness;
+
+mod id;
+#[cfg_attr(not(tokio_unstable), allow(unreachable_pub))]
+pub use id::{id, try_id, Id};
+
+#[cfg(feature = "rt")]
+mod abort;
+mod join;
+
+#[cfg(feature = "rt")]
+pub use self::abort::AbortHandle;
+
+pub use self::join::JoinHandle;
+
+mod list;
+pub(crate) use self::list::{LocalOwnedTasks, OwnedTasks};
+
+mod raw;
+pub(crate) use self::raw::RawTask;
+
+mod state;
+use self::state::State;
+
+mod waker;
+
+cfg_taskdump! {
+ pub(crate) mod trace;
+}
+
+use crate::future::Future;
+use crate::util::linked_list;
+
+use std::marker::PhantomData;
+use std::ptr::NonNull;
+use std::{fmt, mem};
+
+/// An owned handle to the task, tracked by ref count.
+#[repr(transparent)]
+pub(crate) struct Task<S: 'static> {
+ raw: RawTask,
+ _p: PhantomData<S>,
+}
+
+unsafe impl<S> Send for Task<S> {}
+unsafe impl<S> Sync for Task<S> {}
+
+/// A task was notified.
+#[repr(transparent)]
+pub(crate) struct Notified<S: 'static>(Task<S>);
+
+// safety: This type cannot be used to touch the task without first verifying
+// that the value is on a thread where it is safe to poll the task.
+unsafe impl<S: Schedule> Send for Notified<S> {}
+unsafe impl<S: Schedule> Sync for Notified<S> {}
+
+/// A non-Send variant of Notified with the invariant that it is on a thread
+/// where it is safe to poll it.
+#[repr(transparent)]
+pub(crate) struct LocalNotified<S: 'static> {
+ task: Task<S>,
+ _not_send: PhantomData<*const ()>,
+}
+
+/// A task that is not owned by any OwnedTasks. Used for blocking tasks.
+/// This type holds two ref-counts.
+pub(crate) struct UnownedTask<S: 'static> {
+ raw: RawTask,
+ _p: PhantomData<S>,
+}
+
+// safety: This type can only be created given a Send task.
+unsafe impl<S> Send for UnownedTask<S> {}
+unsafe impl<S> Sync for UnownedTask<S> {}
+
+/// Task result sent back.
+pub(crate) type Result<T> = std::result::Result<T, JoinError>;
+
+pub(crate) trait Schedule: Sync + Sized + 'static {
+ /// The task has completed work and is ready to be released. The scheduler
+ /// should release it immediately and return it. The task module will batch
+ /// the ref-dec with setting other options.
+ ///
+ /// If the scheduler has already released the task, then None is returned.
+ fn release(&self, task: &Task<Self>) -> Option<Task<Self>>;
+
+ /// Schedule the task
+ fn schedule(&self, task: Notified<Self>);
+
+ /// Schedule the task to run in the near future, yielding the thread to
+ /// other tasks.
+ fn yield_now(&self, task: Notified<Self>) {
+ self.schedule(task);
+ }
+
+ /// Polling the task resulted in a panic. Should the runtime shutdown?
+ fn unhandled_panic(&self) {
+ // By default, do nothing. This maintains the 1.0 behavior.
+ }
+}
+
+cfg_rt! {
+ /// This is the constructor for a new task. Three references to the task are
+ /// created. The first task reference is usually put into an OwnedTasks
+ /// immediately. The Notified is sent to the scheduler as an ordinary
+ /// notification.
+ fn new_task<T, S>(
+ task: T,
+ scheduler: S,
+ id: Id,
+ ) -> (Task<S>, Notified<S>, JoinHandle<T::Output>)
+ where
+ S: Schedule,
+ T: Future + 'static,
+ T::Output: 'static,
+ {
+ let raw = RawTask::new::<T, S>(task, scheduler, id);
+ let task = Task {
+ raw,
+ _p: PhantomData,
+ };
+ let notified = Notified(Task {
+ raw,
+ _p: PhantomData,
+ });
+ let join = JoinHandle::new(raw);
+
+ (task, notified, join)
+ }
+
+ /// Creates a new task with an associated join handle. This method is used
+ /// only when the task is not going to be stored in an `OwnedTasks` list.
+ ///
+ /// Currently only blocking tasks use this method.
+ pub(crate) fn unowned<T, S>(task: T, scheduler: S, id: Id) -> (UnownedTask<S>, JoinHandle<T::Output>)
+ where
+ S: Schedule,
+ T: Send + Future + 'static,
+ T::Output: Send + 'static,
+ {
+ let (task, notified, join) = new_task(task, scheduler, id);
+
+ // This transfers the ref-count of task and notified into an UnownedTask.
+ // This is valid because an UnownedTask holds two ref-counts.
+ let unowned = UnownedTask {
+ raw: task.raw,
+ _p: PhantomData,
+ };
+ std::mem::forget(task);
+ std::mem::forget(notified);
+
+ (unowned, join)
+ }
+}
+
+impl<S: 'static> Task<S> {
+ unsafe fn new(raw: RawTask) -> Task<S> {
+ Task {
+ raw,
+ _p: PhantomData,
+ }
+ }
+
+ unsafe fn from_raw(ptr: NonNull<Header>) -> Task<S> {
+ Task::new(RawTask::from_raw(ptr))
+ }
+
+ #[cfg(all(
+ tokio_unstable,
+ tokio_taskdump,
+ feature = "rt",
+ target_os = "linux",
+ any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")
+ ))]
+ pub(super) fn as_raw(&self) -> RawTask {
+ self.raw
+ }
+
+ fn header(&self) -> &Header {
+ self.raw.header()
+ }
+
+ fn header_ptr(&self) -> NonNull<Header> {
+ self.raw.header_ptr()
+ }
+}
+
+impl<S: 'static> Notified<S> {
+ fn header(&self) -> &Header {
+ self.0.header()
+ }
+}
+
+impl<S: 'static> Notified<S> {
+ pub(crate) unsafe fn from_raw(ptr: RawTask) -> Notified<S> {
+ Notified(Task::new(ptr))
+ }
+}
+
+impl<S: 'static> Notified<S> {
+ pub(crate) fn into_raw(self) -> RawTask {
+ let raw = self.0.raw;
+ mem::forget(self);
+ raw
+ }
+}
+
+impl<S: Schedule> Task<S> {
+ /// Preemptively cancels the task as part of the shutdown process.
+ pub(crate) fn shutdown(self) {
+ let raw = self.raw;
+ mem::forget(self);
+ raw.shutdown();
+ }
+}
+
+impl<S: Schedule> LocalNotified<S> {
+ /// Runs the task.
+ pub(crate) fn run(self) {
+ let raw = self.task.raw;
+ mem::forget(self);
+ raw.poll();
+ }
+}
+
+impl<S: Schedule> UnownedTask<S> {
+ // Used in test of the inject queue.
+ #[cfg(test)]
+ #[cfg_attr(tokio_wasm, allow(dead_code))]
+ pub(super) fn into_notified(self) -> Notified<S> {
+ Notified(self.into_task())
+ }
+
+ fn into_task(self) -> Task<S> {
+ // Convert into a task.
+ let task = Task {
+ raw: self.raw,
+ _p: PhantomData,
+ };
+ mem::forget(self);
+
+ // Drop a ref-count since an UnownedTask holds two.
+ task.header().state.ref_dec();
+
+ task
+ }
+
+ pub(crate) fn run(self) {
+ let raw = self.raw;
+ mem::forget(self);
+
+ // Transfer one ref-count to a Task object.
+ let task = Task::<S> {
+ raw,
+ _p: PhantomData,
+ };
+
+ // Use the other ref-count to poll the task.
+ raw.poll();
+ // Decrement our extra ref-count
+ drop(task);
+ }
+
+ pub(crate) fn shutdown(self) {
+ self.into_task().shutdown()
+ }
+}
+
+impl<S: 'static> Drop for Task<S> {
+ fn drop(&mut self) {
+ // Decrement the ref count
+ if self.header().state.ref_dec() {
+ // Deallocate if this is the final ref count
+ self.raw.dealloc();
+ }
+ }
+}
+
+impl<S: 'static> Drop for UnownedTask<S> {
+ fn drop(&mut self) {
+ // Decrement the ref count
+ if self.raw.header().state.ref_dec_twice() {
+ // Deallocate if this is the final ref count
+ self.raw.dealloc();
+ }
+ }
+}
+
+impl<S> fmt::Debug for Task<S> {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(fmt, "Task({:p})", self.header())
+ }
+}
+
+impl<S> fmt::Debug for Notified<S> {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(fmt, "task::Notified({:p})", self.0.header())
+ }
+}
+
+/// # Safety
+///
+/// Tasks are pinned.
+unsafe impl<S> linked_list::Link for Task<S> {
+ type Handle = Task<S>;
+ type Target = Header;
+
+ fn as_raw(handle: &Task<S>) -> NonNull<Header> {
+ handle.raw.header_ptr()
+ }
+
+ unsafe fn from_raw(ptr: NonNull<Header>) -> Task<S> {
+ Task::from_raw(ptr)
+ }
+
+ unsafe fn pointers(target: NonNull<Header>) -> NonNull<linked_list::Pointers<Header>> {
+ self::core::Trailer::addr_of_owned(Header::get_trailer(target))
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/raw.rs b/third_party/rust/tokio/src/runtime/task/raw.rs
new file mode 100644
index 0000000000..8078859285
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/raw.rs
@@ -0,0 +1,317 @@
+use crate::future::Future;
+use crate::runtime::task::core::{Core, Trailer};
+use crate::runtime::task::{Cell, Harness, Header, Id, Schedule, State};
+
+use std::ptr::NonNull;
+use std::task::{Poll, Waker};
+
+/// Raw task handle
+pub(crate) struct RawTask {
+ ptr: NonNull<Header>,
+}
+
+pub(super) struct Vtable {
+ /// Polls the future.
+ pub(super) poll: unsafe fn(NonNull<Header>),
+
+ /// Schedules the task for execution on the runtime.
+ pub(super) schedule: unsafe fn(NonNull<Header>),
+
+ /// Deallocates the memory.
+ pub(super) dealloc: unsafe fn(NonNull<Header>),
+
+ /// Reads the task output, if complete.
+ pub(super) try_read_output: unsafe fn(NonNull<Header>, *mut (), &Waker),
+
+ /// The join handle has been dropped.
+ pub(super) drop_join_handle_slow: unsafe fn(NonNull<Header>),
+
+ /// An abort handle has been dropped.
+ pub(super) drop_abort_handle: unsafe fn(NonNull<Header>),
+
+ /// Scheduler is being shutdown.
+ pub(super) shutdown: unsafe fn(NonNull<Header>),
+
+ /// The number of bytes that the `trailer` field is offset from the header.
+ pub(super) trailer_offset: usize,
+
+ /// The number of bytes that the `scheduler` field is offset from the header.
+ pub(super) scheduler_offset: usize,
+
+ /// The number of bytes that the `id` field is offset from the header.
+ pub(super) id_offset: usize,
+}
+
+/// Get the vtable for the requested `T` and `S` generics.
+pub(super) fn vtable<T: Future, S: Schedule>() -> &'static Vtable {
+ &Vtable {
+ poll: poll::<T, S>,
+ schedule: schedule::<S>,
+ dealloc: dealloc::<T, S>,
+ try_read_output: try_read_output::<T, S>,
+ drop_join_handle_slow: drop_join_handle_slow::<T, S>,
+ drop_abort_handle: drop_abort_handle::<T, S>,
+ shutdown: shutdown::<T, S>,
+ trailer_offset: OffsetHelper::<T, S>::TRAILER_OFFSET,
+ scheduler_offset: OffsetHelper::<T, S>::SCHEDULER_OFFSET,
+ id_offset: OffsetHelper::<T, S>::ID_OFFSET,
+ }
+}
+
+/// Calling `get_trailer_offset` directly in vtable doesn't work because it
+/// prevents the vtable from being promoted to a static reference.
+///
+/// See this thread for more info:
+/// <https://users.rust-lang.org/t/custom-vtables-with-integers/78508>
+struct OffsetHelper<T, S>(T, S);
+impl<T: Future, S: Schedule> OffsetHelper<T, S> {
+ // Pass `size_of`/`align_of` as arguments rather than calling them directly
+ // inside `get_trailer_offset` because trait bounds on generic parameters
+ // of const fn are unstable on our MSRV.
+ const TRAILER_OFFSET: usize = get_trailer_offset(
+ std::mem::size_of::<Header>(),
+ std::mem::size_of::<Core<T, S>>(),
+ std::mem::align_of::<Core<T, S>>(),
+ std::mem::align_of::<Trailer>(),
+ );
+
+ // The `scheduler` is the first field of `Core`, so it has the same
+ // offset as `Core`.
+ const SCHEDULER_OFFSET: usize = get_core_offset(
+ std::mem::size_of::<Header>(),
+ std::mem::align_of::<Core<T, S>>(),
+ );
+
+ const ID_OFFSET: usize = get_id_offset(
+ std::mem::size_of::<Header>(),
+ std::mem::align_of::<Core<T, S>>(),
+ std::mem::size_of::<S>(),
+ std::mem::align_of::<Id>(),
+ );
+}
+
+/// Compute the offset of the `Trailer` field in `Cell<T, S>` using the
+/// `#[repr(C)]` algorithm.
+///
+/// Pseudo-code for the `#[repr(C)]` algorithm can be found here:
+/// <https://doc.rust-lang.org/reference/type-layout.html#reprc-structs>
+const fn get_trailer_offset(
+ header_size: usize,
+ core_size: usize,
+ core_align: usize,
+ trailer_align: usize,
+) -> usize {
+ let mut offset = header_size;
+
+ let core_misalign = offset % core_align;
+ if core_misalign > 0 {
+ offset += core_align - core_misalign;
+ }
+ offset += core_size;
+
+ let trailer_misalign = offset % trailer_align;
+ if trailer_misalign > 0 {
+ offset += trailer_align - trailer_misalign;
+ }
+
+ offset
+}
+
+/// Compute the offset of the `Core<T, S>` field in `Cell<T, S>` using the
+/// `#[repr(C)]` algorithm.
+///
+/// Pseudo-code for the `#[repr(C)]` algorithm can be found here:
+/// <https://doc.rust-lang.org/reference/type-layout.html#reprc-structs>
+const fn get_core_offset(header_size: usize, core_align: usize) -> usize {
+ let mut offset = header_size;
+
+ let core_misalign = offset % core_align;
+ if core_misalign > 0 {
+ offset += core_align - core_misalign;
+ }
+
+ offset
+}
+
+/// Compute the offset of the `Id` field in `Cell<T, S>` using the
+/// `#[repr(C)]` algorithm.
+///
+/// Pseudo-code for the `#[repr(C)]` algorithm can be found here:
+/// <https://doc.rust-lang.org/reference/type-layout.html#reprc-structs>
+const fn get_id_offset(
+ header_size: usize,
+ core_align: usize,
+ scheduler_size: usize,
+ id_align: usize,
+) -> usize {
+ let mut offset = get_core_offset(header_size, core_align);
+ offset += scheduler_size;
+
+ let id_misalign = offset % id_align;
+ if id_misalign > 0 {
+ offset += id_align - id_misalign;
+ }
+
+ offset
+}
+
+impl RawTask {
+ pub(super) fn new<T, S>(task: T, scheduler: S, id: Id) -> RawTask
+ where
+ T: Future,
+ S: Schedule,
+ {
+ let ptr = Box::into_raw(Cell::<_, S>::new(task, scheduler, State::new(), id));
+ let ptr = unsafe { NonNull::new_unchecked(ptr as *mut Header) };
+
+ RawTask { ptr }
+ }
+
+ pub(super) unsafe fn from_raw(ptr: NonNull<Header>) -> RawTask {
+ RawTask { ptr }
+ }
+
+ pub(super) fn header_ptr(&self) -> NonNull<Header> {
+ self.ptr
+ }
+
+ pub(super) fn trailer_ptr(&self) -> NonNull<Trailer> {
+ unsafe { Header::get_trailer(self.ptr) }
+ }
+
+ /// Returns a reference to the task's header.
+ pub(super) fn header(&self) -> &Header {
+ unsafe { self.ptr.as_ref() }
+ }
+
+ /// Returns a reference to the task's trailer.
+ pub(super) fn trailer(&self) -> &Trailer {
+ unsafe { &*self.trailer_ptr().as_ptr() }
+ }
+
+ /// Returns a reference to the task's state.
+ pub(super) fn state(&self) -> &State {
+ &self.header().state
+ }
+
+ /// Safety: mutual exclusion is required to call this function.
+ pub(crate) fn poll(self) {
+ let vtable = self.header().vtable;
+ unsafe { (vtable.poll)(self.ptr) }
+ }
+
+ pub(super) fn schedule(self) {
+ let vtable = self.header().vtable;
+ unsafe { (vtable.schedule)(self.ptr) }
+ }
+
+ pub(super) fn dealloc(self) {
+ let vtable = self.header().vtable;
+ unsafe {
+ (vtable.dealloc)(self.ptr);
+ }
+ }
+
+ /// Safety: `dst` must be a `*mut Poll<super::Result<T::Output>>` where `T`
+ /// is the future stored by the task.
+ pub(super) unsafe fn try_read_output(self, dst: *mut (), waker: &Waker) {
+ let vtable = self.header().vtable;
+ (vtable.try_read_output)(self.ptr, dst, waker);
+ }
+
+ pub(super) fn drop_join_handle_slow(self) {
+ let vtable = self.header().vtable;
+ unsafe { (vtable.drop_join_handle_slow)(self.ptr) }
+ }
+
+ pub(super) fn drop_abort_handle(self) {
+ let vtable = self.header().vtable;
+ unsafe { (vtable.drop_abort_handle)(self.ptr) }
+ }
+
+ pub(super) fn shutdown(self) {
+ let vtable = self.header().vtable;
+ unsafe { (vtable.shutdown)(self.ptr) }
+ }
+
+ /// Increment the task's reference count.
+ ///
+ /// Currently, this is used only when creating an `AbortHandle`.
+ pub(super) fn ref_inc(self) {
+ self.header().state.ref_inc();
+ }
+
+ /// Get the queue-next pointer
+ ///
+ /// This is for usage by the injection queue
+ ///
+ /// Safety: make sure only one queue uses this and access is synchronized.
+ pub(crate) unsafe fn get_queue_next(self) -> Option<RawTask> {
+ self.header()
+ .queue_next
+ .with(|ptr| *ptr)
+ .map(|p| RawTask::from_raw(p))
+ }
+
+ /// Sets the queue-next pointer
+ ///
+ /// This is for usage by the injection queue
+ ///
+ /// Safety: make sure only one queue uses this and access is synchronized.
+ pub(crate) unsafe fn set_queue_next(self, val: Option<RawTask>) {
+ self.header().set_next(val.map(|task| task.ptr));
+ }
+}
+
+impl Clone for RawTask {
+ fn clone(&self) -> Self {
+ RawTask { ptr: self.ptr }
+ }
+}
+
+impl Copy for RawTask {}
+
+unsafe fn poll<T: Future, S: Schedule>(ptr: NonNull<Header>) {
+ let harness = Harness::<T, S>::from_raw(ptr);
+ harness.poll();
+}
+
+unsafe fn schedule<S: Schedule>(ptr: NonNull<Header>) {
+ use crate::runtime::task::{Notified, Task};
+
+ let scheduler = Header::get_scheduler::<S>(ptr);
+ scheduler
+ .as_ref()
+ .schedule(Notified(Task::from_raw(ptr.cast())));
+}
+
+unsafe fn dealloc<T: Future, S: Schedule>(ptr: NonNull<Header>) {
+ let harness = Harness::<T, S>::from_raw(ptr);
+ harness.dealloc();
+}
+
+unsafe fn try_read_output<T: Future, S: Schedule>(
+ ptr: NonNull<Header>,
+ dst: *mut (),
+ waker: &Waker,
+) {
+ let out = &mut *(dst as *mut Poll<super::Result<T::Output>>);
+
+ let harness = Harness::<T, S>::from_raw(ptr);
+ harness.try_read_output(out, waker);
+}
+
+unsafe fn drop_join_handle_slow<T: Future, S: Schedule>(ptr: NonNull<Header>) {
+ let harness = Harness::<T, S>::from_raw(ptr);
+ harness.drop_join_handle_slow()
+}
+
+unsafe fn drop_abort_handle<T: Future, S: Schedule>(ptr: NonNull<Header>) {
+ let harness = Harness::<T, S>::from_raw(ptr);
+ harness.drop_reference();
+}
+
+unsafe fn shutdown<T: Future, S: Schedule>(ptr: NonNull<Header>) {
+ let harness = Harness::<T, S>::from_raw(ptr);
+ harness.shutdown()
+}
diff --git a/third_party/rust/tokio/src/runtime/task/state.rs b/third_party/rust/tokio/src/runtime/task/state.rs
new file mode 100644
index 0000000000..12f5449181
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/state.rs
@@ -0,0 +1,611 @@
+use crate::loom::sync::atomic::AtomicUsize;
+
+use std::fmt;
+use std::sync::atomic::Ordering::{AcqRel, Acquire, Release};
+use std::usize;
+
+pub(super) struct State {
+ val: AtomicUsize,
+}
+
+/// Current state value.
+#[derive(Copy, Clone)]
+pub(super) struct Snapshot(usize);
+
+type UpdateResult = Result<Snapshot, Snapshot>;
+
+/// The task is currently being run.
+const RUNNING: usize = 0b0001;
+
+/// The task is complete.
+///
+/// Once this bit is set, it is never unset.
+const COMPLETE: usize = 0b0010;
+
+/// Extracts the task's lifecycle value from the state.
+const LIFECYCLE_MASK: usize = 0b11;
+
+/// Flag tracking if the task has been pushed into a run queue.
+const NOTIFIED: usize = 0b100;
+
+/// The join handle is still around.
+#[allow(clippy::unusual_byte_groupings)] // https://github.com/rust-lang/rust-clippy/issues/6556
+const JOIN_INTEREST: usize = 0b1_000;
+
+/// A join handle waker has been set.
+#[allow(clippy::unusual_byte_groupings)] // https://github.com/rust-lang/rust-clippy/issues/6556
+const JOIN_WAKER: usize = 0b10_000;
+
+/// The task has been forcibly cancelled.
+#[allow(clippy::unusual_byte_groupings)] // https://github.com/rust-lang/rust-clippy/issues/6556
+const CANCELLED: usize = 0b100_000;
+
+/// All bits.
+const STATE_MASK: usize = LIFECYCLE_MASK | NOTIFIED | JOIN_INTEREST | JOIN_WAKER | CANCELLED;
+
+/// Bits used by the ref count portion of the state.
+const REF_COUNT_MASK: usize = !STATE_MASK;
+
+/// Number of positions to shift the ref count.
+const REF_COUNT_SHIFT: usize = REF_COUNT_MASK.count_zeros() as usize;
+
+/// One ref count.
+const REF_ONE: usize = 1 << REF_COUNT_SHIFT;
+
+/// State a task is initialized with.
+///
+/// A task is initialized with three references:
+///
+/// * A reference that will be stored in an OwnedTasks or LocalOwnedTasks.
+/// * A reference that will be sent to the scheduler as an ordinary notification.
+/// * A reference for the JoinHandle.
+///
+/// As the task starts with a `JoinHandle`, `JOIN_INTEREST` is set.
+/// As the task starts with a `Notified`, `NOTIFIED` is set.
+const INITIAL_STATE: usize = (REF_ONE * 3) | JOIN_INTEREST | NOTIFIED;
+
+#[must_use]
+pub(super) enum TransitionToRunning {
+ Success,
+ Cancelled,
+ Failed,
+ Dealloc,
+}
+
+#[must_use]
+pub(super) enum TransitionToIdle {
+ Ok,
+ OkNotified,
+ OkDealloc,
+ Cancelled,
+}
+
+#[must_use]
+pub(super) enum TransitionToNotifiedByVal {
+ DoNothing,
+ Submit,
+ Dealloc,
+}
+
+#[must_use]
+pub(crate) enum TransitionToNotifiedByRef {
+ DoNothing,
+ Submit,
+}
+
+/// All transitions are performed via RMW operations. This establishes an
+/// unambiguous modification order.
+impl State {
+ /// Returns a task's initial state.
+ pub(super) fn new() -> State {
+ // The raw task returned by this method has a ref-count of three. See
+ // the comment on INITIAL_STATE for more.
+ State {
+ val: AtomicUsize::new(INITIAL_STATE),
+ }
+ }
+
+ /// Loads the current state, establishes `Acquire` ordering.
+ pub(super) fn load(&self) -> Snapshot {
+ Snapshot(self.val.load(Acquire))
+ }
+
+ /// Attempts to transition the lifecycle to `Running`. This sets the
+ /// notified bit to false so notifications during the poll can be detected.
+ pub(super) fn transition_to_running(&self) -> TransitionToRunning {
+ self.fetch_update_action(|mut next| {
+ let action;
+ assert!(next.is_notified());
+
+ if !next.is_idle() {
+ // This happens if the task is either currently running or if it
+ // has already completed, e.g. if it was cancelled during
+ // shutdown. Consume the ref-count and return.
+ next.ref_dec();
+ if next.ref_count() == 0 {
+ action = TransitionToRunning::Dealloc;
+ } else {
+ action = TransitionToRunning::Failed;
+ }
+ } else {
+ // We are able to lock the RUNNING bit.
+ next.set_running();
+ next.unset_notified();
+
+ if next.is_cancelled() {
+ action = TransitionToRunning::Cancelled;
+ } else {
+ action = TransitionToRunning::Success;
+ }
+ }
+ (action, Some(next))
+ })
+ }
+
+ /// Transitions the task from `Running` -> `Idle`.
+ ///
+ /// Returns `true` if the transition to `Idle` is successful, `false` otherwise.
+ /// The transition to `Idle` fails if the task has been flagged to be
+ /// cancelled.
+ pub(super) fn transition_to_idle(&self) -> TransitionToIdle {
+ self.fetch_update_action(|curr| {
+ assert!(curr.is_running());
+
+ if curr.is_cancelled() {
+ return (TransitionToIdle::Cancelled, None);
+ }
+
+ let mut next = curr;
+ let action;
+ next.unset_running();
+
+ if !next.is_notified() {
+ // Polling the future consumes the ref-count of the Notified.
+ next.ref_dec();
+ if next.ref_count() == 0 {
+ action = TransitionToIdle::OkDealloc;
+ } else {
+ action = TransitionToIdle::Ok;
+ }
+ } else {
+ // The caller will schedule a new notification, so we create a
+ // new ref-count for the notification. Our own ref-count is kept
+ // for now, and the caller will drop it shortly.
+ next.ref_inc();
+ action = TransitionToIdle::OkNotified;
+ }
+
+ (action, Some(next))
+ })
+ }
+
+ /// Transitions the task from `Running` -> `Complete`.
+ pub(super) fn transition_to_complete(&self) -> Snapshot {
+ const DELTA: usize = RUNNING | COMPLETE;
+
+ let prev = Snapshot(self.val.fetch_xor(DELTA, AcqRel));
+ assert!(prev.is_running());
+ assert!(!prev.is_complete());
+
+ Snapshot(prev.0 ^ DELTA)
+ }
+
+ /// Transitions from `Complete` -> `Terminal`, decrementing the reference
+ /// count the specified number of times.
+ ///
+ /// Returns true if the task should be deallocated.
+ pub(super) fn transition_to_terminal(&self, count: usize) -> bool {
+ let prev = Snapshot(self.val.fetch_sub(count * REF_ONE, AcqRel));
+ assert!(
+ prev.ref_count() >= count,
+ "current: {}, sub: {}",
+ prev.ref_count(),
+ count
+ );
+ prev.ref_count() == count
+ }
+
+ /// Transitions the state to `NOTIFIED`.
+ ///
+ /// If no task needs to be submitted, a ref-count is consumed.
+ ///
+ /// If a task needs to be submitted, the ref-count is incremented for the
+ /// new Notified.
+ pub(super) fn transition_to_notified_by_val(&self) -> TransitionToNotifiedByVal {
+ self.fetch_update_action(|mut snapshot| {
+ let action;
+
+ if snapshot.is_running() {
+ // If the task is running, we mark it as notified, but we should
+ // not submit anything as the thread currently running the
+ // future is responsible for that.
+ snapshot.set_notified();
+ snapshot.ref_dec();
+
+ // The thread that set the running bit also holds a ref-count.
+ assert!(snapshot.ref_count() > 0);
+
+ action = TransitionToNotifiedByVal::DoNothing;
+ } else if snapshot.is_complete() || snapshot.is_notified() {
+ // We do not need to submit any notifications, but we have to
+ // decrement the ref-count.
+ snapshot.ref_dec();
+
+ if snapshot.ref_count() == 0 {
+ action = TransitionToNotifiedByVal::Dealloc;
+ } else {
+ action = TransitionToNotifiedByVal::DoNothing;
+ }
+ } else {
+ // We create a new notified that we can submit. The caller
+ // retains ownership of the ref-count they passed in.
+ snapshot.set_notified();
+ snapshot.ref_inc();
+ action = TransitionToNotifiedByVal::Submit;
+ }
+
+ (action, Some(snapshot))
+ })
+ }
+
+ /// Transitions the state to `NOTIFIED`.
+ pub(super) fn transition_to_notified_by_ref(&self) -> TransitionToNotifiedByRef {
+ self.fetch_update_action(|mut snapshot| {
+ if snapshot.is_complete() || snapshot.is_notified() {
+ // There is nothing to do in this case.
+ (TransitionToNotifiedByRef::DoNothing, None)
+ } else if snapshot.is_running() {
+ // If the task is running, we mark it as notified, but we should
+ // not submit as the thread currently running the future is
+ // responsible for that.
+ snapshot.set_notified();
+ (TransitionToNotifiedByRef::DoNothing, Some(snapshot))
+ } else {
+ // The task is idle and not notified. We should submit a
+ // notification.
+ snapshot.set_notified();
+ snapshot.ref_inc();
+ (TransitionToNotifiedByRef::Submit, Some(snapshot))
+ }
+ })
+ }
+
+ /// Transitions the state to `NOTIFIED`, unconditionally increasing the ref count.
+ #[cfg(all(
+ tokio_unstable,
+ tokio_taskdump,
+ feature = "rt",
+ target_os = "linux",
+ any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")
+ ))]
+ pub(super) fn transition_to_notified_for_tracing(&self) {
+ self.fetch_update_action(|mut snapshot| {
+ snapshot.set_notified();
+ snapshot.ref_inc();
+ ((), Some(snapshot))
+ });
+ }
+
+ /// Sets the cancelled bit and transitions the state to `NOTIFIED` if idle.
+ ///
+ /// Returns `true` if the task needs to be submitted to the pool for
+ /// execution.
+ pub(super) fn transition_to_notified_and_cancel(&self) -> bool {
+ self.fetch_update_action(|mut snapshot| {
+ if snapshot.is_cancelled() || snapshot.is_complete() {
+ // Aborts to completed or cancelled tasks are no-ops.
+ (false, None)
+ } else if snapshot.is_running() {
+ // If the task is running, we mark it as cancelled. The thread
+ // running the task will notice the cancelled bit when it
+ // stops polling and it will kill the task.
+ //
+ // The set_notified() call is not strictly necessary but it will
+ // in some cases let a wake_by_ref call return without having
+ // to perform a compare_exchange.
+ snapshot.set_notified();
+ snapshot.set_cancelled();
+ (false, Some(snapshot))
+ } else {
+ // The task is idle. We set the cancelled and notified bits and
+ // submit a notification if the notified bit was not already
+ // set.
+ snapshot.set_cancelled();
+ if !snapshot.is_notified() {
+ snapshot.set_notified();
+ snapshot.ref_inc();
+ (true, Some(snapshot))
+ } else {
+ (false, Some(snapshot))
+ }
+ }
+ })
+ }
+
+ /// Sets the `CANCELLED` bit and attempts to transition to `Running`.
+ ///
+ /// Returns `true` if the transition to `Running` succeeded.
+ pub(super) fn transition_to_shutdown(&self) -> bool {
+ let mut prev = Snapshot(0);
+
+ let _ = self.fetch_update(|mut snapshot| {
+ prev = snapshot;
+
+ if snapshot.is_idle() {
+ snapshot.set_running();
+ }
+
+ // If the task was not idle, the thread currently running the task
+ // will notice the cancelled bit and cancel it once the poll
+ // completes.
+ snapshot.set_cancelled();
+ Some(snapshot)
+ });
+
+ prev.is_idle()
+ }
+
+ /// Optimistically tries to swap the state assuming the join handle is
+ /// __immediately__ dropped on spawn.
+ pub(super) fn drop_join_handle_fast(&self) -> Result<(), ()> {
+ use std::sync::atomic::Ordering::Relaxed;
+
+ // Relaxed is acceptable as if this function is called and succeeds,
+ // then nothing has been done w/ the join handle.
+ //
+ // The moment the join handle is used (polled), the `JOIN_WAKER` flag is
+ // set, at which point the CAS will fail.
+ //
+ // Given this, there is no risk if this operation is reordered.
+ self.val
+ .compare_exchange_weak(
+ INITIAL_STATE,
+ (INITIAL_STATE - REF_ONE) & !JOIN_INTEREST,
+ Release,
+ Relaxed,
+ )
+ .map(|_| ())
+ .map_err(|_| ())
+ }
+
+ /// Tries to unset the JOIN_INTEREST flag.
+ ///
+ /// Returns `Ok` if the operation happens before the task transitions to a
+ /// completed state, `Err` otherwise.
+ pub(super) fn unset_join_interested(&self) -> UpdateResult {
+ self.fetch_update(|curr| {
+ assert!(curr.is_join_interested());
+
+ if curr.is_complete() {
+ return None;
+ }
+
+ let mut next = curr;
+ next.unset_join_interested();
+
+ Some(next)
+ })
+ }
+
+ /// Sets the `JOIN_WAKER` bit.
+ ///
+ /// Returns `Ok` if the bit is set, `Err` otherwise. This operation fails if
+ /// the task has completed.
+ pub(super) fn set_join_waker(&self) -> UpdateResult {
+ self.fetch_update(|curr| {
+ assert!(curr.is_join_interested());
+ assert!(!curr.is_join_waker_set());
+
+ if curr.is_complete() {
+ return None;
+ }
+
+ let mut next = curr;
+ next.set_join_waker();
+
+ Some(next)
+ })
+ }
+
+ /// Unsets the `JOIN_WAKER` bit.
+ ///
+ /// Returns `Ok` has been unset, `Err` otherwise. This operation fails if
+ /// the task has completed.
+ pub(super) fn unset_waker(&self) -> UpdateResult {
+ self.fetch_update(|curr| {
+ assert!(curr.is_join_interested());
+ assert!(curr.is_join_waker_set());
+
+ if curr.is_complete() {
+ return None;
+ }
+
+ let mut next = curr;
+ next.unset_join_waker();
+
+ Some(next)
+ })
+ }
+
+ pub(super) fn ref_inc(&self) {
+ use std::process;
+ use std::sync::atomic::Ordering::Relaxed;
+
+ // Using a relaxed ordering is alright here, as knowledge of the
+ // original reference prevents other threads from erroneously deleting
+ // the object.
+ //
+ // As explained in the [Boost documentation][1], Increasing the
+ // reference counter can always be done with memory_order_relaxed: New
+ // references to an object can only be formed from an existing
+ // reference, and passing an existing reference from one thread to
+ // another must already provide any required synchronization.
+ //
+ // [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html)
+ let prev = self.val.fetch_add(REF_ONE, Relaxed);
+
+ // If the reference count overflowed, abort.
+ if prev > isize::MAX as usize {
+ process::abort();
+ }
+ }
+
+ /// Returns `true` if the task should be released.
+ pub(super) fn ref_dec(&self) -> bool {
+ let prev = Snapshot(self.val.fetch_sub(REF_ONE, AcqRel));
+ assert!(prev.ref_count() >= 1);
+ prev.ref_count() == 1
+ }
+
+ /// Returns `true` if the task should be released.
+ pub(super) fn ref_dec_twice(&self) -> bool {
+ let prev = Snapshot(self.val.fetch_sub(2 * REF_ONE, AcqRel));
+ assert!(prev.ref_count() >= 2);
+ prev.ref_count() == 2
+ }
+
+ fn fetch_update_action<F, T>(&self, mut f: F) -> T
+ where
+ F: FnMut(Snapshot) -> (T, Option<Snapshot>),
+ {
+ let mut curr = self.load();
+
+ loop {
+ let (output, next) = f(curr);
+ let next = match next {
+ Some(next) => next,
+ None => return output,
+ };
+
+ let res = self.val.compare_exchange(curr.0, next.0, AcqRel, Acquire);
+
+ match res {
+ Ok(_) => return output,
+ Err(actual) => curr = Snapshot(actual),
+ }
+ }
+ }
+
+ fn fetch_update<F>(&self, mut f: F) -> Result<Snapshot, Snapshot>
+ where
+ F: FnMut(Snapshot) -> Option<Snapshot>,
+ {
+ let mut curr = self.load();
+
+ loop {
+ let next = match f(curr) {
+ Some(next) => next,
+ None => return Err(curr),
+ };
+
+ let res = self.val.compare_exchange(curr.0, next.0, AcqRel, Acquire);
+
+ match res {
+ Ok(_) => return Ok(next),
+ Err(actual) => curr = Snapshot(actual),
+ }
+ }
+ }
+}
+
+// ===== impl Snapshot =====
+
+impl Snapshot {
+ /// Returns `true` if the task is in an idle state.
+ pub(super) fn is_idle(self) -> bool {
+ self.0 & (RUNNING | COMPLETE) == 0
+ }
+
+ /// Returns `true` if the task has been flagged as notified.
+ pub(super) fn is_notified(self) -> bool {
+ self.0 & NOTIFIED == NOTIFIED
+ }
+
+ fn unset_notified(&mut self) {
+ self.0 &= !NOTIFIED
+ }
+
+ fn set_notified(&mut self) {
+ self.0 |= NOTIFIED
+ }
+
+ pub(super) fn is_running(self) -> bool {
+ self.0 & RUNNING == RUNNING
+ }
+
+ fn set_running(&mut self) {
+ self.0 |= RUNNING;
+ }
+
+ fn unset_running(&mut self) {
+ self.0 &= !RUNNING;
+ }
+
+ pub(super) fn is_cancelled(self) -> bool {
+ self.0 & CANCELLED == CANCELLED
+ }
+
+ fn set_cancelled(&mut self) {
+ self.0 |= CANCELLED;
+ }
+
+ /// Returns `true` if the task's future has completed execution.
+ pub(super) fn is_complete(self) -> bool {
+ self.0 & COMPLETE == COMPLETE
+ }
+
+ pub(super) fn is_join_interested(self) -> bool {
+ self.0 & JOIN_INTEREST == JOIN_INTEREST
+ }
+
+ fn unset_join_interested(&mut self) {
+ self.0 &= !JOIN_INTEREST
+ }
+
+ pub(super) fn is_join_waker_set(self) -> bool {
+ self.0 & JOIN_WAKER == JOIN_WAKER
+ }
+
+ fn set_join_waker(&mut self) {
+ self.0 |= JOIN_WAKER;
+ }
+
+ fn unset_join_waker(&mut self) {
+ self.0 &= !JOIN_WAKER
+ }
+
+ pub(super) fn ref_count(self) -> usize {
+ (self.0 & REF_COUNT_MASK) >> REF_COUNT_SHIFT
+ }
+
+ fn ref_inc(&mut self) {
+ assert!(self.0 <= isize::MAX as usize);
+ self.0 += REF_ONE;
+ }
+
+ pub(super) fn ref_dec(&mut self) {
+ assert!(self.ref_count() > 0);
+ self.0 -= REF_ONE
+ }
+}
+
+impl fmt::Debug for State {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let snapshot = self.load();
+ snapshot.fmt(fmt)
+ }
+}
+
+impl fmt::Debug for Snapshot {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("Snapshot")
+ .field("is_running", &self.is_running())
+ .field("is_complete", &self.is_complete())
+ .field("is_notified", &self.is_notified())
+ .field("is_cancelled", &self.is_cancelled())
+ .field("is_join_interested", &self.is_join_interested())
+ .field("is_join_waker_set", &self.is_join_waker_set())
+ .field("ref_count", &self.ref_count())
+ .finish()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/trace/mod.rs b/third_party/rust/tokio/src/runtime/task/trace/mod.rs
new file mode 100644
index 0000000000..543b7eee98
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/trace/mod.rs
@@ -0,0 +1,330 @@
+use crate::loom::sync::Arc;
+use crate::runtime::context;
+use crate::runtime::scheduler::{self, current_thread, Inject};
+
+use backtrace::BacktraceFrame;
+use std::cell::Cell;
+use std::collections::VecDeque;
+use std::ffi::c_void;
+use std::fmt;
+use std::future::Future;
+use std::pin::Pin;
+use std::ptr::{self, NonNull};
+use std::task::{self, Poll};
+
+mod symbol;
+mod tree;
+
+use symbol::Symbol;
+use tree::Tree;
+
+use super::{Notified, OwnedTasks};
+
+type Backtrace = Vec<BacktraceFrame>;
+type SymbolTrace = Vec<Symbol>;
+
+/// The ambiant backtracing context.
+pub(crate) struct Context {
+ /// The address of [`Trace::root`] establishes an upper unwinding bound on
+ /// the backtraces in `Trace`.
+ active_frame: Cell<Option<NonNull<Frame>>>,
+ /// The place to stash backtraces.
+ collector: Cell<Option<Trace>>,
+}
+
+/// A [`Frame`] in an intrusive, doubly-linked tree of [`Frame`]s.
+struct Frame {
+ /// The location associated with this frame.
+ inner_addr: *const c_void,
+
+ /// The parent frame, if any.
+ parent: Option<NonNull<Frame>>,
+}
+
+/// An tree execution trace.
+///
+/// Traces are captured with [`Trace::capture`], rooted with [`Trace::root`]
+/// and leaved with [`trace_leaf`].
+#[derive(Clone, Debug)]
+pub(crate) struct Trace {
+ // The linear backtraces that comprise this trace. These linear traces can
+ // be re-knitted into a tree.
+ backtraces: Vec<Backtrace>,
+}
+
+pin_project_lite::pin_project! {
+ #[derive(Debug, Clone)]
+ #[must_use = "futures do nothing unless you `.await` or poll them"]
+ pub(crate) struct Root<T> {
+ #[pin]
+ future: T,
+ }
+}
+
+const FAIL_NO_THREAD_LOCAL: &str = "The Tokio thread-local has been destroyed \
+ as part of shutting down the current \
+ thread, so collecting a taskdump is not \
+ possible.";
+
+impl Context {
+ pub(crate) const fn new() -> Self {
+ Context {
+ active_frame: Cell::new(None),
+ collector: Cell::new(None),
+ }
+ }
+
+ /// SAFETY: Callers of this function must ensure that trace frames always
+ /// form a valid linked list.
+ unsafe fn try_with_current<F, R>(f: F) -> Option<R>
+ where
+ F: FnOnce(&Self) -> R,
+ {
+ crate::runtime::context::with_trace(f)
+ }
+
+ unsafe fn with_current_frame<F, R>(f: F) -> R
+ where
+ F: FnOnce(&Cell<Option<NonNull<Frame>>>) -> R,
+ {
+ Self::try_with_current(|context| f(&context.active_frame)).expect(FAIL_NO_THREAD_LOCAL)
+ }
+
+ fn with_current_collector<F, R>(f: F) -> R
+ where
+ F: FnOnce(&Cell<Option<Trace>>) -> R,
+ {
+ // SAFETY: This call can only access the collector field, so it cannot
+ // break the trace frame linked list.
+ unsafe {
+ Self::try_with_current(|context| f(&context.collector)).expect(FAIL_NO_THREAD_LOCAL)
+ }
+ }
+}
+
+impl Trace {
+ /// Invokes `f`, returning both its result and the collection of backtraces
+ /// captured at each sub-invocation of [`trace_leaf`].
+ #[inline(never)]
+ pub(crate) fn capture<F, R>(f: F) -> (R, Trace)
+ where
+ F: FnOnce() -> R,
+ {
+ let collector = Trace { backtraces: vec![] };
+
+ let previous = Context::with_current_collector(|current| current.replace(Some(collector)));
+
+ let result = f();
+
+ let collector =
+ Context::with_current_collector(|current| current.replace(previous)).unwrap();
+
+ (result, collector)
+ }
+
+ /// The root of a trace.
+ #[inline(never)]
+ pub(crate) fn root<F>(future: F) -> Root<F> {
+ Root { future }
+ }
+}
+
+/// If this is a sub-invocation of [`Trace::capture`], capture a backtrace.
+///
+/// The captured backtrace will be returned by [`Trace::capture`].
+///
+/// Invoking this function does nothing when it is not a sub-invocation
+/// [`Trace::capture`].
+// This function is marked `#[inline(never)]` to ensure that it gets a distinct `Frame` in the
+// backtrace, below which frames should not be included in the backtrace (since they reflect the
+// internal implementation details of this crate).
+#[inline(never)]
+pub(crate) fn trace_leaf(cx: &mut task::Context<'_>) -> Poll<()> {
+ // Safety: We don't manipulate the current context's active frame.
+ let did_trace = unsafe {
+ Context::try_with_current(|context_cell| {
+ if let Some(mut collector) = context_cell.collector.take() {
+ let mut frames = vec![];
+ let mut above_leaf = false;
+
+ if let Some(active_frame) = context_cell.active_frame.get() {
+ let active_frame = active_frame.as_ref();
+
+ backtrace::trace(|frame| {
+ let below_root = !ptr::eq(frame.symbol_address(), active_frame.inner_addr);
+
+ // only capture frames above `Trace::leaf` and below
+ // `Trace::root`.
+ if above_leaf && below_root {
+ frames.push(frame.to_owned().into());
+ }
+
+ if ptr::eq(frame.symbol_address(), trace_leaf as *const _) {
+ above_leaf = true;
+ }
+
+ // only continue unwinding if we're below `Trace::root`
+ below_root
+ });
+ }
+ collector.backtraces.push(frames);
+ context_cell.collector.set(Some(collector));
+ true
+ } else {
+ false
+ }
+ })
+ .unwrap_or(false)
+ };
+
+ if did_trace {
+ // Use the same logic that `yield_now` uses to send out wakeups after
+ // the task yields.
+ context::with_scheduler(|scheduler| {
+ if let Some(scheduler) = scheduler {
+ match scheduler {
+ scheduler::Context::CurrentThread(s) => s.defer.defer(cx.waker()),
+ #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))]
+ scheduler::Context::MultiThread(s) => s.defer.defer(cx.waker()),
+ }
+ }
+ });
+
+ Poll::Pending
+ } else {
+ Poll::Ready(())
+ }
+}
+
+impl fmt::Display for Trace {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ Tree::from_trace(self.clone()).fmt(f)
+ }
+}
+
+fn defer<F: FnOnce() -> R, R>(f: F) -> impl Drop {
+ use std::mem::ManuallyDrop;
+
+ struct Defer<F: FnOnce() -> R, R>(ManuallyDrop<F>);
+
+ impl<F: FnOnce() -> R, R> Drop for Defer<F, R> {
+ #[inline(always)]
+ fn drop(&mut self) {
+ unsafe {
+ ManuallyDrop::take(&mut self.0)();
+ }
+ }
+ }
+
+ Defer(ManuallyDrop::new(f))
+}
+
+impl<T: Future> Future for Root<T> {
+ type Output = T::Output;
+
+ #[inline(never)]
+ fn poll(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll<Self::Output> {
+ // SAFETY: The context's current frame is restored to its original state
+ // before `frame` is dropped.
+ unsafe {
+ let mut frame = Frame {
+ inner_addr: Self::poll as *const c_void,
+ parent: None,
+ };
+
+ Context::with_current_frame(|current| {
+ frame.parent = current.take();
+ current.set(Some(NonNull::from(&frame)));
+ });
+
+ let _restore = defer(|| {
+ Context::with_current_frame(|current| {
+ current.set(frame.parent);
+ });
+ });
+
+ let this = self.project();
+ this.future.poll(cx)
+ }
+ }
+}
+
+/// Trace and poll all tasks of the current_thread runtime.
+pub(in crate::runtime) fn trace_current_thread(
+ owned: &OwnedTasks<Arc<current_thread::Handle>>,
+ local: &mut VecDeque<Notified<Arc<current_thread::Handle>>>,
+ injection: &Inject<Arc<current_thread::Handle>>,
+) -> Vec<Trace> {
+ // clear the local and injection queues
+ local.clear();
+
+ while let Some(task) = injection.pop() {
+ drop(task);
+ }
+
+ // notify each task
+ let mut tasks = vec![];
+ owned.for_each(|task| {
+ // set the notified bit
+ task.as_raw().state().transition_to_notified_for_tracing();
+ // store the raw tasks into a vec
+ tasks.push(task.as_raw());
+ });
+
+ tasks
+ .into_iter()
+ .map(|task| {
+ let ((), trace) = Trace::capture(|| task.poll());
+ trace
+ })
+ .collect()
+}
+
+cfg_rt_multi_thread! {
+ use crate::loom::sync::Mutex;
+ use crate::runtime::scheduler::multi_thread;
+ use crate::runtime::scheduler::multi_thread::Synced;
+ use crate::runtime::scheduler::inject::Shared;
+
+ /// Trace and poll all tasks of the current_thread runtime.
+ ///
+ /// ## Safety
+ ///
+ /// Must be called with the same `synced` that `injection` was created with.
+ pub(in crate::runtime) unsafe fn trace_multi_thread(
+ owned: &OwnedTasks<Arc<multi_thread::Handle>>,
+ local: &mut multi_thread::queue::Local<Arc<multi_thread::Handle>>,
+ synced: &Mutex<Synced>,
+ injection: &Shared<Arc<multi_thread::Handle>>,
+ ) -> Vec<Trace> {
+ // clear the local queue
+ while let Some(notified) = local.pop() {
+ drop(notified);
+ }
+
+ // clear the injection queue
+ let mut synced = synced.lock();
+ while let Some(notified) = injection.pop(&mut synced.inject) {
+ drop(notified);
+ }
+
+ drop(synced);
+
+ // notify each task
+ let mut traces = vec![];
+ owned.for_each(|task| {
+ // set the notified bit
+ task.as_raw().state().transition_to_notified_for_tracing();
+
+ // trace the task
+ let ((), trace) = Trace::capture(|| task.as_raw().poll());
+ traces.push(trace);
+
+ // reschedule the task
+ let _ = task.as_raw().state().transition_to_notified_by_ref();
+ task.as_raw().schedule();
+ });
+
+ traces
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/trace/symbol.rs b/third_party/rust/tokio/src/runtime/task/trace/symbol.rs
new file mode 100644
index 0000000000..49d7ba37f7
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/trace/symbol.rs
@@ -0,0 +1,92 @@
+use backtrace::BacktraceSymbol;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::ptr;
+
+/// A symbol in a backtrace.
+///
+/// This wrapper type serves two purposes. The first is that it provides a
+/// representation of a symbol that can be inserted into hashmaps and hashsets;
+/// the [`backtrace`] crate does not define [`Hash`], [`PartialEq`], or [`Eq`]
+/// on [`BacktraceSymbol`], and recommends that users define their own wrapper
+/// which implements these traits.
+///
+/// Second, this wrapper includes a `parent_hash` field that uniquely
+/// identifies this symbol's position in its trace. Otherwise, e.g., our code
+/// would not be able to distinguish between recursive calls of a function at
+/// different depths.
+#[derive(Clone)]
+pub(super) struct Symbol {
+ pub(super) symbol: BacktraceSymbol,
+ pub(super) parent_hash: u64,
+}
+
+impl Hash for Symbol {
+ fn hash<H>(&self, state: &mut H)
+ where
+ H: Hasher,
+ {
+ if let Some(name) = self.symbol.name() {
+ name.as_bytes().hash(state);
+ }
+
+ if let Some(addr) = self.symbol.addr() {
+ ptr::hash(addr, state);
+ }
+
+ self.symbol.filename().hash(state);
+ self.symbol.lineno().hash(state);
+ self.symbol.colno().hash(state);
+ self.parent_hash.hash(state);
+ }
+}
+
+impl PartialEq for Symbol {
+ fn eq(&self, other: &Self) -> bool {
+ (self.parent_hash == other.parent_hash)
+ && match (self.symbol.name(), other.symbol.name()) {
+ (None, None) => true,
+ (Some(lhs_name), Some(rhs_name)) => lhs_name.as_bytes() == rhs_name.as_bytes(),
+ _ => false,
+ }
+ && match (self.symbol.addr(), other.symbol.addr()) {
+ (None, None) => true,
+ (Some(lhs_addr), Some(rhs_addr)) => ptr::eq(lhs_addr, rhs_addr),
+ _ => false,
+ }
+ && (self.symbol.filename() == other.symbol.filename())
+ && (self.symbol.lineno() == other.symbol.lineno())
+ && (self.symbol.colno() == other.symbol.colno())
+ }
+}
+
+impl Eq for Symbol {}
+
+impl fmt::Display for Symbol {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ if let Some(name) = self.symbol.name() {
+ let name = name.to_string();
+ let name = if let Some((name, _)) = name.rsplit_once("::") {
+ name
+ } else {
+ &name
+ };
+ fmt::Display::fmt(&name, f)?;
+ }
+
+ if let Some(filename) = self.symbol.filename() {
+ f.write_str(" at ")?;
+ filename.to_string_lossy().fmt(f)?;
+ if let Some(lineno) = self.symbol.lineno() {
+ f.write_str(":")?;
+ fmt::Display::fmt(&lineno, f)?;
+ if let Some(colno) = self.symbol.colno() {
+ f.write_str(":")?;
+ fmt::Display::fmt(&colno, f)?;
+ }
+ }
+ }
+
+ Ok(())
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/task/trace/tree.rs b/third_party/rust/tokio/src/runtime/task/trace/tree.rs
new file mode 100644
index 0000000000..7e6f8efeca
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/trace/tree.rs
@@ -0,0 +1,126 @@
+use std::collections::{hash_map::DefaultHasher, HashMap, HashSet};
+use std::fmt;
+use std::hash::{Hash, Hasher};
+
+use super::{Backtrace, Symbol, SymbolTrace, Trace};
+
+/// An adjacency list representation of an execution tree.
+///
+/// This tree provides a convenient intermediate representation for formatting
+/// [`Trace`] as a tree.
+pub(super) struct Tree {
+ /// The roots of the trees.
+ ///
+ /// There should only be one root, but the code is robust to multiple roots.
+ roots: HashSet<Symbol>,
+
+ /// The adjacency list of symbols in the execution tree(s).
+ edges: HashMap<Symbol, HashSet<Symbol>>,
+}
+
+impl Tree {
+ /// Constructs a [`Tree`] from [`Trace`]
+ pub(super) fn from_trace(trace: Trace) -> Self {
+ let mut roots: HashSet<Symbol> = HashSet::default();
+ let mut edges: HashMap<Symbol, HashSet<Symbol>> = HashMap::default();
+
+ for trace in trace.backtraces {
+ let trace = to_symboltrace(trace);
+
+ if let Some(first) = trace.first() {
+ roots.insert(first.to_owned());
+ }
+
+ let mut trace = trace.into_iter().peekable();
+ while let Some(frame) = trace.next() {
+ let subframes = edges.entry(frame).or_default();
+ if let Some(subframe) = trace.peek() {
+ subframes.insert(subframe.clone());
+ }
+ }
+ }
+
+ Tree { roots, edges }
+ }
+
+ /// Produces the sub-symbols of a given symbol.
+ fn consequences(&self, frame: &Symbol) -> Option<impl ExactSizeIterator<Item = &Symbol>> {
+ Some(self.edges.get(frame)?.iter())
+ }
+
+ /// Format this [`Tree`] as a textual tree.
+ fn display<W: fmt::Write>(
+ &self,
+ f: &mut W,
+ root: &Symbol,
+ is_last: bool,
+ prefix: &str,
+ ) -> fmt::Result {
+ let root_fmt = format!("{}", root);
+
+ let current;
+ let next;
+
+ if is_last {
+ current = format!("{prefix}└╼\u{a0}{root_fmt}");
+ next = format!("{}\u{a0}\u{a0}\u{a0}", prefix);
+ } else {
+ current = format!("{prefix}├╼\u{a0}{root_fmt}");
+ next = format!("{}│\u{a0}\u{a0}", prefix);
+ }
+
+ write!(f, "{}", {
+ let mut current = current.chars();
+ current.next().unwrap();
+ current.next().unwrap();
+ &current.as_str()
+ })?;
+
+ if let Some(consequences) = self.consequences(root) {
+ let len = consequences.len();
+ for (i, consequence) in consequences.enumerate() {
+ let is_last = i == len - 1;
+ writeln!(f)?;
+ self.display(f, consequence, is_last, &next)?;
+ }
+ }
+
+ Ok(())
+ }
+}
+
+impl fmt::Display for Tree {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ for root in &self.roots {
+ self.display(f, root, true, " ")?;
+ }
+ Ok(())
+ }
+}
+
+/// Resolve a sequence of [`backtrace::BacktraceFrame`]s into a sequence of
+/// [`Symbol`]s.
+fn to_symboltrace(backtrace: Backtrace) -> SymbolTrace {
+ // Resolve the backtrace frames to symbols.
+ let backtrace: Backtrace = {
+ let mut backtrace = backtrace::Backtrace::from(backtrace);
+ backtrace.resolve();
+ backtrace.into()
+ };
+
+ // Accumulate the symbols in descending order into `symboltrace`.
+ let mut symboltrace: SymbolTrace = vec![];
+ let mut state = DefaultHasher::new();
+ for frame in backtrace.into_iter().rev() {
+ for symbol in frame.symbols().iter().rev() {
+ let symbol = Symbol {
+ symbol: symbol.clone(),
+ parent_hash: state.finish(),
+ };
+ symbol.hash(&mut state);
+ symboltrace.push(symbol);
+ }
+ }
+
+ symboltrace
+}
diff --git a/third_party/rust/tokio/src/runtime/task/waker.rs b/third_party/rust/tokio/src/runtime/task/waker.rs
new file mode 100644
index 0000000000..b5f5ace9ec
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/task/waker.rs
@@ -0,0 +1,104 @@
+use crate::future::Future;
+use crate::runtime::task::{Header, RawTask, Schedule};
+
+use std::marker::PhantomData;
+use std::mem::ManuallyDrop;
+use std::ops;
+use std::ptr::NonNull;
+use std::task::{RawWaker, RawWakerVTable, Waker};
+
+pub(super) struct WakerRef<'a, S: 'static> {
+ waker: ManuallyDrop<Waker>,
+ _p: PhantomData<(&'a Header, S)>,
+}
+
+/// Returns a `WakerRef` which avoids having to preemptively increase the
+/// refcount if there is no need to do so.
+pub(super) fn waker_ref<T, S>(header: &NonNull<Header>) -> WakerRef<'_, S>
+where
+ T: Future,
+ S: Schedule,
+{
+ // `Waker::will_wake` uses the VTABLE pointer as part of the check. This
+ // means that `will_wake` will always return false when using the current
+ // task's waker. (discussion at rust-lang/rust#66281).
+ //
+ // To fix this, we use a single vtable. Since we pass in a reference at this
+ // point and not an *owned* waker, we must ensure that `drop` is never
+ // called on this waker instance. This is done by wrapping it with
+ // `ManuallyDrop` and then never calling drop.
+ let waker = unsafe { ManuallyDrop::new(Waker::from_raw(raw_waker(*header))) };
+
+ WakerRef {
+ waker,
+ _p: PhantomData,
+ }
+}
+
+impl<S> ops::Deref for WakerRef<'_, S> {
+ type Target = Waker;
+
+ fn deref(&self) -> &Waker {
+ &self.waker
+ }
+}
+
+cfg_trace! {
+ macro_rules! trace {
+ ($header:expr, $op:expr) => {
+ if let Some(id) = Header::get_tracing_id(&$header) {
+ tracing::trace!(
+ target: "tokio::task::waker",
+ op = $op,
+ task.id = id.into_u64(),
+ );
+ }
+ }
+ }
+}
+
+cfg_not_trace! {
+ macro_rules! trace {
+ ($header:expr, $op:expr) => {
+ // noop
+ let _ = &$header;
+ }
+ }
+}
+
+unsafe fn clone_waker(ptr: *const ()) -> RawWaker {
+ let header = NonNull::new_unchecked(ptr as *mut Header);
+ trace!(header, "waker.clone");
+ header.as_ref().state.ref_inc();
+ raw_waker(header)
+}
+
+unsafe fn drop_waker(ptr: *const ()) {
+ let ptr = NonNull::new_unchecked(ptr as *mut Header);
+ trace!(ptr, "waker.drop");
+ let raw = RawTask::from_raw(ptr);
+ raw.drop_reference();
+}
+
+unsafe fn wake_by_val(ptr: *const ()) {
+ let ptr = NonNull::new_unchecked(ptr as *mut Header);
+ trace!(ptr, "waker.wake");
+ let raw = RawTask::from_raw(ptr);
+ raw.wake_by_val();
+}
+
+// Wake without consuming the waker
+unsafe fn wake_by_ref(ptr: *const ()) {
+ let ptr = NonNull::new_unchecked(ptr as *mut Header);
+ trace!(ptr, "waker.wake_by_ref");
+ let raw = RawTask::from_raw(ptr);
+ raw.wake_by_ref();
+}
+
+static WAKER_VTABLE: RawWakerVTable =
+ RawWakerVTable::new(clone_waker, wake_by_val, wake_by_ref, drop_waker);
+
+fn raw_waker(header: NonNull<Header>) -> RawWaker {
+ let ptr = header.as_ptr() as *const ();
+ RawWaker::new(ptr, &WAKER_VTABLE)
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/inject.rs b/third_party/rust/tokio/src/runtime/tests/inject.rs
new file mode 100644
index 0000000000..ccead5e024
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/inject.rs
@@ -0,0 +1,54 @@
+use crate::runtime::scheduler::inject;
+
+#[test]
+fn push_and_pop() {
+ const N: usize = 2;
+
+ let (inject, mut synced) = inject::Shared::new();
+
+ for i in 0..N {
+ assert_eq!(inject.len(), i);
+ let (task, _) = super::unowned(async {});
+ unsafe { inject.push(&mut synced, task) };
+ }
+
+ for i in 0..N {
+ assert_eq!(inject.len(), N - i);
+ assert!(unsafe { inject.pop(&mut synced) }.is_some());
+ }
+
+ println!("--------------");
+
+ assert!(unsafe { inject.pop(&mut synced) }.is_none());
+}
+
+#[test]
+fn push_batch_and_pop() {
+ let (inject, mut inject_synced) = inject::Shared::new();
+
+ unsafe {
+ inject.push_batch(
+ &mut inject_synced,
+ (0..10).map(|_| super::unowned(async {}).0),
+ );
+
+ assert_eq!(5, inject.pop_n(&mut inject_synced, 5).count());
+ assert_eq!(5, inject.pop_n(&mut inject_synced, 5).count());
+ assert_eq!(0, inject.pop_n(&mut inject_synced, 5).count());
+ }
+}
+
+#[test]
+fn pop_n_drains_on_drop() {
+ let (inject, mut inject_synced) = inject::Shared::new();
+
+ unsafe {
+ inject.push_batch(
+ &mut inject_synced,
+ (0..10).map(|_| super::unowned(async {}).0),
+ );
+ let _ = inject.pop_n(&mut inject_synced, 10);
+
+ assert_eq!(inject.len(), 0);
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/loom_blocking.rs b/third_party/rust/tokio/src/runtime/tests/loom_blocking.rs
new file mode 100644
index 0000000000..5c4aeae39c
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/loom_blocking.rs
@@ -0,0 +1,102 @@
+use crate::runtime::{self, Runtime};
+
+use std::sync::Arc;
+
+#[test]
+fn blocking_shutdown() {
+ loom::model(|| {
+ let v = Arc::new(());
+
+ let rt = mk_runtime(1);
+ {
+ let _enter = rt.enter();
+ for _ in 0..2 {
+ let v = v.clone();
+ crate::task::spawn_blocking(move || {
+ assert!(1 < Arc::strong_count(&v));
+ });
+ }
+ }
+
+ drop(rt);
+ assert_eq!(1, Arc::strong_count(&v));
+ });
+}
+
+#[test]
+fn spawn_mandatory_blocking_should_always_run() {
+ use crate::runtime::tests::loom_oneshot;
+ loom::model(|| {
+ let rt = runtime::Builder::new_current_thread().build().unwrap();
+
+ let (tx, rx) = loom_oneshot::channel();
+ let _enter = rt.enter();
+ runtime::spawn_blocking(|| {});
+ runtime::spawn_mandatory_blocking(move || {
+ let _ = tx.send(());
+ })
+ .unwrap();
+
+ drop(rt);
+
+ // This call will deadlock if `spawn_mandatory_blocking` doesn't run.
+ let () = rx.recv();
+ });
+}
+
+#[test]
+fn spawn_mandatory_blocking_should_run_even_when_shutting_down_from_other_thread() {
+ use crate::runtime::tests::loom_oneshot;
+ loom::model(|| {
+ let rt = runtime::Builder::new_current_thread().build().unwrap();
+ let handle = rt.handle().clone();
+
+ // Drop the runtime in a different thread
+ {
+ loom::thread::spawn(move || {
+ drop(rt);
+ });
+ }
+
+ let _enter = handle.enter();
+ let (tx, rx) = loom_oneshot::channel();
+ let handle = runtime::spawn_mandatory_blocking(move || {
+ let _ = tx.send(());
+ });
+
+ // handle.is_some() means that `spawn_mandatory_blocking`
+ // promised us to run the blocking task
+ if handle.is_some() {
+ // This call will deadlock if `spawn_mandatory_blocking` doesn't run.
+ let () = rx.recv();
+ }
+ });
+}
+
+#[test]
+fn spawn_blocking_when_paused() {
+ use std::time::Duration;
+ loom::model(|| {
+ let rt = crate::runtime::Builder::new_current_thread()
+ .enable_time()
+ .start_paused(true)
+ .build()
+ .unwrap();
+ let handle = rt.handle();
+ let _enter = handle.enter();
+ let a = crate::task::spawn_blocking(|| {});
+ let b = crate::task::spawn_blocking(|| {});
+ rt.block_on(crate::time::timeout(Duration::from_millis(1), async move {
+ a.await.expect("blocking task should finish");
+ b.await.expect("blocking task should finish");
+ }))
+ .expect("timeout should not trigger");
+ });
+}
+
+fn mk_runtime(num_threads: usize) -> Runtime {
+ runtime::Builder::new_multi_thread()
+ .worker_threads(num_threads)
+ .build()
+ .unwrap()
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/loom_current_thread_scheduler.rs b/third_party/rust/tokio/src/runtime/tests/loom_current_thread_scheduler.rs
new file mode 100644
index 0000000000..a772603f71
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/loom_current_thread_scheduler.rs
@@ -0,0 +1,142 @@
+use crate::loom::sync::atomic::AtomicUsize;
+use crate::loom::sync::Arc;
+use crate::loom::thread;
+use crate::runtime::{Builder, Runtime};
+use crate::sync::oneshot::{self, Receiver};
+use crate::task;
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::atomic::Ordering::{Acquire, Release};
+use std::task::{Context, Poll};
+
+fn assert_at_most_num_polls(rt: Arc<Runtime>, at_most_polls: usize) {
+ let (tx, rx) = oneshot::channel();
+ let num_polls = Arc::new(AtomicUsize::new(0));
+ rt.spawn(async move {
+ for _ in 0..12 {
+ task::yield_now().await;
+ }
+ tx.send(()).unwrap();
+ });
+
+ rt.block_on(async {
+ BlockedFuture {
+ rx,
+ num_polls: num_polls.clone(),
+ }
+ .await;
+ });
+
+ let polls = num_polls.load(Acquire);
+ assert!(polls <= at_most_polls);
+}
+
+#[test]
+fn block_on_num_polls() {
+ loom::model(|| {
+ // we expect at most 4 number of polls because there are three points at
+ // which we poll the future and an opportunity for a false-positive.. At
+ // any of these points it can be ready:
+ //
+ // - when we fail to steal the parker and we block on a notification
+ // that it is available.
+ //
+ // - when we steal the parker and we schedule the future
+ //
+ // - when the future is woken up and we have ran the max number of tasks
+ // for the current tick or there are no more tasks to run.
+ //
+ // - a thread is notified that the parker is available but a third
+ // thread acquires it before the notified thread can.
+ //
+ let at_most = 4;
+
+ let rt1 = Arc::new(Builder::new_current_thread().build().unwrap());
+ let rt2 = rt1.clone();
+ let rt3 = rt1.clone();
+
+ let th1 = thread::spawn(move || assert_at_most_num_polls(rt1, at_most));
+ let th2 = thread::spawn(move || assert_at_most_num_polls(rt2, at_most));
+ let th3 = thread::spawn(move || assert_at_most_num_polls(rt3, at_most));
+
+ th1.join().unwrap();
+ th2.join().unwrap();
+ th3.join().unwrap();
+ });
+}
+
+#[test]
+fn assert_no_unnecessary_polls() {
+ loom::model(|| {
+ // // After we poll outer future, woken should reset to false
+ let rt = Builder::new_current_thread().build().unwrap();
+ let (tx, rx) = oneshot::channel();
+ let pending_cnt = Arc::new(AtomicUsize::new(0));
+
+ rt.spawn(async move {
+ for _ in 0..24 {
+ task::yield_now().await;
+ }
+ tx.send(()).unwrap();
+ });
+
+ let pending_cnt_clone = pending_cnt.clone();
+ rt.block_on(async move {
+ // use task::yield_now() to ensure woken set to true
+ // ResetFuture will be polled at most once
+ // Here comes two cases
+ // 1. recv no message from channel, ResetFuture will be polled
+ // but get Pending and we record ResetFuture.pending_cnt ++.
+ // Then when message arrive, ResetFuture returns Ready. So we
+ // expect ResetFuture.pending_cnt = 1
+ // 2. recv message from channel, ResetFuture returns Ready immediately.
+ // We expect ResetFuture.pending_cnt = 0
+ task::yield_now().await;
+ ResetFuture {
+ rx,
+ pending_cnt: pending_cnt_clone,
+ }
+ .await;
+ });
+
+ let pending_cnt = pending_cnt.load(Acquire);
+ assert!(pending_cnt <= 1);
+ });
+}
+
+struct BlockedFuture {
+ rx: Receiver<()>,
+ num_polls: Arc<AtomicUsize>,
+}
+
+impl Future for BlockedFuture {
+ type Output = ();
+
+ fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+ self.num_polls.fetch_add(1, Release);
+
+ match Pin::new(&mut self.rx).poll(cx) {
+ Poll::Pending => Poll::Pending,
+ _ => Poll::Ready(()),
+ }
+ }
+}
+
+struct ResetFuture {
+ rx: Receiver<()>,
+ pending_cnt: Arc<AtomicUsize>,
+}
+
+impl Future for ResetFuture {
+ type Output = ();
+
+ fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+ match Pin::new(&mut self.rx).poll(cx) {
+ Poll::Pending => {
+ self.pending_cnt.fetch_add(1, Release);
+ Poll::Pending
+ }
+ _ => Poll::Ready(()),
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/loom_join_set.rs b/third_party/rust/tokio/src/runtime/tests/loom_join_set.rs
new file mode 100644
index 0000000000..bd343876a2
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/loom_join_set.rs
@@ -0,0 +1,82 @@
+use crate::runtime::Builder;
+use crate::task::JoinSet;
+
+#[test]
+fn test_join_set() {
+ loom::model(|| {
+ let rt = Builder::new_multi_thread()
+ .worker_threads(1)
+ .build()
+ .unwrap();
+ let mut set = JoinSet::new();
+
+ rt.block_on(async {
+ assert_eq!(set.len(), 0);
+ set.spawn(async { () });
+ assert_eq!(set.len(), 1);
+ set.spawn(async { () });
+ assert_eq!(set.len(), 2);
+ let () = set.join_next().await.unwrap().unwrap();
+ assert_eq!(set.len(), 1);
+ set.spawn(async { () });
+ assert_eq!(set.len(), 2);
+ let () = set.join_next().await.unwrap().unwrap();
+ assert_eq!(set.len(), 1);
+ let () = set.join_next().await.unwrap().unwrap();
+ assert_eq!(set.len(), 0);
+ set.spawn(async { () });
+ assert_eq!(set.len(), 1);
+ });
+
+ drop(set);
+ drop(rt);
+ });
+}
+
+#[test]
+fn abort_all_during_completion() {
+ use std::sync::{
+ atomic::{AtomicBool, Ordering::SeqCst},
+ Arc,
+ };
+
+ // These booleans assert that at least one execution had the task complete first, and that at
+ // least one execution had the task be cancelled before it completed.
+ let complete_happened = Arc::new(AtomicBool::new(false));
+ let cancel_happened = Arc::new(AtomicBool::new(false));
+
+ {
+ let complete_happened = complete_happened.clone();
+ let cancel_happened = cancel_happened.clone();
+ loom::model(move || {
+ let rt = Builder::new_multi_thread()
+ .worker_threads(1)
+ .build()
+ .unwrap();
+
+ let mut set = JoinSet::new();
+
+ rt.block_on(async {
+ set.spawn(async { () });
+ set.abort_all();
+
+ match set.join_next().await {
+ Some(Ok(())) => complete_happened.store(true, SeqCst),
+ Some(Err(err)) if err.is_cancelled() => cancel_happened.store(true, SeqCst),
+ Some(Err(err)) => panic!("fail: {}", err),
+ None => {
+ unreachable!("Aborting the task does not remove it from the JoinSet.")
+ }
+ }
+
+ assert!(matches!(set.join_next().await, None));
+ });
+
+ drop(set);
+ drop(rt);
+ });
+ }
+
+ assert!(complete_happened.load(SeqCst));
+ assert!(cancel_happened.load(SeqCst));
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/loom_local.rs b/third_party/rust/tokio/src/runtime/tests/loom_local.rs
new file mode 100644
index 0000000000..d9a07a45f0
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/loom_local.rs
@@ -0,0 +1,47 @@
+use crate::runtime::tests::loom_oneshot as oneshot;
+use crate::runtime::Builder;
+use crate::task::LocalSet;
+
+use std::task::Poll;
+
+/// Waking a runtime will attempt to push a task into a queue of notifications
+/// in the runtime, however the tasks in such a queue usually have a reference
+/// to the runtime itself. This means that if they are not properly removed at
+/// runtime shutdown, this will cause a memory leak.
+///
+/// This test verifies that waking something during shutdown of a LocalSet does
+/// not result in tasks lingering in the queue once shutdown is complete. This
+/// is verified using loom's leak finder.
+#[test]
+fn wake_during_shutdown() {
+ loom::model(|| {
+ let rt = Builder::new_current_thread().build().unwrap();
+ let ls = LocalSet::new();
+
+ let (send, recv) = oneshot::channel();
+
+ ls.spawn_local(async move {
+ let mut send = Some(send);
+
+ let () = futures::future::poll_fn(|cx| {
+ if let Some(send) = send.take() {
+ send.send(cx.waker().clone());
+ }
+
+ Poll::Pending
+ })
+ .await;
+ });
+
+ let handle = loom::thread::spawn(move || {
+ let waker = recv.recv();
+ waker.wake();
+ });
+
+ ls.block_on(&rt, crate::task::yield_now());
+
+ drop(ls);
+ handle.join().unwrap();
+ drop(rt);
+ });
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/loom_oneshot.rs b/third_party/rust/tokio/src/runtime/tests/loom_oneshot.rs
new file mode 100644
index 0000000000..87eb638642
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/loom_oneshot.rs
@@ -0,0 +1,48 @@
+use crate::loom::sync::{Arc, Mutex};
+use loom::sync::Notify;
+
+pub(crate) fn channel<T>() -> (Sender<T>, Receiver<T>) {
+ let inner = Arc::new(Inner {
+ notify: Notify::new(),
+ value: Mutex::new(None),
+ });
+
+ let tx = Sender {
+ inner: inner.clone(),
+ };
+ let rx = Receiver { inner };
+
+ (tx, rx)
+}
+
+pub(crate) struct Sender<T> {
+ inner: Arc<Inner<T>>,
+}
+
+pub(crate) struct Receiver<T> {
+ inner: Arc<Inner<T>>,
+}
+
+struct Inner<T> {
+ notify: Notify,
+ value: Mutex<Option<T>>,
+}
+
+impl<T> Sender<T> {
+ pub(crate) fn send(self, value: T) {
+ *self.inner.value.lock() = Some(value);
+ self.inner.notify.notify();
+ }
+}
+
+impl<T> Receiver<T> {
+ pub(crate) fn recv(self) -> T {
+ loop {
+ if let Some(v) = self.inner.value.lock().take() {
+ return v;
+ }
+
+ self.inner.notify.wait();
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/loom_pool.rs b/third_party/rust/tokio/src/runtime/tests/loom_pool.rs
new file mode 100644
index 0000000000..fb42e1eb40
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/loom_pool.rs
@@ -0,0 +1,458 @@
+/// Full runtime loom tests. These are heavy tests and take significant time to
+/// run on CI.
+///
+/// Use `LOOM_MAX_PREEMPTIONS=1` to do a "quick" run as a smoke test.
+///
+/// In order to speed up the C
+use crate::future::poll_fn;
+use crate::runtime::tests::loom_oneshot as oneshot;
+use crate::runtime::{self, Runtime};
+use crate::{spawn, task};
+use tokio_test::assert_ok;
+
+use loom::sync::atomic::{AtomicBool, AtomicUsize};
+use loom::sync::Arc;
+
+use pin_project_lite::pin_project;
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::atomic::Ordering::{Relaxed, SeqCst};
+use std::task::{Context, Poll};
+
+mod atomic_take {
+ use loom::sync::atomic::AtomicBool;
+ use std::mem::MaybeUninit;
+ use std::sync::atomic::Ordering::SeqCst;
+
+ pub(super) struct AtomicTake<T> {
+ inner: MaybeUninit<T>,
+ taken: AtomicBool,
+ }
+
+ impl<T> AtomicTake<T> {
+ pub(super) fn new(value: T) -> Self {
+ Self {
+ inner: MaybeUninit::new(value),
+ taken: AtomicBool::new(false),
+ }
+ }
+
+ pub(super) fn take(&self) -> Option<T> {
+ // safety: Only one thread will see the boolean change from false
+ // to true, so that thread is able to take the value.
+ match self.taken.fetch_or(true, SeqCst) {
+ false => unsafe { Some(std::ptr::read(self.inner.as_ptr())) },
+ true => None,
+ }
+ }
+ }
+
+ impl<T> Drop for AtomicTake<T> {
+ fn drop(&mut self) {
+ drop(self.take());
+ }
+ }
+}
+
+#[derive(Clone)]
+struct AtomicOneshot<T> {
+ value: std::sync::Arc<atomic_take::AtomicTake<oneshot::Sender<T>>>,
+}
+impl<T> AtomicOneshot<T> {
+ fn new(sender: oneshot::Sender<T>) -> Self {
+ Self {
+ value: std::sync::Arc::new(atomic_take::AtomicTake::new(sender)),
+ }
+ }
+
+ fn assert_send(&self, value: T) {
+ self.value.take().unwrap().send(value);
+ }
+}
+
+/// Tests are divided into groups to make the runs faster on CI.
+mod group_a {
+ use super::*;
+
+ #[test]
+ fn racy_shutdown() {
+ loom::model(|| {
+ let pool = mk_pool(1);
+
+ // here's the case we want to exercise:
+ //
+ // a worker that still has tasks in its local queue gets sent to the blocking pool (due to
+ // block_in_place). the blocking pool is shut down, so drops the worker. the worker's
+ // shutdown method never gets run.
+ //
+ // we do this by spawning two tasks on one worker, the first of which does block_in_place,
+ // and then immediately drop the pool.
+
+ pool.spawn(track(async {
+ crate::task::block_in_place(|| {});
+ }));
+ pool.spawn(track(async {}));
+ drop(pool);
+ });
+ }
+
+ #[test]
+ fn pool_multi_spawn() {
+ loom::model(|| {
+ let pool = mk_pool(2);
+ let c1 = Arc::new(AtomicUsize::new(0));
+
+ let (tx, rx) = oneshot::channel();
+ let tx1 = AtomicOneshot::new(tx);
+
+ // Spawn a task
+ let c2 = c1.clone();
+ let tx2 = tx1.clone();
+ pool.spawn(track(async move {
+ spawn(track(async move {
+ if 1 == c1.fetch_add(1, Relaxed) {
+ tx1.assert_send(());
+ }
+ }));
+ }));
+
+ // Spawn a second task
+ pool.spawn(track(async move {
+ spawn(track(async move {
+ if 1 == c2.fetch_add(1, Relaxed) {
+ tx2.assert_send(());
+ }
+ }));
+ }));
+
+ rx.recv();
+ });
+ }
+
+ fn only_blocking_inner(first_pending: bool) {
+ loom::model(move || {
+ let pool = mk_pool(1);
+ let (block_tx, block_rx) = oneshot::channel();
+
+ pool.spawn(track(async move {
+ crate::task::block_in_place(move || {
+ block_tx.send(());
+ });
+ if first_pending {
+ task::yield_now().await
+ }
+ }));
+
+ block_rx.recv();
+ drop(pool);
+ });
+ }
+
+ #[test]
+ fn only_blocking_without_pending() {
+ only_blocking_inner(false)
+ }
+
+ #[test]
+ fn only_blocking_with_pending() {
+ only_blocking_inner(true)
+ }
+}
+
+mod group_b {
+ use super::*;
+
+ fn blocking_and_regular_inner(first_pending: bool) {
+ const NUM: usize = 3;
+ loom::model(move || {
+ let pool = mk_pool(1);
+ let cnt = Arc::new(AtomicUsize::new(0));
+
+ let (block_tx, block_rx) = oneshot::channel();
+ let (done_tx, done_rx) = oneshot::channel();
+ let done_tx = AtomicOneshot::new(done_tx);
+
+ pool.spawn(track(async move {
+ crate::task::block_in_place(move || {
+ block_tx.send(());
+ });
+ if first_pending {
+ task::yield_now().await
+ }
+ }));
+
+ for _ in 0..NUM {
+ let cnt = cnt.clone();
+ let done_tx = done_tx.clone();
+
+ pool.spawn(track(async move {
+ if NUM == cnt.fetch_add(1, Relaxed) + 1 {
+ done_tx.assert_send(());
+ }
+ }));
+ }
+
+ done_rx.recv();
+ block_rx.recv();
+
+ drop(pool);
+ });
+ }
+
+ #[test]
+ fn blocking_and_regular() {
+ blocking_and_regular_inner(false);
+ }
+
+ #[test]
+ fn blocking_and_regular_with_pending() {
+ blocking_and_regular_inner(true);
+ }
+
+ #[test]
+ fn join_output() {
+ loom::model(|| {
+ let rt = mk_pool(1);
+
+ rt.block_on(async {
+ let t = crate::spawn(track(async { "hello" }));
+
+ let out = assert_ok!(t.await);
+ assert_eq!("hello", out.into_inner());
+ });
+ });
+ }
+
+ #[test]
+ fn poll_drop_handle_then_drop() {
+ loom::model(|| {
+ let rt = mk_pool(1);
+
+ rt.block_on(async move {
+ let mut t = crate::spawn(track(async { "hello" }));
+
+ poll_fn(|cx| {
+ let _ = Pin::new(&mut t).poll(cx);
+ Poll::Ready(())
+ })
+ .await;
+ });
+ })
+ }
+
+ #[test]
+ fn complete_block_on_under_load() {
+ loom::model(|| {
+ let pool = mk_pool(1);
+
+ pool.block_on(async {
+ // Trigger a re-schedule
+ crate::spawn(track(async {
+ for _ in 0..2 {
+ task::yield_now().await;
+ }
+ }));
+
+ gated2(true).await
+ });
+ });
+ }
+
+ #[test]
+ fn shutdown_with_notification() {
+ use crate::sync::oneshot;
+
+ loom::model(|| {
+ let rt = mk_pool(2);
+ let (done_tx, done_rx) = oneshot::channel::<()>();
+
+ rt.spawn(track(async move {
+ let (tx, rx) = oneshot::channel::<()>();
+
+ crate::spawn(async move {
+ crate::task::spawn_blocking(move || {
+ let _ = tx.send(());
+ });
+
+ let _ = done_rx.await;
+ });
+
+ let _ = rx.await;
+
+ let _ = done_tx.send(());
+ }));
+ });
+ }
+}
+
+mod group_c {
+ use super::*;
+
+ #[test]
+ fn pool_shutdown() {
+ loom::model(|| {
+ let pool = mk_pool(2);
+
+ pool.spawn(track(async move {
+ gated2(true).await;
+ }));
+
+ pool.spawn(track(async move {
+ gated2(false).await;
+ }));
+
+ drop(pool);
+ });
+ }
+}
+
+mod group_d {
+ use super::*;
+
+ #[test]
+ fn pool_multi_notify() {
+ loom::model(|| {
+ let pool = mk_pool(2);
+
+ let c1 = Arc::new(AtomicUsize::new(0));
+
+ let (done_tx, done_rx) = oneshot::channel();
+ let done_tx1 = AtomicOneshot::new(done_tx);
+ let done_tx2 = done_tx1.clone();
+
+ // Spawn a task
+ let c2 = c1.clone();
+ pool.spawn(track(async move {
+ multi_gated().await;
+
+ if 1 == c1.fetch_add(1, Relaxed) {
+ done_tx1.assert_send(());
+ }
+ }));
+
+ // Spawn a second task
+ pool.spawn(track(async move {
+ multi_gated().await;
+
+ if 1 == c2.fetch_add(1, Relaxed) {
+ done_tx2.assert_send(());
+ }
+ }));
+
+ done_rx.recv();
+ });
+ }
+}
+
+fn mk_pool(num_threads: usize) -> Runtime {
+ runtime::Builder::new_multi_thread()
+ .worker_threads(num_threads)
+ // Set the intervals to avoid tuning logic
+ .event_interval(2)
+ .build()
+ .unwrap()
+}
+
+fn gated2(thread: bool) -> impl Future<Output = &'static str> {
+ use loom::thread;
+ use std::sync::Arc;
+
+ let gate = Arc::new(AtomicBool::new(false));
+ let mut fired = false;
+
+ poll_fn(move |cx| {
+ if !fired {
+ let gate = gate.clone();
+ let waker = cx.waker().clone();
+
+ if thread {
+ thread::spawn(move || {
+ gate.store(true, SeqCst);
+ waker.wake_by_ref();
+ });
+ } else {
+ spawn(track(async move {
+ gate.store(true, SeqCst);
+ waker.wake_by_ref();
+ }));
+ }
+
+ fired = true;
+
+ return Poll::Pending;
+ }
+
+ if gate.load(SeqCst) {
+ Poll::Ready("hello world")
+ } else {
+ Poll::Pending
+ }
+ })
+}
+
+async fn multi_gated() {
+ struct Gate {
+ waker: loom::future::AtomicWaker,
+ count: AtomicUsize,
+ }
+
+ let gate = Arc::new(Gate {
+ waker: loom::future::AtomicWaker::new(),
+ count: AtomicUsize::new(0),
+ });
+
+ {
+ let gate = gate.clone();
+ spawn(track(async move {
+ for i in 1..3 {
+ gate.count.store(i, SeqCst);
+ gate.waker.wake();
+ }
+ }));
+ }
+
+ poll_fn(move |cx| {
+ if gate.count.load(SeqCst) < 2 {
+ gate.waker.register_by_ref(cx.waker());
+ Poll::Pending
+ } else {
+ Poll::Ready(())
+ }
+ })
+ .await;
+}
+
+fn track<T: Future>(f: T) -> Track<T> {
+ Track {
+ inner: f,
+ arc: Arc::new(()),
+ }
+}
+
+pin_project! {
+ struct Track<T> {
+ #[pin]
+ inner: T,
+ // Arc is used to hook into loom's leak tracking.
+ arc: Arc<()>,
+ }
+}
+
+impl<T> Track<T> {
+ fn into_inner(self) -> T {
+ self.inner
+ }
+}
+
+impl<T: Future> Future for Track<T> {
+ type Output = Track<T::Output>;
+
+ fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+ let me = self.project();
+
+ Poll::Ready(Track {
+ inner: ready!(me.inner.poll(cx)),
+ arc: me.arc.clone(),
+ })
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/loom_queue.rs b/third_party/rust/tokio/src/runtime/tests/loom_queue.rs
new file mode 100644
index 0000000000..b60e039b9a
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/loom_queue.rs
@@ -0,0 +1,205 @@
+use crate::runtime::scheduler::multi_thread::{queue, Stats};
+use crate::runtime::tests::NoopSchedule;
+
+use loom::thread;
+use std::cell::RefCell;
+
+fn new_stats() -> Stats {
+ Stats::new(&crate::runtime::WorkerMetrics::new())
+}
+
+#[test]
+fn basic() {
+ loom::model(|| {
+ let (steal, mut local) = queue::local();
+ let inject = RefCell::new(vec![]);
+ let mut stats = new_stats();
+
+ let th = thread::spawn(move || {
+ let mut stats = new_stats();
+ let (_, mut local) = queue::local();
+ let mut n = 0;
+
+ for _ in 0..3 {
+ if steal.steal_into(&mut local, &mut stats).is_some() {
+ n += 1;
+ }
+
+ while local.pop().is_some() {
+ n += 1;
+ }
+ }
+
+ n
+ });
+
+ let mut n = 0;
+
+ for _ in 0..2 {
+ for _ in 0..2 {
+ let (task, _) = super::unowned(async {});
+ local.push_back_or_overflow(task, &inject, &mut stats);
+ }
+
+ if local.pop().is_some() {
+ n += 1;
+ }
+
+ // Push another task
+ let (task, _) = super::unowned(async {});
+ local.push_back_or_overflow(task, &inject, &mut stats);
+
+ while local.pop().is_some() {
+ n += 1;
+ }
+ }
+
+ n += inject.borrow_mut().drain(..).count();
+
+ n += th.join().unwrap();
+
+ assert_eq!(6, n);
+ });
+}
+
+#[test]
+fn steal_overflow() {
+ loom::model(|| {
+ let (steal, mut local) = queue::local();
+ let inject = RefCell::new(vec![]);
+ let mut stats = new_stats();
+
+ let th = thread::spawn(move || {
+ let mut stats = new_stats();
+ let (_, mut local) = queue::local();
+ let mut n = 0;
+
+ if steal.steal_into(&mut local, &mut stats).is_some() {
+ n += 1;
+ }
+
+ while local.pop().is_some() {
+ n += 1;
+ }
+
+ n
+ });
+
+ let mut n = 0;
+
+ // push a task, pop a task
+ let (task, _) = super::unowned(async {});
+ local.push_back_or_overflow(task, &inject, &mut stats);
+
+ if local.pop().is_some() {
+ n += 1;
+ }
+
+ for _ in 0..6 {
+ let (task, _) = super::unowned(async {});
+ local.push_back_or_overflow(task, &inject, &mut stats);
+ }
+
+ n += th.join().unwrap();
+
+ while local.pop().is_some() {
+ n += 1;
+ }
+
+ n += inject.borrow_mut().drain(..).count();
+
+ assert_eq!(7, n);
+ });
+}
+
+#[test]
+fn multi_stealer() {
+ const NUM_TASKS: usize = 5;
+
+ fn steal_tasks(steal: queue::Steal<NoopSchedule>) -> usize {
+ let mut stats = new_stats();
+ let (_, mut local) = queue::local();
+
+ if steal.steal_into(&mut local, &mut stats).is_none() {
+ return 0;
+ }
+
+ let mut n = 1;
+
+ while local.pop().is_some() {
+ n += 1;
+ }
+
+ n
+ }
+
+ loom::model(|| {
+ let (steal, mut local) = queue::local();
+ let inject = RefCell::new(vec![]);
+ let mut stats = new_stats();
+
+ // Push work
+ for _ in 0..NUM_TASKS {
+ let (task, _) = super::unowned(async {});
+ local.push_back_or_overflow(task, &inject, &mut stats);
+ }
+
+ let th1 = {
+ let steal = steal.clone();
+ thread::spawn(move || steal_tasks(steal))
+ };
+
+ let th2 = thread::spawn(move || steal_tasks(steal));
+
+ let mut n = 0;
+
+ while local.pop().is_some() {
+ n += 1;
+ }
+
+ n += inject.borrow_mut().drain(..).count();
+
+ n += th1.join().unwrap();
+ n += th2.join().unwrap();
+
+ assert_eq!(n, NUM_TASKS);
+ });
+}
+
+#[test]
+fn chained_steal() {
+ loom::model(|| {
+ let mut stats = new_stats();
+ let (s1, mut l1) = queue::local();
+ let (s2, mut l2) = queue::local();
+ let inject = RefCell::new(vec![]);
+
+ // Load up some tasks
+ for _ in 0..4 {
+ let (task, _) = super::unowned(async {});
+ l1.push_back_or_overflow(task, &inject, &mut stats);
+
+ let (task, _) = super::unowned(async {});
+ l2.push_back_or_overflow(task, &inject, &mut stats);
+ }
+
+ // Spawn a task to steal from **our** queue
+ let th = thread::spawn(move || {
+ let mut stats = new_stats();
+ let (_, mut local) = queue::local();
+ s1.steal_into(&mut local, &mut stats);
+
+ while local.pop().is_some() {}
+ });
+
+ // Drain our tasks, then attempt to steal
+ while l1.pop().is_some() {}
+
+ s2.steal_into(&mut l1, &mut stats);
+
+ th.join().unwrap();
+
+ while l1.pop().is_some() {}
+ while l2.pop().is_some() {}
+ });
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/loom_shutdown_join.rs b/third_party/rust/tokio/src/runtime/tests/loom_shutdown_join.rs
new file mode 100644
index 0000000000..6fbc4bfded
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/loom_shutdown_join.rs
@@ -0,0 +1,28 @@
+use crate::runtime::{Builder, Handle};
+
+#[test]
+fn join_handle_cancel_on_shutdown() {
+ let mut builder = loom::model::Builder::new();
+ builder.preemption_bound = Some(2);
+ builder.check(|| {
+ use futures::future::FutureExt;
+
+ let rt = Builder::new_multi_thread()
+ .worker_threads(2)
+ .build()
+ .unwrap();
+
+ let handle = rt.block_on(async move { Handle::current() });
+
+ let jh1 = handle.spawn(futures::future::pending::<()>());
+
+ drop(rt);
+
+ let jh2 = handle.spawn(futures::future::pending::<()>());
+
+ let err1 = jh1.now_or_never().unwrap().unwrap_err();
+ let err2 = jh2.now_or_never().unwrap().unwrap_err();
+ assert!(err1.is_cancelled());
+ assert!(err2.is_cancelled());
+ });
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/loom_yield.rs b/third_party/rust/tokio/src/runtime/tests/loom_yield.rs
new file mode 100644
index 0000000000..ba506e5a40
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/loom_yield.rs
@@ -0,0 +1,37 @@
+use crate::runtime::park;
+use crate::runtime::tests::loom_oneshot as oneshot;
+use crate::runtime::{self, Runtime};
+
+#[test]
+fn yield_calls_park_before_scheduling_again() {
+ // Don't need to check all permutations
+ let mut loom = loom::model::Builder::default();
+ loom.max_permutations = Some(1);
+ loom.check(|| {
+ let rt = mk_runtime(2);
+ let (tx, rx) = oneshot::channel::<()>();
+
+ rt.spawn(async {
+ let tid = loom::thread::current().id();
+ let park_count = park::current_thread_park_count();
+
+ crate::task::yield_now().await;
+
+ if tid == loom::thread::current().id() {
+ let new_park_count = park::current_thread_park_count();
+ assert_eq!(park_count + 1, new_park_count);
+ }
+
+ tx.send(());
+ });
+
+ rx.recv();
+ });
+}
+
+fn mk_runtime(num_threads: usize) -> Runtime {
+ runtime::Builder::new_multi_thread()
+ .worker_threads(num_threads)
+ .build()
+ .unwrap()
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/mod.rs b/third_party/rust/tokio/src/runtime/tests/mod.rs
new file mode 100644
index 0000000000..b12a76e268
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/mod.rs
@@ -0,0 +1,78 @@
+// Enable dead_code / unreachable_pub here. It has been disabled in lib.rs for
+// other code when running loom tests.
+#![cfg_attr(loom, warn(dead_code, unreachable_pub))]
+
+use self::noop_scheduler::NoopSchedule;
+use self::unowned_wrapper::unowned;
+
+mod noop_scheduler {
+ use crate::runtime::task::{self, Task};
+
+ /// `task::Schedule` implementation that does nothing, for testing.
+ pub(crate) struct NoopSchedule;
+
+ impl task::Schedule for NoopSchedule {
+ fn release(&self, _task: &Task<Self>) -> Option<Task<Self>> {
+ None
+ }
+
+ fn schedule(&self, _task: task::Notified<Self>) {
+ unreachable!();
+ }
+ }
+}
+
+mod unowned_wrapper {
+ use crate::runtime::task::{Id, JoinHandle, Notified};
+ use crate::runtime::tests::NoopSchedule;
+
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ pub(crate) fn unowned<T>(task: T) -> (Notified<NoopSchedule>, JoinHandle<T::Output>)
+ where
+ T: std::future::Future + Send + 'static,
+ T::Output: Send + 'static,
+ {
+ use tracing::Instrument;
+ let span = tracing::trace_span!("test_span");
+ let task = task.instrument(span);
+ let (task, handle) = crate::runtime::task::unowned(task, NoopSchedule, Id::next());
+ (task.into_notified(), handle)
+ }
+
+ #[cfg(not(all(tokio_unstable, feature = "tracing")))]
+ pub(crate) fn unowned<T>(task: T) -> (Notified<NoopSchedule>, JoinHandle<T::Output>)
+ where
+ T: std::future::Future + Send + 'static,
+ T::Output: Send + 'static,
+ {
+ let (task, handle) = crate::runtime::task::unowned(task, NoopSchedule, Id::next());
+ (task.into_notified(), handle)
+ }
+}
+
+cfg_loom! {
+ mod loom_blocking;
+ mod loom_current_thread_scheduler;
+ mod loom_local;
+ mod loom_oneshot;
+ mod loom_pool;
+ mod loom_queue;
+ mod loom_shutdown_join;
+ mod loom_join_set;
+ mod loom_yield;
+
+ // Make sure debug assertions are enabled
+ #[cfg(not(debug_assertions))]
+ compiler_error!("these tests require debug assertions to be enabled");
+}
+
+cfg_not_loom! {
+ mod inject;
+ mod queue;
+
+ #[cfg(not(miri))]
+ mod task_combinations;
+
+ #[cfg(miri)]
+ mod task;
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/queue.rs b/third_party/rust/tokio/src/runtime/tests/queue.rs
new file mode 100644
index 0000000000..5df92b7a29
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/queue.rs
@@ -0,0 +1,283 @@
+use crate::runtime::scheduler::multi_thread::{queue, Stats};
+use crate::runtime::task::{self, Schedule, Task};
+
+use std::cell::RefCell;
+use std::thread;
+use std::time::Duration;
+
+#[allow(unused)]
+macro_rules! assert_metrics {
+ ($stats:ident, $field:ident == $v:expr) => {{
+ use crate::runtime::WorkerMetrics;
+ use std::sync::atomic::Ordering::Relaxed;
+
+ let worker = WorkerMetrics::new();
+ $stats.submit(&worker);
+
+ let expect = $v;
+ let actual = worker.$field.load(Relaxed);
+
+ assert!(actual == expect, "expect = {}; actual = {}", expect, actual)
+ }};
+}
+
+fn new_stats() -> Stats {
+ use crate::runtime::WorkerMetrics;
+ Stats::new(&WorkerMetrics::new())
+}
+
+#[test]
+fn fits_256_one_at_a_time() {
+ let (_, mut local) = queue::local();
+ let inject = RefCell::new(vec![]);
+ let mut stats = new_stats();
+
+ for _ in 0..256 {
+ let (task, _) = super::unowned(async {});
+ local.push_back_or_overflow(task, &inject, &mut stats);
+ }
+
+ cfg_metrics! {
+ assert_metrics!(stats, overflow_count == 0);
+ }
+
+ assert!(inject.borrow_mut().pop().is_none());
+
+ while local.pop().is_some() {}
+}
+
+#[test]
+fn fits_256_all_at_once() {
+ let (_, mut local) = queue::local();
+
+ let mut tasks = (0..256)
+ .map(|_| super::unowned(async {}).0)
+ .collect::<Vec<_>>();
+ local.push_back(tasks.drain(..));
+
+ let mut i = 0;
+ while local.pop().is_some() {
+ i += 1;
+ }
+
+ assert_eq!(i, 256);
+}
+
+#[test]
+fn fits_256_all_in_chunks() {
+ let (_, mut local) = queue::local();
+
+ let mut tasks = (0..256)
+ .map(|_| super::unowned(async {}).0)
+ .collect::<Vec<_>>();
+
+ local.push_back(tasks.drain(..10));
+ local.push_back(tasks.drain(..100));
+ local.push_back(tasks.drain(..46));
+ local.push_back(tasks.drain(..100));
+
+ let mut i = 0;
+ while local.pop().is_some() {
+ i += 1;
+ }
+
+ assert_eq!(i, 256);
+}
+
+#[test]
+fn overflow() {
+ let (_, mut local) = queue::local();
+ let inject = RefCell::new(vec![]);
+ let mut stats = new_stats();
+
+ for _ in 0..257 {
+ let (task, _) = super::unowned(async {});
+ local.push_back_or_overflow(task, &inject, &mut stats);
+ }
+
+ cfg_metrics! {
+ assert_metrics!(stats, overflow_count == 1);
+ }
+
+ let mut n = 0;
+
+ n += inject.borrow_mut().drain(..).count();
+
+ while local.pop().is_some() {
+ n += 1;
+ }
+
+ assert_eq!(n, 257);
+}
+
+#[test]
+fn steal_batch() {
+ let mut stats = new_stats();
+
+ let (steal1, mut local1) = queue::local();
+ let (_, mut local2) = queue::local();
+ let inject = RefCell::new(vec![]);
+
+ for _ in 0..4 {
+ let (task, _) = super::unowned(async {});
+ local1.push_back_or_overflow(task, &inject, &mut stats);
+ }
+
+ assert!(steal1.steal_into(&mut local2, &mut stats).is_some());
+
+ cfg_metrics! {
+ assert_metrics!(stats, steal_count == 2);
+ }
+
+ for _ in 0..1 {
+ assert!(local2.pop().is_some());
+ }
+
+ assert!(local2.pop().is_none());
+
+ for _ in 0..2 {
+ assert!(local1.pop().is_some());
+ }
+
+ assert!(local1.pop().is_none());
+}
+
+const fn normal_or_miri(normal: usize, miri: usize) -> usize {
+ if cfg!(miri) {
+ miri
+ } else {
+ normal
+ }
+}
+
+#[test]
+fn stress1() {
+ const NUM_ITER: usize = 5;
+ const NUM_STEAL: usize = normal_or_miri(1_000, 10);
+ const NUM_LOCAL: usize = normal_or_miri(1_000, 10);
+ const NUM_PUSH: usize = normal_or_miri(500, 10);
+ const NUM_POP: usize = normal_or_miri(250, 10);
+
+ let mut stats = new_stats();
+
+ for _ in 0..NUM_ITER {
+ let (steal, mut local) = queue::local();
+ let inject = RefCell::new(vec![]);
+
+ let th = thread::spawn(move || {
+ let mut stats = new_stats();
+ let (_, mut local) = queue::local();
+ let mut n = 0;
+
+ for _ in 0..NUM_STEAL {
+ if steal.steal_into(&mut local, &mut stats).is_some() {
+ n += 1;
+ }
+
+ while local.pop().is_some() {
+ n += 1;
+ }
+
+ thread::yield_now();
+ }
+
+ cfg_metrics! {
+ assert_metrics!(stats, steal_count == n as _);
+ }
+
+ n
+ });
+
+ let mut n = 0;
+
+ for _ in 0..NUM_LOCAL {
+ for _ in 0..NUM_PUSH {
+ let (task, _) = super::unowned(async {});
+ local.push_back_or_overflow(task, &inject, &mut stats);
+ }
+
+ for _ in 0..NUM_POP {
+ if local.pop().is_some() {
+ n += 1;
+ } else {
+ break;
+ }
+ }
+ }
+
+ n += inject.borrow_mut().drain(..).count();
+
+ n += th.join().unwrap();
+
+ assert_eq!(n, NUM_LOCAL * NUM_PUSH);
+ }
+}
+
+#[test]
+fn stress2() {
+ const NUM_ITER: usize = 1;
+ const NUM_TASKS: usize = normal_or_miri(1_000_000, 50);
+ const NUM_STEAL: usize = normal_or_miri(1_000, 10);
+
+ let mut stats = new_stats();
+
+ for _ in 0..NUM_ITER {
+ let (steal, mut local) = queue::local();
+ let inject = RefCell::new(vec![]);
+
+ let th = thread::spawn(move || {
+ let mut stats = new_stats();
+ let (_, mut local) = queue::local();
+ let mut n = 0;
+
+ for _ in 0..NUM_STEAL {
+ if steal.steal_into(&mut local, &mut stats).is_some() {
+ n += 1;
+ }
+
+ while local.pop().is_some() {
+ n += 1;
+ }
+
+ thread::sleep(Duration::from_micros(10));
+ }
+
+ n
+ });
+
+ let mut num_pop = 0;
+
+ for i in 0..NUM_TASKS {
+ let (task, _) = super::unowned(async {});
+ local.push_back_or_overflow(task, &inject, &mut stats);
+
+ if i % 128 == 0 && local.pop().is_some() {
+ num_pop += 1;
+ }
+
+ num_pop += inject.borrow_mut().drain(..).count();
+ }
+
+ num_pop += th.join().unwrap();
+
+ while local.pop().is_some() {
+ num_pop += 1;
+ }
+
+ num_pop += inject.borrow_mut().drain(..).count();
+
+ assert_eq!(num_pop, NUM_TASKS);
+ }
+}
+
+struct Runtime;
+
+impl Schedule for Runtime {
+ fn release(&self, _task: &Task<Self>) -> Option<Task<Self>> {
+ None
+ }
+
+ fn schedule(&self, _task: task::Notified<Self>) {
+ unreachable!();
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/task.rs b/third_party/rust/tokio/src/runtime/tests/task.rs
new file mode 100644
index 0000000000..a79c0f50d1
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/task.rs
@@ -0,0 +1,332 @@
+use crate::runtime::task::{self, unowned, Id, JoinHandle, OwnedTasks, Schedule, Task};
+use crate::runtime::tests::NoopSchedule;
+use crate::util::TryLock;
+
+use std::collections::VecDeque;
+use std::future::Future;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+
+struct AssertDropHandle {
+ is_dropped: Arc<AtomicBool>,
+}
+impl AssertDropHandle {
+ #[track_caller]
+ fn assert_dropped(&self) {
+ assert!(self.is_dropped.load(Ordering::SeqCst));
+ }
+
+ #[track_caller]
+ fn assert_not_dropped(&self) {
+ assert!(!self.is_dropped.load(Ordering::SeqCst));
+ }
+}
+
+struct AssertDrop {
+ is_dropped: Arc<AtomicBool>,
+}
+impl AssertDrop {
+ fn new() -> (Self, AssertDropHandle) {
+ let shared = Arc::new(AtomicBool::new(false));
+ (
+ AssertDrop {
+ is_dropped: shared.clone(),
+ },
+ AssertDropHandle {
+ is_dropped: shared.clone(),
+ },
+ )
+ }
+}
+impl Drop for AssertDrop {
+ fn drop(&mut self) {
+ self.is_dropped.store(true, Ordering::SeqCst);
+ }
+}
+
+// A Notified does not shut down on drop, but it is dropped once the ref-count
+// hits zero.
+#[test]
+fn create_drop1() {
+ let (ad, handle) = AssertDrop::new();
+ let (notified, join) = unowned(
+ async {
+ drop(ad);
+ unreachable!()
+ },
+ NoopSchedule,
+ Id::next(),
+ );
+ drop(notified);
+ handle.assert_not_dropped();
+ drop(join);
+ handle.assert_dropped();
+}
+
+#[test]
+fn create_drop2() {
+ let (ad, handle) = AssertDrop::new();
+ let (notified, join) = unowned(
+ async {
+ drop(ad);
+ unreachable!()
+ },
+ NoopSchedule,
+ Id::next(),
+ );
+ drop(join);
+ handle.assert_not_dropped();
+ drop(notified);
+ handle.assert_dropped();
+}
+
+#[test]
+fn drop_abort_handle1() {
+ let (ad, handle) = AssertDrop::new();
+ let (notified, join) = unowned(
+ async {
+ drop(ad);
+ unreachable!()
+ },
+ NoopSchedule,
+ Id::next(),
+ );
+ let abort = join.abort_handle();
+ drop(join);
+ handle.assert_not_dropped();
+ drop(notified);
+ handle.assert_not_dropped();
+ drop(abort);
+ handle.assert_dropped();
+}
+
+#[test]
+fn drop_abort_handle2() {
+ let (ad, handle) = AssertDrop::new();
+ let (notified, join) = unowned(
+ async {
+ drop(ad);
+ unreachable!()
+ },
+ NoopSchedule,
+ Id::next(),
+ );
+ let abort = join.abort_handle();
+ drop(notified);
+ handle.assert_not_dropped();
+ drop(abort);
+ handle.assert_not_dropped();
+ drop(join);
+ handle.assert_dropped();
+}
+
+// Shutting down through Notified works
+#[test]
+fn create_shutdown1() {
+ let (ad, handle) = AssertDrop::new();
+ let (notified, join) = unowned(
+ async {
+ drop(ad);
+ unreachable!()
+ },
+ NoopSchedule,
+ Id::next(),
+ );
+ drop(join);
+ handle.assert_not_dropped();
+ notified.shutdown();
+ handle.assert_dropped();
+}
+
+#[test]
+fn create_shutdown2() {
+ let (ad, handle) = AssertDrop::new();
+ let (notified, join) = unowned(
+ async {
+ drop(ad);
+ unreachable!()
+ },
+ NoopSchedule,
+ Id::next(),
+ );
+ handle.assert_not_dropped();
+ notified.shutdown();
+ handle.assert_dropped();
+ drop(join);
+}
+
+#[test]
+fn unowned_poll() {
+ let (task, _) = unowned(async {}, NoopSchedule, Id::next());
+ task.run();
+}
+
+#[test]
+fn schedule() {
+ with(|rt| {
+ rt.spawn(async {
+ crate::task::yield_now().await;
+ });
+
+ assert_eq!(2, rt.tick());
+ rt.shutdown();
+ })
+}
+
+#[test]
+fn shutdown() {
+ with(|rt| {
+ rt.spawn(async {
+ loop {
+ crate::task::yield_now().await;
+ }
+ });
+
+ rt.tick_max(1);
+
+ rt.shutdown();
+ })
+}
+
+#[test]
+fn shutdown_immediately() {
+ with(|rt| {
+ rt.spawn(async {
+ loop {
+ crate::task::yield_now().await;
+ }
+ });
+
+ rt.shutdown();
+ })
+}
+
+#[test]
+fn spawn_during_shutdown() {
+ static DID_SPAWN: AtomicBool = AtomicBool::new(false);
+
+ struct SpawnOnDrop(Runtime);
+ impl Drop for SpawnOnDrop {
+ fn drop(&mut self) {
+ DID_SPAWN.store(true, Ordering::SeqCst);
+ self.0.spawn(async {});
+ }
+ }
+
+ with(|rt| {
+ let rt2 = rt.clone();
+ rt.spawn(async move {
+ let _spawn_on_drop = SpawnOnDrop(rt2);
+
+ loop {
+ crate::task::yield_now().await;
+ }
+ });
+
+ rt.tick_max(1);
+ rt.shutdown();
+ });
+
+ assert!(DID_SPAWN.load(Ordering::SeqCst));
+}
+
+fn with(f: impl FnOnce(Runtime)) {
+ struct Reset;
+
+ impl Drop for Reset {
+ fn drop(&mut self) {
+ let _rt = CURRENT.try_lock().unwrap().take();
+ }
+ }
+
+ let _reset = Reset;
+
+ let rt = Runtime(Arc::new(Inner {
+ owned: OwnedTasks::new(),
+ core: TryLock::new(Core {
+ queue: VecDeque::new(),
+ }),
+ }));
+
+ *CURRENT.try_lock().unwrap() = Some(rt.clone());
+ f(rt)
+}
+
+#[derive(Clone)]
+struct Runtime(Arc<Inner>);
+
+struct Inner {
+ core: TryLock<Core>,
+ owned: OwnedTasks<Runtime>,
+}
+
+struct Core {
+ queue: VecDeque<task::Notified<Runtime>>,
+}
+
+static CURRENT: TryLock<Option<Runtime>> = TryLock::new(None);
+
+impl Runtime {
+ fn spawn<T>(&self, future: T) -> JoinHandle<T::Output>
+ where
+ T: 'static + Send + Future,
+ T::Output: 'static + Send,
+ {
+ let (handle, notified) = self.0.owned.bind(future, self.clone(), Id::next());
+
+ if let Some(notified) = notified {
+ self.schedule(notified);
+ }
+
+ handle
+ }
+
+ fn tick(&self) -> usize {
+ self.tick_max(usize::MAX)
+ }
+
+ fn tick_max(&self, max: usize) -> usize {
+ let mut n = 0;
+
+ while !self.is_empty() && n < max {
+ let task = self.next_task();
+ n += 1;
+ let task = self.0.owned.assert_owner(task);
+ task.run();
+ }
+
+ n
+ }
+
+ fn is_empty(&self) -> bool {
+ self.0.core.try_lock().unwrap().queue.is_empty()
+ }
+
+ fn next_task(&self) -> task::Notified<Runtime> {
+ self.0.core.try_lock().unwrap().queue.pop_front().unwrap()
+ }
+
+ fn shutdown(&self) {
+ let mut core = self.0.core.try_lock().unwrap();
+
+ self.0.owned.close_and_shutdown_all();
+
+ while let Some(task) = core.queue.pop_back() {
+ drop(task);
+ }
+
+ drop(core);
+
+ assert!(self.0.owned.is_empty());
+ }
+}
+
+impl Schedule for Runtime {
+ fn release(&self, task: &Task<Self>) -> Option<Task<Self>> {
+ self.0.owned.remove(task)
+ }
+
+ fn schedule(&self, task: task::Notified<Self>) {
+ self.0.core.try_lock().unwrap().queue.push_back(task);
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/tests/task_combinations.rs b/third_party/rust/tokio/src/runtime/tests/task_combinations.rs
new file mode 100644
index 0000000000..73a20d9760
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/tests/task_combinations.rs
@@ -0,0 +1,487 @@
+use std::fmt;
+use std::future::Future;
+use std::panic;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+use crate::runtime::task::AbortHandle;
+use crate::runtime::Builder;
+use crate::sync::oneshot;
+use crate::task::JoinHandle;
+
+use futures::future::FutureExt;
+
+// Enums for each option in the combinations being tested
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+enum CombiRuntime {
+ CurrentThread,
+ Multi1,
+ Multi2,
+}
+#[derive(Copy, Clone, Debug, PartialEq)]
+enum CombiLocalSet {
+ Yes,
+ No,
+}
+#[derive(Copy, Clone, Debug, PartialEq)]
+enum CombiTask {
+ PanicOnRun,
+ PanicOnDrop,
+ PanicOnRunAndDrop,
+ NoPanic,
+}
+#[derive(Copy, Clone, Debug, PartialEq)]
+enum CombiOutput {
+ PanicOnDrop,
+ NoPanic,
+}
+#[derive(Copy, Clone, Debug, PartialEq)]
+enum CombiJoinInterest {
+ Polled,
+ NotPolled,
+}
+#[allow(clippy::enum_variant_names)] // we aren't using glob imports
+#[derive(Copy, Clone, Debug, PartialEq)]
+enum CombiJoinHandle {
+ DropImmediately = 1,
+ DropFirstPoll = 2,
+ DropAfterNoConsume = 3,
+ DropAfterConsume = 4,
+}
+#[derive(Copy, Clone, Debug, PartialEq)]
+enum CombiAbort {
+ NotAborted = 0,
+ AbortedImmediately = 1,
+ AbortedFirstPoll = 2,
+ AbortedAfterFinish = 3,
+ AbortedAfterConsumeOutput = 4,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+enum CombiAbortSource {
+ JoinHandle,
+ AbortHandle,
+}
+
+#[test]
+fn test_combinations() {
+ let mut rt = &[
+ CombiRuntime::CurrentThread,
+ CombiRuntime::Multi1,
+ CombiRuntime::Multi2,
+ ][..];
+
+ if cfg!(miri) {
+ rt = &[CombiRuntime::CurrentThread];
+ }
+
+ let ls = [CombiLocalSet::Yes, CombiLocalSet::No];
+ let task = [
+ CombiTask::NoPanic,
+ CombiTask::PanicOnRun,
+ CombiTask::PanicOnDrop,
+ CombiTask::PanicOnRunAndDrop,
+ ];
+ let output = [CombiOutput::NoPanic, CombiOutput::PanicOnDrop];
+ let ji = [CombiJoinInterest::Polled, CombiJoinInterest::NotPolled];
+ let jh = [
+ CombiJoinHandle::DropImmediately,
+ CombiJoinHandle::DropFirstPoll,
+ CombiJoinHandle::DropAfterNoConsume,
+ CombiJoinHandle::DropAfterConsume,
+ ];
+ let abort = [
+ CombiAbort::NotAborted,
+ CombiAbort::AbortedImmediately,
+ CombiAbort::AbortedFirstPoll,
+ CombiAbort::AbortedAfterFinish,
+ CombiAbort::AbortedAfterConsumeOutput,
+ ];
+ let ah = [
+ None,
+ Some(CombiJoinHandle::DropImmediately),
+ Some(CombiJoinHandle::DropFirstPoll),
+ Some(CombiJoinHandle::DropAfterNoConsume),
+ Some(CombiJoinHandle::DropAfterConsume),
+ ];
+
+ for rt in rt.iter().copied() {
+ for ls in ls.iter().copied() {
+ for task in task.iter().copied() {
+ for output in output.iter().copied() {
+ for ji in ji.iter().copied() {
+ for jh in jh.iter().copied() {
+ for abort in abort.iter().copied() {
+ // abort via join handle --- abort handles
+ // may be dropped at any point
+ for ah in ah.iter().copied() {
+ test_combination(
+ rt,
+ ls,
+ task,
+ output,
+ ji,
+ jh,
+ ah,
+ abort,
+ CombiAbortSource::JoinHandle,
+ );
+ }
+ // if aborting via AbortHandle, it will
+ // never be dropped.
+ test_combination(
+ rt,
+ ls,
+ task,
+ output,
+ ji,
+ jh,
+ None,
+ abort,
+ CombiAbortSource::AbortHandle,
+ );
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+fn is_debug<T: fmt::Debug>(_: &T) {}
+
+#[allow(clippy::too_many_arguments)]
+fn test_combination(
+ rt: CombiRuntime,
+ ls: CombiLocalSet,
+ task: CombiTask,
+ output: CombiOutput,
+ ji: CombiJoinInterest,
+ jh: CombiJoinHandle,
+ ah: Option<CombiJoinHandle>,
+ abort: CombiAbort,
+ abort_src: CombiAbortSource,
+) {
+ match (abort_src, ah) {
+ (CombiAbortSource::JoinHandle, _) if (jh as usize) < (abort as usize) => {
+ // join handle dropped prior to abort
+ return;
+ }
+ (CombiAbortSource::AbortHandle, Some(_)) => {
+ // abort handle dropped, we can't abort through the
+ // abort handle
+ return;
+ }
+
+ _ => {}
+ }
+
+ if (task == CombiTask::PanicOnDrop) && (output == CombiOutput::PanicOnDrop) {
+ // this causes double panic
+ return;
+ }
+ if (task == CombiTask::PanicOnRunAndDrop) && (abort != CombiAbort::AbortedImmediately) {
+ // this causes double panic
+ return;
+ }
+
+ is_debug(&rt);
+ is_debug(&ls);
+ is_debug(&task);
+ is_debug(&output);
+ is_debug(&ji);
+ is_debug(&jh);
+ is_debug(&ah);
+ is_debug(&abort);
+ is_debug(&abort_src);
+
+ // A runtime optionally with a LocalSet
+ struct Rt {
+ rt: crate::runtime::Runtime,
+ ls: Option<crate::task::LocalSet>,
+ }
+ impl Rt {
+ fn new(rt: CombiRuntime, ls: CombiLocalSet) -> Self {
+ let rt = match rt {
+ CombiRuntime::CurrentThread => Builder::new_current_thread().build().unwrap(),
+ CombiRuntime::Multi1 => Builder::new_multi_thread()
+ .worker_threads(1)
+ .build()
+ .unwrap(),
+ CombiRuntime::Multi2 => Builder::new_multi_thread()
+ .worker_threads(2)
+ .build()
+ .unwrap(),
+ };
+
+ let ls = match ls {
+ CombiLocalSet::Yes => Some(crate::task::LocalSet::new()),
+ CombiLocalSet::No => None,
+ };
+
+ Self { rt, ls }
+ }
+ fn block_on<T>(&self, task: T) -> T::Output
+ where
+ T: Future,
+ {
+ match &self.ls {
+ Some(ls) => ls.block_on(&self.rt, task),
+ None => self.rt.block_on(task),
+ }
+ }
+ fn spawn<T>(&self, task: T) -> JoinHandle<T::Output>
+ where
+ T: Future + Send + 'static,
+ T::Output: Send + 'static,
+ {
+ match &self.ls {
+ Some(ls) => ls.spawn_local(task),
+ None => self.rt.spawn(task),
+ }
+ }
+ }
+
+ // The type used for the output of the future
+ struct Output {
+ panic_on_drop: bool,
+ on_drop: Option<oneshot::Sender<()>>,
+ }
+ impl Output {
+ fn disarm(&mut self) {
+ self.panic_on_drop = false;
+ }
+ }
+ impl Drop for Output {
+ fn drop(&mut self) {
+ let _ = self.on_drop.take().unwrap().send(());
+ if self.panic_on_drop {
+ panic!("Panicking in Output");
+ }
+ }
+ }
+
+ // A wrapper around the future that is spawned
+ struct FutWrapper<F> {
+ inner: F,
+ on_drop: Option<oneshot::Sender<()>>,
+ panic_on_drop: bool,
+ }
+ impl<F: Future> Future for FutWrapper<F> {
+ type Output = F::Output;
+ fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<F::Output> {
+ unsafe {
+ let me = Pin::into_inner_unchecked(self);
+ let inner = Pin::new_unchecked(&mut me.inner);
+ inner.poll(cx)
+ }
+ }
+ }
+ impl<F> Drop for FutWrapper<F> {
+ fn drop(&mut self) {
+ let _: Result<(), ()> = self.on_drop.take().unwrap().send(());
+ if self.panic_on_drop {
+ panic!("Panicking in FutWrapper");
+ }
+ }
+ }
+
+ // The channels passed to the task
+ struct Signals {
+ on_first_poll: Option<oneshot::Sender<()>>,
+ wait_complete: Option<oneshot::Receiver<()>>,
+ on_output_drop: Option<oneshot::Sender<()>>,
+ }
+
+ // The task we will spawn
+ async fn my_task(mut signal: Signals, task: CombiTask, out: CombiOutput) -> Output {
+ // Signal that we have been polled once
+ let _ = signal.on_first_poll.take().unwrap().send(());
+
+ // Wait for a signal, then complete the future
+ let _ = signal.wait_complete.take().unwrap().await;
+
+ // If the task gets past wait_complete without yielding, then aborts
+ // may not be caught without this yield_now.
+ crate::task::yield_now().await;
+
+ if task == CombiTask::PanicOnRun || task == CombiTask::PanicOnRunAndDrop {
+ panic!("Panicking in my_task on {:?}", std::thread::current().id());
+ }
+
+ Output {
+ panic_on_drop: out == CombiOutput::PanicOnDrop,
+ on_drop: signal.on_output_drop.take(),
+ }
+ }
+
+ let rt = Rt::new(rt, ls);
+
+ let (on_first_poll, wait_first_poll) = oneshot::channel();
+ let (on_complete, wait_complete) = oneshot::channel();
+ let (on_future_drop, wait_future_drop) = oneshot::channel();
+ let (on_output_drop, wait_output_drop) = oneshot::channel();
+ let signal = Signals {
+ on_first_poll: Some(on_first_poll),
+ wait_complete: Some(wait_complete),
+ on_output_drop: Some(on_output_drop),
+ };
+
+ // === Spawn task ===
+ let mut handle = Some(rt.spawn(FutWrapper {
+ inner: my_task(signal, task, output),
+ on_drop: Some(on_future_drop),
+ panic_on_drop: task == CombiTask::PanicOnDrop || task == CombiTask::PanicOnRunAndDrop,
+ }));
+
+ // Keep track of whether the task has been killed with an abort
+ let mut aborted = false;
+
+ // If we want to poll the JoinHandle, do it now
+ if ji == CombiJoinInterest::Polled {
+ assert!(
+ handle.as_mut().unwrap().now_or_never().is_none(),
+ "Polling handle succeeded"
+ );
+ }
+
+ // If we are either aborting the task via an abort handle, or dropping via
+ // an abort handle, do that now.
+ let mut abort_handle = if ah.is_some() || abort_src == CombiAbortSource::AbortHandle {
+ handle.as_ref().map(JoinHandle::abort_handle)
+ } else {
+ None
+ };
+
+ let do_abort = |abort_handle: &mut Option<AbortHandle>,
+ join_handle: Option<&mut JoinHandle<_>>| {
+ match abort_src {
+ CombiAbortSource::AbortHandle => abort_handle.take().unwrap().abort(),
+ CombiAbortSource::JoinHandle => join_handle.unwrap().abort(),
+ }
+ };
+
+ if abort == CombiAbort::AbortedImmediately {
+ do_abort(&mut abort_handle, handle.as_mut());
+ aborted = true;
+ }
+ if jh == CombiJoinHandle::DropImmediately {
+ drop(handle.take().unwrap());
+ }
+
+ // === Wait for first poll ===
+ let got_polled = rt.block_on(wait_first_poll).is_ok();
+ if !got_polled {
+ // it's possible that we are aborted but still got polled
+ assert!(
+ aborted,
+ "Task completed without ever being polled but was not aborted."
+ );
+ }
+
+ if abort == CombiAbort::AbortedFirstPoll {
+ do_abort(&mut abort_handle, handle.as_mut());
+ aborted = true;
+ }
+ if jh == CombiJoinHandle::DropFirstPoll {
+ drop(handle.take().unwrap());
+ }
+ if ah == Some(CombiJoinHandle::DropFirstPoll) {
+ drop(abort_handle.take().unwrap());
+ }
+
+ // Signal the future that it can return now
+ let _ = on_complete.send(());
+ // === Wait for future to be dropped ===
+ assert!(
+ rt.block_on(wait_future_drop).is_ok(),
+ "The future should always be dropped."
+ );
+
+ if abort == CombiAbort::AbortedAfterFinish {
+ // Don't set aborted to true here as the task already finished
+ do_abort(&mut abort_handle, handle.as_mut());
+ }
+ if jh == CombiJoinHandle::DropAfterNoConsume {
+ if ah == Some(CombiJoinHandle::DropAfterNoConsume) {
+ drop(handle.take().unwrap());
+ // The runtime will usually have dropped every ref-count at this point,
+ // in which case dropping the AbortHandle drops the output.
+ //
+ // (But it might race and still hold a ref-count)
+ let panic = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+ drop(abort_handle.take().unwrap());
+ }));
+ if panic.is_err() {
+ assert!(
+ (output == CombiOutput::PanicOnDrop)
+ && (!matches!(task, CombiTask::PanicOnRun | CombiTask::PanicOnRunAndDrop))
+ && !aborted,
+ "Dropping AbortHandle shouldn't panic here"
+ );
+ }
+ } else {
+ // The runtime will usually have dropped every ref-count at this point,
+ // in which case dropping the JoinHandle drops the output.
+ //
+ // (But it might race and still hold a ref-count)
+ let panic = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+ drop(handle.take().unwrap());
+ }));
+ if panic.is_err() {
+ assert!(
+ (output == CombiOutput::PanicOnDrop)
+ && (!matches!(task, CombiTask::PanicOnRun | CombiTask::PanicOnRunAndDrop))
+ && !aborted,
+ "Dropping JoinHandle shouldn't panic here"
+ );
+ }
+ }
+ }
+
+ // Check whether we drop after consuming the output
+ if jh == CombiJoinHandle::DropAfterConsume {
+ // Using as_mut() to not immediately drop the handle
+ let result = rt.block_on(handle.as_mut().unwrap());
+
+ match result {
+ Ok(mut output) => {
+ // Don't panic here.
+ output.disarm();
+ assert!(!aborted, "Task was aborted but returned output");
+ }
+ Err(err) if err.is_cancelled() => assert!(aborted, "Cancelled output but not aborted"),
+ Err(err) if err.is_panic() => {
+ assert!(
+ (task == CombiTask::PanicOnRun)
+ || (task == CombiTask::PanicOnDrop)
+ || (task == CombiTask::PanicOnRunAndDrop)
+ || (output == CombiOutput::PanicOnDrop),
+ "Panic but nothing should panic"
+ );
+ }
+ _ => unreachable!(),
+ }
+
+ let mut handle = handle.take().unwrap();
+ if abort == CombiAbort::AbortedAfterConsumeOutput {
+ do_abort(&mut abort_handle, Some(&mut handle));
+ }
+ drop(handle);
+
+ if ah == Some(CombiJoinHandle::DropAfterConsume) {
+ drop(abort_handle.take());
+ }
+ }
+
+ // The output should have been dropped now. Check whether the output
+ // object was created at all.
+ let output_created = rt.block_on(wait_output_drop).is_ok();
+ assert_eq!(
+ output_created,
+ (!matches!(task, CombiTask::PanicOnRun | CombiTask::PanicOnRunAndDrop)) && !aborted,
+ "Creation of output object"
+ );
+}
diff --git a/third_party/rust/tokio/src/runtime/thread_id.rs b/third_party/rust/tokio/src/runtime/thread_id.rs
new file mode 100644
index 0000000000..ef39289796
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/thread_id.rs
@@ -0,0 +1,31 @@
+use std::num::NonZeroU64;
+
+#[derive(Eq, PartialEq, Clone, Copy, Hash, Debug)]
+pub(crate) struct ThreadId(NonZeroU64);
+
+impl ThreadId {
+ pub(crate) fn next() -> Self {
+ use crate::loom::sync::atomic::{Ordering::Relaxed, StaticAtomicU64};
+
+ static NEXT_ID: StaticAtomicU64 = StaticAtomicU64::new(0);
+
+ let mut last = NEXT_ID.load(Relaxed);
+ loop {
+ let id = match last.checked_add(1) {
+ Some(id) => id,
+ None => exhausted(),
+ };
+
+ match NEXT_ID.compare_exchange_weak(last, id, Relaxed, Relaxed) {
+ Ok(_) => return ThreadId(NonZeroU64::new(id).unwrap()),
+ Err(id) => last = id,
+ }
+ }
+ }
+}
+
+#[cold]
+#[allow(dead_code)]
+fn exhausted() -> ! {
+ panic!("failed to generate unique thread ID: bitspace exhausted")
+}
diff --git a/third_party/rust/tokio/src/runtime/time/entry.rs b/third_party/rust/tokio/src/runtime/time/entry.rs
new file mode 100644
index 0000000000..798d3c11eb
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/time/entry.rs
@@ -0,0 +1,644 @@
+//! Timer state structures.
+//!
+//! This module contains the heart of the intrusive timer implementation, and as
+//! such the structures inside are full of tricky concurrency and unsafe code.
+//!
+//! # Ground rules
+//!
+//! The heart of the timer implementation here is the [`TimerShared`] structure,
+//! shared between the [`TimerEntry`] and the driver. Generally, we permit access
+//! to [`TimerShared`] ONLY via either 1) a mutable reference to [`TimerEntry`] or
+//! 2) a held driver lock.
+//!
+//! It follows from this that any changes made while holding BOTH 1 and 2 will
+//! be reliably visible, regardless of ordering. This is because of the acq/rel
+//! fences on the driver lock ensuring ordering with 2, and rust mutable
+//! reference rules for 1 (a mutable reference to an object can't be passed
+//! between threads without an acq/rel barrier, and same-thread we have local
+//! happens-before ordering).
+//!
+//! # State field
+//!
+//! Each timer has a state field associated with it. This field contains either
+//! the current scheduled time, or a special flag value indicating its state.
+//! This state can either indicate that the timer is on the 'pending' queue (and
+//! thus will be fired with an `Ok(())` result soon) or that it has already been
+//! fired/deregistered.
+//!
+//! This single state field allows for code that is firing the timer to
+//! synchronize with any racing `reset` calls reliably.
+//!
+//! # Cached vs true timeouts
+//!
+//! To allow for the use case of a timeout that is periodically reset before
+//! expiration to be as lightweight as possible, we support optimistically
+//! lock-free timer resets, in the case where a timer is rescheduled to a later
+//! point than it was originally scheduled for.
+//!
+//! This is accomplished by lazily rescheduling timers. That is, we update the
+//! state field with the true expiration of the timer from the holder of
+//! the [`TimerEntry`]. When the driver services timers (ie, whenever it's
+//! walking lists of timers), it checks this "true when" value, and reschedules
+//! based on it.
+//!
+//! We do, however, also need to track what the expiration time was when we
+//! originally registered the timer; this is used to locate the right linked
+//! list when the timer is being cancelled. This is referred to as the "cached
+//! when" internally.
+//!
+//! There is of course a race condition between timer reset and timer
+//! expiration. If the driver fails to observe the updated expiration time, it
+//! could trigger expiration of the timer too early. However, because
+//! [`mark_pending`][mark_pending] performs a compare-and-swap, it will identify this race and
+//! refuse to mark the timer as pending.
+//!
+//! [mark_pending]: TimerHandle::mark_pending
+
+use crate::loom::cell::UnsafeCell;
+use crate::loom::sync::atomic::AtomicU64;
+use crate::loom::sync::atomic::Ordering;
+
+use crate::runtime::scheduler;
+use crate::sync::AtomicWaker;
+use crate::time::Instant;
+use crate::util::linked_list;
+
+use std::cell::UnsafeCell as StdUnsafeCell;
+use std::task::{Context, Poll, Waker};
+use std::{marker::PhantomPinned, pin::Pin, ptr::NonNull};
+
+type TimerResult = Result<(), crate::time::error::Error>;
+
+const STATE_DEREGISTERED: u64 = u64::MAX;
+const STATE_PENDING_FIRE: u64 = STATE_DEREGISTERED - 1;
+const STATE_MIN_VALUE: u64 = STATE_PENDING_FIRE;
+/// The largest safe integer to use for ticks.
+///
+/// This value should be updated if any other signal values are added above.
+pub(super) const MAX_SAFE_MILLIS_DURATION: u64 = u64::MAX - 2;
+
+/// This structure holds the current shared state of the timer - its scheduled
+/// time (if registered), or otherwise the result of the timer completing, as
+/// well as the registered waker.
+///
+/// Generally, the StateCell is only permitted to be accessed from two contexts:
+/// Either a thread holding the corresponding &mut TimerEntry, or a thread
+/// holding the timer driver lock. The write actions on the StateCell amount to
+/// passing "ownership" of the StateCell between these contexts; moving a timer
+/// from the TimerEntry to the driver requires _both_ holding the &mut
+/// TimerEntry and the driver lock, while moving it back (firing the timer)
+/// requires only the driver lock.
+pub(super) struct StateCell {
+ /// Holds either the scheduled expiration time for this timer, or (if the
+ /// timer has been fired and is unregistered), `u64::MAX`.
+ state: AtomicU64,
+ /// If the timer is fired (an Acquire order read on state shows
+ /// `u64::MAX`), holds the result that should be returned from
+ /// polling the timer. Otherwise, the contents are unspecified and reading
+ /// without holding the driver lock is undefined behavior.
+ result: UnsafeCell<TimerResult>,
+ /// The currently-registered waker
+ waker: AtomicWaker,
+}
+
+impl Default for StateCell {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl std::fmt::Debug for StateCell {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "StateCell({:?})", self.read_state())
+ }
+}
+
+impl StateCell {
+ fn new() -> Self {
+ Self {
+ state: AtomicU64::new(STATE_DEREGISTERED),
+ result: UnsafeCell::new(Ok(())),
+ waker: AtomicWaker::new(),
+ }
+ }
+
+ fn is_pending(&self) -> bool {
+ self.state.load(Ordering::Relaxed) == STATE_PENDING_FIRE
+ }
+
+ /// Returns the current expiration time, or None if not currently scheduled.
+ fn when(&self) -> Option<u64> {
+ let cur_state = self.state.load(Ordering::Relaxed);
+
+ if cur_state == STATE_DEREGISTERED {
+ None
+ } else {
+ Some(cur_state)
+ }
+ }
+
+ /// If the timer is completed, returns the result of the timer. Otherwise,
+ /// returns None and registers the waker.
+ fn poll(&self, waker: &Waker) -> Poll<TimerResult> {
+ // We must register first. This ensures that either `fire` will
+ // observe the new waker, or we will observe a racing fire to have set
+ // the state, or both.
+ self.waker.register_by_ref(waker);
+
+ self.read_state()
+ }
+
+ fn read_state(&self) -> Poll<TimerResult> {
+ let cur_state = self.state.load(Ordering::Acquire);
+
+ if cur_state == STATE_DEREGISTERED {
+ // SAFETY: The driver has fired this timer; this involves writing
+ // the result, and then writing (with release ordering) the state
+ // field.
+ Poll::Ready(unsafe { self.result.with(|p| *p) })
+ } else {
+ Poll::Pending
+ }
+ }
+
+ /// Marks this timer as being moved to the pending list, if its scheduled
+ /// time is not after `not_after`.
+ ///
+ /// If the timer is scheduled for a time after not_after, returns an Err
+ /// containing the current scheduled time.
+ ///
+ /// SAFETY: Must hold the driver lock.
+ unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> {
+ // Quick initial debug check to see if the timer is already fired. Since
+ // firing the timer can only happen with the driver lock held, we know
+ // we shouldn't be able to "miss" a transition to a fired state, even
+ // with relaxed ordering.
+ let mut cur_state = self.state.load(Ordering::Relaxed);
+
+ loop {
+ // improve the error message for things like
+ // https://github.com/tokio-rs/tokio/issues/3675
+ assert!(
+ cur_state < STATE_MIN_VALUE,
+ "mark_pending called when the timer entry is in an invalid state"
+ );
+
+ if cur_state > not_after {
+ break Err(cur_state);
+ }
+
+ match self.state.compare_exchange(
+ cur_state,
+ STATE_PENDING_FIRE,
+ Ordering::AcqRel,
+ Ordering::Acquire,
+ ) {
+ Ok(_) => {
+ break Ok(());
+ }
+ Err(actual_state) => {
+ cur_state = actual_state;
+ }
+ }
+ }
+ }
+
+ /// Fires the timer, setting the result to the provided result.
+ ///
+ /// Returns:
+ /// * `Some(waker) - if fired and a waker needs to be invoked once the
+ /// driver lock is released
+ /// * `None` - if fired and a waker does not need to be invoked, or if
+ /// already fired
+ ///
+ /// SAFETY: The driver lock must be held.
+ unsafe fn fire(&self, result: TimerResult) -> Option<Waker> {
+ // Quick initial check to see if the timer is already fired. Since
+ // firing the timer can only happen with the driver lock held, we know
+ // we shouldn't be able to "miss" a transition to a fired state, even
+ // with relaxed ordering.
+ let cur_state = self.state.load(Ordering::Relaxed);
+ if cur_state == STATE_DEREGISTERED {
+ return None;
+ }
+
+ // SAFETY: We assume the driver lock is held and the timer is not
+ // fired, so only the driver is accessing this field.
+ //
+ // We perform a release-ordered store to state below, to ensure this
+ // write is visible before the state update is visible.
+ unsafe { self.result.with_mut(|p| *p = result) };
+
+ self.state.store(STATE_DEREGISTERED, Ordering::Release);
+
+ self.waker.take_waker()
+ }
+
+ /// Marks the timer as registered (poll will return None) and sets the
+ /// expiration time.
+ ///
+ /// While this function is memory-safe, it should only be called from a
+ /// context holding both `&mut TimerEntry` and the driver lock.
+ fn set_expiration(&self, timestamp: u64) {
+ debug_assert!(timestamp < STATE_MIN_VALUE);
+
+ // We can use relaxed ordering because we hold the driver lock and will
+ // fence when we release the lock.
+ self.state.store(timestamp, Ordering::Relaxed);
+ }
+
+ /// Attempts to adjust the timer to a new timestamp.
+ ///
+ /// If the timer has already been fired, is pending firing, or the new
+ /// timestamp is earlier than the old timestamp, (or occasionally
+ /// spuriously) returns Err without changing the timer's state. In this
+ /// case, the timer must be deregistered and re-registered.
+ fn extend_expiration(&self, new_timestamp: u64) -> Result<(), ()> {
+ let mut prior = self.state.load(Ordering::Relaxed);
+ loop {
+ if new_timestamp < prior || prior >= STATE_MIN_VALUE {
+ return Err(());
+ }
+
+ match self.state.compare_exchange_weak(
+ prior,
+ new_timestamp,
+ Ordering::AcqRel,
+ Ordering::Acquire,
+ ) {
+ Ok(_) => {
+ return Ok(());
+ }
+ Err(true_prior) => {
+ prior = true_prior;
+ }
+ }
+ }
+ }
+
+ /// Returns true if the state of this timer indicates that the timer might
+ /// be registered with the driver. This check is performed with relaxed
+ /// ordering, but is conservative - if it returns false, the timer is
+ /// definitely _not_ registered.
+ pub(super) fn might_be_registered(&self) -> bool {
+ self.state.load(Ordering::Relaxed) != u64::MAX
+ }
+}
+
+/// A timer entry.
+///
+/// This is the handle to a timer that is controlled by the requester of the
+/// timer. As this participates in intrusive data structures, it must be pinned
+/// before polling.
+#[derive(Debug)]
+pub(crate) struct TimerEntry {
+ /// Arc reference to the runtime handle. We can only free the driver after
+ /// deregistering everything from their respective timer wheels.
+ driver: scheduler::Handle,
+ /// Shared inner structure; this is part of an intrusive linked list, and
+ /// therefore other references can exist to it while mutable references to
+ /// Entry exist.
+ ///
+ /// This is manipulated only under the inner mutex. TODO: Can we use loom
+ /// cells for this?
+ inner: StdUnsafeCell<TimerShared>,
+ /// Deadline for the timer. This is used to register on the first
+ /// poll, as we can't register prior to being pinned.
+ deadline: Instant,
+ /// Whether the deadline has been registered.
+ registered: bool,
+ /// Ensure the type is !Unpin
+ _m: std::marker::PhantomPinned,
+}
+
+unsafe impl Send for TimerEntry {}
+unsafe impl Sync for TimerEntry {}
+
+/// An TimerHandle is the (non-enforced) "unique" pointer from the driver to the
+/// timer entry. Generally, at most one TimerHandle exists for a timer at a time
+/// (enforced by the timer state machine).
+///
+/// SAFETY: An TimerHandle is essentially a raw pointer, and the usual caveats
+/// of pointer safety apply. In particular, TimerHandle does not itself enforce
+/// that the timer does still exist; however, normally an TimerHandle is created
+/// immediately before registering the timer, and is consumed when firing the
+/// timer, to help minimize mistakes. Still, because TimerHandle cannot enforce
+/// memory safety, all operations are unsafe.
+#[derive(Debug)]
+pub(crate) struct TimerHandle {
+ inner: NonNull<TimerShared>,
+}
+
+pub(super) type EntryList = crate::util::linked_list::LinkedList<TimerShared, TimerShared>;
+
+/// The shared state structure of a timer. This structure is shared between the
+/// frontend (`Entry`) and driver backend.
+///
+/// Note that this structure is located inside the `TimerEntry` structure.
+pub(crate) struct TimerShared {
+ /// A link within the doubly-linked list of timers on a particular level and
+ /// slot. Valid only if state is equal to Registered.
+ ///
+ /// Only accessed under the entry lock.
+ pointers: linked_list::Pointers<TimerShared>,
+
+ /// The expiration time for which this entry is currently registered.
+ /// Generally owned by the driver, but is accessed by the entry when not
+ /// registered.
+ cached_when: AtomicU64,
+
+ /// The true expiration time. Set by the timer future, read by the driver.
+ true_when: AtomicU64,
+
+ /// Current state. This records whether the timer entry is currently under
+ /// the ownership of the driver, and if not, its current state (not
+ /// complete, fired, error, etc).
+ state: StateCell,
+
+ _p: PhantomPinned,
+}
+
+unsafe impl Send for TimerShared {}
+unsafe impl Sync for TimerShared {}
+
+impl std::fmt::Debug for TimerShared {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("TimerShared")
+ .field("when", &self.true_when.load(Ordering::Relaxed))
+ .field("cached_when", &self.cached_when.load(Ordering::Relaxed))
+ .field("state", &self.state)
+ .finish()
+ }
+}
+
+generate_addr_of_methods! {
+ impl<> TimerShared {
+ unsafe fn addr_of_pointers(self: NonNull<Self>) -> NonNull<linked_list::Pointers<TimerShared>> {
+ &self.pointers
+ }
+ }
+}
+
+impl TimerShared {
+ pub(super) fn new() -> Self {
+ Self {
+ cached_when: AtomicU64::new(0),
+ true_when: AtomicU64::new(0),
+ pointers: linked_list::Pointers::new(),
+ state: StateCell::default(),
+ _p: PhantomPinned,
+ }
+ }
+
+ /// Gets the cached time-of-expiration value.
+ pub(super) fn cached_when(&self) -> u64 {
+ // Cached-when is only accessed under the driver lock, so we can use relaxed
+ self.cached_when.load(Ordering::Relaxed)
+ }
+
+ /// Gets the true time-of-expiration value, and copies it into the cached
+ /// time-of-expiration value.
+ ///
+ /// SAFETY: Must be called with the driver lock held, and when this entry is
+ /// not in any timer wheel lists.
+ pub(super) unsafe fn sync_when(&self) -> u64 {
+ let true_when = self.true_when();
+
+ self.cached_when.store(true_when, Ordering::Relaxed);
+
+ true_when
+ }
+
+ /// Sets the cached time-of-expiration value.
+ ///
+ /// SAFETY: Must be called with the driver lock held, and when this entry is
+ /// not in any timer wheel lists.
+ unsafe fn set_cached_when(&self, when: u64) {
+ self.cached_when.store(when, Ordering::Relaxed);
+ }
+
+ /// Returns the true time-of-expiration value, with relaxed memory ordering.
+ pub(super) fn true_when(&self) -> u64 {
+ self.state.when().expect("Timer already fired")
+ }
+
+ /// Sets the true time-of-expiration value, even if it is less than the
+ /// current expiration or the timer is deregistered.
+ ///
+ /// SAFETY: Must only be called with the driver lock held and the entry not
+ /// in the timer wheel.
+ pub(super) unsafe fn set_expiration(&self, t: u64) {
+ self.state.set_expiration(t);
+ self.cached_when.store(t, Ordering::Relaxed);
+ }
+
+ /// Sets the true time-of-expiration only if it is after the current.
+ pub(super) fn extend_expiration(&self, t: u64) -> Result<(), ()> {
+ self.state.extend_expiration(t)
+ }
+
+ /// Returns a TimerHandle for this timer.
+ pub(super) fn handle(&self) -> TimerHandle {
+ TimerHandle {
+ inner: NonNull::from(self),
+ }
+ }
+
+ /// Returns true if the state of this timer indicates that the timer might
+ /// be registered with the driver. This check is performed with relaxed
+ /// ordering, but is conservative - if it returns false, the timer is
+ /// definitely _not_ registered.
+ pub(super) fn might_be_registered(&self) -> bool {
+ self.state.might_be_registered()
+ }
+}
+
+unsafe impl linked_list::Link for TimerShared {
+ type Handle = TimerHandle;
+
+ type Target = TimerShared;
+
+ fn as_raw(handle: &Self::Handle) -> NonNull<Self::Target> {
+ handle.inner
+ }
+
+ unsafe fn from_raw(ptr: NonNull<Self::Target>) -> Self::Handle {
+ TimerHandle { inner: ptr }
+ }
+
+ unsafe fn pointers(
+ target: NonNull<Self::Target>,
+ ) -> NonNull<linked_list::Pointers<Self::Target>> {
+ TimerShared::addr_of_pointers(target)
+ }
+}
+
+// ===== impl Entry =====
+
+impl TimerEntry {
+ #[track_caller]
+ pub(crate) fn new(handle: &scheduler::Handle, deadline: Instant) -> Self {
+ // Panic if the time driver is not enabled
+ let _ = handle.driver().time();
+
+ let driver = handle.clone();
+
+ Self {
+ driver,
+ inner: StdUnsafeCell::new(TimerShared::new()),
+ deadline,
+ registered: false,
+ _m: std::marker::PhantomPinned,
+ }
+ }
+
+ fn inner(&self) -> &TimerShared {
+ unsafe { &*self.inner.get() }
+ }
+
+ pub(crate) fn deadline(&self) -> Instant {
+ self.deadline
+ }
+
+ pub(crate) fn is_elapsed(&self) -> bool {
+ !self.inner().state.might_be_registered() && self.registered
+ }
+
+ /// Cancels and deregisters the timer. This operation is irreversible.
+ pub(crate) fn cancel(self: Pin<&mut Self>) {
+ // We need to perform an acq/rel fence with the driver thread, and the
+ // simplest way to do so is to grab the driver lock.
+ //
+ // Why is this necessary? We're about to release this timer's memory for
+ // some other non-timer use. However, we've been doing a bunch of
+ // relaxed (or even non-atomic) writes from the driver thread, and we'll
+ // be doing more from _this thread_ (as this memory is interpreted as
+ // something else).
+ //
+ // It is critical to ensure that, from the point of view of the driver,
+ // those future non-timer writes happen-after the timer is fully fired,
+ // and from the purpose of this thread, the driver's writes all
+ // happen-before we drop the timer. This in turn requires us to perform
+ // an acquire-release barrier in _both_ directions between the driver
+ // and dropping thread.
+ //
+ // The lock acquisition in clear_entry serves this purpose. All of the
+ // driver manipulations happen with the lock held, so we can just take
+ // the lock and be sure that this drop happens-after everything the
+ // driver did so far and happens-before everything the driver does in
+ // the future. While we have the lock held, we also go ahead and
+ // deregister the entry if necessary.
+ unsafe { self.driver().clear_entry(NonNull::from(self.inner())) };
+ }
+
+ pub(crate) fn reset(mut self: Pin<&mut Self>, new_time: Instant, reregister: bool) {
+ unsafe { self.as_mut().get_unchecked_mut() }.deadline = new_time;
+ unsafe { self.as_mut().get_unchecked_mut() }.registered = reregister;
+
+ let tick = self.driver().time_source().deadline_to_tick(new_time);
+
+ if self.inner().extend_expiration(tick).is_ok() {
+ return;
+ }
+
+ if reregister {
+ unsafe {
+ self.driver()
+ .reregister(&self.driver.driver().io, tick, self.inner().into());
+ }
+ }
+ }
+
+ pub(crate) fn poll_elapsed(
+ mut self: Pin<&mut Self>,
+ cx: &mut Context<'_>,
+ ) -> Poll<Result<(), super::Error>> {
+ if self.driver().is_shutdown() {
+ panic!("{}", crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR);
+ }
+
+ if !self.registered {
+ let deadline = self.deadline;
+ self.as_mut().reset(deadline, true);
+ }
+
+ let this = unsafe { self.get_unchecked_mut() };
+
+ this.inner().state.poll(cx.waker())
+ }
+
+ pub(crate) fn driver(&self) -> &super::Handle {
+ self.driver.driver().time()
+ }
+
+ #[cfg(all(tokio_unstable, feature = "tracing"))]
+ pub(crate) fn clock(&self) -> &super::Clock {
+ self.driver.driver().clock()
+ }
+}
+
+impl TimerHandle {
+ pub(super) unsafe fn cached_when(&self) -> u64 {
+ unsafe { self.inner.as_ref().cached_when() }
+ }
+
+ pub(super) unsafe fn sync_when(&self) -> u64 {
+ unsafe { self.inner.as_ref().sync_when() }
+ }
+
+ pub(super) unsafe fn is_pending(&self) -> bool {
+ unsafe { self.inner.as_ref().state.is_pending() }
+ }
+
+ /// Forcibly sets the true and cached expiration times to the given tick.
+ ///
+ /// SAFETY: The caller must ensure that the handle remains valid, the driver
+ /// lock is held, and that the timer is not in any wheel linked lists.
+ pub(super) unsafe fn set_expiration(&self, tick: u64) {
+ self.inner.as_ref().set_expiration(tick);
+ }
+
+ /// Attempts to mark this entry as pending. If the expiration time is after
+ /// `not_after`, however, returns an Err with the current expiration time.
+ ///
+ /// If an `Err` is returned, the `cached_when` value will be updated to this
+ /// new expiration time.
+ ///
+ /// SAFETY: The caller must ensure that the handle remains valid, the driver
+ /// lock is held, and that the timer is not in any wheel linked lists.
+ /// After returning Ok, the entry must be added to the pending list.
+ pub(super) unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> {
+ match self.inner.as_ref().state.mark_pending(not_after) {
+ Ok(()) => {
+ // mark this as being on the pending queue in cached_when
+ self.inner.as_ref().set_cached_when(u64::MAX);
+ Ok(())
+ }
+ Err(tick) => {
+ self.inner.as_ref().set_cached_when(tick);
+ Err(tick)
+ }
+ }
+ }
+
+ /// Attempts to transition to a terminal state. If the state is already a
+ /// terminal state, does nothing.
+ ///
+ /// Because the entry might be dropped after the state is moved to a
+ /// terminal state, this function consumes the handle to ensure we don't
+ /// access the entry afterwards.
+ ///
+ /// Returns the last-registered waker, if any.
+ ///
+ /// SAFETY: The driver lock must be held while invoking this function, and
+ /// the entry must not be in any wheel linked lists.
+ pub(super) unsafe fn fire(self, completed_state: TimerResult) -> Option<Waker> {
+ self.inner.as_ref().state.fire(completed_state)
+ }
+}
+
+impl Drop for TimerEntry {
+ fn drop(&mut self) {
+ unsafe { Pin::new_unchecked(self) }.as_mut().cancel()
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/time/handle.rs b/third_party/rust/tokio/src/runtime/time/handle.rs
new file mode 100644
index 0000000000..fce791d998
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/time/handle.rs
@@ -0,0 +1,62 @@
+use crate::runtime::time::TimeSource;
+use std::fmt;
+
+/// Handle to time driver instance.
+pub(crate) struct Handle {
+ pub(super) time_source: TimeSource,
+ pub(super) inner: super::Inner,
+}
+
+impl Handle {
+ /// Returns the time source associated with this handle.
+ pub(crate) fn time_source(&self) -> &TimeSource {
+ &self.time_source
+ }
+
+ /// Checks whether the driver has been shutdown.
+ pub(super) fn is_shutdown(&self) -> bool {
+ self.inner.is_shutdown()
+ }
+
+ /// Track that the driver is being unparked
+ pub(crate) fn unpark(&self) {
+ #[cfg(feature = "test-util")]
+ self.inner
+ .did_wake
+ .store(true, std::sync::atomic::Ordering::SeqCst);
+ }
+}
+
+cfg_not_rt! {
+ impl Handle {
+ /// Tries to get a handle to the current timer.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if there is no current timer set.
+ ///
+ /// It can be triggered when [`Builder::enable_time`] or
+ /// [`Builder::enable_all`] are not included in the builder.
+ ///
+ /// It can also panic whenever a timer is created outside of a
+ /// Tokio runtime. That is why `rt.block_on(sleep(...))` will panic,
+ /// since the function is executed outside of the runtime.
+ /// Whereas `rt.block_on(async {sleep(...).await})` doesn't panic.
+ /// And this is because wrapping the function on an async makes it lazy,
+ /// and so gets executed inside the runtime successfully without
+ /// panicking.
+ ///
+ /// [`Builder::enable_time`]: crate::runtime::Builder::enable_time
+ /// [`Builder::enable_all`]: crate::runtime::Builder::enable_all
+ #[track_caller]
+ pub(crate) fn current() -> Self {
+ panic!("{}", crate::util::error::CONTEXT_MISSING_ERROR)
+ }
+ }
+}
+
+impl fmt::Debug for Handle {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "Handle")
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/time/mod.rs b/third_party/rust/tokio/src/runtime/time/mod.rs
new file mode 100644
index 0000000000..423ad79ab9
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/time/mod.rs
@@ -0,0 +1,424 @@
+// Currently, rust warns when an unsafe fn contains an unsafe {} block. However,
+// in the future, this will change to the reverse. For now, suppress this
+// warning and generally stick with being explicit about unsafety.
+#![allow(unused_unsafe)]
+#![cfg_attr(not(feature = "rt"), allow(dead_code))]
+
+//! Time driver.
+
+mod entry;
+pub(crate) use entry::TimerEntry;
+use entry::{EntryList, TimerHandle, TimerShared, MAX_SAFE_MILLIS_DURATION};
+
+mod handle;
+pub(crate) use self::handle::Handle;
+
+mod source;
+pub(crate) use source::TimeSource;
+
+mod wheel;
+
+use crate::loom::sync::atomic::{AtomicBool, Ordering};
+use crate::loom::sync::Mutex;
+use crate::runtime::driver::{self, IoHandle, IoStack};
+use crate::time::error::Error;
+use crate::time::{Clock, Duration};
+
+use std::fmt;
+use std::{num::NonZeroU64, ptr::NonNull, task::Waker};
+
+/// Time implementation that drives [`Sleep`][sleep], [`Interval`][interval], and [`Timeout`][timeout].
+///
+/// A `Driver` instance tracks the state necessary for managing time and
+/// notifying the [`Sleep`][sleep] instances once their deadlines are reached.
+///
+/// It is expected that a single instance manages many individual [`Sleep`][sleep]
+/// instances. The `Driver` implementation is thread-safe and, as such, is able
+/// to handle callers from across threads.
+///
+/// After creating the `Driver` instance, the caller must repeatedly call `park`
+/// or `park_timeout`. The time driver will perform no work unless `park` or
+/// `park_timeout` is called repeatedly.
+///
+/// The driver has a resolution of one millisecond. Any unit of time that falls
+/// between milliseconds are rounded up to the next millisecond.
+///
+/// When an instance is dropped, any outstanding [`Sleep`][sleep] instance that has not
+/// elapsed will be notified with an error. At this point, calling `poll` on the
+/// [`Sleep`][sleep] instance will result in panic.
+///
+/// # Implementation
+///
+/// The time driver is based on the [paper by Varghese and Lauck][paper].
+///
+/// A hashed timing wheel is a vector of slots, where each slot handles a time
+/// slice. As time progresses, the timer walks over the slot for the current
+/// instant, and processes each entry for that slot. When the timer reaches the
+/// end of the wheel, it starts again at the beginning.
+///
+/// The implementation maintains six wheels arranged in a set of levels. As the
+/// levels go up, the slots of the associated wheel represent larger intervals
+/// of time. At each level, the wheel has 64 slots. Each slot covers a range of
+/// time equal to the wheel at the lower level. At level zero, each slot
+/// represents one millisecond of time.
+///
+/// The wheels are:
+///
+/// * Level 0: 64 x 1 millisecond slots.
+/// * Level 1: 64 x 64 millisecond slots.
+/// * Level 2: 64 x ~4 second slots.
+/// * Level 3: 64 x ~4 minute slots.
+/// * Level 4: 64 x ~4 hour slots.
+/// * Level 5: 64 x ~12 day slots.
+///
+/// When the timer processes entries at level zero, it will notify all the
+/// `Sleep` instances as their deadlines have been reached. For all higher
+/// levels, all entries will be redistributed across the wheel at the next level
+/// down. Eventually, as time progresses, entries with [`Sleep`][sleep] instances will
+/// either be canceled (dropped) or their associated entries will reach level
+/// zero and be notified.
+///
+/// [paper]: http://www.cs.columbia.edu/~nahum/w6998/papers/ton97-timing-wheels.pdf
+/// [sleep]: crate::time::Sleep
+/// [timeout]: crate::time::Timeout
+/// [interval]: crate::time::Interval
+#[derive(Debug)]
+pub(crate) struct Driver {
+ /// Parker to delegate to.
+ park: IoStack,
+}
+
+/// Timer state shared between `Driver`, `Handle`, and `Registration`.
+struct Inner {
+ // The state is split like this so `Handle` can access `is_shutdown` without locking the mutex
+ pub(super) state: Mutex<InnerState>,
+
+ /// True if the driver is being shutdown.
+ pub(super) is_shutdown: AtomicBool,
+
+ // When `true`, a call to `park_timeout` should immediately return and time
+ // should not advance. One reason for this to be `true` is if the task
+ // passed to `Runtime::block_on` called `task::yield_now()`.
+ //
+ // While it may look racy, it only has any effect when the clock is paused
+ // and pausing the clock is restricted to a single-threaded runtime.
+ #[cfg(feature = "test-util")]
+ did_wake: AtomicBool,
+}
+
+/// Time state shared which must be protected by a `Mutex`
+struct InnerState {
+ /// The last published timer `elapsed` value.
+ elapsed: u64,
+
+ /// The earliest time at which we promise to wake up without unparking.
+ next_wake: Option<NonZeroU64>,
+
+ /// Timer wheel.
+ wheel: wheel::Wheel,
+}
+
+// ===== impl Driver =====
+
+impl Driver {
+ /// Creates a new `Driver` instance that uses `park` to block the current
+ /// thread and `time_source` to get the current time and convert to ticks.
+ ///
+ /// Specifying the source of time is useful when testing.
+ pub(crate) fn new(park: IoStack, clock: &Clock) -> (Driver, Handle) {
+ let time_source = TimeSource::new(clock);
+
+ let handle = Handle {
+ time_source,
+ inner: Inner {
+ state: Mutex::new(InnerState {
+ elapsed: 0,
+ next_wake: None,
+ wheel: wheel::Wheel::new(),
+ }),
+ is_shutdown: AtomicBool::new(false),
+
+ #[cfg(feature = "test-util")]
+ did_wake: AtomicBool::new(false),
+ },
+ };
+
+ let driver = Driver { park };
+
+ (driver, handle)
+ }
+
+ pub(crate) fn park(&mut self, handle: &driver::Handle) {
+ self.park_internal(handle, None)
+ }
+
+ pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) {
+ self.park_internal(handle, Some(duration))
+ }
+
+ pub(crate) fn shutdown(&mut self, rt_handle: &driver::Handle) {
+ let handle = rt_handle.time();
+
+ if handle.is_shutdown() {
+ return;
+ }
+
+ handle.inner.is_shutdown.store(true, Ordering::SeqCst);
+
+ // Advance time forward to the end of time.
+
+ handle.process_at_time(u64::MAX);
+
+ self.park.shutdown(rt_handle);
+ }
+
+ fn park_internal(&mut self, rt_handle: &driver::Handle, limit: Option<Duration>) {
+ let handle = rt_handle.time();
+ let mut lock = handle.inner.state.lock();
+
+ assert!(!handle.is_shutdown());
+
+ let next_wake = lock.wheel.next_expiration_time();
+ lock.next_wake =
+ next_wake.map(|t| NonZeroU64::new(t).unwrap_or_else(|| NonZeroU64::new(1).unwrap()));
+
+ drop(lock);
+
+ match next_wake {
+ Some(when) => {
+ let now = handle.time_source.now(rt_handle.clock());
+ // Note that we effectively round up to 1ms here - this avoids
+ // very short-duration microsecond-resolution sleeps that the OS
+ // might treat as zero-length.
+ let mut duration = handle
+ .time_source
+ .tick_to_duration(when.saturating_sub(now));
+
+ if duration > Duration::from_millis(0) {
+ if let Some(limit) = limit {
+ duration = std::cmp::min(limit, duration);
+ }
+
+ self.park_thread_timeout(rt_handle, duration);
+ } else {
+ self.park.park_timeout(rt_handle, Duration::from_secs(0));
+ }
+ }
+ None => {
+ if let Some(duration) = limit {
+ self.park_thread_timeout(rt_handle, duration);
+ } else {
+ self.park.park(rt_handle);
+ }
+ }
+ }
+
+ // Process pending timers after waking up
+ handle.process(rt_handle.clock());
+ }
+
+ cfg_test_util! {
+ fn park_thread_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) {
+ let handle = rt_handle.time();
+ let clock = rt_handle.clock();
+
+ if clock.can_auto_advance() {
+ self.park.park_timeout(rt_handle, Duration::from_secs(0));
+
+ // If the time driver was woken, then the park completed
+ // before the "duration" elapsed (usually caused by a
+ // yield in `Runtime::block_on`). In this case, we don't
+ // advance the clock.
+ if !handle.did_wake() {
+ // Simulate advancing time
+ if let Err(msg) = clock.advance(duration) {
+ panic!("{}", msg);
+ }
+ }
+ } else {
+ self.park.park_timeout(rt_handle, duration);
+ }
+ }
+ }
+
+ cfg_not_test_util! {
+ fn park_thread_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) {
+ self.park.park_timeout(rt_handle, duration);
+ }
+ }
+}
+
+impl Handle {
+ /// Runs timer related logic, and returns the next wakeup time
+ pub(self) fn process(&self, clock: &Clock) {
+ let now = self.time_source().now(clock);
+
+ self.process_at_time(now)
+ }
+
+ pub(self) fn process_at_time(&self, mut now: u64) {
+ let mut waker_list: [Option<Waker>; 32] = Default::default();
+ let mut waker_idx = 0;
+
+ let mut lock = self.inner.lock();
+
+ if now < lock.elapsed {
+ // Time went backwards! This normally shouldn't happen as the Rust language
+ // guarantees that an Instant is monotonic, but can happen when running
+ // Linux in a VM on a Windows host due to std incorrectly trusting the
+ // hardware clock to be monotonic.
+ //
+ // See <https://github.com/tokio-rs/tokio/issues/3619> for more information.
+ now = lock.elapsed;
+ }
+
+ while let Some(entry) = lock.wheel.poll(now) {
+ debug_assert!(unsafe { entry.is_pending() });
+
+ // SAFETY: We hold the driver lock, and just removed the entry from any linked lists.
+ if let Some(waker) = unsafe { entry.fire(Ok(())) } {
+ waker_list[waker_idx] = Some(waker);
+
+ waker_idx += 1;
+
+ if waker_idx == waker_list.len() {
+ // Wake a batch of wakers. To avoid deadlock, we must do this with the lock temporarily dropped.
+ drop(lock);
+
+ for waker in waker_list.iter_mut() {
+ waker.take().unwrap().wake();
+ }
+
+ waker_idx = 0;
+
+ lock = self.inner.lock();
+ }
+ }
+ }
+
+ // Update the elapsed cache
+ lock.elapsed = lock.wheel.elapsed();
+ lock.next_wake = lock
+ .wheel
+ .poll_at()
+ .map(|t| NonZeroU64::new(t).unwrap_or_else(|| NonZeroU64::new(1).unwrap()));
+
+ drop(lock);
+
+ for waker in waker_list[0..waker_idx].iter_mut() {
+ waker.take().unwrap().wake();
+ }
+ }
+
+ /// Removes a registered timer from the driver.
+ ///
+ /// The timer will be moved to the cancelled state. Wakers will _not_ be
+ /// invoked. If the timer is already completed, this function is a no-op.
+ ///
+ /// This function always acquires the driver lock, even if the entry does
+ /// not appear to be registered.
+ ///
+ /// SAFETY: The timer must not be registered with some other driver, and
+ /// `add_entry` must not be called concurrently.
+ pub(self) unsafe fn clear_entry(&self, entry: NonNull<TimerShared>) {
+ unsafe {
+ let mut lock = self.inner.lock();
+
+ if entry.as_ref().might_be_registered() {
+ lock.wheel.remove(entry);
+ }
+
+ entry.as_ref().handle().fire(Ok(()));
+ }
+ }
+
+ /// Removes and re-adds an entry to the driver.
+ ///
+ /// SAFETY: The timer must be either unregistered, or registered with this
+ /// driver. No other threads are allowed to concurrently manipulate the
+ /// timer at all (the current thread should hold an exclusive reference to
+ /// the `TimerEntry`)
+ pub(self) unsafe fn reregister(
+ &self,
+ unpark: &IoHandle,
+ new_tick: u64,
+ entry: NonNull<TimerShared>,
+ ) {
+ let waker = unsafe {
+ let mut lock = self.inner.lock();
+
+ // We may have raced with a firing/deregistration, so check before
+ // deregistering.
+ if unsafe { entry.as_ref().might_be_registered() } {
+ lock.wheel.remove(entry);
+ }
+
+ // Now that we have exclusive control of this entry, mint a handle to reinsert it.
+ let entry = entry.as_ref().handle();
+
+ if self.is_shutdown() {
+ unsafe { entry.fire(Err(crate::time::error::Error::shutdown())) }
+ } else {
+ entry.set_expiration(new_tick);
+
+ // Note: We don't have to worry about racing with some other resetting
+ // thread, because add_entry and reregister require exclusive control of
+ // the timer entry.
+ match unsafe { lock.wheel.insert(entry) } {
+ Ok(when) => {
+ if lock
+ .next_wake
+ .map(|next_wake| when < next_wake.get())
+ .unwrap_or(true)
+ {
+ unpark.unpark();
+ }
+
+ None
+ }
+ Err((entry, crate::time::error::InsertError::Elapsed)) => unsafe {
+ entry.fire(Ok(()))
+ },
+ }
+ }
+
+ // Must release lock before invoking waker to avoid the risk of deadlock.
+ };
+
+ // The timer was fired synchronously as a result of the reregistration.
+ // Wake the waker; this is needed because we might reset _after_ a poll,
+ // and otherwise the task won't be awoken to poll again.
+ if let Some(waker) = waker {
+ waker.wake();
+ }
+ }
+
+ cfg_test_util! {
+ fn did_wake(&self) -> bool {
+ self.inner.did_wake.swap(false, Ordering::SeqCst)
+ }
+ }
+}
+
+// ===== impl Inner =====
+
+impl Inner {
+ /// Locks the driver's inner structure
+ pub(super) fn lock(&self) -> crate::loom::sync::MutexGuard<'_, InnerState> {
+ self.state.lock()
+ }
+
+ // Check whether the driver has been shutdown
+ pub(super) fn is_shutdown(&self) -> bool {
+ self.is_shutdown.load(Ordering::SeqCst)
+ }
+}
+
+impl fmt::Debug for Inner {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("Inner").finish()
+ }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/third_party/rust/tokio/src/runtime/time/source.rs b/third_party/rust/tokio/src/runtime/time/source.rs
new file mode 100644
index 0000000000..4647bc4122
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/time/source.rs
@@ -0,0 +1,39 @@
+use super::MAX_SAFE_MILLIS_DURATION;
+use crate::time::{Clock, Duration, Instant};
+
+/// A structure which handles conversion from Instants to u64 timestamps.
+#[derive(Debug)]
+pub(crate) struct TimeSource {
+ start_time: Instant,
+}
+
+impl TimeSource {
+ pub(crate) fn new(clock: &Clock) -> Self {
+ Self {
+ start_time: clock.now(),
+ }
+ }
+
+ pub(crate) fn deadline_to_tick(&self, t: Instant) -> u64 {
+ // Round up to the end of a ms
+ self.instant_to_tick(t + Duration::from_nanos(999_999))
+ }
+
+ pub(crate) fn instant_to_tick(&self, t: Instant) -> u64 {
+ // round up
+ let dur: Duration = t
+ .checked_duration_since(self.start_time)
+ .unwrap_or_else(|| Duration::from_secs(0));
+ let ms = dur.as_millis();
+
+ ms.try_into().unwrap_or(MAX_SAFE_MILLIS_DURATION)
+ }
+
+ pub(crate) fn tick_to_duration(&self, t: u64) -> Duration {
+ Duration::from_millis(t)
+ }
+
+ pub(crate) fn now(&self, clock: &Clock) -> u64 {
+ self.instant_to_tick(clock.now())
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/time/tests/mod.rs b/third_party/rust/tokio/src/runtime/time/tests/mod.rs
new file mode 100644
index 0000000000..155d99a348
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/time/tests/mod.rs
@@ -0,0 +1,267 @@
+#![cfg(not(tokio_wasi))]
+
+use std::{task::Context, time::Duration};
+
+#[cfg(not(loom))]
+use futures::task::noop_waker_ref;
+
+use crate::loom::sync::atomic::{AtomicBool, Ordering};
+use crate::loom::sync::Arc;
+use crate::loom::thread;
+
+use super::TimerEntry;
+
+fn block_on<T>(f: impl std::future::Future<Output = T>) -> T {
+ #[cfg(loom)]
+ return loom::future::block_on(f);
+
+ #[cfg(not(loom))]
+ {
+ let rt = crate::runtime::Builder::new_current_thread()
+ .build()
+ .unwrap();
+ rt.block_on(f)
+ }
+}
+
+fn model(f: impl Fn() + Send + Sync + 'static) {
+ #[cfg(loom)]
+ loom::model(f);
+
+ #[cfg(not(loom))]
+ f();
+}
+
+fn rt(start_paused: bool) -> crate::runtime::Runtime {
+ crate::runtime::Builder::new_current_thread()
+ .enable_time()
+ .start_paused(start_paused)
+ .build()
+ .unwrap()
+}
+
+#[test]
+fn single_timer() {
+ model(|| {
+ let rt = rt(false);
+ let handle = rt.handle();
+
+ let handle_ = handle.clone();
+ let jh = thread::spawn(move || {
+ let entry = TimerEntry::new(
+ &handle_.inner,
+ handle_.inner.driver().clock().now() + Duration::from_secs(1),
+ );
+ pin!(entry);
+
+ block_on(futures::future::poll_fn(|cx| {
+ entry.as_mut().poll_elapsed(cx)
+ }))
+ .unwrap();
+ });
+
+ thread::yield_now();
+
+ let time = handle.inner.driver().time();
+ let clock = handle.inner.driver().clock();
+
+ // This may or may not return Some (depending on how it races with the
+ // thread). If it does return None, however, the timer should complete
+ // synchronously.
+ time.process_at_time(time.time_source().now(clock) + 2_000_000_000);
+
+ jh.join().unwrap();
+ })
+}
+
+#[test]
+fn drop_timer() {
+ model(|| {
+ let rt = rt(false);
+ let handle = rt.handle();
+
+ let handle_ = handle.clone();
+ let jh = thread::spawn(move || {
+ let entry = TimerEntry::new(
+ &handle_.inner,
+ handle_.inner.driver().clock().now() + Duration::from_secs(1),
+ );
+ pin!(entry);
+
+ let _ = entry
+ .as_mut()
+ .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref()));
+ let _ = entry
+ .as_mut()
+ .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref()));
+ });
+
+ thread::yield_now();
+
+ let time = handle.inner.driver().time();
+ let clock = handle.inner.driver().clock();
+
+ // advance 2s in the future.
+ time.process_at_time(time.time_source().now(clock) + 2_000_000_000);
+
+ jh.join().unwrap();
+ })
+}
+
+#[test]
+fn change_waker() {
+ model(|| {
+ let rt = rt(false);
+ let handle = rt.handle();
+
+ let handle_ = handle.clone();
+ let jh = thread::spawn(move || {
+ let entry = TimerEntry::new(
+ &handle_.inner,
+ handle_.inner.driver().clock().now() + Duration::from_secs(1),
+ );
+ pin!(entry);
+
+ let _ = entry
+ .as_mut()
+ .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref()));
+
+ block_on(futures::future::poll_fn(|cx| {
+ entry.as_mut().poll_elapsed(cx)
+ }))
+ .unwrap();
+ });
+
+ thread::yield_now();
+
+ let time = handle.inner.driver().time();
+ let clock = handle.inner.driver().clock();
+
+ // advance 2s
+ time.process_at_time(time.time_source().now(clock) + 2_000_000_000);
+
+ jh.join().unwrap();
+ })
+}
+
+#[test]
+fn reset_future() {
+ model(|| {
+ let finished_early = Arc::new(AtomicBool::new(false));
+
+ let rt = rt(false);
+ let handle = rt.handle();
+
+ let handle_ = handle.clone();
+ let finished_early_ = finished_early.clone();
+ let start = handle.inner.driver().clock().now();
+
+ let jh = thread::spawn(move || {
+ let entry = TimerEntry::new(&handle_.inner, start + Duration::from_secs(1));
+ pin!(entry);
+
+ let _ = entry
+ .as_mut()
+ .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref()));
+
+ entry.as_mut().reset(start + Duration::from_secs(2), true);
+
+ // shouldn't complete before 2s
+ block_on(futures::future::poll_fn(|cx| {
+ entry.as_mut().poll_elapsed(cx)
+ }))
+ .unwrap();
+
+ finished_early_.store(true, Ordering::Relaxed);
+ });
+
+ thread::yield_now();
+
+ let handle = handle.inner.driver().time();
+
+ // This may or may not return a wakeup time.
+ handle.process_at_time(
+ handle
+ .time_source()
+ .instant_to_tick(start + Duration::from_millis(1500)),
+ );
+
+ assert!(!finished_early.load(Ordering::Relaxed));
+
+ handle.process_at_time(
+ handle
+ .time_source()
+ .instant_to_tick(start + Duration::from_millis(2500)),
+ );
+
+ jh.join().unwrap();
+
+ assert!(finished_early.load(Ordering::Relaxed));
+ })
+}
+
+#[cfg(not(loom))]
+fn normal_or_miri<T>(normal: T, miri: T) -> T {
+ if cfg!(miri) {
+ miri
+ } else {
+ normal
+ }
+}
+
+#[test]
+#[cfg(not(loom))]
+fn poll_process_levels() {
+ let rt = rt(true);
+ let handle = rt.handle();
+
+ let mut entries = vec![];
+
+ for i in 0..normal_or_miri(1024, 64) {
+ let mut entry = Box::pin(TimerEntry::new(
+ &handle.inner,
+ handle.inner.driver().clock().now() + Duration::from_millis(i),
+ ));
+
+ let _ = entry
+ .as_mut()
+ .poll_elapsed(&mut Context::from_waker(noop_waker_ref()));
+
+ entries.push(entry);
+ }
+
+ for t in 1..normal_or_miri(1024, 64) {
+ handle.inner.driver().time().process_at_time(t as u64);
+
+ for (deadline, future) in entries.iter_mut().enumerate() {
+ let mut context = Context::from_waker(noop_waker_ref());
+ if deadline <= t {
+ assert!(future.as_mut().poll_elapsed(&mut context).is_ready());
+ } else {
+ assert!(future.as_mut().poll_elapsed(&mut context).is_pending());
+ }
+ }
+ }
+}
+
+#[test]
+#[cfg(not(loom))]
+fn poll_process_levels_targeted() {
+ let mut context = Context::from_waker(noop_waker_ref());
+
+ let rt = rt(true);
+ let handle = rt.handle();
+
+ let e1 = TimerEntry::new(
+ &handle.inner,
+ handle.inner.driver().clock().now() + Duration::from_millis(193),
+ );
+ pin!(e1);
+
+ let handle = handle.inner.driver().time();
+
+ handle.process_at_time(62);
+ assert!(e1.as_mut().poll_elapsed(&mut context).is_pending());
+ handle.process_at_time(192);
+ handle.process_at_time(192);
+}
diff --git a/third_party/rust/tokio/src/runtime/time/wheel/level.rs b/third_party/rust/tokio/src/runtime/time/wheel/level.rs
new file mode 100644
index 0000000000..7e48ff5c57
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/time/wheel/level.rs
@@ -0,0 +1,274 @@
+use crate::runtime::time::{EntryList, TimerHandle, TimerShared};
+
+use std::{fmt, ptr::NonNull};
+
+/// Wheel for a single level in the timer. This wheel contains 64 slots.
+pub(crate) struct Level {
+ level: usize,
+
+ /// Bit field tracking which slots currently contain entries.
+ ///
+ /// Using a bit field to track slots that contain entries allows avoiding a
+ /// scan to find entries. This field is updated when entries are added or
+ /// removed from a slot.
+ ///
+ /// The least-significant bit represents slot zero.
+ occupied: u64,
+
+ /// Slots. We access these via the EntryInner `current_list` as well, so this needs to be an UnsafeCell.
+ slot: [EntryList; LEVEL_MULT],
+}
+
+/// Indicates when a slot must be processed next.
+#[derive(Debug)]
+pub(crate) struct Expiration {
+ /// The level containing the slot.
+ pub(crate) level: usize,
+
+ /// The slot index.
+ pub(crate) slot: usize,
+
+ /// The instant at which the slot needs to be processed.
+ pub(crate) deadline: u64,
+}
+
+/// Level multiplier.
+///
+/// Being a power of 2 is very important.
+const LEVEL_MULT: usize = 64;
+
+impl Level {
+ pub(crate) fn new(level: usize) -> Level {
+ // A value has to be Copy in order to use syntax like:
+ // let stack = Stack::default();
+ // ...
+ // slots: [stack; 64],
+ //
+ // Alternatively, since Stack is Default one can
+ // use syntax like:
+ // let slots: [Stack; 64] = Default::default();
+ //
+ // However, that is only supported for arrays of size
+ // 32 or fewer. So in our case we have to explicitly
+ // invoke the constructor for each array element.
+ let ctor = EntryList::default;
+
+ Level {
+ level,
+ occupied: 0,
+ slot: [
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ctor(),
+ ],
+ }
+ }
+
+ /// Finds the slot that needs to be processed next and returns the slot and
+ /// `Instant` at which this slot must be processed.
+ pub(crate) fn next_expiration(&self, now: u64) -> Option<Expiration> {
+ // Use the `occupied` bit field to get the index of the next slot that
+ // needs to be processed.
+ let slot = match self.next_occupied_slot(now) {
+ Some(slot) => slot,
+ None => return None,
+ };
+
+ // From the slot index, calculate the `Instant` at which it needs to be
+ // processed. This value *must* be in the future with respect to `now`.
+
+ let level_range = level_range(self.level);
+ let slot_range = slot_range(self.level);
+
+ // Compute the start date of the current level by masking the low bits
+ // of `now` (`level_range` is a power of 2).
+ let level_start = now & !(level_range - 1);
+ let mut deadline = level_start + slot as u64 * slot_range;
+
+ if deadline <= now {
+ // A timer is in a slot "prior" to the current time. This can occur
+ // because we do not have an infinite hierarchy of timer levels, and
+ // eventually a timer scheduled for a very distant time might end up
+ // being placed in a slot that is beyond the end of all of the
+ // arrays.
+ //
+ // To deal with this, we first limit timers to being scheduled no
+ // more than MAX_DURATION ticks in the future; that is, they're at
+ // most one rotation of the top level away. Then, we force timers
+ // that logically would go into the top+1 level, to instead go into
+ // the top level's slots.
+ //
+ // What this means is that the top level's slots act as a
+ // pseudo-ring buffer, and we rotate around them indefinitely. If we
+ // compute a deadline before now, and it's the top level, it
+ // therefore means we're actually looking at a slot in the future.
+ debug_assert_eq!(self.level, super::NUM_LEVELS - 1);
+
+ deadline += level_range;
+ }
+
+ debug_assert!(
+ deadline >= now,
+ "deadline={:016X}; now={:016X}; level={}; lr={:016X}, sr={:016X}, slot={}; occupied={:b}",
+ deadline,
+ now,
+ self.level,
+ level_range,
+ slot_range,
+ slot,
+ self.occupied
+ );
+
+ Some(Expiration {
+ level: self.level,
+ slot,
+ deadline,
+ })
+ }
+
+ fn next_occupied_slot(&self, now: u64) -> Option<usize> {
+ if self.occupied == 0 {
+ return None;
+ }
+
+ // Get the slot for now using Maths
+ let now_slot = (now / slot_range(self.level)) as usize;
+ let occupied = self.occupied.rotate_right(now_slot as u32);
+ let zeros = occupied.trailing_zeros() as usize;
+ let slot = (zeros + now_slot) % 64;
+
+ Some(slot)
+ }
+
+ pub(crate) unsafe fn add_entry(&mut self, item: TimerHandle) {
+ let slot = slot_for(item.cached_when(), self.level);
+
+ self.slot[slot].push_front(item);
+
+ self.occupied |= occupied_bit(slot);
+ }
+
+ pub(crate) unsafe fn remove_entry(&mut self, item: NonNull<TimerShared>) {
+ let slot = slot_for(unsafe { item.as_ref().cached_when() }, self.level);
+
+ unsafe { self.slot[slot].remove(item) };
+ if self.slot[slot].is_empty() {
+ // The bit is currently set
+ debug_assert!(self.occupied & occupied_bit(slot) != 0);
+
+ // Unset the bit
+ self.occupied ^= occupied_bit(slot);
+ }
+ }
+
+ pub(crate) fn take_slot(&mut self, slot: usize) -> EntryList {
+ self.occupied &= !occupied_bit(slot);
+
+ std::mem::take(&mut self.slot[slot])
+ }
+}
+
+impl fmt::Debug for Level {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("Level")
+ .field("occupied", &self.occupied)
+ .finish()
+ }
+}
+
+fn occupied_bit(slot: usize) -> u64 {
+ 1 << slot
+}
+
+fn slot_range(level: usize) -> u64 {
+ LEVEL_MULT.pow(level as u32) as u64
+}
+
+fn level_range(level: usize) -> u64 {
+ LEVEL_MULT as u64 * slot_range(level)
+}
+
+/// Converts a duration (milliseconds) and a level to a slot position.
+fn slot_for(duration: u64, level: usize) -> usize {
+ ((duration >> (level * 6)) % LEVEL_MULT as u64) as usize
+}
+
+#[cfg(all(test, not(loom)))]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_slot_for() {
+ for pos in 0..64 {
+ assert_eq!(pos as usize, slot_for(pos, 0));
+ }
+
+ for level in 1..5 {
+ for pos in level..64 {
+ let a = pos * 64_usize.pow(level as u32);
+ assert_eq!(pos as usize, slot_for(a as u64, level));
+ }
+ }
+ }
+}
diff --git a/third_party/rust/tokio/src/runtime/time/wheel/mod.rs b/third_party/rust/tokio/src/runtime/time/wheel/mod.rs
new file mode 100644
index 0000000000..bf13b7b241
--- /dev/null
+++ b/third_party/rust/tokio/src/runtime/time/wheel/mod.rs
@@ -0,0 +1,349 @@
+use crate::runtime::time::{TimerHandle, TimerShared};
+use crate::time::error::InsertError;
+
+mod level;
+pub(crate) use self::level::Expiration;
+use self::level::Level;
+
+use std::ptr::NonNull;
+
+use super::EntryList;
+
+/// Timing wheel implementation.
+///
+/// This type provides the hashed timing wheel implementation that backs `Timer`
+/// and `DelayQueue`.
+///
+/// The structure is generic over `T: Stack`. This allows handling timeout data
+/// being stored on the heap or in a slab. In order to support the latter case,
+/// the slab must be passed into each function allowing the implementation to
+/// lookup timer entries.
+///
+/// See `Timer` documentation for some implementation notes.
+#[derive(Debug)]
+pub(crate) struct Wheel {
+ /// The number of milliseconds elapsed since the wheel started.
+ elapsed: u64,
+
+ /// Timer wheel.
+ ///
+ /// Levels:
+ ///
+ /// * 1 ms slots / 64 ms range
+ /// * 64 ms slots / ~ 4 sec range
+ /// * ~ 4 sec slots / ~ 4 min range
+ /// * ~ 4 min slots / ~ 4 hr range
+ /// * ~ 4 hr slots / ~ 12 day range
+ /// * ~ 12 day slots / ~ 2 yr range
+ levels: Vec<Level>,
+
+ /// Entries queued for firing
+ pending: EntryList,
+}
+
+/// Number of levels. Each level has 64 slots. By using 6 levels with 64 slots
+/// each, the timer is able to track time up to 2 years into the future with a
+/// precision of 1 millisecond.
+const NUM_LEVELS: usize = 6;
+
+/// The maximum duration of a `Sleep`.
+pub(super) const MAX_DURATION: u64 = (1 << (6 * NUM_LEVELS)) - 1;
+
+impl Wheel {
+ /// Creates a new timing wheel.
+ pub(crate) fn new() -> Wheel {
+ let levels = (0..NUM_LEVELS).map(Level::new).collect();
+
+ Wheel {
+ elapsed: 0,
+ levels,
+ pending: EntryList::new(),
+ }
+ }
+
+ /// Returns the number of milliseconds that have elapsed since the timing
+ /// wheel's creation.
+ pub(crate) fn elapsed(&self) -> u64 {
+ self.elapsed
+ }
+
+ /// Inserts an entry into the timing wheel.
+ ///
+ /// # Arguments
+ ///
+ /// * `item`: The item to insert into the wheel.
+ ///
+ /// # Return
+ ///
+ /// Returns `Ok` when the item is successfully inserted, `Err` otherwise.
+ ///
+ /// `Err(Elapsed)` indicates that `when` represents an instant that has
+ /// already passed. In this case, the caller should fire the timeout
+ /// immediately.
+ ///
+ /// `Err(Invalid)` indicates an invalid `when` argument as been supplied.
+ ///
+ /// # Safety
+ ///
+ /// This function registers item into an intrusive linked list. The caller
+ /// must ensure that `item` is pinned and will not be dropped without first
+ /// being deregistered.
+ pub(crate) unsafe fn insert(
+ &mut self,
+ item: TimerHandle,
+ ) -> Result<u64, (TimerHandle, InsertError)> {
+ let when = item.sync_when();
+
+ if when <= self.elapsed {
+ return Err((item, InsertError::Elapsed));
+ }
+
+ // Get the level at which the entry should be stored
+ let level = self.level_for(when);
+
+ unsafe {
+ self.levels[level].add_entry(item);
+ }
+
+ debug_assert!({
+ self.levels[level]
+ .next_expiration(self.elapsed)
+ .map(|e| e.deadline >= self.elapsed)
+ .unwrap_or(true)
+ });
+
+ Ok(when)
+ }
+
+ /// Removes `item` from the timing wheel.
+ pub(crate) unsafe fn remove(&mut self, item: NonNull<TimerShared>) {
+ unsafe {
+ let when = item.as_ref().cached_when();
+ if when == u64::MAX {
+ self.pending.remove(item);
+ } else {
+ debug_assert!(
+ self.elapsed <= when,
+ "elapsed={}; when={}",
+ self.elapsed,
+ when
+ );
+
+ let level = self.level_for(when);
+
+ self.levels[level].remove_entry(item);
+ }
+ }
+ }
+
+ /// Instant at which to poll.
+ pub(crate) fn poll_at(&self) -> Option<u64> {
+ self.next_expiration().map(|expiration| expiration.deadline)
+ }
+
+ /// Advances the timer up to the instant represented by `now`.
+ pub(crate) fn poll(&mut self, now: u64) -> Option<TimerHandle> {
+ loop {
+ if let Some(handle) = self.pending.pop_back() {
+ return Some(handle);
+ }
+
+ match self.next_expiration() {
+ Some(ref expiration) if expiration.deadline <= now => {
+ self.process_expiration(expiration);
+
+ self.set_elapsed(expiration.deadline);
+ }
+ _ => {
+ // in this case the poll did not indicate an expiration
+ // _and_ we were not able to find a next expiration in
+ // the current list of timers. advance to the poll's
+ // current time and do nothing else.
+ self.set_elapsed(now);
+ break;
+ }
+ }
+ }
+
+ self.pending.pop_back()
+ }
+
+ /// Returns the instant at which the next timeout expires.
+ fn next_expiration(&self) -> Option<Expiration> {
+ if !self.pending.is_empty() {
+ // Expire immediately as we have things pending firing
+ return Some(Expiration {
+ level: 0,
+ slot: 0,
+ deadline: self.elapsed,
+ });
+ }
+
+ // Check all levels
+ for level in 0..NUM_LEVELS {
+ if let Some(expiration) = self.levels[level].next_expiration(self.elapsed) {
+ // There cannot be any expirations at a higher level that happen
+ // before this one.
+ debug_assert!(self.no_expirations_before(level + 1, expiration.deadline));
+
+ return Some(expiration);
+ }
+ }
+
+ None
+ }
+
+ /// Returns the tick at which this timer wheel next needs to perform some
+ /// processing, or None if there are no timers registered.
+ pub(super) fn next_expiration_time(&self) -> Option<u64> {
+ self.next_expiration().map(|ex| ex.deadline)
+ }
+
+ /// Used for debug assertions
+ fn no_expirations_before(&self, start_level: usize, before: u64) -> bool {
+ let mut res = true;
+
+ for l2 in start_level..NUM_LEVELS {
+ if let Some(e2) = self.levels[l2].next_expiration(self.elapsed) {
+ if e2.deadline < before {
+ res = false;
+ }
+ }
+ }
+
+ res
+ }
+
+ /// iteratively find entries that are between the wheel's current
+ /// time and the expiration time. for each in that population either
+ /// queue it for notification (in the case of the last level) or tier
+ /// it down to the next level (in all other cases).
+ pub(crate) fn process_expiration(&mut self, expiration: &Expiration) {
+ // Note that we need to take _all_ of the entries off the list before
+ // processing any of them. This is important because it's possible that
+ // those entries might need to be reinserted into the same slot.
+ //
+ // This happens only on the highest level, when an entry is inserted
+ // more than MAX_DURATION into the future. When this happens, we wrap
+ // around, and process some entries a multiple of MAX_DURATION before
+ // they actually need to be dropped down a level. We then reinsert them
+ // back into the same position; we must make sure we don't then process
+ // those entries again or we'll end up in an infinite loop.
+ let mut entries = self.take_entries(expiration);
+
+ while let Some(item) = entries.pop_back() {
+ if expiration.level == 0 {
+ debug_assert_eq!(unsafe { item.cached_when() }, expiration.deadline);
+ }
+
+ // Try to expire the entry; this is cheap (doesn't synchronize) if
+ // the timer is not expired, and updates cached_when.
+ match unsafe { item.mark_pending(expiration.deadline) } {
+ Ok(()) => {
+ // Item was expired
+ self.pending.push_front(item);
+ }
+ Err(expiration_tick) => {
+ let level = level_for(expiration.deadline, expiration_tick);
+ unsafe {
+ self.levels[level].add_entry(item);
+ }
+ }
+ }
+ }
+ }
+
+ fn set_elapsed(&mut self, when: u64) {
+ assert!(
+ self.elapsed <= when,
+ "elapsed={:?}; when={:?}",
+ self.elapsed,
+ when
+ );
+
+ if when > self.elapsed {
+ self.elapsed = when;
+ }
+ }
+
+ /// Obtains the list of entries that need processing for the given expiration.
+ ///
+ fn take_entries(&mut self, expiration: &Expiration) -> EntryList {
+ self.levels[expiration.level].take_slot(expiration.slot)
+ }
+
+ fn level_for(&self, when: u64) -> usize {
+ level_for(self.elapsed, when)
+ }
+}
+
+fn level_for(elapsed: u64, when: u64) -> usize {
+ const SLOT_MASK: u64 = (1 << 6) - 1;
+
+ // Mask in the trailing bits ignored by the level calculation in order to cap
+ // the possible leading zeros
+ let mut masked = elapsed ^ when | SLOT_MASK;
+
+ if masked >= MAX_DURATION {
+ // Fudge the timer into the top level
+ masked = MAX_DURATION - 1;
+ }
+
+ let leading_zeros = masked.leading_zeros() as usize;
+ let significant = 63 - leading_zeros;
+
+ significant / 6
+}
+
+#[cfg(all(test, not(loom)))]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_level_for() {
+ for pos in 0..64 {
+ assert_eq!(
+ 0,
+ level_for(0, pos),
+ "level_for({}) -- binary = {:b}",
+ pos,
+ pos
+ );
+ }
+
+ for level in 1..5 {
+ for pos in level..64 {
+ let a = pos * 64_usize.pow(level as u32);
+ assert_eq!(
+ level,
+ level_for(0, a as u64),
+ "level_for({}) -- binary = {:b}",
+ a,
+ a
+ );
+
+ if pos > level {
+ let a = a - 1;
+ assert_eq!(
+ level,
+ level_for(0, a as u64),
+ "level_for({}) -- binary = {:b}",
+ a,
+ a
+ );
+ }
+
+ if pos < 64 {
+ let a = a + 1;
+ assert_eq!(
+ level,
+ level_for(0, a as u64),
+ "level_for({}) -- binary = {:b}",
+ a,
+ a
+ );
+ }
+ }
+ }
+ }
+}