// Thread parker implementation for Windows. // // This uses WaitOnAddress and WakeByAddressSingle if available (Windows 8+). // This modern API is exactly the same as the futex syscalls the Linux thread // parker uses. When These APIs are available, the implementation of this // thread parker matches the Linux thread parker exactly. // // However, when the modern API is not available, this implementation falls // back to NT Keyed Events, which are similar, but have some important // differences. These are available since Windows XP. // // WaitOnAddress first checks the state of the thread parker to make sure it no // WakeByAddressSingle calls can be missed between updating the parker state // and calling the function. // // NtWaitForKeyedEvent does not have this option, and unconditionally blocks // without checking the parker state first. Instead, NtReleaseKeyedEvent // (unlike WakeByAddressSingle) *blocks* until it woke up a thread waiting for // it by NtWaitForKeyedEvent. This way, we can be sure no events are missed, // but we need to be careful not to block unpark() if park_timeout() was woken // up by a timeout instead of unpark(). // // Unlike WaitOnAddress, NtWaitForKeyedEvent/NtReleaseKeyedEvent operate on a // HANDLE (created with NtCreateKeyedEvent). This means that we can be sure // a successfully awoken park() was awoken by unpark() and not a // NtReleaseKeyedEvent call from some other code, as these events are not only // matched by the key (address of the parker (state)), but also by this HANDLE. // We lazily allocate this handle the first time it is needed. // // The fast path (calling park() after unpark() was already called) and the // possible states are the same for both implementations. This is used here to // make sure the fast path does not even check which API to use, but can return // right away, independent of the used API. Only the slow paths (which will // actually block/wake a thread) check which API is available and have // different implementations. // // Unfortunately, NT Keyed Events are an undocumented Windows API. However: // - This API is relatively simple with obvious behaviour, and there are // several (unofficial) articles documenting the details. [1] // - `parking_lot` has been using this API for years (on Windows versions // before Windows 8). [2] Many big projects extensively use parking_lot, // such as servo and the Rust compiler itself. // - It is the underlying API used by Windows SRW locks and Windows critical // sections. [3] [4] // - The source code of the implementations of Wine, ReactOs, and Windows XP // are available and match the expected behaviour. // - The main risk with an undocumented API is that it might change in the // future. But since we only use it for older versions of Windows, that's not // a problem. // - Even if these functions do not block or wake as we expect (which is // unlikely, see all previous points), this implementation would still be // memory safe. The NT Keyed Events API is only used to sleep/block in the // right place. // // [1]: http://www.locklessinc.com/articles/keyed_events/ // [2]: https://github.com/Amanieu/parking_lot/commit/43abbc964e // [3]: https://docs.microsoft.com/en-us/archive/msdn-magazine/2012/november/windows-with-c-the-evolution-of-synchronization-in-windows-and-c // [4]: Windows Internals, Part 1, ISBN 9780735671300 use crate::pin::Pin; use crate::ptr; use crate::sync::atomic::{ AtomicI8, AtomicPtr, Ordering::{Acquire, Relaxed, Release}, }; use crate::sys::{c, dur2timeout}; use crate::time::Duration; pub struct Parker { state: AtomicI8, } const PARKED: i8 = -1; const EMPTY: i8 = 0; const NOTIFIED: i8 = 1; // Notes about memory ordering: // // Memory ordering is only relevant for the relative ordering of operations // between different variables. Even Ordering::Relaxed guarantees a // monotonic/consistent order when looking at just a single atomic variable. // // So, since this parker is just a single atomic variable, we only need to look // at the ordering guarantees we need to provide to the 'outside world'. // // The only memory ordering guarantee that parking and unparking provide, is // that things which happened before unpark() are visible on the thread // returning from park() afterwards. Otherwise, it was effectively unparked // before unpark() was called while still consuming the 'token'. // // In other words, unpark() needs to synchronize with the part of park() that // consumes the token and returns. // // This is done with a release-acquire synchronization, by using // Ordering::Release when writing NOTIFIED (the 'token') in unpark(), and using // Ordering::Acquire when reading this state in park() after waking up. impl Parker { /// Construct the Windows parker. The UNIX parker implementation /// requires this to happen in-place. pub unsafe fn new_in_place(parker: *mut Parker) { parker.write(Self { state: AtomicI8::new(EMPTY) }); } // Assumes this is only called by the thread that owns the Parker, // which means that `self.state != PARKED`. This implementation doesn't require `Pin`, // but other implementations do. pub unsafe fn park(self: Pin<&Self>) { // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the // first case. if self.state.fetch_sub(1, Acquire) == NOTIFIED { return; } if let Some(wait_on_address) = c::WaitOnAddress::option() { loop { // Wait for something to happen, assuming it's still set to PARKED. wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE); // Change NOTIFIED=>EMPTY but leave PARKED alone. if self.state.compare_exchange(NOTIFIED, EMPTY, Acquire, Acquire).is_ok() { // Actually woken up by unpark(). return; } else { // Spurious wake up. We loop to try again. } } } else { // Wait for unpark() to produce this event. c::NtWaitForKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut()); // Set the state back to EMPTY (from either PARKED or NOTIFIED). // Note that we don't just write EMPTY, but use swap() to also // include an acquire-ordered read to synchronize with unpark()'s // release-ordered write. self.state.swap(EMPTY, Acquire); } } // Assumes this is only called by the thread that owns the Parker, // which means that `self.state != PARKED`. This implementation doesn't require `Pin`, // but other implementations do. pub unsafe fn park_timeout(self: Pin<&Self>, timeout: Duration) { // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the // first case. if self.state.fetch_sub(1, Acquire) == NOTIFIED { return; } if let Some(wait_on_address) = c::WaitOnAddress::option() { // Wait for something to happen, assuming it's still set to PARKED. wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout)); // Set the state back to EMPTY (from either PARKED or NOTIFIED). // Note that we don't just write EMPTY, but use swap() to also // include an acquire-ordered read to synchronize with unpark()'s // release-ordered write. if self.state.swap(EMPTY, Acquire) == NOTIFIED { // Actually woken up by unpark(). } else { // Timeout or spurious wake up. // We return either way, because we can't easily tell if it was the // timeout or not. } } else { // Need to wait for unpark() using NtWaitForKeyedEvent. let handle = keyed_event_handle(); // NtWaitForKeyedEvent uses a unit of 100ns, and uses negative // values to indicate a relative time on the monotonic clock. // This is documented here for the underlying KeWaitForSingleObject function: // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-kewaitforsingleobject let mut timeout = match i64::try_from((timeout.as_nanos() + 99) / 100) { Ok(t) => -t, Err(_) => i64::MIN, }; // Wait for unpark() to produce this event. let unparked = c::NtWaitForKeyedEvent(handle, self.ptr(), 0, &mut timeout) == c::STATUS_SUCCESS; // Set the state back to EMPTY (from either PARKED or NOTIFIED). let prev_state = self.state.swap(EMPTY, Acquire); if !unparked && prev_state == NOTIFIED { // We were awoken by a timeout, not by unpark(), but the state // was set to NOTIFIED, which means we *just* missed an // unpark(), which is now blocked on us to wait for it. // Wait for it to consume the event and unblock that thread. c::NtWaitForKeyedEvent(handle, self.ptr(), 0, ptr::null_mut()); } } } // This implementation doesn't require `Pin`, but other implementations do. pub fn unpark(self: Pin<&Self>) { // Change PARKED=>NOTIFIED, EMPTY=>NOTIFIED, or NOTIFIED=>NOTIFIED, and // wake the thread in the first case. // // Note that even NOTIFIED=>NOTIFIED results in a write. This is on // purpose, to make sure every unpark() has a release-acquire ordering // with park(). if self.state.swap(NOTIFIED, Release) == PARKED { unsafe { if let Some(wake_by_address_single) = c::WakeByAddressSingle::option() { wake_by_address_single(self.ptr()); } else { // If we run NtReleaseKeyedEvent before the waiting thread runs // NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up. // If the waiting thread wakes up before we run NtReleaseKeyedEvent // (e.g. due to a timeout), this blocks until we do wake up a thread. // To prevent this thread from blocking indefinitely in that case, // park_impl() will, after seeing the state set to NOTIFIED after // waking up, call NtWaitForKeyedEvent again to unblock us. c::NtReleaseKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut()); } } } } fn ptr(&self) -> c::LPVOID { &self.state as *const _ as c::LPVOID } } fn keyed_event_handle() -> c::HANDLE { const INVALID: c::HANDLE = ptr::invalid_mut(!0); static HANDLE: AtomicPtr = AtomicPtr::new(INVALID); match HANDLE.load(Relaxed) { INVALID => { let mut handle = c::INVALID_HANDLE_VALUE; unsafe { match c::NtCreateKeyedEvent( &mut handle, c::GENERIC_READ | c::GENERIC_WRITE, ptr::null_mut(), 0, ) { c::STATUS_SUCCESS => {} r => panic!("Unable to create keyed event handle: error {r}"), } } match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) { Ok(_) => handle, Err(h) => { // Lost the race to another thread initializing HANDLE before we did. // Closing our handle and using theirs instead. unsafe { c::CloseHandle(handle); } h } } } handle => handle, } }