From 218caa410aa38c29984be31a5229b9fa717560ee Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:19:13 +0200 Subject: Merging upstream version 1.68.2+dfsg1. Signed-off-by: Daniel Baumann --- library/std/src/sys/windows/c.rs | 13 +- library/std/src/sys/windows/mod.rs | 2 +- library/std/src/sys/windows/os.rs | 2 +- library/std/src/sys/windows/pipe.rs | 6 +- library/std/src/sys/windows/process.rs | 5 + library/std/src/sys/windows/rand.rs | 121 +++--------- library/std/src/sys/windows/thread.rs | 2 +- library/std/src/sys/windows/thread_parker.rs | 253 -------------------------- library/std/src/sys/windows/thread_parking.rs | 253 ++++++++++++++++++++++++++ 9 files changed, 297 insertions(+), 360 deletions(-) delete mode 100644 library/std/src/sys/windows/thread_parker.rs create mode 100644 library/std/src/sys/windows/thread_parking.rs (limited to 'library/std/src/sys/windows') diff --git a/library/std/src/sys/windows/c.rs b/library/std/src/sys/windows/c.rs index 81461de4f..f58dcf128 100644 --- a/library/std/src/sys/windows/c.rs +++ b/library/std/src/sys/windows/c.rs @@ -295,8 +295,6 @@ pub fn nt_success(status: NTSTATUS) -> bool { status >= 0 } -// "RNG\0" -pub const BCRYPT_RNG_ALGORITHM: &[u16] = &[b'R' as u16, b'N' as u16, b'G' as u16, 0]; pub const BCRYPT_USE_SYSTEM_PREFERRED_RNG: DWORD = 0x00000002; #[repr(C)] @@ -834,6 +832,10 @@ if #[cfg(not(target_vendor = "uwp"))] { #[link(name = "advapi32")] extern "system" { + // Forbidden when targeting UWP + #[link_name = "SystemFunction036"] + pub fn RtlGenRandom(RandomBuffer: *mut u8, RandomBufferLength: ULONG) -> BOOLEAN; + // Allowed but unused by UWP pub fn OpenProcessToken( ProcessHandle: HANDLE, @@ -1258,13 +1260,6 @@ extern "system" { cbBuffer: ULONG, dwFlags: ULONG, ) -> NTSTATUS; - pub fn BCryptOpenAlgorithmProvider( - phalgorithm: *mut BCRYPT_ALG_HANDLE, - pszAlgId: LPCWSTR, - pszimplementation: LPCWSTR, - dwflags: ULONG, - ) -> NTSTATUS; - pub fn BCryptCloseAlgorithmProvider(hAlgorithm: BCRYPT_ALG_HANDLE, dwFlags: ULONG) -> NTSTATUS; } // Functions that aren't available on every version of Windows that we support, diff --git a/library/std/src/sys/windows/mod.rs b/library/std/src/sys/windows/mod.rs index e67411e16..77359abe4 100644 --- a/library/std/src/sys/windows/mod.rs +++ b/library/std/src/sys/windows/mod.rs @@ -33,7 +33,7 @@ pub mod stdio; pub mod thread; pub mod thread_local_dtor; pub mod thread_local_key; -pub mod thread_parker; +pub mod thread_parking; pub mod time; cfg_if::cfg_if! { if #[cfg(not(target_vendor = "uwp"))] { diff --git a/library/std/src/sys/windows/os.rs b/library/std/src/sys/windows/os.rs index 352337ba3..d7adeb266 100644 --- a/library/std/src/sys/windows/os.rs +++ b/library/std/src/sys/windows/os.rs @@ -157,7 +157,7 @@ impl<'a> Iterator for SplitPaths<'a> { // Double quotes are used as a way of introducing literal semicolons // (since c:\some;dir is a valid Windows path). Double quotes are not // themselves permitted in path names, so there is no way to escape a - // double quote. Quoted regions can appear in arbitrary locations, so + // double quote. Quoted regions can appear in arbitrary locations, so // // c:\foo;c:\som"e;di"r;c:\bar // diff --git a/library/std/src/sys/windows/pipe.rs b/library/std/src/sys/windows/pipe.rs index 9f26acc45..7b25edaa5 100644 --- a/library/std/src/sys/windows/pipe.rs +++ b/library/std/src/sys/windows/pipe.rs @@ -1,7 +1,7 @@ use crate::os::windows::prelude::*; use crate::ffi::OsStr; -use crate::io::{self, IoSlice, IoSliceMut}; +use crate::io::{self, IoSlice, IoSliceMut, Read}; use crate::mem; use crate::path::Path; use crate::ptr; @@ -261,6 +261,10 @@ impl AnonPipe { self.inner.is_read_vectored() } + pub fn read_to_end(&self, buf: &mut Vec) -> io::Result { + self.handle().read_to_end(buf) + } + pub fn write(&self, buf: &[u8]) -> io::Result { unsafe { let len = crate::cmp::min(buf.len(), c::DWORD::MAX as usize) as c::DWORD; diff --git a/library/std/src/sys/windows/process.rs b/library/std/src/sys/windows/process.rs index 31e9b34fb..10bc949e1 100644 --- a/library/std/src/sys/windows/process.rs +++ b/library/std/src/sys/windows/process.rs @@ -351,6 +351,11 @@ impl Command { )) } } + + pub fn output(&mut self) -> io::Result<(ExitStatus, Vec, Vec)> { + let (proc, pipes) = self.spawn(Stdio::MakePipe, false)?; + crate::sys_common::process::wait_with_output(proc, pipes) + } } impl fmt::Debug for Command { diff --git a/library/std/src/sys/windows/rand.rs b/library/std/src/sys/windows/rand.rs index b5a49489d..cdf37cfe9 100644 --- a/library/std/src/sys/windows/rand.rs +++ b/library/std/src/sys/windows/rand.rs @@ -1,106 +1,39 @@ -//! # Random key generation -//! -//! This module wraps the RNG provided by the OS. There are a few different -//! ways to interface with the OS RNG so it's worth exploring each of the options. -//! Note that at the time of writing these all go through the (undocumented) -//! `bcryptPrimitives.dll` but they use different route to get there. -//! -//! Originally we were using [`RtlGenRandom`], however that function is -//! deprecated and warns it "may be altered or unavailable in subsequent versions". -//! -//! So we switched to [`BCryptGenRandom`] with the `BCRYPT_USE_SYSTEM_PREFERRED_RNG` -//! flag to query and find the system configured RNG. However, this change caused a small -//! but significant number of users to experience panics caused by a failure of -//! this function. See [#94098]. -//! -//! The current version falls back to using `BCryptOpenAlgorithmProvider` if -//! `BCRYPT_USE_SYSTEM_PREFERRED_RNG` fails for any reason. -//! -//! [#94098]: https://github.com/rust-lang/rust/issues/94098 -//! [`RtlGenRandom`]: https://docs.microsoft.com/en-us/windows/win32/api/ntsecapi/nf-ntsecapi-rtlgenrandom -//! [`BCryptGenRandom`]: https://docs.microsoft.com/en-us/windows/win32/api/bcrypt/nf-bcrypt-bcryptgenrandom +use crate::io; use crate::mem; use crate::ptr; use crate::sys::c; -/// Generates high quality secure random keys for use by [`HashMap`]. -/// -/// This is used to seed the default [`RandomState`]. -/// -/// [`HashMap`]: crate::collections::HashMap -/// [`RandomState`]: crate::collections::hash_map::RandomState pub fn hashmap_random_keys() -> (u64, u64) { - Rng::SYSTEM.gen_random_keys().unwrap_or_else(fallback_rng) + let mut v = (0, 0); + let ret = unsafe { + c::BCryptGenRandom( + ptr::null_mut(), + &mut v as *mut _ as *mut u8, + mem::size_of_val(&v) as c::ULONG, + c::BCRYPT_USE_SYSTEM_PREFERRED_RNG, + ) + }; + if c::nt_success(ret) { v } else { fallback_rng() } } -struct Rng { - algorithm: c::BCRYPT_ALG_HANDLE, - flags: u32, -} -impl Rng { - const SYSTEM: Self = unsafe { Self::new(ptr::null_mut(), c::BCRYPT_USE_SYSTEM_PREFERRED_RNG) }; - - /// Create the RNG from an existing algorithm handle. - /// - /// # Safety - /// - /// The handle must either be null or a valid algorithm handle. - const unsafe fn new(algorithm: c::BCRYPT_ALG_HANDLE, flags: u32) -> Self { - Self { algorithm, flags } - } - - /// Open a handle to the RNG algorithm. - fn open() -> Result { - use crate::sync::atomic::AtomicPtr; - use crate::sync::atomic::Ordering::{Acquire, Release}; - - // An atomic is used so we don't need to reopen the handle every time. - static HANDLE: AtomicPtr = AtomicPtr::new(ptr::null_mut()); - - let mut handle = HANDLE.load(Acquire); - if handle.is_null() { - let status = unsafe { - c::BCryptOpenAlgorithmProvider( - &mut handle, - c::BCRYPT_RNG_ALGORITHM.as_ptr(), - ptr::null(), - 0, - ) - }; - if c::nt_success(status) { - // If another thread opens a handle first then use that handle instead. - let result = HANDLE.compare_exchange(ptr::null_mut(), handle, Release, Acquire); - if let Err(previous_handle) = result { - // Close our handle and return the previous one. - unsafe { c::BCryptCloseAlgorithmProvider(handle, 0) }; - handle = previous_handle; - } - Ok(unsafe { Self::new(handle, 0) }) - } else { - Err(status) - } - } else { - Ok(unsafe { Self::new(handle, 0) }) - } - } +/// Generate random numbers using the fallback RNG function (RtlGenRandom) +/// +/// This is necessary because of a failure to load the SysWOW64 variant of the +/// bcryptprimitives.dll library from code that lives in bcrypt.dll +/// See +#[cfg(not(target_vendor = "uwp"))] +#[inline(never)] +fn fallback_rng() -> (u64, u64) { + let mut v = (0, 0); + let ret = + unsafe { c::RtlGenRandom(&mut v as *mut _ as *mut u8, mem::size_of_val(&v) as c::ULONG) }; - fn gen_random_keys(self) -> Result<(u64, u64), c::NTSTATUS> { - let mut v = (0, 0); - let status = unsafe { - let size = mem::size_of_val(&v).try_into().unwrap(); - c::BCryptGenRandom(self.algorithm, ptr::addr_of_mut!(v).cast(), size, self.flags) - }; - if c::nt_success(status) { Ok(v) } else { Err(status) } - } + if ret != 0 { v } else { panic!("fallback RNG broken: {}", io::Error::last_os_error()) } } -/// Generate random numbers using the fallback RNG function +/// We can't use RtlGenRandom with UWP, so there is no fallback +#[cfg(target_vendor = "uwp")] #[inline(never)] -fn fallback_rng(rng_status: c::NTSTATUS) -> (u64, u64) { - match Rng::open().and_then(|rng| rng.gen_random_keys()) { - Ok(keys) => keys, - Err(status) => { - panic!("RNG broken: {rng_status:#x}, fallback RNG broken: {status:#x}") - } - } +fn fallback_rng() -> (u64, u64) { + panic!("fallback RNG broken: RtlGenRandom() not supported on UWP"); } diff --git a/library/std/src/sys/windows/thread.rs b/library/std/src/sys/windows/thread.rs index c5c9e97e6..1cb576c95 100644 --- a/library/std/src/sys/windows/thread.rs +++ b/library/std/src/sys/windows/thread.rs @@ -26,7 +26,7 @@ impl Thread { // FIXME On UNIX, we guard against stack sizes that are too small but // that's because pthreads enforces that stacks are at least - // PTHREAD_STACK_MIN bytes big. Windows has no such lower limit, it's + // PTHREAD_STACK_MIN bytes big. Windows has no such lower limit, it's // just that below a certain threshold you can't do anything useful. // That threshold is application and architecture-specific, however. let ret = c::CreateThread( diff --git a/library/std/src/sys/windows/thread_parker.rs b/library/std/src/sys/windows/thread_parker.rs deleted file mode 100644 index 2f7ae863b..000000000 --- a/library/std/src/sys/windows/thread_parker.rs +++ /dev/null @@ -1,253 +0,0 @@ -// Thread parker implementation for Windows. -// -// This uses WaitOnAddress and WakeByAddressSingle if available (Windows 8+). -// This modern API is exactly the same as the futex syscalls the Linux thread -// parker uses. When These APIs are available, the implementation of this -// thread parker matches the Linux thread parker exactly. -// -// However, when the modern API is not available, this implementation falls -// back to NT Keyed Events, which are similar, but have some important -// differences. These are available since Windows XP. -// -// WaitOnAddress first checks the state of the thread parker to make sure it no -// WakeByAddressSingle calls can be missed between updating the parker state -// and calling the function. -// -// NtWaitForKeyedEvent does not have this option, and unconditionally blocks -// without checking the parker state first. Instead, NtReleaseKeyedEvent -// (unlike WakeByAddressSingle) *blocks* until it woke up a thread waiting for -// it by NtWaitForKeyedEvent. This way, we can be sure no events are missed, -// but we need to be careful not to block unpark() if park_timeout() was woken -// up by a timeout instead of unpark(). -// -// Unlike WaitOnAddress, NtWaitForKeyedEvent/NtReleaseKeyedEvent operate on a -// HANDLE (created with NtCreateKeyedEvent). This means that we can be sure -// a successfully awoken park() was awoken by unpark() and not a -// NtReleaseKeyedEvent call from some other code, as these events are not only -// matched by the key (address of the parker (state)), but also by this HANDLE. -// We lazily allocate this handle the first time it is needed. -// -// The fast path (calling park() after unpark() was already called) and the -// possible states are the same for both implementations. This is used here to -// make sure the fast path does not even check which API to use, but can return -// right away, independent of the used API. Only the slow paths (which will -// actually block/wake a thread) check which API is available and have -// different implementations. -// -// Unfortunately, NT Keyed Events are an undocumented Windows API. However: -// - This API is relatively simple with obvious behaviour, and there are -// several (unofficial) articles documenting the details. [1] -// - `parking_lot` has been using this API for years (on Windows versions -// before Windows 8). [2] Many big projects extensively use parking_lot, -// such as servo and the Rust compiler itself. -// - It is the underlying API used by Windows SRW locks and Windows critical -// sections. [3] [4] -// - The source code of the implementations of Wine, ReactOs, and Windows XP -// are available and match the expected behaviour. -// - The main risk with an undocumented API is that it might change in the -// future. But since we only use it for older versions of Windows, that's not -// a problem. -// - Even if these functions do not block or wake as we expect (which is -// unlikely, see all previous points), this implementation would still be -// memory safe. The NT Keyed Events API is only used to sleep/block in the -// right place. -// -// [1]: http://www.locklessinc.com/articles/keyed_events/ -// [2]: https://github.com/Amanieu/parking_lot/commit/43abbc964e -// [3]: https://docs.microsoft.com/en-us/archive/msdn-magazine/2012/november/windows-with-c-the-evolution-of-synchronization-in-windows-and-c -// [4]: Windows Internals, Part 1, ISBN 9780735671300 - -use crate::pin::Pin; -use crate::ptr; -use crate::sync::atomic::{ - AtomicI8, AtomicPtr, - Ordering::{Acquire, Relaxed, Release}, -}; -use crate::sys::{c, dur2timeout}; -use crate::time::Duration; - -pub struct Parker { - state: AtomicI8, -} - -const PARKED: i8 = -1; -const EMPTY: i8 = 0; -const NOTIFIED: i8 = 1; - -// Notes about memory ordering: -// -// Memory ordering is only relevant for the relative ordering of operations -// between different variables. Even Ordering::Relaxed guarantees a -// monotonic/consistent order when looking at just a single atomic variable. -// -// So, since this parker is just a single atomic variable, we only need to look -// at the ordering guarantees we need to provide to the 'outside world'. -// -// The only memory ordering guarantee that parking and unparking provide, is -// that things which happened before unpark() are visible on the thread -// returning from park() afterwards. Otherwise, it was effectively unparked -// before unpark() was called while still consuming the 'token'. -// -// In other words, unpark() needs to synchronize with the part of park() that -// consumes the token and returns. -// -// This is done with a release-acquire synchronization, by using -// Ordering::Release when writing NOTIFIED (the 'token') in unpark(), and using -// Ordering::Acquire when reading this state in park() after waking up. -impl Parker { - /// Construct the Windows parker. The UNIX parker implementation - /// requires this to happen in-place. - pub unsafe fn new(parker: *mut Parker) { - parker.write(Self { state: AtomicI8::new(EMPTY) }); - } - - // Assumes this is only called by the thread that owns the Parker, - // which means that `self.state != PARKED`. This implementation doesn't require `Pin`, - // but other implementations do. - pub unsafe fn park(self: Pin<&Self>) { - // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the - // first case. - if self.state.fetch_sub(1, Acquire) == NOTIFIED { - return; - } - - if let Some(wait_on_address) = c::WaitOnAddress::option() { - loop { - // Wait for something to happen, assuming it's still set to PARKED. - wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE); - // Change NOTIFIED=>EMPTY but leave PARKED alone. - if self.state.compare_exchange(NOTIFIED, EMPTY, Acquire, Acquire).is_ok() { - // Actually woken up by unpark(). - return; - } else { - // Spurious wake up. We loop to try again. - } - } - } else { - // Wait for unpark() to produce this event. - c::NtWaitForKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut()); - // Set the state back to EMPTY (from either PARKED or NOTIFIED). - // Note that we don't just write EMPTY, but use swap() to also - // include an acquire-ordered read to synchronize with unpark()'s - // release-ordered write. - self.state.swap(EMPTY, Acquire); - } - } - - // Assumes this is only called by the thread that owns the Parker, - // which means that `self.state != PARKED`. This implementation doesn't require `Pin`, - // but other implementations do. - pub unsafe fn park_timeout(self: Pin<&Self>, timeout: Duration) { - // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the - // first case. - if self.state.fetch_sub(1, Acquire) == NOTIFIED { - return; - } - - if let Some(wait_on_address) = c::WaitOnAddress::option() { - // Wait for something to happen, assuming it's still set to PARKED. - wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout)); - // Set the state back to EMPTY (from either PARKED or NOTIFIED). - // Note that we don't just write EMPTY, but use swap() to also - // include an acquire-ordered read to synchronize with unpark()'s - // release-ordered write. - if self.state.swap(EMPTY, Acquire) == NOTIFIED { - // Actually woken up by unpark(). - } else { - // Timeout or spurious wake up. - // We return either way, because we can't easily tell if it was the - // timeout or not. - } - } else { - // Need to wait for unpark() using NtWaitForKeyedEvent. - let handle = keyed_event_handle(); - - // NtWaitForKeyedEvent uses a unit of 100ns, and uses negative - // values to indicate a relative time on the monotonic clock. - // This is documented here for the underlying KeWaitForSingleObject function: - // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-kewaitforsingleobject - let mut timeout = match i64::try_from((timeout.as_nanos() + 99) / 100) { - Ok(t) => -t, - Err(_) => i64::MIN, - }; - - // Wait for unpark() to produce this event. - let unparked = - c::NtWaitForKeyedEvent(handle, self.ptr(), 0, &mut timeout) == c::STATUS_SUCCESS; - - // Set the state back to EMPTY (from either PARKED or NOTIFIED). - let prev_state = self.state.swap(EMPTY, Acquire); - - if !unparked && prev_state == NOTIFIED { - // We were awoken by a timeout, not by unpark(), but the state - // was set to NOTIFIED, which means we *just* missed an - // unpark(), which is now blocked on us to wait for it. - // Wait for it to consume the event and unblock that thread. - c::NtWaitForKeyedEvent(handle, self.ptr(), 0, ptr::null_mut()); - } - } - } - - // This implementation doesn't require `Pin`, but other implementations do. - pub fn unpark(self: Pin<&Self>) { - // Change PARKED=>NOTIFIED, EMPTY=>NOTIFIED, or NOTIFIED=>NOTIFIED, and - // wake the thread in the first case. - // - // Note that even NOTIFIED=>NOTIFIED results in a write. This is on - // purpose, to make sure every unpark() has a release-acquire ordering - // with park(). - if self.state.swap(NOTIFIED, Release) == PARKED { - unsafe { - if let Some(wake_by_address_single) = c::WakeByAddressSingle::option() { - wake_by_address_single(self.ptr()); - } else { - // If we run NtReleaseKeyedEvent before the waiting thread runs - // NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up. - // If the waiting thread wakes up before we run NtReleaseKeyedEvent - // (e.g. due to a timeout), this blocks until we do wake up a thread. - // To prevent this thread from blocking indefinitely in that case, - // park_impl() will, after seeing the state set to NOTIFIED after - // waking up, call NtWaitForKeyedEvent again to unblock us. - c::NtReleaseKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut()); - } - } - } - } - - fn ptr(&self) -> c::LPVOID { - &self.state as *const _ as c::LPVOID - } -} - -fn keyed_event_handle() -> c::HANDLE { - const INVALID: c::HANDLE = ptr::invalid_mut(!0); - static HANDLE: AtomicPtr = AtomicPtr::new(INVALID); - match HANDLE.load(Relaxed) { - INVALID => { - let mut handle = c::INVALID_HANDLE_VALUE; - unsafe { - match c::NtCreateKeyedEvent( - &mut handle, - c::GENERIC_READ | c::GENERIC_WRITE, - ptr::null_mut(), - 0, - ) { - c::STATUS_SUCCESS => {} - r => panic!("Unable to create keyed event handle: error {r}"), - } - } - match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) { - Ok(_) => handle, - Err(h) => { - // Lost the race to another thread initializing HANDLE before we did. - // Closing our handle and using theirs instead. - unsafe { - c::CloseHandle(handle); - } - h - } - } - } - handle => handle, - } -} diff --git a/library/std/src/sys/windows/thread_parking.rs b/library/std/src/sys/windows/thread_parking.rs new file mode 100644 index 000000000..5d43676ad --- /dev/null +++ b/library/std/src/sys/windows/thread_parking.rs @@ -0,0 +1,253 @@ +// Thread parker implementation for Windows. +// +// This uses WaitOnAddress and WakeByAddressSingle if available (Windows 8+). +// This modern API is exactly the same as the futex syscalls the Linux thread +// parker uses. When These APIs are available, the implementation of this +// thread parker matches the Linux thread parker exactly. +// +// However, when the modern API is not available, this implementation falls +// back to NT Keyed Events, which are similar, but have some important +// differences. These are available since Windows XP. +// +// WaitOnAddress first checks the state of the thread parker to make sure it no +// WakeByAddressSingle calls can be missed between updating the parker state +// and calling the function. +// +// NtWaitForKeyedEvent does not have this option, and unconditionally blocks +// without checking the parker state first. Instead, NtReleaseKeyedEvent +// (unlike WakeByAddressSingle) *blocks* until it woke up a thread waiting for +// it by NtWaitForKeyedEvent. This way, we can be sure no events are missed, +// but we need to be careful not to block unpark() if park_timeout() was woken +// up by a timeout instead of unpark(). +// +// Unlike WaitOnAddress, NtWaitForKeyedEvent/NtReleaseKeyedEvent operate on a +// HANDLE (created with NtCreateKeyedEvent). This means that we can be sure +// a successfully awoken park() was awoken by unpark() and not a +// NtReleaseKeyedEvent call from some other code, as these events are not only +// matched by the key (address of the parker (state)), but also by this HANDLE. +// We lazily allocate this handle the first time it is needed. +// +// The fast path (calling park() after unpark() was already called) and the +// possible states are the same for both implementations. This is used here to +// make sure the fast path does not even check which API to use, but can return +// right away, independent of the used API. Only the slow paths (which will +// actually block/wake a thread) check which API is available and have +// different implementations. +// +// Unfortunately, NT Keyed Events are an undocumented Windows API. However: +// - This API is relatively simple with obvious behaviour, and there are +// several (unofficial) articles documenting the details. [1] +// - `parking_lot` has been using this API for years (on Windows versions +// before Windows 8). [2] Many big projects extensively use parking_lot, +// such as servo and the Rust compiler itself. +// - It is the underlying API used by Windows SRW locks and Windows critical +// sections. [3] [4] +// - The source code of the implementations of Wine, ReactOs, and Windows XP +// are available and match the expected behaviour. +// - The main risk with an undocumented API is that it might change in the +// future. But since we only use it for older versions of Windows, that's not +// a problem. +// - Even if these functions do not block or wake as we expect (which is +// unlikely, see all previous points), this implementation would still be +// memory safe. The NT Keyed Events API is only used to sleep/block in the +// right place. +// +// [1]: http://www.locklessinc.com/articles/keyed_events/ +// [2]: https://github.com/Amanieu/parking_lot/commit/43abbc964e +// [3]: https://docs.microsoft.com/en-us/archive/msdn-magazine/2012/november/windows-with-c-the-evolution-of-synchronization-in-windows-and-c +// [4]: Windows Internals, Part 1, ISBN 9780735671300 + +use crate::pin::Pin; +use crate::ptr; +use crate::sync::atomic::{ + AtomicI8, AtomicPtr, + Ordering::{Acquire, Relaxed, Release}, +}; +use crate::sys::{c, dur2timeout}; +use crate::time::Duration; + +pub struct Parker { + state: AtomicI8, +} + +const PARKED: i8 = -1; +const EMPTY: i8 = 0; +const NOTIFIED: i8 = 1; + +// Notes about memory ordering: +// +// Memory ordering is only relevant for the relative ordering of operations +// between different variables. Even Ordering::Relaxed guarantees a +// monotonic/consistent order when looking at just a single atomic variable. +// +// So, since this parker is just a single atomic variable, we only need to look +// at the ordering guarantees we need to provide to the 'outside world'. +// +// The only memory ordering guarantee that parking and unparking provide, is +// that things which happened before unpark() are visible on the thread +// returning from park() afterwards. Otherwise, it was effectively unparked +// before unpark() was called while still consuming the 'token'. +// +// In other words, unpark() needs to synchronize with the part of park() that +// consumes the token and returns. +// +// This is done with a release-acquire synchronization, by using +// Ordering::Release when writing NOTIFIED (the 'token') in unpark(), and using +// Ordering::Acquire when reading this state in park() after waking up. +impl Parker { + /// Construct the Windows parker. The UNIX parker implementation + /// requires this to happen in-place. + pub unsafe fn new_in_place(parker: *mut Parker) { + parker.write(Self { state: AtomicI8::new(EMPTY) }); + } + + // Assumes this is only called by the thread that owns the Parker, + // which means that `self.state != PARKED`. This implementation doesn't require `Pin`, + // but other implementations do. + pub unsafe fn park(self: Pin<&Self>) { + // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the + // first case. + if self.state.fetch_sub(1, Acquire) == NOTIFIED { + return; + } + + if let Some(wait_on_address) = c::WaitOnAddress::option() { + loop { + // Wait for something to happen, assuming it's still set to PARKED. + wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE); + // Change NOTIFIED=>EMPTY but leave PARKED alone. + if self.state.compare_exchange(NOTIFIED, EMPTY, Acquire, Acquire).is_ok() { + // Actually woken up by unpark(). + return; + } else { + // Spurious wake up. We loop to try again. + } + } + } else { + // Wait for unpark() to produce this event. + c::NtWaitForKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut()); + // Set the state back to EMPTY (from either PARKED or NOTIFIED). + // Note that we don't just write EMPTY, but use swap() to also + // include an acquire-ordered read to synchronize with unpark()'s + // release-ordered write. + self.state.swap(EMPTY, Acquire); + } + } + + // Assumes this is only called by the thread that owns the Parker, + // which means that `self.state != PARKED`. This implementation doesn't require `Pin`, + // but other implementations do. + pub unsafe fn park_timeout(self: Pin<&Self>, timeout: Duration) { + // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the + // first case. + if self.state.fetch_sub(1, Acquire) == NOTIFIED { + return; + } + + if let Some(wait_on_address) = c::WaitOnAddress::option() { + // Wait for something to happen, assuming it's still set to PARKED. + wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout)); + // Set the state back to EMPTY (from either PARKED or NOTIFIED). + // Note that we don't just write EMPTY, but use swap() to also + // include an acquire-ordered read to synchronize with unpark()'s + // release-ordered write. + if self.state.swap(EMPTY, Acquire) == NOTIFIED { + // Actually woken up by unpark(). + } else { + // Timeout or spurious wake up. + // We return either way, because we can't easily tell if it was the + // timeout or not. + } + } else { + // Need to wait for unpark() using NtWaitForKeyedEvent. + let handle = keyed_event_handle(); + + // NtWaitForKeyedEvent uses a unit of 100ns, and uses negative + // values to indicate a relative time on the monotonic clock. + // This is documented here for the underlying KeWaitForSingleObject function: + // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-kewaitforsingleobject + let mut timeout = match i64::try_from((timeout.as_nanos() + 99) / 100) { + Ok(t) => -t, + Err(_) => i64::MIN, + }; + + // Wait for unpark() to produce this event. + let unparked = + c::NtWaitForKeyedEvent(handle, self.ptr(), 0, &mut timeout) == c::STATUS_SUCCESS; + + // Set the state back to EMPTY (from either PARKED or NOTIFIED). + let prev_state = self.state.swap(EMPTY, Acquire); + + if !unparked && prev_state == NOTIFIED { + // We were awoken by a timeout, not by unpark(), but the state + // was set to NOTIFIED, which means we *just* missed an + // unpark(), which is now blocked on us to wait for it. + // Wait for it to consume the event and unblock that thread. + c::NtWaitForKeyedEvent(handle, self.ptr(), 0, ptr::null_mut()); + } + } + } + + // This implementation doesn't require `Pin`, but other implementations do. + pub fn unpark(self: Pin<&Self>) { + // Change PARKED=>NOTIFIED, EMPTY=>NOTIFIED, or NOTIFIED=>NOTIFIED, and + // wake the thread in the first case. + // + // Note that even NOTIFIED=>NOTIFIED results in a write. This is on + // purpose, to make sure every unpark() has a release-acquire ordering + // with park(). + if self.state.swap(NOTIFIED, Release) == PARKED { + unsafe { + if let Some(wake_by_address_single) = c::WakeByAddressSingle::option() { + wake_by_address_single(self.ptr()); + } else { + // If we run NtReleaseKeyedEvent before the waiting thread runs + // NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up. + // If the waiting thread wakes up before we run NtReleaseKeyedEvent + // (e.g. due to a timeout), this blocks until we do wake up a thread. + // To prevent this thread from blocking indefinitely in that case, + // park_impl() will, after seeing the state set to NOTIFIED after + // waking up, call NtWaitForKeyedEvent again to unblock us. + c::NtReleaseKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut()); + } + } + } + } + + fn ptr(&self) -> c::LPVOID { + &self.state as *const _ as c::LPVOID + } +} + +fn keyed_event_handle() -> c::HANDLE { + const INVALID: c::HANDLE = ptr::invalid_mut(!0); + static HANDLE: AtomicPtr = AtomicPtr::new(INVALID); + match HANDLE.load(Relaxed) { + INVALID => { + let mut handle = c::INVALID_HANDLE_VALUE; + unsafe { + match c::NtCreateKeyedEvent( + &mut handle, + c::GENERIC_READ | c::GENERIC_WRITE, + ptr::null_mut(), + 0, + ) { + c::STATUS_SUCCESS => {} + r => panic!("Unable to create keyed event handle: error {r}"), + } + } + match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) { + Ok(_) => handle, + Err(h) => { + // Lost the race to another thread initializing HANDLE before we did. + // Closing our handle and using theirs instead. + unsafe { + c::CloseHandle(handle); + } + h + } + } + } + handle => handle, + } +} -- cgit v1.2.3