9 files changed, 297 insertions, 360 deletions
diff --git a/library/std/src/sys/windows/c.rs b/library/std/src/sys/windows/c.rs
index 81461de4f..f58dcf128 100644
--- a/library/std/src/sys/windows/c.rs
+++ b/library/std/src/sys/windows/c.rs
@@ -295,8 +295,6 @@ pub fn nt_success(status: NTSTATUS) -> bool {
     status >= 0
 }
 
-// "RNG\0"
-pub const BCRYPT_RNG_ALGORITHM: &[u16] = &[b'R' as u16, b'N' as u16, b'G' as u16, 0];
 pub const BCRYPT_USE_SYSTEM_PREFERRED_RNG: DWORD = 0x00000002;
 
 #[repr(C)]
@@ -834,6 +832,10 @@ if #[cfg(not(target_vendor = "uwp"))] {
 
     #[link(name = "advapi32")]
     extern "system" {
+        // Forbidden when targeting UWP
+        #[link_name = "SystemFunction036"]
+        pub fn RtlGenRandom(RandomBuffer: *mut u8, RandomBufferLength: ULONG) -> BOOLEAN;
+
         // Allowed but unused by UWP
         pub fn OpenProcessToken(
             ProcessHandle: HANDLE,
@@ -1258,13 +1260,6 @@ extern "system" {
         cbBuffer: ULONG,
         dwFlags: ULONG,
     ) -> NTSTATUS;
-    pub fn BCryptOpenAlgorithmProvider(
-        phalgorithm: *mut BCRYPT_ALG_HANDLE,
-        pszAlgId: LPCWSTR,
-        pszimplementation: LPCWSTR,
-        dwflags: ULONG,
-    ) -> NTSTATUS;
-    pub fn BCryptCloseAlgorithmProvider(hAlgorithm: BCRYPT_ALG_HANDLE, dwFlags: ULONG) -> NTSTATUS;
 }
 
 // Functions that aren't available on every version of Windows that we support,
diff --git a/library/std/src/sys/windows/mod.rs b/library/std/src/sys/windows/mod.rs
index e67411e16..77359abe4 100644
--- a/library/std/src/sys/windows/mod.rs
+++ b/library/std/src/sys/windows/mod.rs
@@ -33,7 +33,7 @@ pub mod stdio;
 pub mod thread;
 pub mod thread_local_dtor;
 pub mod thread_local_key;
-pub mod thread_parker;
+pub mod thread_parking;
 pub mod time;
 cfg_if::cfg_if! {
     if #[cfg(not(target_vendor = "uwp"))] {
diff --git a/library/std/src/sys/windows/os.rs b/library/std/src/sys/windows/os.rs
index 352337ba3..d7adeb266 100644
--- a/library/std/src/sys/windows/os.rs
+++ b/library/std/src/sys/windows/os.rs
@@ -157,7 +157,7 @@ impl<'a> Iterator for SplitPaths<'a> {
         // Double quotes are used as a way of introducing literal semicolons
         // (since c:\some;dir is a valid Windows path). Double quotes are not
         // themselves permitted in path names, so there is no way to escape a
-        // double quote.  Quoted regions can appear in arbitrary locations, so
+        // double quote. Quoted regions can appear in arbitrary locations, so
         //
         //   c:\foo;c:\som"e;di"r;c:\bar
         //
diff --git a/library/std/src/sys/windows/pipe.rs b/library/std/src/sys/windows/pipe.rs
index 9f26acc45..7b25edaa5 100644
--- a/library/std/src/sys/windows/pipe.rs
+++ b/library/std/src/sys/windows/pipe.rs
@@ -1,7 +1,7 @@
 use crate::os::windows::prelude::*;
 
 use crate::ffi::OsStr;
-use crate::io::{self, IoSlice, IoSliceMut};
+use crate::io::{self, IoSlice, IoSliceMut, Read};
 use crate::mem;
 use crate::path::Path;
 use crate::ptr;
@@ -261,6 +261,10 @@ impl AnonPipe {
         self.inner.is_read_vectored()
     }
 
+    pub fn read_to_end(&self, buf: &mut Vec<u8>) -> io::Result<usize> {
+        self.handle().read_to_end(buf)
+    }
+
     pub fn write(&self, buf: &[u8]) -> io::Result<usize> {
         unsafe {
             let len = crate::cmp::min(buf.len(), c::DWORD::MAX as usize) as c::DWORD;
diff --git a/library/std/src/sys/windows/process.rs b/library/std/src/sys/windows/process.rs
index 31e9b34fb..10bc949e1 100644
--- a/library/std/src/sys/windows/process.rs
+++ b/library/std/src/sys/windows/process.rs
@@ -351,6 +351,11 @@ impl Command {
             ))
         }
     }
+
+    pub fn output(&mut self) -> io::Result<(ExitStatus, Vec<u8>, Vec<u8>)> {
+        let (proc, pipes) = self.spawn(Stdio::MakePipe, false)?;
+        crate::sys_common::process::wait_with_output(proc, pipes)
+    }
 }
 
 impl fmt::Debug for Command {
diff --git a/library/std/src/sys/windows/rand.rs b/library/std/src/sys/windows/rand.rs
index b5a49489d..cdf37cfe9 100644
--- a/library/std/src/sys/windows/rand.rs
+++ b/library/std/src/sys/windows/rand.rs
@@ -1,106 +1,39 @@
-//! # Random key generation
-//!
-//! This module wraps the RNG provided by the OS. There are a few different
-//! ways to interface with the OS RNG so it's worth exploring each of the options.
-//! Note that at the time of writing these all go through the (undocumented)
-//! `bcryptPrimitives.dll` but they use different route to get there.
-//!
-//! Originally we were using [`RtlGenRandom`], however that function is
-//! deprecated and warns it "may be altered or unavailable in subsequent versions".
-//!
-//! So we switched to [`BCryptGenRandom`] with the `BCRYPT_USE_SYSTEM_PREFERRED_RNG`
-//! flag to query and find the system configured RNG. However, this change caused a small
-//! but significant number of users to experience panics caused by a failure of
-//! this function. See [#94098].
-//!
-//! The current version falls back to using `BCryptOpenAlgorithmProvider` if
-//! `BCRYPT_USE_SYSTEM_PREFERRED_RNG` fails for any reason.
-//!
-//! [#94098]: https://github.com/rust-lang/rust/issues/94098
-//! [`RtlGenRandom`]: https://docs.microsoft.com/en-us/windows/win32/api/ntsecapi/nf-ntsecapi-rtlgenrandom
-//! [`BCryptGenRandom`]: https://docs.microsoft.com/en-us/windows/win32/api/bcrypt/nf-bcrypt-bcryptgenrandom
+use crate::io;
 use crate::mem;
 use crate::ptr;
 use crate::sys::c;
 
-/// Generates high quality secure random keys for use by [`HashMap`].
-///
-/// This is used to seed the default [`RandomState`].
-///
-/// [`HashMap`]: crate::collections::HashMap
-/// [`RandomState`]: crate::collections::hash_map::RandomState
 pub fn hashmap_random_keys() -> (u64, u64) {
-    Rng::SYSTEM.gen_random_keys().unwrap_or_else(fallback_rng)
+    let mut v = (0, 0);
+    let ret = unsafe {
+        c::BCryptGenRandom(
+            ptr::null_mut(),
+            &mut v as *mut _ as *mut u8,
+            mem::size_of_val(&v) as c::ULONG,
+            c::BCRYPT_USE_SYSTEM_PREFERRED_RNG,
+        )
+    };
+    if c::nt_success(ret) { v } else { fallback_rng() }
 }
 
-struct Rng {
-    algorithm: c::BCRYPT_ALG_HANDLE,
-    flags: u32,
-}
-impl Rng {
-    const SYSTEM: Self = unsafe { Self::new(ptr::null_mut(), c::BCRYPT_USE_SYSTEM_PREFERRED_RNG) };
-
-    /// Create the RNG from an existing algorithm handle.
-    ///
-    /// # Safety
-    ///
-    /// The handle must either be null or a valid algorithm handle.
-    const unsafe fn new(algorithm: c::BCRYPT_ALG_HANDLE, flags: u32) -> Self {
-        Self { algorithm, flags }
-    }
-
-    /// Open a handle to the RNG algorithm.
-    fn open() -> Result<Self, c::NTSTATUS> {
-        use crate::sync::atomic::AtomicPtr;
-        use crate::sync::atomic::Ordering::{Acquire, Release};
-
-        // An atomic is used so we don't need to reopen the handle every time.
-        static HANDLE: AtomicPtr<crate::ffi::c_void> = AtomicPtr::new(ptr::null_mut());
-
-        let mut handle = HANDLE.load(Acquire);
-        if handle.is_null() {
-            let status = unsafe {
-                c::BCryptOpenAlgorithmProvider(
-                    &mut handle,
-                    c::BCRYPT_RNG_ALGORITHM.as_ptr(),
-                    ptr::null(),
-                    0,
-                )
-            };
-            if c::nt_success(status) {
-                // If another thread opens a handle first then use that handle instead.
-                let result = HANDLE.compare_exchange(ptr::null_mut(), handle, Release, Acquire);
-                if let Err(previous_handle) = result {
-                    // Close our handle and return the previous one.
-                    unsafe { c::BCryptCloseAlgorithmProvider(handle, 0) };
-                    handle = previous_handle;
-                }
-                Ok(unsafe { Self::new(handle, 0) })
-            } else {
-                Err(status)
-            }
-        } else {
-            Ok(unsafe { Self::new(handle, 0) })
-        }
-    }
+/// Generate random numbers using the fallback RNG function (RtlGenRandom)
+///
+/// This is necessary because of a failure to load the SysWOW64 variant of the
+/// bcryptprimitives.dll library from code that lives in bcrypt.dll
+/// See <https://bugzilla.mozilla.org/show_bug.cgi?id=1788004#c9>
+#[cfg(not(target_vendor = "uwp"))]
+#[inline(never)]
+fn fallback_rng() -> (u64, u64) {
+    let mut v = (0, 0);
+    let ret =
+        unsafe { c::RtlGenRandom(&mut v as *mut _ as *mut u8, mem::size_of_val(&v) as c::ULONG) };
 
-    fn gen_random_keys(self) -> Result<(u64, u64), c::NTSTATUS> {
-        let mut v = (0, 0);
-        let status = unsafe {
-            let size = mem::size_of_val(&v).try_into().unwrap();
-            c::BCryptGenRandom(self.algorithm, ptr::addr_of_mut!(v).cast(), size, self.flags)
-        };
-        if c::nt_success(status) { Ok(v) } else { Err(status) }
-    }
+    if ret != 0 { v } else { panic!("fallback RNG broken: {}", io::Error::last_os_error()) }
 }
 
-/// Generate random numbers using the fallback RNG function
+/// We can't use RtlGenRandom with UWP, so there is no fallback
+#[cfg(target_vendor = "uwp")]
 #[inline(never)]
-fn fallback_rng(rng_status: c::NTSTATUS) -> (u64, u64) {
-    match Rng::open().and_then(|rng| rng.gen_random_keys()) {
-        Ok(keys) => keys,
-        Err(status) => {
-            panic!("RNG broken: {rng_status:#x}, fallback RNG broken: {status:#x}")
-        }
-    }
+fn fallback_rng() -> (u64, u64) {
+    panic!("fallback RNG broken: RtlGenRandom() not supported on UWP");
 }
diff --git a/library/std/src/sys/windows/thread.rs b/library/std/src/sys/windows/thread.rs
index c5c9e97e6..1cb576c95 100644
--- a/library/std/src/sys/windows/thread.rs
+++ b/library/std/src/sys/windows/thread.rs
@@ -26,7 +26,7 @@ impl Thread {
 
         // FIXME On UNIX, we guard against stack sizes that are too small but
         // that's because pthreads enforces that stacks are at least
-        // PTHREAD_STACK_MIN bytes big.  Windows has no such lower limit, it's
+        // PTHREAD_STACK_MIN bytes big. Windows has no such lower limit, it's
         // just that below a certain threshold you can't do anything useful.
         // That threshold is application and architecture-specific, however.
         let ret = c::CreateThread(
diff --git a/library/std/src/sys/windows/thread_parker.rs b/library/std/src/sys/windows/thread_parker.rs
deleted file mode 100644
index 2f7ae863b..000000000
--- a/library/std/src/sys/windows/thread_parker.rs
+++ /dev/null
@@ -1,253 +0,0 @@
-// Thread parker implementation for Windows.
-//
-// This uses WaitOnAddress and WakeByAddressSingle if available (Windows 8+).
-// This modern API is exactly the same as the futex syscalls the Linux thread
-// parker uses. When These APIs are available, the implementation of this
-// thread parker matches the Linux thread parker exactly.
-//
-// However, when the modern API is not available, this implementation falls
-// back to NT Keyed Events, which are similar, but have some important
-// differences. These are available since Windows XP.
-//
-// WaitOnAddress first checks the state of the thread parker to make sure it no
-// WakeByAddressSingle calls can be missed between updating the parker state
-// and calling the function.
-//
-// NtWaitForKeyedEvent does not have this option, and unconditionally blocks
-// without checking the parker state first. Instead, NtReleaseKeyedEvent
-// (unlike WakeByAddressSingle) *blocks* until it woke up a thread waiting for
-// it by NtWaitForKeyedEvent. This way, we can be sure no events are missed,
-// but we need to be careful not to block unpark() if park_timeout() was woken
-// up by a timeout instead of unpark().
-//
-// Unlike WaitOnAddress, NtWaitForKeyedEvent/NtReleaseKeyedEvent operate on a
-// HANDLE (created with NtCreateKeyedEvent). This means that we can be sure
-// a successfully awoken park() was awoken by unpark() and not a
-// NtReleaseKeyedEvent call from some other code, as these events are not only
-// matched by the key (address of the parker (state)), but also by this HANDLE.
-// We lazily allocate this handle the first time it is needed.
-//
-// The fast path (calling park() after unpark() was already called) and the
-// possible states are the same for both implementations. This is used here to
-// make sure the fast path does not even check which API to use, but can return
-// right away, independent of the used API. Only the slow paths (which will
-// actually block/wake a thread) check which API is available and have
-// different implementations.
-//
-// Unfortunately, NT Keyed Events are an undocumented Windows API. However:
-// - This API is relatively simple with obvious behaviour, and there are
-//   several (unofficial) articles documenting the details. [1]
-// - `parking_lot` has been using this API for years (on Windows versions
-//   before Windows 8). [2] Many big projects extensively use parking_lot,
-//   such as servo and the Rust compiler itself.
-// - It is the underlying API used by Windows SRW locks and Windows critical
-//   sections. [3] [4]
-// - The source code of the implementations of Wine, ReactOs, and Windows XP
-//   are available and match the expected behaviour.
-// - The main risk with an undocumented API is that it might change in the
-//   future. But since we only use it for older versions of Windows, that's not
-//   a problem.
-// - Even if these functions do not block or wake as we expect (which is
-//   unlikely, see all previous points), this implementation would still be
-//   memory safe. The NT Keyed Events API is only used to sleep/block in the
-//   right place.
-//
-// [1]: http://www.locklessinc.com/articles/keyed_events/
-// [2]: https://github.com/Amanieu/parking_lot/commit/43abbc964e
-// [3]: https://docs.microsoft.com/en-us/archive/msdn-magazine/2012/november/windows-with-c-the-evolution-of-synchronization-in-windows-and-c
-// [4]: Windows Internals, Part 1, ISBN 9780735671300
-
-use crate::pin::Pin;
-use crate::ptr;
-use crate::sync::atomic::{
-    AtomicI8, AtomicPtr,
-    Ordering::{Acquire, Relaxed, Release},
-};
-use crate::sys::{c, dur2timeout};
-use crate::time::Duration;
-
-pub struct Parker {
-    state: AtomicI8,
-}
-
-const PARKED: i8 = -1;
-const EMPTY: i8 = 0;
-const NOTIFIED: i8 = 1;
-
-// Notes about memory ordering:
-//
-// Memory ordering is only relevant for the relative ordering of operations
-// between different variables. Even Ordering::Relaxed guarantees a
-// monotonic/consistent order when looking at just a single atomic variable.
-//
-// So, since this parker is just a single atomic variable, we only need to look
-// at the ordering guarantees we need to provide to the 'outside world'.
-//
-// The only memory ordering guarantee that parking and unparking provide, is
-// that things which happened before unpark() are visible on the thread
-// returning from park() afterwards. Otherwise, it was effectively unparked
-// before unpark() was called while still consuming the 'token'.
-//
-// In other words, unpark() needs to synchronize with the part of park() that
-// consumes the token and returns.
-//
-// This is done with a release-acquire synchronization, by using
-// Ordering::Release when writing NOTIFIED (the 'token') in unpark(), and using
-// Ordering::Acquire when reading this state in park() after waking up.
-impl Parker {
-    /// Construct the Windows parker. The UNIX parker implementation
-    /// requires this to happen in-place.
-    pub unsafe fn new(parker: *mut Parker) {
-        parker.write(Self { state: AtomicI8::new(EMPTY) });
-    }
-
-    // Assumes this is only called by the thread that owns the Parker,
-    // which means that `self.state != PARKED`. This implementation doesn't require `Pin`,
-    // but other implementations do.
-    pub unsafe fn park(self: Pin<&Self>) {
-        // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the
-        // first case.
-        if self.state.fetch_sub(1, Acquire) == NOTIFIED {
-            return;
-        }
-
-        if let Some(wait_on_address) = c::WaitOnAddress::option() {
-            loop {
-                // Wait for something to happen, assuming it's still set to PARKED.
-                wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE);
-                // Change NOTIFIED=>EMPTY but leave PARKED alone.
-                if self.state.compare_exchange(NOTIFIED, EMPTY, Acquire, Acquire).is_ok() {
-                    // Actually woken up by unpark().
-                    return;
-                } else {
-                    // Spurious wake up. We loop to try again.
-                }
-            }
-        } else {
-            // Wait for unpark() to produce this event.
-            c::NtWaitForKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut());
-            // Set the state back to EMPTY (from either PARKED or NOTIFIED).
-            // Note that we don't just write EMPTY, but use swap() to also
-            // include an acquire-ordered read to synchronize with unpark()'s
-            // release-ordered write.
-            self.state.swap(EMPTY, Acquire);
-        }
-    }
-
-    // Assumes this is only called by the thread that owns the Parker,
-    // which means that `self.state != PARKED`. This implementation doesn't require `Pin`,
-    // but other implementations do.
-    pub unsafe fn park_timeout(self: Pin<&Self>, timeout: Duration) {
-        // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the
-        // first case.
-        if self.state.fetch_sub(1, Acquire) == NOTIFIED {
-            return;
-        }
-
-        if let Some(wait_on_address) = c::WaitOnAddress::option() {
-            // Wait for something to happen, assuming it's still set to PARKED.
-            wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout));
-            // Set the state back to EMPTY (from either PARKED or NOTIFIED).
-            // Note that we don't just write EMPTY, but use swap() to also
-            // include an acquire-ordered read to synchronize with unpark()'s
-            // release-ordered write.
-            if self.state.swap(EMPTY, Acquire) == NOTIFIED {
-                // Actually woken up by unpark().
-            } else {
-                // Timeout or spurious wake up.
-                // We return either way, because we can't easily tell if it was the
-                // timeout or not.
-            }
-        } else {
-            // Need to wait for unpark() using NtWaitForKeyedEvent.
-            let handle = keyed_event_handle();
-
-            // NtWaitForKeyedEvent uses a unit of 100ns, and uses negative
-            // values to indicate a relative time on the monotonic clock.
-            // This is documented here for the underlying KeWaitForSingleObject function:
-            // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-kewaitforsingleobject
-            let mut timeout = match i64::try_from((timeout.as_nanos() + 99) / 100) {
-                Ok(t) => -t,
-                Err(_) => i64::MIN,
-            };
-
-            // Wait for unpark() to produce this event.
-            let unparked =
-                c::NtWaitForKeyedEvent(handle, self.ptr(), 0, &mut timeout) == c::STATUS_SUCCESS;
-
-            // Set the state back to EMPTY (from either PARKED or NOTIFIED).
-            let prev_state = self.state.swap(EMPTY, Acquire);
-
-            if !unparked && prev_state == NOTIFIED {
-                // We were awoken by a timeout, not by unpark(), but the state
-                // was set to NOTIFIED, which means we *just* missed an
-                // unpark(), which is now blocked on us to wait for it.
-                // Wait for it to consume the event and unblock that thread.
-                c::NtWaitForKeyedEvent(handle, self.ptr(), 0, ptr::null_mut());
-            }
-        }
-    }
-
-    // This implementation doesn't require `Pin`, but other implementations do.
-    pub fn unpark(self: Pin<&Self>) {
-        // Change PARKED=>NOTIFIED, EMPTY=>NOTIFIED, or NOTIFIED=>NOTIFIED, and
-        // wake the thread in the first case.
-        //
-        // Note that even NOTIFIED=>NOTIFIED results in a write. This is on
-        // purpose, to make sure every unpark() has a release-acquire ordering
-        // with park().
-        if self.state.swap(NOTIFIED, Release) == PARKED {
-            unsafe {
-                if let Some(wake_by_address_single) = c::WakeByAddressSingle::option() {
-                    wake_by_address_single(self.ptr());
-                } else {
-                    // If we run NtReleaseKeyedEvent before the waiting thread runs
-                    // NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up.
-                    // If the waiting thread wakes up before we run NtReleaseKeyedEvent
-                    // (e.g. due to a timeout), this blocks until we do wake up a thread.
-                    // To prevent this thread from blocking indefinitely in that case,
-                    // park_impl() will, after seeing the state set to NOTIFIED after
-                    // waking up, call NtWaitForKeyedEvent again to unblock us.
-                    c::NtReleaseKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut());
-                }
-            }
-        }
-    }
-
-    fn ptr(&self) -> c::LPVOID {
-        &self.state as *const _ as c::LPVOID
-    }
-}
-
-fn keyed_event_handle() -> c::HANDLE {
-    const INVALID: c::HANDLE = ptr::invalid_mut(!0);
-    static HANDLE: AtomicPtr<libc::c_void> = AtomicPtr::new(INVALID);
-    match HANDLE.load(Relaxed) {
-        INVALID => {
-            let mut handle = c::INVALID_HANDLE_VALUE;
-            unsafe {
-                match c::NtCreateKeyedEvent(
-                    &mut handle,
-                    c::GENERIC_READ | c::GENERIC_WRITE,
-                    ptr::null_mut(),
-                    0,
-                ) {
-                    c::STATUS_SUCCESS => {}
-                    r => panic!("Unable to create keyed event handle: error {r}"),
-                }
-            }
-            match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) {
-                Ok(_) => handle,
-                Err(h) => {
-                    // Lost the race to another thread initializing HANDLE before we did.
-                    // Closing our handle and using theirs instead.
-                    unsafe {
-                        c::CloseHandle(handle);
-                    }
-                    h
-                }
-            }
-        }
-        handle => handle,
-    }
-}
diff --git a/library/std/src/sys/windows/thread_parking.rs b/library/std/src/sys/windows/thread_parking.rs
new file mode 100644
index 000000000..5d43676ad
--- /dev/null
+++ b/library/std/src/sys/windows/thread_parking.rs
@@ -0,0 +1,253 @@
+// Thread parker implementation for Windows.
+//
+// This uses WaitOnAddress and WakeByAddressSingle if available (Windows 8+).
+// This modern API is exactly the same as the futex syscalls the Linux thread
+// parker uses. When These APIs are available, the implementation of this
+// thread parker matches the Linux thread parker exactly.
+//
+// However, when the modern API is not available, this implementation falls
+// back to NT Keyed Events, which are similar, but have some important
+// differences. These are available since Windows XP.
+//
+// WaitOnAddress first checks the state of the thread parker to make sure it no
+// WakeByAddressSingle calls can be missed between updating the parker state
+// and calling the function.
+//
+// NtWaitForKeyedEvent does not have this option, and unconditionally blocks
+// without checking the parker state first. Instead, NtReleaseKeyedEvent
+// (unlike WakeByAddressSingle) *blocks* until it woke up a thread waiting for
+// it by NtWaitForKeyedEvent. This way, we can be sure no events are missed,
+// but we need to be careful not to block unpark() if park_timeout() was woken
+// up by a timeout instead of unpark().
+//
+// Unlike WaitOnAddress, NtWaitForKeyedEvent/NtReleaseKeyedEvent operate on a
+// HANDLE (created with NtCreateKeyedEvent). This means that we can be sure
+// a successfully awoken park() was awoken by unpark() and not a
+// NtReleaseKeyedEvent call from some other code, as these events are not only
+// matched by the key (address of the parker (state)), but also by this HANDLE.
+// We lazily allocate this handle the first time it is needed.
+//
+// The fast path (calling park() after unpark() was already called) and the
+// possible states are the same for both implementations. This is used here to
+// make sure the fast path does not even check which API to use, but can return
+// right away, independent of the used API. Only the slow paths (which will
+// actually block/wake a thread) check which API is available and have
+// different implementations.
+//
+// Unfortunately, NT Keyed Events are an undocumented Windows API. However:
+// - This API is relatively simple with obvious behaviour, and there are
+//   several (unofficial) articles documenting the details. [1]
+// - `parking_lot` has been using this API for years (on Windows versions
+//   before Windows 8). [2] Many big projects extensively use parking_lot,
+//   such as servo and the Rust compiler itself.
+// - It is the underlying API used by Windows SRW locks and Windows critical
+//   sections. [3] [4]
+// - The source code of the implementations of Wine, ReactOs, and Windows XP
+//   are available and match the expected behaviour.
+// - The main risk with an undocumented API is that it might change in the
+//   future. But since we only use it for older versions of Windows, that's not
+//   a problem.
+// - Even if these functions do not block or wake as we expect (which is
+//   unlikely, see all previous points), this implementation would still be
+//   memory safe. The NT Keyed Events API is only used to sleep/block in the
+//   right place.
+//
+// [1]: http://www.locklessinc.com/articles/keyed_events/
+// [2]: https://github.com/Amanieu/parking_lot/commit/43abbc964e
+// [3]: https://docs.microsoft.com/en-us/archive/msdn-magazine/2012/november/windows-with-c-the-evolution-of-synchronization-in-windows-and-c
+// [4]: Windows Internals, Part 1, ISBN 9780735671300
+
+use crate::pin::Pin;
+use crate::ptr;
+use crate::sync::atomic::{
+    AtomicI8, AtomicPtr,
+    Ordering::{Acquire, Relaxed, Release},
+};
+use crate::sys::{c, dur2timeout};
+use crate::time::Duration;
+
+pub struct Parker {
+    state: AtomicI8,
+}
+
+const PARKED: i8 = -1;
+const EMPTY: i8 = 0;
+const NOTIFIED: i8 = 1;
+
+// Notes about memory ordering:
+//
+// Memory ordering is only relevant for the relative ordering of operations
+// between different variables. Even Ordering::Relaxed guarantees a
+// monotonic/consistent order when looking at just a single atomic variable.
+//
+// So, since this parker is just a single atomic variable, we only need to look
+// at the ordering guarantees we need to provide to the 'outside world'.
+//
+// The only memory ordering guarantee that parking and unparking provide, is
+// that things which happened before unpark() are visible on the thread
+// returning from park() afterwards. Otherwise, it was effectively unparked
+// before unpark() was called while still consuming the 'token'.
+//
+// In other words, unpark() needs to synchronize with the part of park() that
+// consumes the token and returns.
+//
+// This is done with a release-acquire synchronization, by using
+// Ordering::Release when writing NOTIFIED (the 'token') in unpark(), and using
+// Ordering::Acquire when reading this state in park() after waking up.
+impl Parker {
+    /// Construct the Windows parker. The UNIX parker implementation
+    /// requires this to happen in-place.
+    pub unsafe fn new_in_place(parker: *mut Parker) {
+        parker.write(Self { state: AtomicI8::new(EMPTY) });
+    }
+
+    // Assumes this is only called by the thread that owns the Parker,
+    // which means that `self.state != PARKED`. This implementation doesn't require `Pin`,
+    // but other implementations do.
+    pub unsafe fn park(self: Pin<&Self>) {
+        // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the
+        // first case.
+        if self.state.fetch_sub(1, Acquire) == NOTIFIED {
+            return;
+        }
+
+        if let Some(wait_on_address) = c::WaitOnAddress::option() {
+            loop {
+                // Wait for something to happen, assuming it's still set to PARKED.
+                wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE);
+                // Change NOTIFIED=>EMPTY but leave PARKED alone.
+                if self.state.compare_exchange(NOTIFIED, EMPTY, Acquire, Acquire).is_ok() {
+                    // Actually woken up by unpark().
+                    return;
+                } else {
+                    // Spurious wake up. We loop to try again.
+                }
+            }
+        } else {
+            // Wait for unpark() to produce this event.
+            c::NtWaitForKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut());
+            // Set the state back to EMPTY (from either PARKED or NOTIFIED).
+            // Note that we don't just write EMPTY, but use swap() to also
+            // include an acquire-ordered read to synchronize with unpark()'s
+            // release-ordered write.
+            self.state.swap(EMPTY, Acquire);
+        }
+    }
+
+    // Assumes this is only called by the thread that owns the Parker,
+    // which means that `self.state != PARKED`. This implementation doesn't require `Pin`,
+    // but other implementations do.
+    pub unsafe fn park_timeout(self: Pin<&Self>, timeout: Duration) {
+        // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the
+        // first case.
+        if self.state.fetch_sub(1, Acquire) == NOTIFIED {
+            return;
+        }
+
+        if let Some(wait_on_address) = c::WaitOnAddress::option() {
+            // Wait for something to happen, assuming it's still set to PARKED.
+            wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout));
+            // Set the state back to EMPTY (from either PARKED or NOTIFIED).
+            // Note that we don't just write EMPTY, but use swap() to also
+            // include an acquire-ordered read to synchronize with unpark()'s
+            // release-ordered write.
+            if self.state.swap(EMPTY, Acquire) == NOTIFIED {
+                // Actually woken up by unpark().
+            } else {
+                // Timeout or spurious wake up.
+                // We return either way, because we can't easily tell if it was the
+                // timeout or not.
+            }
+        } else {
+            // Need to wait for unpark() using NtWaitForKeyedEvent.
+            let handle = keyed_event_handle();
+
+            // NtWaitForKeyedEvent uses a unit of 100ns, and uses negative
+            // values to indicate a relative time on the monotonic clock.
+            // This is documented here for the underlying KeWaitForSingleObject function:
+            // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-kewaitforsingleobject
+            let mut timeout = match i64::try_from((timeout.as_nanos() + 99) / 100) {
+                Ok(t) => -t,
+                Err(_) => i64::MIN,
+            };
+
+            // Wait for unpark() to produce this event.
+            let unparked =
+                c::NtWaitForKeyedEvent(handle, self.ptr(), 0, &mut timeout) == c::STATUS_SUCCESS;
+
+            // Set the state back to EMPTY (from either PARKED or NOTIFIED).
+            let prev_state = self.state.swap(EMPTY, Acquire);
+
+            if !unparked && prev_state == NOTIFIED {
+                // We were awoken by a timeout, not by unpark(), but the state
+                // was set to NOTIFIED, which means we *just* missed an
+                // unpark(), which is now blocked on us to wait for it.
+                // Wait for it to consume the event and unblock that thread.
+                c::NtWaitForKeyedEvent(handle, self.ptr(), 0, ptr::null_mut());
+            }
+        }
+    }
+
+    // This implementation doesn't require `Pin`, but other implementations do.
+    pub fn unpark(self: Pin<&Self>) {
+        // Change PARKED=>NOTIFIED, EMPTY=>NOTIFIED, or NOTIFIED=>NOTIFIED, and
+        // wake the thread in the first case.
+        //
+        // Note that even NOTIFIED=>NOTIFIED results in a write. This is on
+        // purpose, to make sure every unpark() has a release-acquire ordering
+        // with park().
+        if self.state.swap(NOTIFIED, Release) == PARKED {
+            unsafe {
+                if let Some(wake_by_address_single) = c::WakeByAddressSingle::option() {
+                    wake_by_address_single(self.ptr());
+                } else {
+                    // If we run NtReleaseKeyedEvent before the waiting thread runs
+                    // NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up.
+                    // If the waiting thread wakes up before we run NtReleaseKeyedEvent
+                    // (e.g. due to a timeout), this blocks until we do wake up a thread.
+                    // To prevent this thread from blocking indefinitely in that case,
+                    // park_impl() will, after seeing the state set to NOTIFIED after
+                    // waking up, call NtWaitForKeyedEvent again to unblock us.
+                    c::NtReleaseKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut());
+                }
+            }
+        }
+    }
+
+    fn ptr(&self) -> c::LPVOID {
+        &self.state as *const _ as c::LPVOID
+    }
+}
+
+fn keyed_event_handle() -> c::HANDLE {
+    const INVALID: c::HANDLE = ptr::invalid_mut(!0);
+    static HANDLE: AtomicPtr<libc::c_void> = AtomicPtr::new(INVALID);
+    match HANDLE.load(Relaxed) {
+        INVALID => {
+            let mut handle = c::INVALID_HANDLE_VALUE;
+            unsafe {
+                match c::NtCreateKeyedEvent(
+                    &mut handle,
+                    c::GENERIC_READ | c::GENERIC_WRITE,
+                    ptr::null_mut(),
+                    0,
+                ) {
+                    c::STATUS_SUCCESS => {}
+                    r => panic!("Unable to create keyed event handle: error {r}"),
+                }
+            }
+            match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) {
+                Ok(_) => handle,
+                Err(h) => {
+                    // Lost the race to another thread initializing HANDLE before we did.
+                    // Closing our handle and using theirs instead.
+                    unsafe {
+                        c::CloseHandle(handle);
+                    }
+                    h
+                }
+            }
+        }
+        handle => handle,
+    }
+}