summaryrefslogtreecommitdiffstats
path: root/library/std/src/sys_common
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--library/std/src/sys_common/backtrace.rs9
-rw-r--r--library/std/src/sys_common/condvar.rs1
-rw-r--r--library/std/src/sys_common/condvar/check.rs1
-rw-r--r--library/std/src/sys_common/mod.rs10
-rw-r--r--library/std/src/sys_common/mutex.rs45
-rw-r--r--library/std/src/sys_common/net.rs28
-rw-r--r--library/std/src/sys_common/once/futex.rs134
-rw-r--r--library/std/src/sys_common/once/generic.rs282
-rw-r--r--library/std/src/sys_common/once/mod.rs43
-rw-r--r--library/std/src/sys_common/remutex.rs46
-rw-r--r--library/std/src/sys_common/remutex/tests.rs37
-rw-r--r--library/std/src/sys_common/rwlock.rs61
-rw-r--r--library/std/src/sys_common/thread_local_key.rs26
-rw-r--r--library/std/src/sys_common/thread_local_key/tests.rs9
-rw-r--r--library/std/src/sys_common/thread_parker/mod.rs1
-rw-r--r--library/std/src/sys_common/wtf8.rs95
-rw-r--r--library/std/src/sys_common/wtf8/tests.rs295
17 files changed, 878 insertions, 245 deletions
diff --git a/library/std/src/sys_common/backtrace.rs b/library/std/src/sys_common/backtrace.rs
index 31164afdc..8807077cb 100644
--- a/library/std/src/sys_common/backtrace.rs
+++ b/library/std/src/sys_common/backtrace.rs
@@ -7,15 +7,14 @@ use crate::fmt;
use crate::io;
use crate::io::prelude::*;
use crate::path::{self, Path, PathBuf};
-use crate::sys_common::mutex::StaticMutex;
+use crate::sync::{Mutex, PoisonError};
/// Max number of frames to print.
const MAX_NB_FRAMES: usize = 100;
-// SAFETY: Don't attempt to lock this reentrantly.
-pub unsafe fn lock() -> impl Drop {
- static LOCK: StaticMutex = StaticMutex::new();
- LOCK.lock()
+pub fn lock() -> impl Drop {
+ static LOCK: Mutex<()> = Mutex::new(());
+ LOCK.lock().unwrap_or_else(PoisonError::into_inner)
}
/// Prints the current backtrace.
diff --git a/library/std/src/sys_common/condvar.rs b/library/std/src/sys_common/condvar.rs
index f3ac1061b..8bc5b2411 100644
--- a/library/std/src/sys_common/condvar.rs
+++ b/library/std/src/sys_common/condvar.rs
@@ -15,6 +15,7 @@ pub struct Condvar {
impl Condvar {
/// Creates a new condition variable for use.
#[inline]
+ #[rustc_const_stable(feature = "const_locks", since = "1.63.0")]
pub const fn new() -> Self {
Self { inner: imp::MovableCondvar::new(), check: CondvarCheck::new() }
}
diff --git a/library/std/src/sys_common/condvar/check.rs b/library/std/src/sys_common/condvar/check.rs
index ce8f36704..4ac9e62bf 100644
--- a/library/std/src/sys_common/condvar/check.rs
+++ b/library/std/src/sys_common/condvar/check.rs
@@ -50,6 +50,7 @@ pub struct NoCheck;
#[allow(dead_code)]
impl NoCheck {
+ #[rustc_const_stable(feature = "const_locks", since = "1.63.0")]
pub const fn new() -> Self {
Self
}
diff --git a/library/std/src/sys_common/mod.rs b/library/std/src/sys_common/mod.rs
index 80f56bf75..8c19f9332 100644
--- a/library/std/src/sys_common/mod.rs
+++ b/library/std/src/sys_common/mod.rs
@@ -27,17 +27,25 @@ pub mod io;
pub mod lazy_box;
pub mod memchr;
pub mod mutex;
+pub mod once;
pub mod process;
pub mod remutex;
pub mod rwlock;
pub mod thread;
pub mod thread_info;
pub mod thread_local_dtor;
-pub mod thread_local_key;
pub mod thread_parker;
pub mod wtf8;
cfg_if::cfg_if! {
+ if #[cfg(target_os = "windows")] {
+ pub use crate::sys::thread_local_key;
+ } else {
+ pub mod thread_local_key;
+ }
+}
+
+cfg_if::cfg_if! {
if #[cfg(any(target_os = "l4re",
target_os = "hermit",
feature = "restricted-std",
diff --git a/library/std/src/sys_common/mutex.rs b/library/std/src/sys_common/mutex.rs
index 48479f5bd..98046f20f 100644
--- a/library/std/src/sys_common/mutex.rs
+++ b/library/std/src/sys_common/mutex.rs
@@ -1,49 +1,5 @@
use crate::sys::locks as imp;
-/// An OS-based mutual exclusion lock, meant for use in static variables.
-///
-/// This mutex has a const constructor ([`StaticMutex::new`]), does not
-/// implement `Drop` to cleanup resources, and causes UB when used reentrantly.
-///
-/// This mutex does not implement poisoning.
-///
-/// This is a wrapper around `imp::Mutex` that does *not* call `init()` and
-/// `destroy()`.
-pub struct StaticMutex(imp::Mutex);
-
-unsafe impl Sync for StaticMutex {}
-
-impl StaticMutex {
- /// Creates a new mutex for use.
- #[inline]
- pub const fn new() -> Self {
- Self(imp::Mutex::new())
- }
-
- /// Calls raw_lock() and then returns an RAII guard to guarantee the mutex
- /// will be unlocked.
- ///
- /// It is undefined behaviour to call this function while locked by the
- /// same thread.
- #[inline]
- pub unsafe fn lock(&'static self) -> StaticMutexGuard {
- self.0.lock();
- StaticMutexGuard(&self.0)
- }
-}
-
-#[must_use]
-pub struct StaticMutexGuard(&'static imp::Mutex);
-
-impl Drop for StaticMutexGuard {
- #[inline]
- fn drop(&mut self) {
- unsafe {
- self.0.unlock();
- }
- }
-}
-
/// An OS-based mutual exclusion lock.
///
/// This mutex cleans up its resources in its `Drop` implementation, may safely
@@ -61,6 +17,7 @@ unsafe impl Sync for MovableMutex {}
impl MovableMutex {
/// Creates a new mutex.
#[inline]
+ #[rustc_const_stable(feature = "const_locks", since = "1.63.0")]
pub const fn new() -> Self {
Self(imp::MovableMutex::new())
}
diff --git a/library/std/src/sys_common/net.rs b/library/std/src/sys_common/net.rs
index 33d336c43..fad4a6333 100644
--- a/library/std/src/sys_common/net.rs
+++ b/library/std/src/sys_common/net.rs
@@ -2,15 +2,16 @@
mod tests;
use crate::cmp;
-use crate::ffi::CString;
+use crate::convert::{TryFrom, TryInto};
use crate::fmt;
use crate::io::{self, ErrorKind, IoSlice, IoSliceMut};
use crate::mem;
use crate::net::{Ipv4Addr, Ipv6Addr, Shutdown, SocketAddr};
use crate::ptr;
+use crate::sys::common::small_c_string::run_with_cstr;
use crate::sys::net::netc as c;
use crate::sys::net::{cvt, cvt_gai, cvt_r, init, wrlen_t, Socket};
-use crate::sys_common::{FromInner, IntoInner};
+use crate::sys_common::{AsInner, FromInner, IntoInner};
use crate::time::Duration;
use libc::{c_int, c_void};
@@ -197,14 +198,15 @@ impl<'a> TryFrom<(&'a str, u16)> for LookupHost {
fn try_from((host, port): (&'a str, u16)) -> io::Result<LookupHost> {
init();
- let c_host = CString::new(host)?;
- let mut hints: c::addrinfo = unsafe { mem::zeroed() };
- hints.ai_socktype = c::SOCK_STREAM;
- let mut res = ptr::null_mut();
- unsafe {
- cvt_gai(c::getaddrinfo(c_host.as_ptr(), ptr::null(), &hints, &mut res))
- .map(|_| LookupHost { original: res, cur: res, port })
- }
+ run_with_cstr(host.as_bytes(), |c_host| {
+ let mut hints: c::addrinfo = unsafe { mem::zeroed() };
+ hints.ai_socktype = c::SOCK_STREAM;
+ let mut res = ptr::null_mut();
+ unsafe {
+ cvt_gai(c::getaddrinfo(c_host.as_ptr(), ptr::null(), &hints, &mut res))
+ .map(|_| LookupHost { original: res, cur: res, port })
+ }
+ })
}
}
@@ -345,6 +347,12 @@ impl TcpStream {
}
}
+impl AsInner<Socket> for TcpStream {
+ fn as_inner(&self) -> &Socket {
+ &self.inner
+ }
+}
+
impl FromInner<Socket> for TcpStream {
fn from_inner(socket: Socket) -> TcpStream {
TcpStream { inner: socket }
diff --git a/library/std/src/sys_common/once/futex.rs b/library/std/src/sys_common/once/futex.rs
new file mode 100644
index 000000000..5c7e6c013
--- /dev/null
+++ b/library/std/src/sys_common/once/futex.rs
@@ -0,0 +1,134 @@
+use crate::cell::Cell;
+use crate::sync as public;
+use crate::sync::atomic::{
+ AtomicU32,
+ Ordering::{Acquire, Relaxed, Release},
+};
+use crate::sys::futex::{futex_wait, futex_wake_all};
+
+// On some platforms, the OS is very nice and handles the waiter queue for us.
+// This means we only need one atomic value with 5 states:
+
+/// No initialization has run yet, and no thread is currently using the Once.
+const INCOMPLETE: u32 = 0;
+/// Some thread has previously attempted to initialize the Once, but it panicked,
+/// so the Once is now poisoned. There are no other threads currently accessing
+/// this Once.
+const POISONED: u32 = 1;
+/// Some thread is currently attempting to run initialization. It may succeed,
+/// so all future threads need to wait for it to finish.
+const RUNNING: u32 = 2;
+/// Some thread is currently attempting to run initialization and there are threads
+/// waiting for it to finish.
+const QUEUED: u32 = 3;
+/// Initialization has completed and all future calls should finish immediately.
+const COMPLETE: u32 = 4;
+
+// Threads wait by setting the state to QUEUED and calling `futex_wait` on the state
+// variable. When the running thread finishes, it will wake all waiting threads using
+// `futex_wake_all`.
+
+pub struct OnceState {
+ poisoned: bool,
+ set_state_to: Cell<u32>,
+}
+
+impl OnceState {
+ #[inline]
+ pub fn is_poisoned(&self) -> bool {
+ self.poisoned
+ }
+
+ #[inline]
+ pub fn poison(&self) {
+ self.set_state_to.set(POISONED);
+ }
+}
+
+struct CompletionGuard<'a> {
+ state: &'a AtomicU32,
+ set_state_on_drop_to: u32,
+}
+
+impl<'a> Drop for CompletionGuard<'a> {
+ fn drop(&mut self) {
+ // Use release ordering to propagate changes to all threads checking
+ // up on the Once. `futex_wake_all` does its own synchronization, hence
+ // we do not need `AcqRel`.
+ if self.state.swap(self.set_state_on_drop_to, Release) == QUEUED {
+ futex_wake_all(&self.state);
+ }
+ }
+}
+
+pub struct Once {
+ state: AtomicU32,
+}
+
+impl Once {
+ #[inline]
+ pub const fn new() -> Once {
+ Once { state: AtomicU32::new(INCOMPLETE) }
+ }
+
+ #[inline]
+ pub fn is_completed(&self) -> bool {
+ // Use acquire ordering to make all initialization changes visible to the
+ // current thread.
+ self.state.load(Acquire) == COMPLETE
+ }
+
+ // This uses FnMut to match the API of the generic implementation. As this
+ // implementation is quite light-weight, it is generic over the closure and
+ // so avoids the cost of dynamic dispatch.
+ #[cold]
+ #[track_caller]
+ pub fn call(&self, ignore_poisoning: bool, f: &mut impl FnMut(&public::OnceState)) {
+ let mut state = self.state.load(Acquire);
+ loop {
+ match state {
+ POISONED if !ignore_poisoning => {
+ // Panic to propagate the poison.
+ panic!("Once instance has previously been poisoned");
+ }
+ INCOMPLETE | POISONED => {
+ // Try to register the current thread as the one running.
+ if let Err(new) =
+ self.state.compare_exchange_weak(state, RUNNING, Acquire, Acquire)
+ {
+ state = new;
+ continue;
+ }
+ // `waiter_queue` will manage other waiting threads, and
+ // wake them up on drop.
+ let mut waiter_queue =
+ CompletionGuard { state: &self.state, set_state_on_drop_to: POISONED };
+ // Run the function, letting it know if we're poisoned or not.
+ let f_state = public::OnceState {
+ inner: OnceState {
+ poisoned: state == POISONED,
+ set_state_to: Cell::new(COMPLETE),
+ },
+ };
+ f(&f_state);
+ waiter_queue.set_state_on_drop_to = f_state.inner.set_state_to.get();
+ return;
+ }
+ RUNNING | QUEUED => {
+ // Set the state to QUEUED if it is not already.
+ if state == RUNNING
+ && let Err(new) = self.state.compare_exchange_weak(RUNNING, QUEUED, Relaxed, Acquire)
+ {
+ state = new;
+ continue;
+ }
+
+ futex_wait(&self.state, QUEUED, None);
+ state = self.state.load(Acquire);
+ }
+ COMPLETE => return,
+ _ => unreachable!("state is never set to invalid values"),
+ }
+ }
+ }
+}
diff --git a/library/std/src/sys_common/once/generic.rs b/library/std/src/sys_common/once/generic.rs
new file mode 100644
index 000000000..acf5f2471
--- /dev/null
+++ b/library/std/src/sys_common/once/generic.rs
@@ -0,0 +1,282 @@
+// Each `Once` has one word of atomic state, and this state is CAS'd on to
+// determine what to do. There are four possible state of a `Once`:
+//
+// * Incomplete - no initialization has run yet, and no thread is currently
+// using the Once.
+// * Poisoned - some thread has previously attempted to initialize the Once, but
+// it panicked, so the Once is now poisoned. There are no other
+// threads currently accessing this Once.
+// * Running - some thread is currently attempting to run initialization. It may
+// succeed, so all future threads need to wait for it to finish.
+// Note that this state is accompanied with a payload, described
+// below.
+// * Complete - initialization has completed and all future calls should finish
+// immediately.
+//
+// With 4 states we need 2 bits to encode this, and we use the remaining bits
+// in the word we have allocated as a queue of threads waiting for the thread
+// responsible for entering the RUNNING state. This queue is just a linked list
+// of Waiter nodes which is monotonically increasing in size. Each node is
+// allocated on the stack, and whenever the running closure finishes it will
+// consume the entire queue and notify all waiters they should try again.
+//
+// You'll find a few more details in the implementation, but that's the gist of
+// it!
+//
+// Atomic orderings:
+// When running `Once` we deal with multiple atomics:
+// `Once.state_and_queue` and an unknown number of `Waiter.signaled`.
+// * `state_and_queue` is used (1) as a state flag, (2) for synchronizing the
+// result of the `Once`, and (3) for synchronizing `Waiter` nodes.
+// - At the end of the `call` function we have to make sure the result
+// of the `Once` is acquired. So every load which can be the only one to
+// load COMPLETED must have at least acquire ordering, which means all
+// three of them.
+// - `WaiterQueue::drop` is the only place that may store COMPLETED, and
+// must do so with release ordering to make the result available.
+// - `wait` inserts `Waiter` nodes as a pointer in `state_and_queue`, and
+// needs to make the nodes available with release ordering. The load in
+// its `compare_exchange` can be relaxed because it only has to compare
+// the atomic, not to read other data.
+// - `WaiterQueue::drop` must see the `Waiter` nodes, so it must load
+// `state_and_queue` with acquire ordering.
+// - There is just one store where `state_and_queue` is used only as a
+// state flag, without having to synchronize data: switching the state
+// from INCOMPLETE to RUNNING in `call`. This store can be Relaxed,
+// but the read has to be Acquire because of the requirements mentioned
+// above.
+// * `Waiter.signaled` is both used as a flag, and to protect a field with
+// interior mutability in `Waiter`. `Waiter.thread` is changed in
+// `WaiterQueue::drop` which then sets `signaled` with release ordering.
+// After `wait` loads `signaled` with acquire ordering and sees it is true,
+// it needs to see the changes to drop the `Waiter` struct correctly.
+// * There is one place where the two atomics `Once.state_and_queue` and
+// `Waiter.signaled` come together, and might be reordered by the compiler or
+// processor. Because both use acquire ordering such a reordering is not
+// allowed, so no need for `SeqCst`.
+
+use crate::cell::Cell;
+use crate::fmt;
+use crate::ptr;
+use crate::sync as public;
+use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering};
+use crate::thread::{self, Thread};
+
+type Masked = ();
+
+pub struct Once {
+ state_and_queue: AtomicPtr<Masked>,
+}
+
+pub struct OnceState {
+ poisoned: bool,
+ set_state_on_drop_to: Cell<*mut Masked>,
+}
+
+// Four states that a Once can be in, encoded into the lower bits of
+// `state_and_queue` in the Once structure.
+const INCOMPLETE: usize = 0x0;
+const POISONED: usize = 0x1;
+const RUNNING: usize = 0x2;
+const COMPLETE: usize = 0x3;
+
+// Mask to learn about the state. All other bits are the queue of waiters if
+// this is in the RUNNING state.
+const STATE_MASK: usize = 0x3;
+
+// Representation of a node in the linked list of waiters, used while in the
+// RUNNING state.
+// Note: `Waiter` can't hold a mutable pointer to the next thread, because then
+// `wait` would both hand out a mutable reference to its `Waiter` node, and keep
+// a shared reference to check `signaled`. Instead we hold shared references and
+// use interior mutability.
+#[repr(align(4))] // Ensure the two lower bits are free to use as state bits.
+struct Waiter {
+ thread: Cell<Option<Thread>>,
+ signaled: AtomicBool,
+ next: *const Waiter,
+}
+
+// Head of a linked list of waiters.
+// Every node is a struct on the stack of a waiting thread.
+// Will wake up the waiters when it gets dropped, i.e. also on panic.
+struct WaiterQueue<'a> {
+ state_and_queue: &'a AtomicPtr<Masked>,
+ set_state_on_drop_to: *mut Masked,
+}
+
+impl Once {
+ #[inline]
+ pub const fn new() -> Once {
+ Once { state_and_queue: AtomicPtr::new(ptr::invalid_mut(INCOMPLETE)) }
+ }
+
+ #[inline]
+ pub fn is_completed(&self) -> bool {
+ // An `Acquire` load is enough because that makes all the initialization
+ // operations visible to us, and, this being a fast path, weaker
+ // ordering helps with performance. This `Acquire` synchronizes with
+ // `Release` operations on the slow path.
+ self.state_and_queue.load(Ordering::Acquire).addr() == COMPLETE
+ }
+
+ // This is a non-generic function to reduce the monomorphization cost of
+ // using `call_once` (this isn't exactly a trivial or small implementation).
+ //
+ // Additionally, this is tagged with `#[cold]` as it should indeed be cold
+ // and it helps let LLVM know that calls to this function should be off the
+ // fast path. Essentially, this should help generate more straight line code
+ // in LLVM.
+ //
+ // Finally, this takes an `FnMut` instead of a `FnOnce` because there's
+ // currently no way to take an `FnOnce` and call it via virtual dispatch
+ // without some allocation overhead.
+ #[cold]
+ #[track_caller]
+ pub fn call(&self, ignore_poisoning: bool, init: &mut dyn FnMut(&public::OnceState)) {
+ let mut state_and_queue = self.state_and_queue.load(Ordering::Acquire);
+ loop {
+ match state_and_queue.addr() {
+ COMPLETE => break,
+ POISONED if !ignore_poisoning => {
+ // Panic to propagate the poison.
+ panic!("Once instance has previously been poisoned");
+ }
+ POISONED | INCOMPLETE => {
+ // Try to register this thread as the one RUNNING.
+ let exchange_result = self.state_and_queue.compare_exchange(
+ state_and_queue,
+ ptr::invalid_mut(RUNNING),
+ Ordering::Acquire,
+ Ordering::Acquire,
+ );
+ if let Err(old) = exchange_result {
+ state_and_queue = old;
+ continue;
+ }
+ // `waiter_queue` will manage other waiting threads, and
+ // wake them up on drop.
+ let mut waiter_queue = WaiterQueue {
+ state_and_queue: &self.state_and_queue,
+ set_state_on_drop_to: ptr::invalid_mut(POISONED),
+ };
+ // Run the initialization function, letting it know if we're
+ // poisoned or not.
+ let init_state = public::OnceState {
+ inner: OnceState {
+ poisoned: state_and_queue.addr() == POISONED,
+ set_state_on_drop_to: Cell::new(ptr::invalid_mut(COMPLETE)),
+ },
+ };
+ init(&init_state);
+ waiter_queue.set_state_on_drop_to = init_state.inner.set_state_on_drop_to.get();
+ break;
+ }
+ _ => {
+ // All other values must be RUNNING with possibly a
+ // pointer to the waiter queue in the more significant bits.
+ assert!(state_and_queue.addr() & STATE_MASK == RUNNING);
+ wait(&self.state_and_queue, state_and_queue);
+ state_and_queue = self.state_and_queue.load(Ordering::Acquire);
+ }
+ }
+ }
+ }
+}
+
+fn wait(state_and_queue: &AtomicPtr<Masked>, mut current_state: *mut Masked) {
+ // Note: the following code was carefully written to avoid creating a
+ // mutable reference to `node` that gets aliased.
+ loop {
+ // Don't queue this thread if the status is no longer running,
+ // otherwise we will not be woken up.
+ if current_state.addr() & STATE_MASK != RUNNING {
+ return;
+ }
+
+ // Create the node for our current thread.
+ let node = Waiter {
+ thread: Cell::new(Some(thread::current())),
+ signaled: AtomicBool::new(false),
+ next: current_state.with_addr(current_state.addr() & !STATE_MASK) as *const Waiter,
+ };
+ let me = &node as *const Waiter as *const Masked as *mut Masked;
+
+ // Try to slide in the node at the head of the linked list, making sure
+ // that another thread didn't just replace the head of the linked list.
+ let exchange_result = state_and_queue.compare_exchange(
+ current_state,
+ me.with_addr(me.addr() | RUNNING),
+ Ordering::Release,
+ Ordering::Relaxed,
+ );
+ if let Err(old) = exchange_result {
+ current_state = old;
+ continue;
+ }
+
+ // We have enqueued ourselves, now lets wait.
+ // It is important not to return before being signaled, otherwise we
+ // would drop our `Waiter` node and leave a hole in the linked list
+ // (and a dangling reference). Guard against spurious wakeups by
+ // reparking ourselves until we are signaled.
+ while !node.signaled.load(Ordering::Acquire) {
+ // If the managing thread happens to signal and unpark us before we
+ // can park ourselves, the result could be this thread never gets
+ // unparked. Luckily `park` comes with the guarantee that if it got
+ // an `unpark` just before on an unparked thread it does not park.
+ thread::park();
+ }
+ break;
+ }
+}
+
+#[stable(feature = "std_debug", since = "1.16.0")]
+impl fmt::Debug for Once {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("Once").finish_non_exhaustive()
+ }
+}
+
+impl Drop for WaiterQueue<'_> {
+ fn drop(&mut self) {
+ // Swap out our state with however we finished.
+ let state_and_queue =
+ self.state_and_queue.swap(self.set_state_on_drop_to, Ordering::AcqRel);
+
+ // We should only ever see an old state which was RUNNING.
+ assert_eq!(state_and_queue.addr() & STATE_MASK, RUNNING);
+
+ // Walk the entire linked list of waiters and wake them up (in lifo
+ // order, last to register is first to wake up).
+ unsafe {
+ // Right after setting `node.signaled = true` the other thread may
+ // free `node` if there happens to be has a spurious wakeup.
+ // So we have to take out the `thread` field and copy the pointer to
+ // `next` first.
+ let mut queue =
+ state_and_queue.with_addr(state_and_queue.addr() & !STATE_MASK) as *const Waiter;
+ while !queue.is_null() {
+ let next = (*queue).next;
+ let thread = (*queue).thread.take().unwrap();
+ (*queue).signaled.store(true, Ordering::Release);
+ // ^- FIXME (maybe): This is another case of issue #55005
+ // `store()` has a potentially dangling ref to `signaled`.
+ queue = next;
+ thread.unpark();
+ }
+ }
+ }
+}
+
+impl OnceState {
+ #[inline]
+ pub fn is_poisoned(&self) -> bool {
+ self.poisoned
+ }
+
+ #[inline]
+ pub fn poison(&self) {
+ self.set_state_on_drop_to.set(ptr::invalid_mut(POISONED));
+ }
+}
diff --git a/library/std/src/sys_common/once/mod.rs b/library/std/src/sys_common/once/mod.rs
new file mode 100644
index 000000000..8742e68cc
--- /dev/null
+++ b/library/std/src/sys_common/once/mod.rs
@@ -0,0 +1,43 @@
+// A "once" is a relatively simple primitive, and it's also typically provided
+// by the OS as well (see `pthread_once` or `InitOnceExecuteOnce`). The OS
+// primitives, however, tend to have surprising restrictions, such as the Unix
+// one doesn't allow an argument to be passed to the function.
+//
+// As a result, we end up implementing it ourselves in the standard library.
+// This also gives us the opportunity to optimize the implementation a bit which
+// should help the fast path on call sites.
+//
+// So to recap, the guarantees of a Once are that it will call the
+// initialization closure at most once, and it will never return until the one
+// that's running has finished running. This means that we need some form of
+// blocking here while the custom callback is running at the very least.
+// Additionally, we add on the restriction of **poisoning**. Whenever an
+// initialization closure panics, the Once enters a "poisoned" state which means
+// that all future calls will immediately panic as well.
+//
+// So to implement this, one might first reach for a `Mutex`, but those cannot
+// be put into a `static`. It also gets a lot harder with poisoning to figure
+// out when the mutex needs to be deallocated because it's not after the closure
+// finishes, but after the first successful closure finishes.
+//
+// All in all, this is instead implemented with atomics and lock-free
+// operations! Whee!
+
+cfg_if::cfg_if! {
+ if #[cfg(any(
+ target_os = "linux",
+ target_os = "android",
+ all(target_arch = "wasm32", target_feature = "atomics"),
+ target_os = "freebsd",
+ target_os = "openbsd",
+ target_os = "dragonfly",
+ target_os = "fuchsia",
+ target_os = "hermit",
+ ))] {
+ mod futex;
+ pub use futex::{Once, OnceState};
+ } else {
+ mod generic;
+ pub use generic::{Once, OnceState};
+ }
+}
diff --git a/library/std/src/sys_common/remutex.rs b/library/std/src/sys_common/remutex.rs
index 8921af311..b448ae3a9 100644
--- a/library/std/src/sys_common/remutex.rs
+++ b/library/std/src/sys_common/remutex.rs
@@ -1,13 +1,11 @@
#[cfg(all(test, not(target_os = "emscripten")))]
mod tests;
+use super::mutex as sys;
use crate::cell::UnsafeCell;
-use crate::marker::PhantomPinned;
use crate::ops::Deref;
use crate::panic::{RefUnwindSafe, UnwindSafe};
-use crate::pin::Pin;
use crate::sync::atomic::{AtomicUsize, Ordering::Relaxed};
-use crate::sys::locks as sys;
/// A re-entrant mutual exclusion
///
@@ -41,11 +39,10 @@ use crate::sys::locks as sys;
/// synchronization is left to the mutex, making relaxed memory ordering for
/// the `owner` field fine in all cases.
pub struct ReentrantMutex<T> {
- mutex: sys::Mutex,
+ mutex: sys::MovableMutex,
owner: AtomicUsize,
lock_count: UnsafeCell<u32>,
data: T,
- _pinned: PhantomPinned,
}
unsafe impl<T: Send> Send for ReentrantMutex<T> {}
@@ -68,39 +65,22 @@ impl<T> RefUnwindSafe for ReentrantMutex<T> {}
/// guarded data.
#[must_use = "if unused the ReentrantMutex will immediately unlock"]
pub struct ReentrantMutexGuard<'a, T: 'a> {
- lock: Pin<&'a ReentrantMutex<T>>,
+ lock: &'a ReentrantMutex<T>,
}
impl<T> !Send for ReentrantMutexGuard<'_, T> {}
impl<T> ReentrantMutex<T> {
/// Creates a new reentrant mutex in an unlocked state.
- ///
- /// # Unsafety
- ///
- /// This function is unsafe because it is required that `init` is called
- /// once this mutex is in its final resting place, and only then are the
- /// lock/unlock methods safe.
- pub const unsafe fn new(t: T) -> ReentrantMutex<T> {
+ pub const fn new(t: T) -> ReentrantMutex<T> {
ReentrantMutex {
- mutex: sys::Mutex::new(),
+ mutex: sys::MovableMutex::new(),
owner: AtomicUsize::new(0),
lock_count: UnsafeCell::new(0),
data: t,
- _pinned: PhantomPinned,
}
}
- /// Initializes this mutex so it's ready for use.
- ///
- /// # Unsafety
- ///
- /// Unsafe to call more than once, and must be called after this will no
- /// longer move in memory.
- pub unsafe fn init(self: Pin<&mut Self>) {
- self.get_unchecked_mut().mutex.init()
- }
-
/// Acquires a mutex, blocking the current thread until it is able to do so.
///
/// This function will block the caller until it is available to acquire the mutex.
@@ -113,15 +93,14 @@ impl<T> ReentrantMutex<T> {
/// If another user of this mutex panicked while holding the mutex, then
/// this call will return failure if the mutex would otherwise be
/// acquired.
- pub fn lock(self: Pin<&Self>) -> ReentrantMutexGuard<'_, T> {
+ pub fn lock(&self) -> ReentrantMutexGuard<'_, T> {
let this_thread = current_thread_unique_ptr();
- // Safety: We only touch lock_count when we own the lock,
- // and since self is pinned we can safely call the lock() on the mutex.
+ // Safety: We only touch lock_count when we own the lock.
unsafe {
if self.owner.load(Relaxed) == this_thread {
self.increment_lock_count();
} else {
- self.mutex.lock();
+ self.mutex.raw_lock();
self.owner.store(this_thread, Relaxed);
debug_assert_eq!(*self.lock_count.get(), 0);
*self.lock_count.get() = 1;
@@ -142,10 +121,9 @@ impl<T> ReentrantMutex<T> {
/// If another user of this mutex panicked while holding the mutex, then
/// this call will return failure if the mutex would otherwise be
/// acquired.
- pub fn try_lock(self: Pin<&Self>) -> Option<ReentrantMutexGuard<'_, T>> {
+ pub fn try_lock(&self) -> Option<ReentrantMutexGuard<'_, T>> {
let this_thread = current_thread_unique_ptr();
- // Safety: We only touch lock_count when we own the lock,
- // and since self is pinned we can safely call the try_lock on the mutex.
+ // Safety: We only touch lock_count when we own the lock.
unsafe {
if self.owner.load(Relaxed) == this_thread {
self.increment_lock_count();
@@ -179,12 +157,12 @@ impl<T> Deref for ReentrantMutexGuard<'_, T> {
impl<T> Drop for ReentrantMutexGuard<'_, T> {
#[inline]
fn drop(&mut self) {
- // Safety: We own the lock, and the lock is pinned.
+ // Safety: We own the lock.
unsafe {
*self.lock.lock_count.get() -= 1;
if *self.lock.lock_count.get() == 0 {
self.lock.owner.store(0, Relaxed);
- self.lock.mutex.unlock();
+ self.lock.mutex.raw_unlock();
}
}
}
diff --git a/library/std/src/sys_common/remutex/tests.rs b/library/std/src/sys_common/remutex/tests.rs
index 64873b850..8e97ce11c 100644
--- a/library/std/src/sys_common/remutex/tests.rs
+++ b/library/std/src/sys_common/remutex/tests.rs
@@ -1,18 +1,11 @@
-use crate::boxed::Box;
use crate::cell::RefCell;
-use crate::pin::Pin;
use crate::sync::Arc;
use crate::sys_common::remutex::{ReentrantMutex, ReentrantMutexGuard};
use crate::thread;
#[test]
fn smoke() {
- let m = unsafe {
- let mut m = Box::pin(ReentrantMutex::new(()));
- m.as_mut().init();
- m
- };
- let m = m.as_ref();
+ let m = ReentrantMutex::new(());
{
let a = m.lock();
{
@@ -29,20 +22,15 @@ fn smoke() {
#[test]
fn is_mutex() {
- let m = unsafe {
- // FIXME: Simplify this if Arc gets an Arc::get_pin_mut.
- let mut m = Arc::new(ReentrantMutex::new(RefCell::new(0)));
- Pin::new_unchecked(Arc::get_mut_unchecked(&mut m)).init();
- Pin::new_unchecked(m)
- };
+ let m = Arc::new(ReentrantMutex::new(RefCell::new(0)));
let m2 = m.clone();
- let lock = m.as_ref().lock();
+ let lock = m.lock();
let child = thread::spawn(move || {
- let lock = m2.as_ref().lock();
+ let lock = m2.lock();
assert_eq!(*lock.borrow(), 4950);
});
for i in 0..100 {
- let lock = m.as_ref().lock();
+ let lock = m.lock();
*lock.borrow_mut() += i;
}
drop(lock);
@@ -51,22 +39,17 @@ fn is_mutex() {
#[test]
fn trylock_works() {
- let m = unsafe {
- // FIXME: Simplify this if Arc gets an Arc::get_pin_mut.
- let mut m = Arc::new(ReentrantMutex::new(()));
- Pin::new_unchecked(Arc::get_mut_unchecked(&mut m)).init();
- Pin::new_unchecked(m)
- };
+ let m = Arc::new(ReentrantMutex::new(()));
let m2 = m.clone();
- let _lock = m.as_ref().try_lock();
- let _lock2 = m.as_ref().try_lock();
+ let _lock = m.try_lock();
+ let _lock2 = m.try_lock();
thread::spawn(move || {
- let lock = m2.as_ref().try_lock();
+ let lock = m2.try_lock();
assert!(lock.is_none());
})
.join()
.unwrap();
- let _lock3 = m.as_ref().try_lock();
+ let _lock3 = m.try_lock();
}
pub struct Answer<'a>(pub ReentrantMutexGuard<'a, RefCell<u32>>);
diff --git a/library/std/src/sys_common/rwlock.rs b/library/std/src/sys_common/rwlock.rs
index ba56f3a8f..042981dac 100644
--- a/library/std/src/sys_common/rwlock.rs
+++ b/library/std/src/sys_common/rwlock.rs
@@ -1,65 +1,5 @@
use crate::sys::locks as imp;
-/// An OS-based reader-writer lock, meant for use in static variables.
-///
-/// This rwlock does not implement poisoning.
-///
-/// This rwlock has a const constructor ([`StaticRwLock::new`]), does not
-/// implement `Drop` to cleanup resources.
-pub struct StaticRwLock(imp::RwLock);
-
-impl StaticRwLock {
- /// Creates a new rwlock for use.
- #[inline]
- pub const fn new() -> Self {
- Self(imp::RwLock::new())
- }
-
- /// Acquires shared access to the underlying lock, blocking the current
- /// thread to do so.
- ///
- /// The lock is automatically unlocked when the returned guard is dropped.
- #[inline]
- pub fn read(&'static self) -> StaticRwLockReadGuard {
- unsafe { self.0.read() };
- StaticRwLockReadGuard(&self.0)
- }
-
- /// Acquires write access to the underlying lock, blocking the current thread
- /// to do so.
- ///
- /// The lock is automatically unlocked when the returned guard is dropped.
- #[inline]
- pub fn write(&'static self) -> StaticRwLockWriteGuard {
- unsafe { self.0.write() };
- StaticRwLockWriteGuard(&self.0)
- }
-}
-
-#[must_use]
-pub struct StaticRwLockReadGuard(&'static imp::RwLock);
-
-impl Drop for StaticRwLockReadGuard {
- #[inline]
- fn drop(&mut self) {
- unsafe {
- self.0.read_unlock();
- }
- }
-}
-
-#[must_use]
-pub struct StaticRwLockWriteGuard(&'static imp::RwLock);
-
-impl Drop for StaticRwLockWriteGuard {
- #[inline]
- fn drop(&mut self) {
- unsafe {
- self.0.write_unlock();
- }
- }
-}
-
/// An OS-based reader-writer lock.
///
/// This rwlock cleans up its resources in its `Drop` implementation and may
@@ -75,6 +15,7 @@ pub struct MovableRwLock(imp::MovableRwLock);
impl MovableRwLock {
/// Creates a new reader-writer lock for use.
#[inline]
+ #[rustc_const_stable(feature = "const_locks", since = "1.63.0")]
pub const fn new() -> Self {
Self(imp::MovableRwLock::new())
}
diff --git a/library/std/src/sys_common/thread_local_key.rs b/library/std/src/sys_common/thread_local_key.rs
index 70beebe86..747579f17 100644
--- a/library/std/src/sys_common/thread_local_key.rs
+++ b/library/std/src/sys_common/thread_local_key.rs
@@ -53,7 +53,6 @@ mod tests;
use crate::sync::atomic::{self, AtomicUsize, Ordering};
use crate::sys::thread_local_key as imp;
-use crate::sys_common::mutex::StaticMutex;
/// A type for TLS keys that are statically allocated.
///
@@ -69,8 +68,10 @@ use crate::sys_common::mutex::StaticMutex;
/// ```ignore (cannot-doctest-private-modules)
/// use tls::os::{StaticKey, INIT};
///
+/// // Use a regular global static to store the key.
/// static KEY: StaticKey = INIT;
///
+/// // The state provided via `get` and `set` is thread-local.
/// unsafe {
/// assert!(KEY.get().is_null());
/// KEY.set(1 as *mut u8);
@@ -149,25 +150,6 @@ impl StaticKey {
}
unsafe fn lazy_init(&self) -> usize {
- // Currently the Windows implementation of TLS is pretty hairy, and
- // it greatly simplifies creation if we just synchronize everything.
- //
- // Additionally a 0-index of a tls key hasn't been seen on windows, so
- // we just simplify the whole branch.
- if imp::requires_synchronized_create() {
- // We never call `INIT_LOCK.init()`, so it is UB to attempt to
- // acquire this mutex reentrantly!
- static INIT_LOCK: StaticMutex = StaticMutex::new();
- let _guard = INIT_LOCK.lock();
- let mut key = self.key.load(Ordering::SeqCst);
- if key == 0 {
- key = imp::create(self.dtor) as usize;
- self.key.store(key, Ordering::SeqCst);
- }
- rtassert!(key != 0);
- return key;
- }
-
// POSIX allows the key created here to be 0, but the compare_exchange
// below relies on using 0 as a sentinel value to check who won the
// race to set the shared TLS key. As far as I know, there is no
@@ -230,8 +212,6 @@ impl Key {
impl Drop for Key {
fn drop(&mut self) {
- // Right now Windows doesn't support TLS key destruction, but this also
- // isn't used anywhere other than tests, so just leak the TLS key.
- // unsafe { imp::destroy(self.key) }
+ unsafe { imp::destroy(self.key) }
}
}
diff --git a/library/std/src/sys_common/thread_local_key/tests.rs b/library/std/src/sys_common/thread_local_key/tests.rs
index 968738a41..6f32b858f 100644
--- a/library/std/src/sys_common/thread_local_key/tests.rs
+++ b/library/std/src/sys_common/thread_local_key/tests.rs
@@ -1,4 +1,5 @@
use super::{Key, StaticKey};
+use core::ptr;
fn assert_sync<T: Sync>() {}
fn assert_send<T: Send>() {}
@@ -12,8 +13,8 @@ fn smoke() {
let k2 = Key::new(None);
assert!(k1.get().is_null());
assert!(k2.get().is_null());
- k1.set(1 as *mut _);
- k2.set(2 as *mut _);
+ k1.set(ptr::invalid_mut(1));
+ k2.set(ptr::invalid_mut(2));
assert_eq!(k1.get() as usize, 1);
assert_eq!(k2.get() as usize, 2);
}
@@ -26,8 +27,8 @@ fn statik() {
unsafe {
assert!(K1.get().is_null());
assert!(K2.get().is_null());
- K1.set(1 as *mut _);
- K2.set(2 as *mut _);
+ K1.set(ptr::invalid_mut(1));
+ K2.set(ptr::invalid_mut(2));
assert_eq!(K1.get() as usize, 1);
assert_eq!(K2.get() as usize, 2);
}
diff --git a/library/std/src/sys_common/thread_parker/mod.rs b/library/std/src/sys_common/thread_parker/mod.rs
index cbd7832eb..f86a9a555 100644
--- a/library/std/src/sys_common/thread_parker/mod.rs
+++ b/library/std/src/sys_common/thread_parker/mod.rs
@@ -7,6 +7,7 @@ cfg_if::cfg_if! {
target_os = "openbsd",
target_os = "dragonfly",
target_os = "fuchsia",
+ target_os = "hermit",
))] {
mod futex;
pub use futex::Parker;
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs
index 57fa49893..dd53767d4 100644
--- a/library/std/src/sys_common/wtf8.rs
+++ b/library/std/src/sys_common/wtf8.rs
@@ -89,6 +89,24 @@ impl CodePoint {
self.value
}
+ /// Returns the numeric value of the code point if it is a leading surrogate.
+ #[inline]
+ pub fn to_lead_surrogate(&self) -> Option<u16> {
+ match self.value {
+ lead @ 0xD800..=0xDBFF => Some(lead as u16),
+ _ => None,
+ }
+ }
+
+ /// Returns the numeric value of the code point if it is a trailing surrogate.
+ #[inline]
+ pub fn to_trail_surrogate(&self) -> Option<u16> {
+ match self.value {
+ trail @ 0xDC00..=0xDFFF => Some(trail as u16),
+ _ => None,
+ }
+ }
+
/// Optionally returns a Unicode scalar value for the code point.
///
/// Returns `None` if the code point is a surrogate (from U+D800 to U+DFFF).
@@ -117,6 +135,14 @@ impl CodePoint {
#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)]
pub struct Wtf8Buf {
bytes: Vec<u8>,
+
+ /// Do we know that `bytes` holds a valid UTF-8 encoding? We can easily
+ /// know this if we're constructed from a `String` or `&str`.
+ ///
+ /// It is possible for `bytes` to have valid UTF-8 without this being
+ /// set, such as when we're concatenating `&Wtf8`'s and surrogates become
+ /// paired, as we don't bother to rescan the entire string.
+ is_known_utf8: bool,
}
impl ops::Deref for Wtf8Buf {
@@ -147,13 +173,13 @@ impl Wtf8Buf {
/// Creates a new, empty WTF-8 string.
#[inline]
pub fn new() -> Wtf8Buf {
- Wtf8Buf { bytes: Vec::new() }
+ Wtf8Buf { bytes: Vec::new(), is_known_utf8: true }
}
/// Creates a new, empty WTF-8 string with pre-allocated capacity for `capacity` bytes.
#[inline]
pub fn with_capacity(capacity: usize) -> Wtf8Buf {
- Wtf8Buf { bytes: Vec::with_capacity(capacity) }
+ Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true }
}
/// Creates a WTF-8 string from a UTF-8 `String`.
@@ -163,7 +189,7 @@ impl Wtf8Buf {
/// Since WTF-8 is a superset of UTF-8, this always succeeds.
#[inline]
pub fn from_string(string: String) -> Wtf8Buf {
- Wtf8Buf { bytes: string.into_bytes() }
+ Wtf8Buf { bytes: string.into_bytes(), is_known_utf8: true }
}
/// Creates a WTF-8 string from a UTF-8 `&str` slice.
@@ -173,11 +199,12 @@ impl Wtf8Buf {
/// Since WTF-8 is a superset of UTF-8, this always succeeds.
#[inline]
pub fn from_str(str: &str) -> Wtf8Buf {
- Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()) }
+ Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()), is_known_utf8: true }
}
pub fn clear(&mut self) {
- self.bytes.clear()
+ self.bytes.clear();
+ self.is_known_utf8 = true;
}
/// Creates a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units.
@@ -193,9 +220,11 @@ impl Wtf8Buf {
let surrogate = surrogate.unpaired_surrogate();
// Surrogates are known to be in the code point range.
let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) };
+ // The string will now contain an unpaired surrogate.
+ string.is_known_utf8 = false;
// Skip the WTF-8 concatenation check,
// surrogate pairs are already decoded by decode_utf16
- string.push_code_point_unchecked(code_point)
+ string.push_code_point_unchecked(code_point);
}
}
}
@@ -203,7 +232,7 @@ impl Wtf8Buf {
}
/// Copied from String::push
- /// This does **not** include the WTF-8 concatenation check.
+ /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
let mut bytes = [0; 4];
let bytes = char::encode_utf8_raw(code_point.value, &mut bytes);
@@ -217,6 +246,9 @@ impl Wtf8Buf {
#[inline]
pub fn as_mut_slice(&mut self) -> &mut Wtf8 {
+ // Safety: `Wtf8` doesn't expose any way to mutate the bytes that would
+ // cause them to change from well-formed UTF-8 to ill-formed UTF-8,
+ // which would break the assumptions of the `is_known_utf8` field.
unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) }
}
@@ -236,7 +268,8 @@ impl Wtf8Buf {
/// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid
/// frequent reallocations. After calling `try_reserve`, capacity will be
/// greater than or equal to `self.len() + additional`. Does nothing if
- /// capacity is already sufficient.
+ /// capacity is already sufficient. This method preserves the contents even
+ /// if an error occurs.
///
/// # Errors
///
@@ -313,7 +346,15 @@ impl Wtf8Buf {
self.push_char(decode_surrogate_pair(lead, trail));
self.bytes.extend_from_slice(other_without_trail_surrogate);
}
- _ => self.bytes.extend_from_slice(&other.bytes),
+ _ => {
+ // If we'll be pushing a string containing a surrogate, we may
+ // no longer have UTF-8.
+ if other.next_surrogate(0).is_some() {
+ self.is_known_utf8 = false;
+ }
+
+ self.bytes.extend_from_slice(&other.bytes);
+ }
}
}
@@ -330,13 +371,19 @@ impl Wtf8Buf {
/// like concatenating ill-formed UTF-16 strings effectively would.
#[inline]
pub fn push(&mut self, code_point: CodePoint) {
- if let trail @ 0xDC00..=0xDFFF = code_point.to_u32() {
+ if let Some(trail) = code_point.to_trail_surrogate() {
if let Some(lead) = (&*self).final_lead_surrogate() {
let len_without_lead_surrogate = self.len() - 3;
self.bytes.truncate(len_without_lead_surrogate);
- self.push_char(decode_surrogate_pair(lead, trail as u16));
+ self.push_char(decode_surrogate_pair(lead, trail));
return;
}
+
+ // We're pushing a trailing surrogate.
+ self.is_known_utf8 = false;
+ } else if code_point.to_lead_surrogate().is_some() {
+ // We're pushing a leading surrogate.
+ self.is_known_utf8 = false;
}
// No newly paired surrogates at the boundary.
@@ -363,9 +410,10 @@ impl Wtf8Buf {
/// (that is, if the string contains surrogates),
/// the original WTF-8 string is returned instead.
pub fn into_string(self) -> Result<String, Wtf8Buf> {
- match self.next_surrogate(0) {
- None => Ok(unsafe { String::from_utf8_unchecked(self.bytes) }),
- Some(_) => Err(self),
+ if self.is_known_utf8 || self.next_surrogate(0).is_none() {
+ Ok(unsafe { String::from_utf8_unchecked(self.bytes) })
+ } else {
+ Err(self)
}
}
@@ -375,6 +423,11 @@ impl Wtf8Buf {
///
/// Surrogates are replaced with `"\u{FFFD}"` (the replacement character ā€œļæ½ā€)
pub fn into_string_lossy(mut self) -> String {
+ // Fast path: If we already have UTF-8, we can return it immediately.
+ if self.is_known_utf8 {
+ return unsafe { String::from_utf8_unchecked(self.bytes) };
+ }
+
let mut pos = 0;
loop {
match self.next_surrogate(pos) {
@@ -397,7 +450,7 @@ impl Wtf8Buf {
/// Converts a `Box<Wtf8>` into a `Wtf8Buf`.
pub fn from_box(boxed: Box<Wtf8>) -> Wtf8Buf {
let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) };
- Wtf8Buf { bytes: bytes.into_vec() }
+ Wtf8Buf { bytes: bytes.into_vec(), is_known_utf8: false }
}
}
@@ -575,6 +628,11 @@ impl Wtf8 {
}
}
+ /// Creates an owned `Wtf8Buf` from a borrowed `Wtf8`.
+ pub fn to_owned(&self) -> Wtf8Buf {
+ Wtf8Buf { bytes: self.bytes.to_vec(), is_known_utf8: false }
+ }
+
/// Lossily converts the string to UTF-8.
/// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8.
///
@@ -664,7 +722,8 @@ impl Wtf8 {
}
pub fn clone_into(&self, buf: &mut Wtf8Buf) {
- self.bytes.clone_into(&mut buf.bytes)
+ buf.is_known_utf8 = false;
+ self.bytes.clone_into(&mut buf.bytes);
}
/// Boxes this `Wtf8`.
@@ -704,12 +763,12 @@ impl Wtf8 {
#[inline]
pub fn to_ascii_lowercase(&self) -> Wtf8Buf {
- Wtf8Buf { bytes: self.bytes.to_ascii_lowercase() }
+ Wtf8Buf { bytes: self.bytes.to_ascii_lowercase(), is_known_utf8: false }
}
#[inline]
pub fn to_ascii_uppercase(&self) -> Wtf8Buf {
- Wtf8Buf { bytes: self.bytes.to_ascii_uppercase() }
+ Wtf8Buf { bytes: self.bytes.to_ascii_uppercase(), is_known_utf8: false }
}
#[inline]
diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs
index 931996791..1a302d646 100644
--- a/library/std/src/sys_common/wtf8/tests.rs
+++ b/library/std/src/sys_common/wtf8/tests.rs
@@ -20,6 +20,36 @@ fn code_point_to_u32() {
}
#[test]
+fn code_point_to_lead_surrogate() {
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
+ assert_eq!(c(0).to_lead_surrogate(), None);
+ assert_eq!(c(0xE9).to_lead_surrogate(), None);
+ assert_eq!(c(0xD800).to_lead_surrogate(), Some(0xD800));
+ assert_eq!(c(0xDBFF).to_lead_surrogate(), Some(0xDBFF));
+ assert_eq!(c(0xDC00).to_lead_surrogate(), None);
+ assert_eq!(c(0xDFFF).to_lead_surrogate(), None);
+ assert_eq!(c(0x1F4A9).to_lead_surrogate(), None);
+ assert_eq!(c(0x10FFFF).to_lead_surrogate(), None);
+}
+
+#[test]
+fn code_point_to_trail_surrogate() {
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
+ assert_eq!(c(0).to_trail_surrogate(), None);
+ assert_eq!(c(0xE9).to_trail_surrogate(), None);
+ assert_eq!(c(0xD800).to_trail_surrogate(), None);
+ assert_eq!(c(0xDBFF).to_trail_surrogate(), None);
+ assert_eq!(c(0xDC00).to_trail_surrogate(), Some(0xDC00));
+ assert_eq!(c(0xDFFF).to_trail_surrogate(), Some(0xDFFF));
+ assert_eq!(c(0x1F4A9).to_trail_surrogate(), None);
+ assert_eq!(c(0x10FFFF).to_trail_surrogate(), None);
+}
+
+#[test]
fn code_point_from_char() {
assert_eq!(CodePoint::from_char('a').to_u32(), 0x61);
assert_eq!(CodePoint::from_char('šŸ’©').to_u32(), 0x1F4A9);
@@ -70,35 +100,66 @@ fn wtf8buf_from_string() {
#[test]
fn wtf8buf_from_wide() {
- assert_eq!(Wtf8Buf::from_wide(&[]).bytes, b"");
- assert_eq!(
- Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes,
- b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"
- );
+ let buf = Wtf8Buf::from_wide(&[]);
+ assert_eq!(buf.bytes, b"");
+ assert!(buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xDCA9]);
+ assert_eq!(buf.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]);
+ assert_eq!(buf.bytes, b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xD800]);
+ assert_eq!(buf.bytes, b"\xED\xA0\x80");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xDBFF]);
+ assert_eq!(buf.bytes, b"\xED\xAF\xBF");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xDC00]);
+ assert_eq!(buf.bytes, b"\xED\xB0\x80");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xDFFF]);
+ assert_eq!(buf.bytes, b"\xED\xBF\xBF");
+ assert!(!buf.is_known_utf8);
}
#[test]
fn wtf8buf_push_str() {
let mut string = Wtf8Buf::new();
assert_eq!(string.bytes, b"");
+ assert!(string.is_known_utf8);
+
string.push_str("aĆ© šŸ’©");
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
}
#[test]
fn wtf8buf_push_char() {
let mut string = Wtf8Buf::from_str("aƩ ");
assert_eq!(string.bytes, b"a\xC3\xA9 ");
+ assert!(string.is_known_utf8);
+
string.push_char('šŸ’©');
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
}
#[test]
fn wtf8buf_push() {
let mut string = Wtf8Buf::from_str("aƩ ");
assert_eq!(string.bytes, b"a\xC3\xA9 ");
+ assert!(string.is_known_utf8);
+
string.push(CodePoint::from_char('šŸ’©'));
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
fn c(value: u32) -> CodePoint {
CodePoint::from_u32(value).unwrap()
@@ -106,37 +167,46 @@ fn wtf8buf_push() {
let mut string = Wtf8Buf::new();
string.push(c(0xD83D)); // lead
+ assert!(!string.is_known_utf8);
string.push(c(0xDCA9)); // trail
assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
let mut string = Wtf8Buf::new();
string.push(c(0xD83D)); // lead
+ assert!(!string.is_known_utf8);
string.push(c(0x20)); // not surrogate
string.push(c(0xDCA9)); // trail
assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
let mut string = Wtf8Buf::new();
string.push(c(0xD800)); // lead
+ assert!(!string.is_known_utf8);
string.push(c(0xDBFF)); // lead
assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
let mut string = Wtf8Buf::new();
string.push(c(0xD800)); // lead
+ assert!(!string.is_known_utf8);
string.push(c(0xE000)); // not surrogate
assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
let mut string = Wtf8Buf::new();
string.push(c(0xD7FF)); // not surrogate
+ assert!(string.is_known_utf8);
string.push(c(0xDC00)); // trail
+ assert!(!string.is_known_utf8);
assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
let mut string = Wtf8Buf::new();
string.push(c(0x61)); // not surrogate, < 3 bytes
+ assert!(string.is_known_utf8);
string.push(c(0xDC00)); // trail
+ assert!(!string.is_known_utf8);
assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
let mut string = Wtf8Buf::new();
string.push(c(0xDC00)); // trail
+ assert!(!string.is_known_utf8);
assert_eq!(string.bytes, b"\xED\xB0\x80");
}
@@ -146,6 +216,7 @@ fn wtf8buf_push_wtf8() {
assert_eq!(string.bytes, b"a\xC3\xA9");
string.push_wtf8(Wtf8::from_str(" šŸ’©"));
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
fn w(v: &[u8]) -> &Wtf8 {
unsafe { Wtf8::from_bytes_unchecked(v) }
@@ -161,37 +232,68 @@ fn wtf8buf_push_wtf8() {
string.push_wtf8(w(b" ")); // not surrogate
string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"\xED\xA0\x80")); // lead
string.push_wtf8(w(b"\xED\xAF\xBF")); // lead
assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"\xED\xA0\x80")); // lead
string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate
assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate
string.push_wtf8(w(b"\xED\xB0\x80")); // trail
assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes
string.push_wtf8(w(b"\xED\xB0\x80")); // trail
assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"\xED\xB0\x80")); // trail
assert_eq!(string.bytes, b"\xED\xB0\x80");
+ assert!(!string.is_known_utf8);
}
#[test]
fn wtf8buf_truncate() {
let mut string = Wtf8Buf::from_str("aƩ");
+ assert!(string.is_known_utf8);
+
+ string.truncate(3);
+ assert_eq!(string.bytes, b"a\xC3\xA9");
+ assert!(string.is_known_utf8);
+
string.truncate(1);
assert_eq!(string.bytes, b"a");
+ assert!(string.is_known_utf8);
+
+ string.truncate(0);
+ assert_eq!(string.bytes, b"");
+ assert!(string.is_known_utf8);
+}
+
+#[test]
+fn wtf8buf_truncate_around_non_bmp() {
+ let mut string = Wtf8Buf::from_str("šŸ’©");
+ assert!(string.is_known_utf8);
+
+ string.truncate(4);
+ assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
+
+ string.truncate(0);
+ assert_eq!(string.bytes, b"");
+ assert!(string.is_known_utf8);
}
#[test]
@@ -209,10 +311,36 @@ fn wtf8buf_truncate_fail_longer() {
}
#[test]
+#[should_panic]
+fn wtf8buf_truncate_splitting_non_bmp3() {
+ let mut string = Wtf8Buf::from_str("šŸ’©");
+ assert!(string.is_known_utf8);
+ string.truncate(3);
+}
+
+#[test]
+#[should_panic]
+fn wtf8buf_truncate_splitting_non_bmp2() {
+ let mut string = Wtf8Buf::from_str("šŸ’©");
+ assert!(string.is_known_utf8);
+ string.truncate(2);
+}
+
+#[test]
+#[should_panic]
+fn wtf8buf_truncate_splitting_non_bmp1() {
+ let mut string = Wtf8Buf::from_str("šŸ’©");
+ assert!(string.is_known_utf8);
+ string.truncate(1);
+}
+
+#[test]
fn wtf8buf_into_string() {
let mut string = Wtf8Buf::from_str("aĆ© šŸ’©");
+ assert!(string.is_known_utf8);
assert_eq!(string.clone().into_string(), Ok(String::from("aĆ© šŸ’©")));
string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert!(!string.is_known_utf8);
assert_eq!(string.clone().into_string(), Err(string));
}
@@ -229,15 +357,33 @@ fn wtf8buf_from_iterator() {
fn f(values: &[u32]) -> Wtf8Buf {
values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::<Wtf8Buf>()
}
- assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert_eq!(
+ f(&[0x61, 0xE9, 0x20, 0x1F4A9]),
+ Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true }
+ );
assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
- assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
- assert_eq!(f(&[0xD800, 0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
- assert_eq!(f(&[0xD800, 0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
- assert_eq!(f(&[0xD7FF, 0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80");
- assert_eq!(f(&[0x61, 0xDC00]).bytes, b"\x61\xED\xB0\x80");
- assert_eq!(f(&[0xDC00]).bytes, b"\xED\xB0\x80");
+ assert_eq!(
+ f(&[0xD83D, 0x20, 0xDCA9]),
+ Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0xD800, 0xDBFF]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0xD800, 0xE000]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0xD7FF, 0xDC00]),
+ Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0x61, 0xDC00]),
+ Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(f(&[0xDC00]), Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false });
}
#[test]
@@ -251,15 +397,36 @@ fn wtf8buf_extend() {
string
}
- assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert_eq!(
+ e(&[0x61, 0xE9], &[0x20, 0x1F4A9]),
+ Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true }
+ );
assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
- assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
- assert_eq!(e(&[0xD800], &[0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
- assert_eq!(e(&[0xD800], &[0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
- assert_eq!(e(&[0xD7FF], &[0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80");
- assert_eq!(e(&[0x61], &[0xDC00]).bytes, b"\x61\xED\xB0\x80");
- assert_eq!(e(&[], &[0xDC00]).bytes, b"\xED\xB0\x80");
+ assert_eq!(
+ e(&[0xD83D, 0x20], &[0xDCA9]),
+ Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0xD800], &[0xDBFF]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0xD800], &[0xE000]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0xD7FF], &[0xDC00]),
+ Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0x61], &[0xDC00]),
+ Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[], &[0xDC00]),
+ Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
}
#[test]
@@ -407,3 +574,93 @@ fn wtf8_encode_wide_size_hint() {
assert_eq!((0, Some(0)), iter.size_hint());
assert!(iter.next().is_none());
}
+
+#[test]
+fn wtf8_clone_into() {
+ let mut string = Wtf8Buf::new();
+ Wtf8::from_str("green").clone_into(&mut string);
+ assert_eq!(string.bytes, b"green");
+
+ let mut string = Wtf8Buf::from_str("green");
+ Wtf8::from_str("").clone_into(&mut string);
+ assert_eq!(string.bytes, b"");
+
+ let mut string = Wtf8Buf::from_str("red");
+ Wtf8::from_str("green").clone_into(&mut string);
+ assert_eq!(string.bytes, b"green");
+
+ let mut string = Wtf8Buf::from_str("green");
+ Wtf8::from_str("red").clone_into(&mut string);
+ assert_eq!(string.bytes, b"red");
+
+ let mut string = Wtf8Buf::from_str("green");
+ assert!(string.is_known_utf8);
+ unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").clone_into(&mut string) };
+ assert_eq!(string.bytes, b"\xED\xA0\x80");
+ assert!(!string.is_known_utf8);
+}
+
+#[test]
+fn wtf8_to_ascii_lowercase() {
+ let lowercase = Wtf8::from_str("").to_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"");
+
+ let lowercase = Wtf8::from_str("GrEeN gRaPeS! šŸ‡").to_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87");
+
+ let lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_lowercase() };
+ assert_eq!(lowercase.bytes, b"\xED\xA0\x80");
+ assert!(!lowercase.is_known_utf8);
+}
+
+#[test]
+fn wtf8_to_ascii_uppercase() {
+ let uppercase = Wtf8::from_str("").to_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"");
+
+ let uppercase = Wtf8::from_str("GrEeN gRaPeS! šŸ‡").to_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87");
+
+ let uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_uppercase() };
+ assert_eq!(uppercase.bytes, b"\xED\xA0\x80");
+ assert!(!uppercase.is_known_utf8);
+}
+
+#[test]
+fn wtf8_make_ascii_lowercase() {
+ let mut lowercase = Wtf8Buf::from_str("");
+ lowercase.make_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"");
+
+ let mut lowercase = Wtf8Buf::from_str("GrEeN gRaPeS! šŸ‡");
+ lowercase.make_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87");
+
+ let mut lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
+ lowercase.make_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"\xED\xA0\x80");
+ assert!(!lowercase.is_known_utf8);
+}
+
+#[test]
+fn wtf8_make_ascii_uppercase() {
+ let mut uppercase = Wtf8Buf::from_str("");
+ uppercase.make_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"");
+
+ let mut uppercase = Wtf8Buf::from_str("GrEeN gRaPeS! šŸ‡");
+ uppercase.make_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87");
+
+ let mut uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
+ uppercase.make_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"\xED\xA0\x80");
+ assert!(!uppercase.is_known_utf8);
+}
+
+#[test]
+fn wtf8_to_owned() {
+ let string = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
+ assert_eq!(string.bytes, b"\xED\xA0\x80");
+ assert!(!string.is_known_utf8);
+}