diff options
Diffstat (limited to '')
-rw-r--r-- | library/std/src/sys_common/backtrace.rs | 9 | ||||
-rw-r--r-- | library/std/src/sys_common/condvar.rs | 1 | ||||
-rw-r--r-- | library/std/src/sys_common/condvar/check.rs | 1 | ||||
-rw-r--r-- | library/std/src/sys_common/mod.rs | 10 | ||||
-rw-r--r-- | library/std/src/sys_common/mutex.rs | 45 | ||||
-rw-r--r-- | library/std/src/sys_common/net.rs | 28 | ||||
-rw-r--r-- | library/std/src/sys_common/once/futex.rs | 134 | ||||
-rw-r--r-- | library/std/src/sys_common/once/generic.rs | 282 | ||||
-rw-r--r-- | library/std/src/sys_common/once/mod.rs | 43 | ||||
-rw-r--r-- | library/std/src/sys_common/remutex.rs | 46 | ||||
-rw-r--r-- | library/std/src/sys_common/remutex/tests.rs | 37 | ||||
-rw-r--r-- | library/std/src/sys_common/rwlock.rs | 61 | ||||
-rw-r--r-- | library/std/src/sys_common/thread_local_key.rs | 26 | ||||
-rw-r--r-- | library/std/src/sys_common/thread_local_key/tests.rs | 9 | ||||
-rw-r--r-- | library/std/src/sys_common/thread_parker/mod.rs | 1 | ||||
-rw-r--r-- | library/std/src/sys_common/wtf8.rs | 95 | ||||
-rw-r--r-- | library/std/src/sys_common/wtf8/tests.rs | 295 |
17 files changed, 878 insertions, 245 deletions
diff --git a/library/std/src/sys_common/backtrace.rs b/library/std/src/sys_common/backtrace.rs index 31164afdc..8807077cb 100644 --- a/library/std/src/sys_common/backtrace.rs +++ b/library/std/src/sys_common/backtrace.rs @@ -7,15 +7,14 @@ use crate::fmt; use crate::io; use crate::io::prelude::*; use crate::path::{self, Path, PathBuf}; -use crate::sys_common::mutex::StaticMutex; +use crate::sync::{Mutex, PoisonError}; /// Max number of frames to print. const MAX_NB_FRAMES: usize = 100; -// SAFETY: Don't attempt to lock this reentrantly. -pub unsafe fn lock() -> impl Drop { - static LOCK: StaticMutex = StaticMutex::new(); - LOCK.lock() +pub fn lock() -> impl Drop { + static LOCK: Mutex<()> = Mutex::new(()); + LOCK.lock().unwrap_or_else(PoisonError::into_inner) } /// Prints the current backtrace. diff --git a/library/std/src/sys_common/condvar.rs b/library/std/src/sys_common/condvar.rs index f3ac1061b..8bc5b2411 100644 --- a/library/std/src/sys_common/condvar.rs +++ b/library/std/src/sys_common/condvar.rs @@ -15,6 +15,7 @@ pub struct Condvar { impl Condvar { /// Creates a new condition variable for use. #[inline] + #[rustc_const_stable(feature = "const_locks", since = "1.63.0")] pub const fn new() -> Self { Self { inner: imp::MovableCondvar::new(), check: CondvarCheck::new() } } diff --git a/library/std/src/sys_common/condvar/check.rs b/library/std/src/sys_common/condvar/check.rs index ce8f36704..4ac9e62bf 100644 --- a/library/std/src/sys_common/condvar/check.rs +++ b/library/std/src/sys_common/condvar/check.rs @@ -50,6 +50,7 @@ pub struct NoCheck; #[allow(dead_code)] impl NoCheck { + #[rustc_const_stable(feature = "const_locks", since = "1.63.0")] pub const fn new() -> Self { Self } diff --git a/library/std/src/sys_common/mod.rs b/library/std/src/sys_common/mod.rs index 80f56bf75..8c19f9332 100644 --- a/library/std/src/sys_common/mod.rs +++ b/library/std/src/sys_common/mod.rs @@ -27,17 +27,25 @@ pub mod io; pub mod lazy_box; pub mod memchr; pub mod mutex; +pub mod once; pub mod process; pub mod remutex; pub mod rwlock; pub mod thread; pub mod thread_info; pub mod thread_local_dtor; -pub mod thread_local_key; pub mod thread_parker; pub mod wtf8; cfg_if::cfg_if! { + if #[cfg(target_os = "windows")] { + pub use crate::sys::thread_local_key; + } else { + pub mod thread_local_key; + } +} + +cfg_if::cfg_if! { if #[cfg(any(target_os = "l4re", target_os = "hermit", feature = "restricted-std", diff --git a/library/std/src/sys_common/mutex.rs b/library/std/src/sys_common/mutex.rs index 48479f5bd..98046f20f 100644 --- a/library/std/src/sys_common/mutex.rs +++ b/library/std/src/sys_common/mutex.rs @@ -1,49 +1,5 @@ use crate::sys::locks as imp; -/// An OS-based mutual exclusion lock, meant for use in static variables. -/// -/// This mutex has a const constructor ([`StaticMutex::new`]), does not -/// implement `Drop` to cleanup resources, and causes UB when used reentrantly. -/// -/// This mutex does not implement poisoning. -/// -/// This is a wrapper around `imp::Mutex` that does *not* call `init()` and -/// `destroy()`. -pub struct StaticMutex(imp::Mutex); - -unsafe impl Sync for StaticMutex {} - -impl StaticMutex { - /// Creates a new mutex for use. - #[inline] - pub const fn new() -> Self { - Self(imp::Mutex::new()) - } - - /// Calls raw_lock() and then returns an RAII guard to guarantee the mutex - /// will be unlocked. - /// - /// It is undefined behaviour to call this function while locked by the - /// same thread. - #[inline] - pub unsafe fn lock(&'static self) -> StaticMutexGuard { - self.0.lock(); - StaticMutexGuard(&self.0) - } -} - -#[must_use] -pub struct StaticMutexGuard(&'static imp::Mutex); - -impl Drop for StaticMutexGuard { - #[inline] - fn drop(&mut self) { - unsafe { - self.0.unlock(); - } - } -} - /// An OS-based mutual exclusion lock. /// /// This mutex cleans up its resources in its `Drop` implementation, may safely @@ -61,6 +17,7 @@ unsafe impl Sync for MovableMutex {} impl MovableMutex { /// Creates a new mutex. #[inline] + #[rustc_const_stable(feature = "const_locks", since = "1.63.0")] pub const fn new() -> Self { Self(imp::MovableMutex::new()) } diff --git a/library/std/src/sys_common/net.rs b/library/std/src/sys_common/net.rs index 33d336c43..fad4a6333 100644 --- a/library/std/src/sys_common/net.rs +++ b/library/std/src/sys_common/net.rs @@ -2,15 +2,16 @@ mod tests; use crate::cmp; -use crate::ffi::CString; +use crate::convert::{TryFrom, TryInto}; use crate::fmt; use crate::io::{self, ErrorKind, IoSlice, IoSliceMut}; use crate::mem; use crate::net::{Ipv4Addr, Ipv6Addr, Shutdown, SocketAddr}; use crate::ptr; +use crate::sys::common::small_c_string::run_with_cstr; use crate::sys::net::netc as c; use crate::sys::net::{cvt, cvt_gai, cvt_r, init, wrlen_t, Socket}; -use crate::sys_common::{FromInner, IntoInner}; +use crate::sys_common::{AsInner, FromInner, IntoInner}; use crate::time::Duration; use libc::{c_int, c_void}; @@ -197,14 +198,15 @@ impl<'a> TryFrom<(&'a str, u16)> for LookupHost { fn try_from((host, port): (&'a str, u16)) -> io::Result<LookupHost> { init(); - let c_host = CString::new(host)?; - let mut hints: c::addrinfo = unsafe { mem::zeroed() }; - hints.ai_socktype = c::SOCK_STREAM; - let mut res = ptr::null_mut(); - unsafe { - cvt_gai(c::getaddrinfo(c_host.as_ptr(), ptr::null(), &hints, &mut res)) - .map(|_| LookupHost { original: res, cur: res, port }) - } + run_with_cstr(host.as_bytes(), |c_host| { + let mut hints: c::addrinfo = unsafe { mem::zeroed() }; + hints.ai_socktype = c::SOCK_STREAM; + let mut res = ptr::null_mut(); + unsafe { + cvt_gai(c::getaddrinfo(c_host.as_ptr(), ptr::null(), &hints, &mut res)) + .map(|_| LookupHost { original: res, cur: res, port }) + } + }) } } @@ -345,6 +347,12 @@ impl TcpStream { } } +impl AsInner<Socket> for TcpStream { + fn as_inner(&self) -> &Socket { + &self.inner + } +} + impl FromInner<Socket> for TcpStream { fn from_inner(socket: Socket) -> TcpStream { TcpStream { inner: socket } diff --git a/library/std/src/sys_common/once/futex.rs b/library/std/src/sys_common/once/futex.rs new file mode 100644 index 000000000..5c7e6c013 --- /dev/null +++ b/library/std/src/sys_common/once/futex.rs @@ -0,0 +1,134 @@ +use crate::cell::Cell; +use crate::sync as public; +use crate::sync::atomic::{ + AtomicU32, + Ordering::{Acquire, Relaxed, Release}, +}; +use crate::sys::futex::{futex_wait, futex_wake_all}; + +// On some platforms, the OS is very nice and handles the waiter queue for us. +// This means we only need one atomic value with 5 states: + +/// No initialization has run yet, and no thread is currently using the Once. +const INCOMPLETE: u32 = 0; +/// Some thread has previously attempted to initialize the Once, but it panicked, +/// so the Once is now poisoned. There are no other threads currently accessing +/// this Once. +const POISONED: u32 = 1; +/// Some thread is currently attempting to run initialization. It may succeed, +/// so all future threads need to wait for it to finish. +const RUNNING: u32 = 2; +/// Some thread is currently attempting to run initialization and there are threads +/// waiting for it to finish. +const QUEUED: u32 = 3; +/// Initialization has completed and all future calls should finish immediately. +const COMPLETE: u32 = 4; + +// Threads wait by setting the state to QUEUED and calling `futex_wait` on the state +// variable. When the running thread finishes, it will wake all waiting threads using +// `futex_wake_all`. + +pub struct OnceState { + poisoned: bool, + set_state_to: Cell<u32>, +} + +impl OnceState { + #[inline] + pub fn is_poisoned(&self) -> bool { + self.poisoned + } + + #[inline] + pub fn poison(&self) { + self.set_state_to.set(POISONED); + } +} + +struct CompletionGuard<'a> { + state: &'a AtomicU32, + set_state_on_drop_to: u32, +} + +impl<'a> Drop for CompletionGuard<'a> { + fn drop(&mut self) { + // Use release ordering to propagate changes to all threads checking + // up on the Once. `futex_wake_all` does its own synchronization, hence + // we do not need `AcqRel`. + if self.state.swap(self.set_state_on_drop_to, Release) == QUEUED { + futex_wake_all(&self.state); + } + } +} + +pub struct Once { + state: AtomicU32, +} + +impl Once { + #[inline] + pub const fn new() -> Once { + Once { state: AtomicU32::new(INCOMPLETE) } + } + + #[inline] + pub fn is_completed(&self) -> bool { + // Use acquire ordering to make all initialization changes visible to the + // current thread. + self.state.load(Acquire) == COMPLETE + } + + // This uses FnMut to match the API of the generic implementation. As this + // implementation is quite light-weight, it is generic over the closure and + // so avoids the cost of dynamic dispatch. + #[cold] + #[track_caller] + pub fn call(&self, ignore_poisoning: bool, f: &mut impl FnMut(&public::OnceState)) { + let mut state = self.state.load(Acquire); + loop { + match state { + POISONED if !ignore_poisoning => { + // Panic to propagate the poison. + panic!("Once instance has previously been poisoned"); + } + INCOMPLETE | POISONED => { + // Try to register the current thread as the one running. + if let Err(new) = + self.state.compare_exchange_weak(state, RUNNING, Acquire, Acquire) + { + state = new; + continue; + } + // `waiter_queue` will manage other waiting threads, and + // wake them up on drop. + let mut waiter_queue = + CompletionGuard { state: &self.state, set_state_on_drop_to: POISONED }; + // Run the function, letting it know if we're poisoned or not. + let f_state = public::OnceState { + inner: OnceState { + poisoned: state == POISONED, + set_state_to: Cell::new(COMPLETE), + }, + }; + f(&f_state); + waiter_queue.set_state_on_drop_to = f_state.inner.set_state_to.get(); + return; + } + RUNNING | QUEUED => { + // Set the state to QUEUED if it is not already. + if state == RUNNING + && let Err(new) = self.state.compare_exchange_weak(RUNNING, QUEUED, Relaxed, Acquire) + { + state = new; + continue; + } + + futex_wait(&self.state, QUEUED, None); + state = self.state.load(Acquire); + } + COMPLETE => return, + _ => unreachable!("state is never set to invalid values"), + } + } + } +} diff --git a/library/std/src/sys_common/once/generic.rs b/library/std/src/sys_common/once/generic.rs new file mode 100644 index 000000000..acf5f2471 --- /dev/null +++ b/library/std/src/sys_common/once/generic.rs @@ -0,0 +1,282 @@ +// Each `Once` has one word of atomic state, and this state is CAS'd on to +// determine what to do. There are four possible state of a `Once`: +// +// * Incomplete - no initialization has run yet, and no thread is currently +// using the Once. +// * Poisoned - some thread has previously attempted to initialize the Once, but +// it panicked, so the Once is now poisoned. There are no other +// threads currently accessing this Once. +// * Running - some thread is currently attempting to run initialization. It may +// succeed, so all future threads need to wait for it to finish. +// Note that this state is accompanied with a payload, described +// below. +// * Complete - initialization has completed and all future calls should finish +// immediately. +// +// With 4 states we need 2 bits to encode this, and we use the remaining bits +// in the word we have allocated as a queue of threads waiting for the thread +// responsible for entering the RUNNING state. This queue is just a linked list +// of Waiter nodes which is monotonically increasing in size. Each node is +// allocated on the stack, and whenever the running closure finishes it will +// consume the entire queue and notify all waiters they should try again. +// +// You'll find a few more details in the implementation, but that's the gist of +// it! +// +// Atomic orderings: +// When running `Once` we deal with multiple atomics: +// `Once.state_and_queue` and an unknown number of `Waiter.signaled`. +// * `state_and_queue` is used (1) as a state flag, (2) for synchronizing the +// result of the `Once`, and (3) for synchronizing `Waiter` nodes. +// - At the end of the `call` function we have to make sure the result +// of the `Once` is acquired. So every load which can be the only one to +// load COMPLETED must have at least acquire ordering, which means all +// three of them. +// - `WaiterQueue::drop` is the only place that may store COMPLETED, and +// must do so with release ordering to make the result available. +// - `wait` inserts `Waiter` nodes as a pointer in `state_and_queue`, and +// needs to make the nodes available with release ordering. The load in +// its `compare_exchange` can be relaxed because it only has to compare +// the atomic, not to read other data. +// - `WaiterQueue::drop` must see the `Waiter` nodes, so it must load +// `state_and_queue` with acquire ordering. +// - There is just one store where `state_and_queue` is used only as a +// state flag, without having to synchronize data: switching the state +// from INCOMPLETE to RUNNING in `call`. This store can be Relaxed, +// but the read has to be Acquire because of the requirements mentioned +// above. +// * `Waiter.signaled` is both used as a flag, and to protect a field with +// interior mutability in `Waiter`. `Waiter.thread` is changed in +// `WaiterQueue::drop` which then sets `signaled` with release ordering. +// After `wait` loads `signaled` with acquire ordering and sees it is true, +// it needs to see the changes to drop the `Waiter` struct correctly. +// * There is one place where the two atomics `Once.state_and_queue` and +// `Waiter.signaled` come together, and might be reordered by the compiler or +// processor. Because both use acquire ordering such a reordering is not +// allowed, so no need for `SeqCst`. + +use crate::cell::Cell; +use crate::fmt; +use crate::ptr; +use crate::sync as public; +use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; +use crate::thread::{self, Thread}; + +type Masked = (); + +pub struct Once { + state_and_queue: AtomicPtr<Masked>, +} + +pub struct OnceState { + poisoned: bool, + set_state_on_drop_to: Cell<*mut Masked>, +} + +// Four states that a Once can be in, encoded into the lower bits of +// `state_and_queue` in the Once structure. +const INCOMPLETE: usize = 0x0; +const POISONED: usize = 0x1; +const RUNNING: usize = 0x2; +const COMPLETE: usize = 0x3; + +// Mask to learn about the state. All other bits are the queue of waiters if +// this is in the RUNNING state. +const STATE_MASK: usize = 0x3; + +// Representation of a node in the linked list of waiters, used while in the +// RUNNING state. +// Note: `Waiter` can't hold a mutable pointer to the next thread, because then +// `wait` would both hand out a mutable reference to its `Waiter` node, and keep +// a shared reference to check `signaled`. Instead we hold shared references and +// use interior mutability. +#[repr(align(4))] // Ensure the two lower bits are free to use as state bits. +struct Waiter { + thread: Cell<Option<Thread>>, + signaled: AtomicBool, + next: *const Waiter, +} + +// Head of a linked list of waiters. +// Every node is a struct on the stack of a waiting thread. +// Will wake up the waiters when it gets dropped, i.e. also on panic. +struct WaiterQueue<'a> { + state_and_queue: &'a AtomicPtr<Masked>, + set_state_on_drop_to: *mut Masked, +} + +impl Once { + #[inline] + pub const fn new() -> Once { + Once { state_and_queue: AtomicPtr::new(ptr::invalid_mut(INCOMPLETE)) } + } + + #[inline] + pub fn is_completed(&self) -> bool { + // An `Acquire` load is enough because that makes all the initialization + // operations visible to us, and, this being a fast path, weaker + // ordering helps with performance. This `Acquire` synchronizes with + // `Release` operations on the slow path. + self.state_and_queue.load(Ordering::Acquire).addr() == COMPLETE + } + + // This is a non-generic function to reduce the monomorphization cost of + // using `call_once` (this isn't exactly a trivial or small implementation). + // + // Additionally, this is tagged with `#[cold]` as it should indeed be cold + // and it helps let LLVM know that calls to this function should be off the + // fast path. Essentially, this should help generate more straight line code + // in LLVM. + // + // Finally, this takes an `FnMut` instead of a `FnOnce` because there's + // currently no way to take an `FnOnce` and call it via virtual dispatch + // without some allocation overhead. + #[cold] + #[track_caller] + pub fn call(&self, ignore_poisoning: bool, init: &mut dyn FnMut(&public::OnceState)) { + let mut state_and_queue = self.state_and_queue.load(Ordering::Acquire); + loop { + match state_and_queue.addr() { + COMPLETE => break, + POISONED if !ignore_poisoning => { + // Panic to propagate the poison. + panic!("Once instance has previously been poisoned"); + } + POISONED | INCOMPLETE => { + // Try to register this thread as the one RUNNING. + let exchange_result = self.state_and_queue.compare_exchange( + state_and_queue, + ptr::invalid_mut(RUNNING), + Ordering::Acquire, + Ordering::Acquire, + ); + if let Err(old) = exchange_result { + state_and_queue = old; + continue; + } + // `waiter_queue` will manage other waiting threads, and + // wake them up on drop. + let mut waiter_queue = WaiterQueue { + state_and_queue: &self.state_and_queue, + set_state_on_drop_to: ptr::invalid_mut(POISONED), + }; + // Run the initialization function, letting it know if we're + // poisoned or not. + let init_state = public::OnceState { + inner: OnceState { + poisoned: state_and_queue.addr() == POISONED, + set_state_on_drop_to: Cell::new(ptr::invalid_mut(COMPLETE)), + }, + }; + init(&init_state); + waiter_queue.set_state_on_drop_to = init_state.inner.set_state_on_drop_to.get(); + break; + } + _ => { + // All other values must be RUNNING with possibly a + // pointer to the waiter queue in the more significant bits. + assert!(state_and_queue.addr() & STATE_MASK == RUNNING); + wait(&self.state_and_queue, state_and_queue); + state_and_queue = self.state_and_queue.load(Ordering::Acquire); + } + } + } + } +} + +fn wait(state_and_queue: &AtomicPtr<Masked>, mut current_state: *mut Masked) { + // Note: the following code was carefully written to avoid creating a + // mutable reference to `node` that gets aliased. + loop { + // Don't queue this thread if the status is no longer running, + // otherwise we will not be woken up. + if current_state.addr() & STATE_MASK != RUNNING { + return; + } + + // Create the node for our current thread. + let node = Waiter { + thread: Cell::new(Some(thread::current())), + signaled: AtomicBool::new(false), + next: current_state.with_addr(current_state.addr() & !STATE_MASK) as *const Waiter, + }; + let me = &node as *const Waiter as *const Masked as *mut Masked; + + // Try to slide in the node at the head of the linked list, making sure + // that another thread didn't just replace the head of the linked list. + let exchange_result = state_and_queue.compare_exchange( + current_state, + me.with_addr(me.addr() | RUNNING), + Ordering::Release, + Ordering::Relaxed, + ); + if let Err(old) = exchange_result { + current_state = old; + continue; + } + + // We have enqueued ourselves, now lets wait. + // It is important not to return before being signaled, otherwise we + // would drop our `Waiter` node and leave a hole in the linked list + // (and a dangling reference). Guard against spurious wakeups by + // reparking ourselves until we are signaled. + while !node.signaled.load(Ordering::Acquire) { + // If the managing thread happens to signal and unpark us before we + // can park ourselves, the result could be this thread never gets + // unparked. Luckily `park` comes with the guarantee that if it got + // an `unpark` just before on an unparked thread it does not park. + thread::park(); + } + break; + } +} + +#[stable(feature = "std_debug", since = "1.16.0")] +impl fmt::Debug for Once { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Once").finish_non_exhaustive() + } +} + +impl Drop for WaiterQueue<'_> { + fn drop(&mut self) { + // Swap out our state with however we finished. + let state_and_queue = + self.state_and_queue.swap(self.set_state_on_drop_to, Ordering::AcqRel); + + // We should only ever see an old state which was RUNNING. + assert_eq!(state_and_queue.addr() & STATE_MASK, RUNNING); + + // Walk the entire linked list of waiters and wake them up (in lifo + // order, last to register is first to wake up). + unsafe { + // Right after setting `node.signaled = true` the other thread may + // free `node` if there happens to be has a spurious wakeup. + // So we have to take out the `thread` field and copy the pointer to + // `next` first. + let mut queue = + state_and_queue.with_addr(state_and_queue.addr() & !STATE_MASK) as *const Waiter; + while !queue.is_null() { + let next = (*queue).next; + let thread = (*queue).thread.take().unwrap(); + (*queue).signaled.store(true, Ordering::Release); + // ^- FIXME (maybe): This is another case of issue #55005 + // `store()` has a potentially dangling ref to `signaled`. + queue = next; + thread.unpark(); + } + } + } +} + +impl OnceState { + #[inline] + pub fn is_poisoned(&self) -> bool { + self.poisoned + } + + #[inline] + pub fn poison(&self) { + self.set_state_on_drop_to.set(ptr::invalid_mut(POISONED)); + } +} diff --git a/library/std/src/sys_common/once/mod.rs b/library/std/src/sys_common/once/mod.rs new file mode 100644 index 000000000..8742e68cc --- /dev/null +++ b/library/std/src/sys_common/once/mod.rs @@ -0,0 +1,43 @@ +// A "once" is a relatively simple primitive, and it's also typically provided +// by the OS as well (see `pthread_once` or `InitOnceExecuteOnce`). The OS +// primitives, however, tend to have surprising restrictions, such as the Unix +// one doesn't allow an argument to be passed to the function. +// +// As a result, we end up implementing it ourselves in the standard library. +// This also gives us the opportunity to optimize the implementation a bit which +// should help the fast path on call sites. +// +// So to recap, the guarantees of a Once are that it will call the +// initialization closure at most once, and it will never return until the one +// that's running has finished running. This means that we need some form of +// blocking here while the custom callback is running at the very least. +// Additionally, we add on the restriction of **poisoning**. Whenever an +// initialization closure panics, the Once enters a "poisoned" state which means +// that all future calls will immediately panic as well. +// +// So to implement this, one might first reach for a `Mutex`, but those cannot +// be put into a `static`. It also gets a lot harder with poisoning to figure +// out when the mutex needs to be deallocated because it's not after the closure +// finishes, but after the first successful closure finishes. +// +// All in all, this is instead implemented with atomics and lock-free +// operations! Whee! + +cfg_if::cfg_if! { + if #[cfg(any( + target_os = "linux", + target_os = "android", + all(target_arch = "wasm32", target_feature = "atomics"), + target_os = "freebsd", + target_os = "openbsd", + target_os = "dragonfly", + target_os = "fuchsia", + target_os = "hermit", + ))] { + mod futex; + pub use futex::{Once, OnceState}; + } else { + mod generic; + pub use generic::{Once, OnceState}; + } +} diff --git a/library/std/src/sys_common/remutex.rs b/library/std/src/sys_common/remutex.rs index 8921af311..b448ae3a9 100644 --- a/library/std/src/sys_common/remutex.rs +++ b/library/std/src/sys_common/remutex.rs @@ -1,13 +1,11 @@ #[cfg(all(test, not(target_os = "emscripten")))] mod tests; +use super::mutex as sys; use crate::cell::UnsafeCell; -use crate::marker::PhantomPinned; use crate::ops::Deref; use crate::panic::{RefUnwindSafe, UnwindSafe}; -use crate::pin::Pin; use crate::sync::atomic::{AtomicUsize, Ordering::Relaxed}; -use crate::sys::locks as sys; /// A re-entrant mutual exclusion /// @@ -41,11 +39,10 @@ use crate::sys::locks as sys; /// synchronization is left to the mutex, making relaxed memory ordering for /// the `owner` field fine in all cases. pub struct ReentrantMutex<T> { - mutex: sys::Mutex, + mutex: sys::MovableMutex, owner: AtomicUsize, lock_count: UnsafeCell<u32>, data: T, - _pinned: PhantomPinned, } unsafe impl<T: Send> Send for ReentrantMutex<T> {} @@ -68,39 +65,22 @@ impl<T> RefUnwindSafe for ReentrantMutex<T> {} /// guarded data. #[must_use = "if unused the ReentrantMutex will immediately unlock"] pub struct ReentrantMutexGuard<'a, T: 'a> { - lock: Pin<&'a ReentrantMutex<T>>, + lock: &'a ReentrantMutex<T>, } impl<T> !Send for ReentrantMutexGuard<'_, T> {} impl<T> ReentrantMutex<T> { /// Creates a new reentrant mutex in an unlocked state. - /// - /// # Unsafety - /// - /// This function is unsafe because it is required that `init` is called - /// once this mutex is in its final resting place, and only then are the - /// lock/unlock methods safe. - pub const unsafe fn new(t: T) -> ReentrantMutex<T> { + pub const fn new(t: T) -> ReentrantMutex<T> { ReentrantMutex { - mutex: sys::Mutex::new(), + mutex: sys::MovableMutex::new(), owner: AtomicUsize::new(0), lock_count: UnsafeCell::new(0), data: t, - _pinned: PhantomPinned, } } - /// Initializes this mutex so it's ready for use. - /// - /// # Unsafety - /// - /// Unsafe to call more than once, and must be called after this will no - /// longer move in memory. - pub unsafe fn init(self: Pin<&mut Self>) { - self.get_unchecked_mut().mutex.init() - } - /// Acquires a mutex, blocking the current thread until it is able to do so. /// /// This function will block the caller until it is available to acquire the mutex. @@ -113,15 +93,14 @@ impl<T> ReentrantMutex<T> { /// If another user of this mutex panicked while holding the mutex, then /// this call will return failure if the mutex would otherwise be /// acquired. - pub fn lock(self: Pin<&Self>) -> ReentrantMutexGuard<'_, T> { + pub fn lock(&self) -> ReentrantMutexGuard<'_, T> { let this_thread = current_thread_unique_ptr(); - // Safety: We only touch lock_count when we own the lock, - // and since self is pinned we can safely call the lock() on the mutex. + // Safety: We only touch lock_count when we own the lock. unsafe { if self.owner.load(Relaxed) == this_thread { self.increment_lock_count(); } else { - self.mutex.lock(); + self.mutex.raw_lock(); self.owner.store(this_thread, Relaxed); debug_assert_eq!(*self.lock_count.get(), 0); *self.lock_count.get() = 1; @@ -142,10 +121,9 @@ impl<T> ReentrantMutex<T> { /// If another user of this mutex panicked while holding the mutex, then /// this call will return failure if the mutex would otherwise be /// acquired. - pub fn try_lock(self: Pin<&Self>) -> Option<ReentrantMutexGuard<'_, T>> { + pub fn try_lock(&self) -> Option<ReentrantMutexGuard<'_, T>> { let this_thread = current_thread_unique_ptr(); - // Safety: We only touch lock_count when we own the lock, - // and since self is pinned we can safely call the try_lock on the mutex. + // Safety: We only touch lock_count when we own the lock. unsafe { if self.owner.load(Relaxed) == this_thread { self.increment_lock_count(); @@ -179,12 +157,12 @@ impl<T> Deref for ReentrantMutexGuard<'_, T> { impl<T> Drop for ReentrantMutexGuard<'_, T> { #[inline] fn drop(&mut self) { - // Safety: We own the lock, and the lock is pinned. + // Safety: We own the lock. unsafe { *self.lock.lock_count.get() -= 1; if *self.lock.lock_count.get() == 0 { self.lock.owner.store(0, Relaxed); - self.lock.mutex.unlock(); + self.lock.mutex.raw_unlock(); } } } diff --git a/library/std/src/sys_common/remutex/tests.rs b/library/std/src/sys_common/remutex/tests.rs index 64873b850..8e97ce11c 100644 --- a/library/std/src/sys_common/remutex/tests.rs +++ b/library/std/src/sys_common/remutex/tests.rs @@ -1,18 +1,11 @@ -use crate::boxed::Box; use crate::cell::RefCell; -use crate::pin::Pin; use crate::sync::Arc; use crate::sys_common::remutex::{ReentrantMutex, ReentrantMutexGuard}; use crate::thread; #[test] fn smoke() { - let m = unsafe { - let mut m = Box::pin(ReentrantMutex::new(())); - m.as_mut().init(); - m - }; - let m = m.as_ref(); + let m = ReentrantMutex::new(()); { let a = m.lock(); { @@ -29,20 +22,15 @@ fn smoke() { #[test] fn is_mutex() { - let m = unsafe { - // FIXME: Simplify this if Arc gets an Arc::get_pin_mut. - let mut m = Arc::new(ReentrantMutex::new(RefCell::new(0))); - Pin::new_unchecked(Arc::get_mut_unchecked(&mut m)).init(); - Pin::new_unchecked(m) - }; + let m = Arc::new(ReentrantMutex::new(RefCell::new(0))); let m2 = m.clone(); - let lock = m.as_ref().lock(); + let lock = m.lock(); let child = thread::spawn(move || { - let lock = m2.as_ref().lock(); + let lock = m2.lock(); assert_eq!(*lock.borrow(), 4950); }); for i in 0..100 { - let lock = m.as_ref().lock(); + let lock = m.lock(); *lock.borrow_mut() += i; } drop(lock); @@ -51,22 +39,17 @@ fn is_mutex() { #[test] fn trylock_works() { - let m = unsafe { - // FIXME: Simplify this if Arc gets an Arc::get_pin_mut. - let mut m = Arc::new(ReentrantMutex::new(())); - Pin::new_unchecked(Arc::get_mut_unchecked(&mut m)).init(); - Pin::new_unchecked(m) - }; + let m = Arc::new(ReentrantMutex::new(())); let m2 = m.clone(); - let _lock = m.as_ref().try_lock(); - let _lock2 = m.as_ref().try_lock(); + let _lock = m.try_lock(); + let _lock2 = m.try_lock(); thread::spawn(move || { - let lock = m2.as_ref().try_lock(); + let lock = m2.try_lock(); assert!(lock.is_none()); }) .join() .unwrap(); - let _lock3 = m.as_ref().try_lock(); + let _lock3 = m.try_lock(); } pub struct Answer<'a>(pub ReentrantMutexGuard<'a, RefCell<u32>>); diff --git a/library/std/src/sys_common/rwlock.rs b/library/std/src/sys_common/rwlock.rs index ba56f3a8f..042981dac 100644 --- a/library/std/src/sys_common/rwlock.rs +++ b/library/std/src/sys_common/rwlock.rs @@ -1,65 +1,5 @@ use crate::sys::locks as imp; -/// An OS-based reader-writer lock, meant for use in static variables. -/// -/// This rwlock does not implement poisoning. -/// -/// This rwlock has a const constructor ([`StaticRwLock::new`]), does not -/// implement `Drop` to cleanup resources. -pub struct StaticRwLock(imp::RwLock); - -impl StaticRwLock { - /// Creates a new rwlock for use. - #[inline] - pub const fn new() -> Self { - Self(imp::RwLock::new()) - } - - /// Acquires shared access to the underlying lock, blocking the current - /// thread to do so. - /// - /// The lock is automatically unlocked when the returned guard is dropped. - #[inline] - pub fn read(&'static self) -> StaticRwLockReadGuard { - unsafe { self.0.read() }; - StaticRwLockReadGuard(&self.0) - } - - /// Acquires write access to the underlying lock, blocking the current thread - /// to do so. - /// - /// The lock is automatically unlocked when the returned guard is dropped. - #[inline] - pub fn write(&'static self) -> StaticRwLockWriteGuard { - unsafe { self.0.write() }; - StaticRwLockWriteGuard(&self.0) - } -} - -#[must_use] -pub struct StaticRwLockReadGuard(&'static imp::RwLock); - -impl Drop for StaticRwLockReadGuard { - #[inline] - fn drop(&mut self) { - unsafe { - self.0.read_unlock(); - } - } -} - -#[must_use] -pub struct StaticRwLockWriteGuard(&'static imp::RwLock); - -impl Drop for StaticRwLockWriteGuard { - #[inline] - fn drop(&mut self) { - unsafe { - self.0.write_unlock(); - } - } -} - /// An OS-based reader-writer lock. /// /// This rwlock cleans up its resources in its `Drop` implementation and may @@ -75,6 +15,7 @@ pub struct MovableRwLock(imp::MovableRwLock); impl MovableRwLock { /// Creates a new reader-writer lock for use. #[inline] + #[rustc_const_stable(feature = "const_locks", since = "1.63.0")] pub const fn new() -> Self { Self(imp::MovableRwLock::new()) } diff --git a/library/std/src/sys_common/thread_local_key.rs b/library/std/src/sys_common/thread_local_key.rs index 70beebe86..747579f17 100644 --- a/library/std/src/sys_common/thread_local_key.rs +++ b/library/std/src/sys_common/thread_local_key.rs @@ -53,7 +53,6 @@ mod tests; use crate::sync::atomic::{self, AtomicUsize, Ordering}; use crate::sys::thread_local_key as imp; -use crate::sys_common::mutex::StaticMutex; /// A type for TLS keys that are statically allocated. /// @@ -69,8 +68,10 @@ use crate::sys_common::mutex::StaticMutex; /// ```ignore (cannot-doctest-private-modules) /// use tls::os::{StaticKey, INIT}; /// +/// // Use a regular global static to store the key. /// static KEY: StaticKey = INIT; /// +/// // The state provided via `get` and `set` is thread-local. /// unsafe { /// assert!(KEY.get().is_null()); /// KEY.set(1 as *mut u8); @@ -149,25 +150,6 @@ impl StaticKey { } unsafe fn lazy_init(&self) -> usize { - // Currently the Windows implementation of TLS is pretty hairy, and - // it greatly simplifies creation if we just synchronize everything. - // - // Additionally a 0-index of a tls key hasn't been seen on windows, so - // we just simplify the whole branch. - if imp::requires_synchronized_create() { - // We never call `INIT_LOCK.init()`, so it is UB to attempt to - // acquire this mutex reentrantly! - static INIT_LOCK: StaticMutex = StaticMutex::new(); - let _guard = INIT_LOCK.lock(); - let mut key = self.key.load(Ordering::SeqCst); - if key == 0 { - key = imp::create(self.dtor) as usize; - self.key.store(key, Ordering::SeqCst); - } - rtassert!(key != 0); - return key; - } - // POSIX allows the key created here to be 0, but the compare_exchange // below relies on using 0 as a sentinel value to check who won the // race to set the shared TLS key. As far as I know, there is no @@ -230,8 +212,6 @@ impl Key { impl Drop for Key { fn drop(&mut self) { - // Right now Windows doesn't support TLS key destruction, but this also - // isn't used anywhere other than tests, so just leak the TLS key. - // unsafe { imp::destroy(self.key) } + unsafe { imp::destroy(self.key) } } } diff --git a/library/std/src/sys_common/thread_local_key/tests.rs b/library/std/src/sys_common/thread_local_key/tests.rs index 968738a41..6f32b858f 100644 --- a/library/std/src/sys_common/thread_local_key/tests.rs +++ b/library/std/src/sys_common/thread_local_key/tests.rs @@ -1,4 +1,5 @@ use super::{Key, StaticKey}; +use core::ptr; fn assert_sync<T: Sync>() {} fn assert_send<T: Send>() {} @@ -12,8 +13,8 @@ fn smoke() { let k2 = Key::new(None); assert!(k1.get().is_null()); assert!(k2.get().is_null()); - k1.set(1 as *mut _); - k2.set(2 as *mut _); + k1.set(ptr::invalid_mut(1)); + k2.set(ptr::invalid_mut(2)); assert_eq!(k1.get() as usize, 1); assert_eq!(k2.get() as usize, 2); } @@ -26,8 +27,8 @@ fn statik() { unsafe { assert!(K1.get().is_null()); assert!(K2.get().is_null()); - K1.set(1 as *mut _); - K2.set(2 as *mut _); + K1.set(ptr::invalid_mut(1)); + K2.set(ptr::invalid_mut(2)); assert_eq!(K1.get() as usize, 1); assert_eq!(K2.get() as usize, 2); } diff --git a/library/std/src/sys_common/thread_parker/mod.rs b/library/std/src/sys_common/thread_parker/mod.rs index cbd7832eb..f86a9a555 100644 --- a/library/std/src/sys_common/thread_parker/mod.rs +++ b/library/std/src/sys_common/thread_parker/mod.rs @@ -7,6 +7,7 @@ cfg_if::cfg_if! { target_os = "openbsd", target_os = "dragonfly", target_os = "fuchsia", + target_os = "hermit", ))] { mod futex; pub use futex::Parker; diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 57fa49893..dd53767d4 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -89,6 +89,24 @@ impl CodePoint { self.value } + /// Returns the numeric value of the code point if it is a leading surrogate. + #[inline] + pub fn to_lead_surrogate(&self) -> Option<u16> { + match self.value { + lead @ 0xD800..=0xDBFF => Some(lead as u16), + _ => None, + } + } + + /// Returns the numeric value of the code point if it is a trailing surrogate. + #[inline] + pub fn to_trail_surrogate(&self) -> Option<u16> { + match self.value { + trail @ 0xDC00..=0xDFFF => Some(trail as u16), + _ => None, + } + } + /// Optionally returns a Unicode scalar value for the code point. /// /// Returns `None` if the code point is a surrogate (from U+D800 to U+DFFF). @@ -117,6 +135,14 @@ impl CodePoint { #[derive(Eq, PartialEq, Ord, PartialOrd, Clone)] pub struct Wtf8Buf { bytes: Vec<u8>, + + /// Do we know that `bytes` holds a valid UTF-8 encoding? We can easily + /// know this if we're constructed from a `String` or `&str`. + /// + /// It is possible for `bytes` to have valid UTF-8 without this being + /// set, such as when we're concatenating `&Wtf8`'s and surrogates become + /// paired, as we don't bother to rescan the entire string. + is_known_utf8: bool, } impl ops::Deref for Wtf8Buf { @@ -147,13 +173,13 @@ impl Wtf8Buf { /// Creates a new, empty WTF-8 string. #[inline] pub fn new() -> Wtf8Buf { - Wtf8Buf { bytes: Vec::new() } + Wtf8Buf { bytes: Vec::new(), is_known_utf8: true } } /// Creates a new, empty WTF-8 string with pre-allocated capacity for `capacity` bytes. #[inline] pub fn with_capacity(capacity: usize) -> Wtf8Buf { - Wtf8Buf { bytes: Vec::with_capacity(capacity) } + Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true } } /// Creates a WTF-8 string from a UTF-8 `String`. @@ -163,7 +189,7 @@ impl Wtf8Buf { /// Since WTF-8 is a superset of UTF-8, this always succeeds. #[inline] pub fn from_string(string: String) -> Wtf8Buf { - Wtf8Buf { bytes: string.into_bytes() } + Wtf8Buf { bytes: string.into_bytes(), is_known_utf8: true } } /// Creates a WTF-8 string from a UTF-8 `&str` slice. @@ -173,11 +199,12 @@ impl Wtf8Buf { /// Since WTF-8 is a superset of UTF-8, this always succeeds. #[inline] pub fn from_str(str: &str) -> Wtf8Buf { - Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()) } + Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()), is_known_utf8: true } } pub fn clear(&mut self) { - self.bytes.clear() + self.bytes.clear(); + self.is_known_utf8 = true; } /// Creates a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units. @@ -193,9 +220,11 @@ impl Wtf8Buf { let surrogate = surrogate.unpaired_surrogate(); // Surrogates are known to be in the code point range. let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) }; + // The string will now contain an unpaired surrogate. + string.is_known_utf8 = false; // Skip the WTF-8 concatenation check, // surrogate pairs are already decoded by decode_utf16 - string.push_code_point_unchecked(code_point) + string.push_code_point_unchecked(code_point); } } } @@ -203,7 +232,7 @@ impl Wtf8Buf { } /// Copied from String::push - /// This does **not** include the WTF-8 concatenation check. + /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. fn push_code_point_unchecked(&mut self, code_point: CodePoint) { let mut bytes = [0; 4]; let bytes = char::encode_utf8_raw(code_point.value, &mut bytes); @@ -217,6 +246,9 @@ impl Wtf8Buf { #[inline] pub fn as_mut_slice(&mut self) -> &mut Wtf8 { + // Safety: `Wtf8` doesn't expose any way to mutate the bytes that would + // cause them to change from well-formed UTF-8 to ill-formed UTF-8, + // which would break the assumptions of the `is_known_utf8` field. unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) } } @@ -236,7 +268,8 @@ impl Wtf8Buf { /// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid /// frequent reallocations. After calling `try_reserve`, capacity will be /// greater than or equal to `self.len() + additional`. Does nothing if - /// capacity is already sufficient. + /// capacity is already sufficient. This method preserves the contents even + /// if an error occurs. /// /// # Errors /// @@ -313,7 +346,15 @@ impl Wtf8Buf { self.push_char(decode_surrogate_pair(lead, trail)); self.bytes.extend_from_slice(other_without_trail_surrogate); } - _ => self.bytes.extend_from_slice(&other.bytes), + _ => { + // If we'll be pushing a string containing a surrogate, we may + // no longer have UTF-8. + if other.next_surrogate(0).is_some() { + self.is_known_utf8 = false; + } + + self.bytes.extend_from_slice(&other.bytes); + } } } @@ -330,13 +371,19 @@ impl Wtf8Buf { /// like concatenating ill-formed UTF-16 strings effectively would. #[inline] pub fn push(&mut self, code_point: CodePoint) { - if let trail @ 0xDC00..=0xDFFF = code_point.to_u32() { + if let Some(trail) = code_point.to_trail_surrogate() { if let Some(lead) = (&*self).final_lead_surrogate() { let len_without_lead_surrogate = self.len() - 3; self.bytes.truncate(len_without_lead_surrogate); - self.push_char(decode_surrogate_pair(lead, trail as u16)); + self.push_char(decode_surrogate_pair(lead, trail)); return; } + + // We're pushing a trailing surrogate. + self.is_known_utf8 = false; + } else if code_point.to_lead_surrogate().is_some() { + // We're pushing a leading surrogate. + self.is_known_utf8 = false; } // No newly paired surrogates at the boundary. @@ -363,9 +410,10 @@ impl Wtf8Buf { /// (that is, if the string contains surrogates), /// the original WTF-8 string is returned instead. pub fn into_string(self) -> Result<String, Wtf8Buf> { - match self.next_surrogate(0) { - None => Ok(unsafe { String::from_utf8_unchecked(self.bytes) }), - Some(_) => Err(self), + if self.is_known_utf8 || self.next_surrogate(0).is_none() { + Ok(unsafe { String::from_utf8_unchecked(self.bytes) }) + } else { + Err(self) } } @@ -375,6 +423,11 @@ impl Wtf8Buf { /// /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character āļæ½ā) pub fn into_string_lossy(mut self) -> String { + // Fast path: If we already have UTF-8, we can return it immediately. + if self.is_known_utf8 { + return unsafe { String::from_utf8_unchecked(self.bytes) }; + } + let mut pos = 0; loop { match self.next_surrogate(pos) { @@ -397,7 +450,7 @@ impl Wtf8Buf { /// Converts a `Box<Wtf8>` into a `Wtf8Buf`. pub fn from_box(boxed: Box<Wtf8>) -> Wtf8Buf { let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) }; - Wtf8Buf { bytes: bytes.into_vec() } + Wtf8Buf { bytes: bytes.into_vec(), is_known_utf8: false } } } @@ -575,6 +628,11 @@ impl Wtf8 { } } + /// Creates an owned `Wtf8Buf` from a borrowed `Wtf8`. + pub fn to_owned(&self) -> Wtf8Buf { + Wtf8Buf { bytes: self.bytes.to_vec(), is_known_utf8: false } + } + /// Lossily converts the string to UTF-8. /// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8. /// @@ -664,7 +722,8 @@ impl Wtf8 { } pub fn clone_into(&self, buf: &mut Wtf8Buf) { - self.bytes.clone_into(&mut buf.bytes) + buf.is_known_utf8 = false; + self.bytes.clone_into(&mut buf.bytes); } /// Boxes this `Wtf8`. @@ -704,12 +763,12 @@ impl Wtf8 { #[inline] pub fn to_ascii_lowercase(&self) -> Wtf8Buf { - Wtf8Buf { bytes: self.bytes.to_ascii_lowercase() } + Wtf8Buf { bytes: self.bytes.to_ascii_lowercase(), is_known_utf8: false } } #[inline] pub fn to_ascii_uppercase(&self) -> Wtf8Buf { - Wtf8Buf { bytes: self.bytes.to_ascii_uppercase() } + Wtf8Buf { bytes: self.bytes.to_ascii_uppercase(), is_known_utf8: false } } #[inline] diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 931996791..1a302d646 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -20,6 +20,36 @@ fn code_point_to_u32() { } #[test] +fn code_point_to_lead_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_lead_surrogate(), None); + assert_eq!(c(0xE9).to_lead_surrogate(), None); + assert_eq!(c(0xD800).to_lead_surrogate(), Some(0xD800)); + assert_eq!(c(0xDBFF).to_lead_surrogate(), Some(0xDBFF)); + assert_eq!(c(0xDC00).to_lead_surrogate(), None); + assert_eq!(c(0xDFFF).to_lead_surrogate(), None); + assert_eq!(c(0x1F4A9).to_lead_surrogate(), None); + assert_eq!(c(0x10FFFF).to_lead_surrogate(), None); +} + +#[test] +fn code_point_to_trail_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_trail_surrogate(), None); + assert_eq!(c(0xE9).to_trail_surrogate(), None); + assert_eq!(c(0xD800).to_trail_surrogate(), None); + assert_eq!(c(0xDBFF).to_trail_surrogate(), None); + assert_eq!(c(0xDC00).to_trail_surrogate(), Some(0xDC00)); + assert_eq!(c(0xDFFF).to_trail_surrogate(), Some(0xDFFF)); + assert_eq!(c(0x1F4A9).to_trail_surrogate(), None); + assert_eq!(c(0x10FFFF).to_trail_surrogate(), None); +} + +#[test] fn code_point_from_char() { assert_eq!(CodePoint::from_char('a').to_u32(), 0x61); assert_eq!(CodePoint::from_char('š©').to_u32(), 0x1F4A9); @@ -70,35 +100,66 @@ fn wtf8buf_from_string() { #[test] fn wtf8buf_from_wide() { - assert_eq!(Wtf8Buf::from_wide(&[]).bytes, b""); - assert_eq!( - Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes, - b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9" - ); + let buf = Wtf8Buf::from_wide(&[]); + assert_eq!(buf.bytes, b""); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xDCA9]); + assert_eq!(buf.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]); + assert_eq!(buf.bytes, b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xD800]); + assert_eq!(buf.bytes, b"\xED\xA0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDBFF]); + assert_eq!(buf.bytes, b"\xED\xAF\xBF"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDC00]); + assert_eq!(buf.bytes, b"\xED\xB0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDFFF]); + assert_eq!(buf.bytes, b"\xED\xBF\xBF"); + assert!(!buf.is_known_utf8); } #[test] fn wtf8buf_push_str() { let mut string = Wtf8Buf::new(); assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); + string.push_str("aĆ© š©"); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); } #[test] fn wtf8buf_push_char() { let mut string = Wtf8Buf::from_str("aĆ© "); assert_eq!(string.bytes, b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + string.push_char('š©'); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); } #[test] fn wtf8buf_push() { let mut string = Wtf8Buf::from_str("aĆ© "); assert_eq!(string.bytes, b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + string.push(CodePoint::from_char('š©')); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() @@ -106,37 +167,46 @@ fn wtf8buf_push() { let mut string = Wtf8Buf::new(); string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); string.push(c(0xDCA9)); // trail assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic! let mut string = Wtf8Buf::new(); string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); string.push(c(0x20)); // not surrogate string.push(c(0xDCA9)); // trail assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); let mut string = Wtf8Buf::new(); string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); string.push(c(0xDBFF)); // lead assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); let mut string = Wtf8Buf::new(); string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); string.push(c(0xE000)); // not surrogate assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); let mut string = Wtf8Buf::new(); string.push(c(0xD7FF)); // not surrogate + assert!(string.is_known_utf8); string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); let mut string = Wtf8Buf::new(); string.push(c(0x61)); // not surrogate, < 3 bytes + assert!(string.is_known_utf8); string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); let mut string = Wtf8Buf::new(); string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); assert_eq!(string.bytes, b"\xED\xB0\x80"); } @@ -146,6 +216,7 @@ fn wtf8buf_push_wtf8() { assert_eq!(string.bytes, b"a\xC3\xA9"); string.push_wtf8(Wtf8::from_str(" š©")); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } @@ -161,37 +232,68 @@ fn wtf8buf_push_wtf8() { string.push_wtf8(w(b" ")); // not surrogate string.push_wtf8(w(b"\xED\xB2\xA9")); // trail assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xA0\x80")); // lead string.push_wtf8(w(b"\xED\xAF\xBF")); // lead assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xA0\x80")); // lead string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate string.push_wtf8(w(b"\xED\xB0\x80")); // trail assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes string.push_wtf8(w(b"\xED\xB0\x80")); // trail assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xB0\x80")); // trail assert_eq!(string.bytes, b"\xED\xB0\x80"); + assert!(!string.is_known_utf8); } #[test] fn wtf8buf_truncate() { let mut string = Wtf8Buf::from_str("aĆ©"); + assert!(string.is_known_utf8); + + string.truncate(3); + assert_eq!(string.bytes, b"a\xC3\xA9"); + assert!(string.is_known_utf8); + string.truncate(1); assert_eq!(string.bytes, b"a"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); +} + +#[test] +fn wtf8buf_truncate_around_non_bmp() { + let mut string = Wtf8Buf::from_str("š©"); + assert!(string.is_known_utf8); + + string.truncate(4); + assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); } #[test] @@ -209,10 +311,36 @@ fn wtf8buf_truncate_fail_longer() { } #[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp3() { + let mut string = Wtf8Buf::from_str("š©"); + assert!(string.is_known_utf8); + string.truncate(3); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp2() { + let mut string = Wtf8Buf::from_str("š©"); + assert!(string.is_known_utf8); + string.truncate(2); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp1() { + let mut string = Wtf8Buf::from_str("š©"); + assert!(string.is_known_utf8); + string.truncate(1); +} + +#[test] fn wtf8buf_into_string() { let mut string = Wtf8Buf::from_str("aĆ© š©"); + assert!(string.is_known_utf8); assert_eq!(string.clone().into_string(), Ok(String::from("aĆ© š©"))); string.push(CodePoint::from_u32(0xD800).unwrap()); + assert!(!string.is_known_utf8); assert_eq!(string.clone().into_string(), Err(string)); } @@ -229,15 +357,33 @@ fn wtf8buf_from_iterator() { fn f(values: &[u32]) -> Wtf8Buf { values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::<Wtf8Buf>() } - assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert_eq!( + f(&[0x61, 0xE9, 0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! - assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); - assert_eq!(f(&[0xD800, 0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF"); - assert_eq!(f(&[0xD800, 0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80"); - assert_eq!(f(&[0xD7FF, 0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80"); - assert_eq!(f(&[0x61, 0xDC00]).bytes, b"\x61\xED\xB0\x80"); - assert_eq!(f(&[0xDC00]).bytes, b"\xED\xB0\x80"); + assert_eq!( + f(&[0xD83D, 0x20, 0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD7FF, 0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0x61, 0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!(f(&[0xDC00]), Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false }); } #[test] @@ -251,15 +397,36 @@ fn wtf8buf_extend() { string } - assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert_eq!( + e(&[0x61, 0xE9], &[0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! - assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); - assert_eq!(e(&[0xD800], &[0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF"); - assert_eq!(e(&[0xD800], &[0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80"); - assert_eq!(e(&[0xD7FF], &[0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80"); - assert_eq!(e(&[0x61], &[0xDC00]).bytes, b"\x61\xED\xB0\x80"); - assert_eq!(e(&[], &[0xDC00]).bytes, b"\xED\xB0\x80"); + assert_eq!( + e(&[0xD83D, 0x20], &[0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD7FF], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0x61], &[0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); } #[test] @@ -407,3 +574,93 @@ fn wtf8_encode_wide_size_hint() { assert_eq!((0, Some(0)), iter.size_hint()); assert!(iter.next().is_none()); } + +#[test] +fn wtf8_clone_into() { + let mut string = Wtf8Buf::new(); + Wtf8::from_str("green").clone_into(&mut string); + assert_eq!(string.bytes, b"green"); + + let mut string = Wtf8Buf::from_str("green"); + Wtf8::from_str("").clone_into(&mut string); + assert_eq!(string.bytes, b""); + + let mut string = Wtf8Buf::from_str("red"); + Wtf8::from_str("green").clone_into(&mut string); + assert_eq!(string.bytes, b"green"); + + let mut string = Wtf8Buf::from_str("green"); + Wtf8::from_str("red").clone_into(&mut string); + assert_eq!(string.bytes, b"red"); + + let mut string = Wtf8Buf::from_str("green"); + assert!(string.is_known_utf8); + unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").clone_into(&mut string) }; + assert_eq!(string.bytes, b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wtf8_to_ascii_lowercase() { + let lowercase = Wtf8::from_str("").to_ascii_lowercase(); + assert_eq!(lowercase.bytes, b""); + + let lowercase = Wtf8::from_str("GrEeN gRaPeS! š").to_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87"); + + let lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_lowercase() }; + assert_eq!(lowercase.bytes, b"\xED\xA0\x80"); + assert!(!lowercase.is_known_utf8); +} + +#[test] +fn wtf8_to_ascii_uppercase() { + let uppercase = Wtf8::from_str("").to_ascii_uppercase(); + assert_eq!(uppercase.bytes, b""); + + let uppercase = Wtf8::from_str("GrEeN gRaPeS! š").to_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87"); + + let uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_uppercase() }; + assert_eq!(uppercase.bytes, b"\xED\xA0\x80"); + assert!(!uppercase.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_lowercase() { + let mut lowercase = Wtf8Buf::from_str(""); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b""); + + let mut lowercase = Wtf8Buf::from_str("GrEeN gRaPeS! š"); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87"); + + let mut lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"\xED\xA0\x80"); + assert!(!lowercase.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_uppercase() { + let mut uppercase = Wtf8Buf::from_str(""); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b""); + + let mut uppercase = Wtf8Buf::from_str("GrEeN gRaPeS! š"); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87"); + + let mut uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"\xED\xA0\x80"); + assert!(!uppercase.is_known_utf8); +} + +#[test] +fn wtf8_to_owned() { + let string = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert_eq!(string.bytes, b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} |