// Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use std::borrow::Borrow; use std::cell::{Cell, UnsafeCell}; use std::cmp::Ordering; use std::default::Default; use std::fmt as strfmt; use std::iter::FromIterator; use std::marker::PhantomData; use std::num::NonZeroUsize; use std::ops::{Deref, DerefMut}; use std::sync::atomic::Ordering as AtomicOrdering; use std::sync::atomic::{self, AtomicUsize}; use std::{hash, io, mem, ptr, str, u32}; #[cfg(feature = "encoding")] use encoding::{self, DecoderTrap, EncoderTrap, EncodingRef}; use buf32::{self, Buf32}; use fmt::imp::Fixup; use fmt::{self, Slice}; use util::{copy_and_advance, copy_lifetime, copy_lifetime_mut, unsafe_slice, unsafe_slice_mut}; use OFLOW; const MAX_INLINE_LEN: usize = 8; const MAX_INLINE_TAG: usize = 0xF; const EMPTY_TAG: usize = 0xF; #[inline(always)] fn inline_tag(len: u32) -> NonZeroUsize { debug_assert!(len <= MAX_INLINE_LEN as u32); unsafe { NonZeroUsize::new_unchecked(if len == 0 { EMPTY_TAG } else { len as usize }) } } /// The multithreadedness of a tendril. /// /// Exactly two types implement this trait: /// /// - `Atomic`: use this in your tendril and you will have a `Send` tendril which works /// across threads; this is akin to `Arc`. /// /// - `NonAtomic`: use this in your tendril and you will have a tendril which is neither /// `Send` nor `Sync` but should be a tad faster; this is akin to `Rc`. /// /// The layout of this trait is also mandated to be that of a `usize`, /// for it is used for reference counting. pub unsafe trait Atomicity: 'static { #[doc(hidden)] fn new() -> Self; #[doc(hidden)] fn increment(&self) -> usize; #[doc(hidden)] fn decrement(&self) -> usize; #[doc(hidden)] fn fence_acquire(); } /// A marker of a non-atomic tendril. /// /// This is the default for the second type parameter of a `Tendril` /// and so doesn't typically need to be written. /// /// This is akin to using `Rc` for reference counting. #[repr(C)] pub struct NonAtomic(Cell); unsafe impl Atomicity for NonAtomic { #[inline] fn new() -> Self { NonAtomic(Cell::new(1)) } #[inline] fn increment(&self) -> usize { let value = self.0.get(); self.0.set(value.checked_add(1).expect(OFLOW)); value } #[inline] fn decrement(&self) -> usize { let value = self.0.get(); self.0.set(value - 1); value } #[inline] fn fence_acquire() {} } /// A marker of an atomic (and hence concurrent) tendril. /// /// This is used as the second, optional type parameter of a `Tendril`; /// `Tendril` thus implements`Send`. /// /// This is akin to using `Arc` for reference counting. pub struct Atomic(AtomicUsize); unsafe impl Atomicity for Atomic { #[inline] fn new() -> Self { Atomic(AtomicUsize::new(1)) } #[inline] fn increment(&self) -> usize { // Relaxed is OK because we have a reference already. self.0.fetch_add(1, AtomicOrdering::Relaxed) } #[inline] fn decrement(&self) -> usize { self.0.fetch_sub(1, AtomicOrdering::Release) } #[inline] fn fence_acquire() { atomic::fence(AtomicOrdering::Acquire); } } #[repr(C)] // Preserve field order for cross-atomicity transmutes struct Header { refcount: A, cap: u32, } impl Header where A: Atomicity, { #[inline(always)] unsafe fn new() -> Header { Header { refcount: A::new(), cap: 0, } } } /// Errors that can occur when slicing a `Tendril`. #[derive(Copy, Clone, Hash, Debug, PartialEq, Eq)] pub enum SubtendrilError { OutOfBounds, ValidationFailed, } /// Compact string type for zero-copy parsing. /// /// `Tendril`s have the semantics of owned strings, but are sometimes views /// into shared buffers. When you mutate a `Tendril`, an owned copy is made /// if necessary. Further mutations occur in-place until the string becomes /// shared, e.g. with `clone()` or `subtendril()`. /// /// Buffer sharing is accomplished through thread-local (non-atomic) reference /// counting, which has very low overhead. The Rust type system will prevent /// you at compile time from sending a `Tendril` between threads. We plan to /// relax this restriction in the future; see `README.md`. /// /// Whereas `String` allocates in the heap for any non-empty string, `Tendril` /// can store small strings (up to 8 bytes) in-line, without a heap allocation. /// `Tendril` is also smaller than `String` on 64-bit platforms — 16 bytes /// versus 24. /// /// The type parameter `F` specifies the format of the tendril, for example /// UTF-8 text or uninterpreted bytes. The parameter will be instantiated /// with one of the marker types from `tendril::fmt`. See the `StrTendril` /// and `ByteTendril` type aliases for two examples. /// /// The type parameter `A` indicates the atomicity of the tendril; it is by /// default `NonAtomic`, but can be specified as `Atomic` to get a tendril /// which implements `Send` (viz. a thread-safe tendril). /// /// The maximum length of a `Tendril` is 4 GB. The library will panic if /// you attempt to go over the limit. #[repr(C)] pub struct Tendril where F: fmt::Format, A: Atomicity, { ptr: Cell, buf: UnsafeCell, marker: PhantomData<*mut F>, refcount_marker: PhantomData, } #[repr(C)] union Buffer { heap: Heap, inline: [u8; 8], } #[derive(Copy, Clone)] #[repr(C)] struct Heap { len: u32, aux: u32, } unsafe impl Send for Tendril where F: fmt::Format, A: Atomicity + Sync, { } /// `Tendril` for storing native Rust strings. pub type StrTendril = Tendril; /// `Tendril` for storing binary data. pub type ByteTendril = Tendril; impl Clone for Tendril where F: fmt::Format, A: Atomicity, { #[inline] fn clone(&self) -> Tendril { unsafe { if self.ptr.get().get() > MAX_INLINE_TAG { self.make_buf_shared(); self.incref(); } ptr::read(self) } } } impl Drop for Tendril where F: fmt::Format, A: Atomicity, { #[inline] fn drop(&mut self) { unsafe { let p = self.ptr.get().get(); if p <= MAX_INLINE_TAG { return; } let (buf, shared, _) = self.assume_buf(); if shared { let header = self.header(); if (*header).refcount.decrement() == 1 { A::fence_acquire(); buf.destroy(); } } else { buf.destroy(); } } } } macro_rules! from_iter_method { ($ty:ty) => { #[inline] fn from_iter(iterable: I) -> Self where I: IntoIterator, { let mut output = Self::new(); output.extend(iterable); output } }; } impl Extend for Tendril where A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) where I: IntoIterator, { let iterator = iterable.into_iter(); self.force_reserve(iterator.size_hint().0 as u32); for c in iterator { self.push_char(c); } } } impl FromIterator for Tendril where A: Atomicity, { from_iter_method!(char); } impl Extend for Tendril where A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) where I: IntoIterator, { let iterator = iterable.into_iter(); self.force_reserve(iterator.size_hint().0 as u32); for b in iterator { self.push_slice(&[b]); } } } impl FromIterator for Tendril where A: Atomicity, { from_iter_method!(u8); } impl<'a, A> Extend<&'a u8> for Tendril where A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) where I: IntoIterator, { let iterator = iterable.into_iter(); self.force_reserve(iterator.size_hint().0 as u32); for &b in iterator { self.push_slice(&[b]); } } } impl<'a, A> FromIterator<&'a u8> for Tendril where A: Atomicity, { from_iter_method!(&'a u8); } impl<'a, A> Extend<&'a str> for Tendril where A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) where I: IntoIterator, { for s in iterable { self.push_slice(s); } } } impl<'a, A> FromIterator<&'a str> for Tendril where A: Atomicity, { from_iter_method!(&'a str); } impl<'a, A> Extend<&'a [u8]> for Tendril where A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) where I: IntoIterator, { for s in iterable { self.push_slice(s); } } } impl<'a, A> FromIterator<&'a [u8]> for Tendril where A: Atomicity, { from_iter_method!(&'a [u8]); } impl<'a, F, A> Extend<&'a Tendril> for Tendril where F: fmt::Format + 'a, A: Atomicity, { #[inline] fn extend(&mut self, iterable: I) where I: IntoIterator>, { for t in iterable { self.push_tendril(t); } } } impl<'a, F, A> FromIterator<&'a Tendril> for Tendril where F: fmt::Format + 'a, A: Atomicity, { from_iter_method!(&'a Tendril); } impl Deref for Tendril where F: fmt::SliceFormat, A: Atomicity, { type Target = F::Slice; #[inline] fn deref(&self) -> &F::Slice { unsafe { F::Slice::from_bytes(self.as_byte_slice()) } } } impl DerefMut for Tendril where F: fmt::SliceFormat, A: Atomicity, { #[inline] fn deref_mut(&mut self) -> &mut F::Slice { unsafe { F::Slice::from_mut_bytes(self.as_mut_byte_slice()) } } } impl Borrow<[u8]> for Tendril where F: fmt::SliceFormat, A: Atomicity, { fn borrow(&self) -> &[u8] { self.as_byte_slice() } } // Why not impl Borrow for Tendril? str and [u8] hash differently, // and so a HashMap would silently break if we indexed by str. Ick. // https://github.com/rust-lang/rust/issues/27108 impl PartialEq for Tendril where F: fmt::Format, A: Atomicity, { #[inline] fn eq(&self, other: &Self) -> bool { self.as_byte_slice() == other.as_byte_slice() } #[inline] fn ne(&self, other: &Self) -> bool { self.as_byte_slice() != other.as_byte_slice() } } impl Eq for Tendril where F: fmt::Format, A: Atomicity, { } impl PartialOrd for Tendril where F: fmt::SliceFormat, ::Slice: PartialOrd, A: Atomicity, { #[inline] fn partial_cmp(&self, other: &Self) -> Option { PartialOrd::partial_cmp(&**self, &**other) } } impl Ord for Tendril where F: fmt::SliceFormat, ::Slice: Ord, A: Atomicity, { #[inline] fn cmp(&self, other: &Self) -> Ordering { Ord::cmp(&**self, &**other) } } impl Default for Tendril where F: fmt::Format, A: Atomicity, { #[inline(always)] fn default() -> Tendril { Tendril::new() } } impl strfmt::Debug for Tendril where F: fmt::SliceFormat + Default + strfmt::Debug, ::Slice: strfmt::Debug, A: Atomicity, { #[inline] fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result { let kind = match self.ptr.get().get() { p if p <= MAX_INLINE_TAG => "inline", p if p & 1 == 1 => "shared", _ => "owned", }; write!(f, "Tendril<{:?}>({}: ", ::default(), kind)?; <::Slice as strfmt::Debug>::fmt(&**self, f)?; write!(f, ")") } } impl hash::Hash for Tendril where F: fmt::Format, A: Atomicity, { #[inline] fn hash(&self, hasher: &mut H) { self.as_byte_slice().hash(hasher) } } impl Tendril where F: fmt::Format, A: Atomicity, { /// Create a new, empty `Tendril` in any format. #[inline(always)] pub fn new() -> Tendril { unsafe { Tendril::inline(&[]) } } /// Create a new, empty `Tendril` with a specified capacity. #[inline] pub fn with_capacity(capacity: u32) -> Tendril { let mut t: Tendril = Tendril::new(); if capacity > MAX_INLINE_LEN as u32 { unsafe { t.make_owned_with_capacity(capacity); } } t } /// Reserve space for additional bytes. /// /// This is only a suggestion. There are cases where `Tendril` will /// decline to allocate until the buffer is actually modified. #[inline] pub fn reserve(&mut self, additional: u32) { if !self.is_shared() { // Don't grow a shared tendril because we'd have to copy // right away. self.force_reserve(additional); } } /// Reserve space for additional bytes, even for shared buffers. #[inline] fn force_reserve(&mut self, additional: u32) { let new_len = self.len32().checked_add(additional).expect(OFLOW); if new_len > MAX_INLINE_LEN as u32 { unsafe { self.make_owned_with_capacity(new_len); } } } /// Get the length of the `Tendril`. /// /// This is named not to conflict with `len()` on the underlying /// slice, if any. #[inline(always)] pub fn len32(&self) -> u32 { match self.ptr.get().get() { EMPTY_TAG => 0, n if n <= MAX_INLINE_LEN => n as u32, _ => unsafe { self.raw_len() }, } } /// Is the backing buffer shared? #[inline] pub fn is_shared(&self) -> bool { let n = self.ptr.get().get(); (n > MAX_INLINE_TAG) && ((n & 1) == 1) } /// Is the backing buffer shared with this other `Tendril`? #[inline] pub fn is_shared_with(&self, other: &Tendril) -> bool { let n = self.ptr.get().get(); (n > MAX_INLINE_TAG) && (n == other.ptr.get().get()) } /// Truncate to length 0 without discarding any owned storage. #[inline] pub fn clear(&mut self) { if self.ptr.get().get() <= MAX_INLINE_TAG { self.ptr .set(unsafe { NonZeroUsize::new_unchecked(EMPTY_TAG) }); } else { let (_, shared, _) = unsafe { self.assume_buf() }; if shared { // No need to keep a reference alive for a 0-size slice. *self = Tendril::new(); } else { unsafe { self.set_len(0) }; } } } /// Build a `Tendril` by copying a byte slice, if it conforms to the format. #[inline] pub fn try_from_byte_slice(x: &[u8]) -> Result, ()> { match F::validate(x) { true => Ok(unsafe { Tendril::from_byte_slice_without_validating(x) }), false => Err(()), } } /// View as uninterpreted bytes. #[inline(always)] pub fn as_bytes(&self) -> &Tendril { unsafe { mem::transmute(self) } } /// Convert into uninterpreted bytes. #[inline(always)] pub fn into_bytes(self) -> Tendril { unsafe { mem::transmute(self) } } /// Convert `self` into a type which is `Send`. /// /// If the tendril is owned or inline, this is free, /// but if it's shared this will entail a copy of the contents. #[inline] pub fn into_send(mut self) -> SendTendril { self.make_owned(); SendTendril { // This changes the header.refcount from A to NonAtomic, but that's // OK because we have defined the format of A as a usize. tendril: unsafe { mem::transmute(self) }, } } /// View as a superset format, for free. #[inline(always)] pub fn as_superset(&self) -> &Tendril where F: fmt::SubsetOf, Super: fmt::Format, { unsafe { mem::transmute(self) } } /// Convert into a superset format, for free. #[inline(always)] pub fn into_superset(self) -> Tendril where F: fmt::SubsetOf, Super: fmt::Format, { unsafe { mem::transmute(self) } } /// View as a subset format, if the `Tendril` conforms to that subset. #[inline] pub fn try_as_subset(&self) -> Result<&Tendril, ()> where Sub: fmt::SubsetOf, { match Sub::revalidate_subset(self.as_byte_slice()) { true => Ok(unsafe { mem::transmute(self) }), false => Err(()), } } /// Convert into a subset format, if the `Tendril` conforms to that subset. #[inline] pub fn try_into_subset(self) -> Result, Self> where Sub: fmt::SubsetOf, { match Sub::revalidate_subset(self.as_byte_slice()) { true => Ok(unsafe { mem::transmute(self) }), false => Err(self), } } /// View as another format, if the bytes of the `Tendril` are valid for /// that format. #[inline] pub fn try_reinterpret_view(&self) -> Result<&Tendril, ()> where Other: fmt::Format, { match Other::validate(self.as_byte_slice()) { true => Ok(unsafe { mem::transmute(self) }), false => Err(()), } } /// Convert into another format, if the `Tendril` conforms to that format. /// /// This only re-validates the existing bytes under the new format. It /// will *not* change the byte content of the tendril! /// /// See the `encode` and `decode` methods for character encoding conversion. #[inline] pub fn try_reinterpret(self) -> Result, Self> where Other: fmt::Format, { match Other::validate(self.as_byte_slice()) { true => Ok(unsafe { mem::transmute(self) }), false => Err(self), } } /// Push some bytes onto the end of the `Tendril`, if they conform to the /// format. #[inline] pub fn try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()> { match F::validate(buf) { true => unsafe { self.push_bytes_without_validating(buf); Ok(()) }, false => Err(()), } } /// Push another `Tendril` onto the end of this one. #[inline] pub fn push_tendril(&mut self, other: &Tendril) { let new_len = self.len32().checked_add(other.len32()).expect(OFLOW); unsafe { if (self.ptr.get().get() > MAX_INLINE_TAG) && (other.ptr.get().get() > MAX_INLINE_TAG) { let (self_buf, self_shared, _) = self.assume_buf(); let (other_buf, other_shared, _) = other.assume_buf(); if self_shared && other_shared && (self_buf.data_ptr() == other_buf.data_ptr()) && other.aux() == self.aux() + self.raw_len() { self.set_len(new_len); return; } } self.push_bytes_without_validating(other.as_byte_slice()) } } /// Attempt to slice this `Tendril` as a new `Tendril`. /// /// This will share the buffer when possible. Mutating a shared buffer /// will copy the contents. /// /// The offset and length are in bytes. The function will return /// `Err` if these are out of bounds, or if the resulting slice /// does not conform to the format. #[inline] pub fn try_subtendril( &self, offset: u32, length: u32, ) -> Result, SubtendrilError> { let self_len = self.len32(); if offset > self_len || length > (self_len - offset) { return Err(SubtendrilError::OutOfBounds); } unsafe { let byte_slice = unsafe_slice(self.as_byte_slice(), offset as usize, length as usize); if !F::validate_subseq(byte_slice) { return Err(SubtendrilError::ValidationFailed); } Ok(self.unsafe_subtendril(offset, length)) } } /// Slice this `Tendril` as a new `Tendril`. /// /// Panics on bounds or validity check failure. #[inline] pub fn subtendril(&self, offset: u32, length: u32) -> Tendril { self.try_subtendril(offset, length).unwrap() } /// Try to drop `n` bytes from the front. /// /// Returns `Err` if the bytes are not available, or the suffix fails /// validation. #[inline] pub fn try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError> { if n == 0 { return Ok(()); } let old_len = self.len32(); if n > old_len { return Err(SubtendrilError::OutOfBounds); } let new_len = old_len - n; unsafe { if !F::validate_suffix(unsafe_slice( self.as_byte_slice(), n as usize, new_len as usize, )) { return Err(SubtendrilError::ValidationFailed); } self.unsafe_pop_front(n); Ok(()) } } /// Drop `n` bytes from the front. /// /// Panics if the bytes are not available, or the suffix fails /// validation. #[inline] pub fn pop_front(&mut self, n: u32) { self.try_pop_front(n).unwrap() } /// Drop `n` bytes from the back. /// /// Returns `Err` if the bytes are not available, or the prefix fails /// validation. #[inline] pub fn try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError> { if n == 0 { return Ok(()); } let old_len = self.len32(); if n > old_len { return Err(SubtendrilError::OutOfBounds); } let new_len = old_len - n; unsafe { if !F::validate_prefix(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)) { return Err(SubtendrilError::ValidationFailed); } self.unsafe_pop_back(n); Ok(()) } } /// Drop `n` bytes from the back. /// /// Panics if the bytes are not available, or the prefix fails /// validation. #[inline] pub fn pop_back(&mut self, n: u32) { self.try_pop_back(n).unwrap() } /// View as another format, without validating. #[inline(always)] pub unsafe fn reinterpret_view_without_validating(&self) -> &Tendril where Other: fmt::Format, { mem::transmute(self) } /// Convert into another format, without validating. #[inline(always)] pub unsafe fn reinterpret_without_validating(self) -> Tendril where Other: fmt::Format, { mem::transmute(self) } /// Build a `Tendril` by copying a byte slice, without validating. #[inline] pub unsafe fn from_byte_slice_without_validating(x: &[u8]) -> Tendril { assert!(x.len() <= buf32::MAX_LEN); if x.len() <= MAX_INLINE_LEN { Tendril::inline(x) } else { Tendril::owned_copy(x) } } /// Push some bytes onto the end of the `Tendril`, without validating. #[inline] pub unsafe fn push_bytes_without_validating(&mut self, buf: &[u8]) { assert!(buf.len() <= buf32::MAX_LEN); let Fixup { drop_left, drop_right, insert_len, insert_bytes, } = F::fixup(self.as_byte_slice(), buf); // FIXME: think more about overflow let adj_len = self.len32() + insert_len - drop_left; let new_len = adj_len.checked_add(buf.len() as u32).expect(OFLOW) - drop_right; let drop_left = drop_left as usize; let drop_right = drop_right as usize; if new_len <= MAX_INLINE_LEN as u32 { let mut tmp = [0_u8; MAX_INLINE_LEN]; { let old = self.as_byte_slice(); let mut dest = tmp.as_mut_ptr(); copy_and_advance(&mut dest, unsafe_slice(old, 0, old.len() - drop_left)); copy_and_advance( &mut dest, unsafe_slice(&insert_bytes, 0, insert_len as usize), ); copy_and_advance( &mut dest, unsafe_slice(buf, drop_right, buf.len() - drop_right), ); } *self = Tendril::inline(&tmp[..new_len as usize]); } else { self.make_owned_with_capacity(new_len); let (owned, _, _) = self.assume_buf(); let mut dest = owned .data_ptr() .offset((owned.len as usize - drop_left) as isize); copy_and_advance( &mut dest, unsafe_slice(&insert_bytes, 0, insert_len as usize), ); copy_and_advance( &mut dest, unsafe_slice(buf, drop_right, buf.len() - drop_right), ); self.set_len(new_len); } } /// Slice this `Tendril` as a new `Tendril`. /// /// Does not check validity or bounds! #[inline] pub unsafe fn unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril { if length <= MAX_INLINE_LEN as u32 { Tendril::inline(unsafe_slice( self.as_byte_slice(), offset as usize, length as usize, )) } else { self.make_buf_shared(); self.incref(); let (buf, _, _) = self.assume_buf(); Tendril::shared(buf, self.aux() + offset, length) } } /// Drop `n` bytes from the front. /// /// Does not check validity or bounds! #[inline] pub unsafe fn unsafe_pop_front(&mut self, n: u32) { let new_len = self.len32() - n; if new_len <= MAX_INLINE_LEN as u32 { *self = Tendril::inline(unsafe_slice( self.as_byte_slice(), n as usize, new_len as usize, )); } else { self.make_buf_shared(); self.set_aux(self.aux() + n); let len = self.raw_len(); self.set_len(len - n); } } /// Drop `n` bytes from the back. /// /// Does not check validity or bounds! #[inline] pub unsafe fn unsafe_pop_back(&mut self, n: u32) { let new_len = self.len32() - n; if new_len <= MAX_INLINE_LEN as u32 { *self = Tendril::inline(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)); } else { self.make_buf_shared(); let len = self.raw_len(); self.set_len(len - n); } } #[inline] unsafe fn incref(&self) { (*self.header()).refcount.increment(); } #[inline] unsafe fn make_buf_shared(&self) { let p = self.ptr.get().get(); if p & 1 == 0 { let header = p as *mut Header; (*header).cap = self.aux(); self.ptr.set(NonZeroUsize::new_unchecked(p | 1)); self.set_aux(0); } } // This is not public as it is of no practical value to users. // By and large they shouldn't need to worry about the distinction at all, // and going out of your way to make it owned is pointless. #[inline] fn make_owned(&mut self) { unsafe { let ptr = self.ptr.get().get(); if ptr <= MAX_INLINE_TAG || (ptr & 1) == 1 { *self = Tendril::owned_copy(self.as_byte_slice()); } } } #[inline] unsafe fn make_owned_with_capacity(&mut self, cap: u32) { self.make_owned(); let mut buf = self.assume_buf().0; buf.grow(cap); self.ptr.set(NonZeroUsize::new_unchecked(buf.ptr as usize)); self.set_aux(buf.cap); } #[inline(always)] unsafe fn header(&self) -> *mut Header { (self.ptr.get().get() & !1) as *mut Header } #[inline] unsafe fn assume_buf(&self) -> (Buf32>, bool, u32) { let ptr = self.ptr.get().get(); let header = self.header(); let shared = (ptr & 1) == 1; let (cap, offset) = match shared { true => ((*header).cap, self.aux()), false => (self.aux(), 0), }; ( Buf32 { ptr: header, len: offset + self.len32(), cap: cap, }, shared, offset, ) } #[inline] unsafe fn inline(x: &[u8]) -> Tendril { let len = x.len(); let t = Tendril { ptr: Cell::new(inline_tag(len as u32)), buf: UnsafeCell::new(Buffer { inline: [0; 8] }), marker: PhantomData, refcount_marker: PhantomData, }; ptr::copy_nonoverlapping(x.as_ptr(), (*t.buf.get()).inline.as_mut_ptr(), len); t } #[inline] unsafe fn owned(x: Buf32>) -> Tendril { Tendril { ptr: Cell::new(NonZeroUsize::new_unchecked(x.ptr as usize)), buf: UnsafeCell::new(Buffer { heap: Heap { len: x.len, aux: x.cap, }, }), marker: PhantomData, refcount_marker: PhantomData, } } #[inline] unsafe fn owned_copy(x: &[u8]) -> Tendril { let len32 = x.len() as u32; let mut b = Buf32::with_capacity(len32, Header::new()); ptr::copy_nonoverlapping(x.as_ptr(), b.data_ptr(), x.len()); b.len = len32; Tendril::owned(b) } #[inline] unsafe fn shared(buf: Buf32>, off: u32, len: u32) -> Tendril { Tendril { ptr: Cell::new(NonZeroUsize::new_unchecked((buf.ptr as usize) | 1)), buf: UnsafeCell::new(Buffer { heap: Heap { len, aux: off }, }), marker: PhantomData, refcount_marker: PhantomData, } } #[inline] fn as_byte_slice<'a>(&'a self) -> &'a [u8] { unsafe { match self.ptr.get().get() { EMPTY_TAG => &[], n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked(..n), _ => { let (buf, _, offset) = self.assume_buf(); copy_lifetime( self, unsafe_slice(buf.data(), offset as usize, self.len32() as usize), ) } } } } // There's no need to worry about locking on an atomic Tendril, because it makes it unique as // soon as you do that. #[inline] fn as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8] { unsafe { match self.ptr.get().get() { EMPTY_TAG => &mut [], n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked_mut(..n), _ => { self.make_owned(); let (mut buf, _, offset) = self.assume_buf(); let len = self.len32() as usize; copy_lifetime_mut(self, unsafe_slice_mut(buf.data_mut(), offset as usize, len)) } } } } unsafe fn raw_len(&self) -> u32 { (*self.buf.get()).heap.len } unsafe fn set_len(&mut self, len: u32) { (*self.buf.get()).heap.len = len; } unsafe fn aux(&self) -> u32 { (*self.buf.get()).heap.aux } unsafe fn set_aux(&self, aux: u32) { (*self.buf.get()).heap.aux = aux; } } impl Tendril where F: fmt::SliceFormat, A: Atomicity, { /// Build a `Tendril` by copying a slice. #[inline] pub fn from_slice(x: &F::Slice) -> Tendril { unsafe { Tendril::from_byte_slice_without_validating(x.as_bytes()) } } /// Push a slice onto the end of the `Tendril`. #[inline] pub fn push_slice(&mut self, x: &F::Slice) { unsafe { self.push_bytes_without_validating(x.as_bytes()) } } } /// A simple wrapper to make `Tendril` `Send`. /// /// Although there is a certain subset of the operations on a `Tendril` that a `SendTendril` could /// reasonably implement, in order to clearly separate concerns this type is deliberately /// minimalist, acting as a safe encapsulation around the invariants which permit `Send`ness and /// behaving as an opaque object. /// /// A `SendTendril` may be produced by `Tendril.into_send()` or `SendTendril::from(tendril)`, /// and may be returned to a `Tendril` by `Tendril::from(self)`. #[derive(Clone)] pub struct SendTendril where F: fmt::Format, { tendril: Tendril, } unsafe impl Send for SendTendril where F: fmt::Format {} impl From> for SendTendril where F: fmt::Format, A: Atomicity, { #[inline] fn from(tendril: Tendril) -> SendTendril { tendril.into_send() } } impl From> for Tendril where F: fmt::Format, A: Atomicity, { #[inline] fn from(send: SendTendril) -> Tendril { unsafe { mem::transmute(send.tendril) } // header.refcount may have been initialised as an Atomic or a NonAtomic, but the value // will be the same (1) regardless, because the layout is defined. // Thus we don't need to fiddle about resetting it or anything like that. } } /// `Tendril`-related methods for Rust slices. pub trait SliceExt: fmt::Slice where F: fmt::SliceFormat, { /// Make a `Tendril` from this slice. #[inline] fn to_tendril(&self) -> Tendril { // It should be done thusly, but at the time of writing the defaults don't help inference: //fn to_tendril(&self) -> Tendril // where A: Atomicity, //{ Tendril::from_slice(self) } } impl SliceExt for str {} impl SliceExt for [u8] {} impl Tendril where F: for<'a> fmt::CharFormat<'a>, A: Atomicity, { /// Remove and return the first character, if any. #[inline] pub fn pop_front_char<'a>(&'a mut self) -> Option { unsafe { let next_char; // first char in iterator let mut skip = 0; // number of bytes to skip, or 0 to clear { // <--+ // | Creating an iterator borrows self, so introduce a // +- scope to contain the borrow (that way we can mutate // self below, after this scope exits). let mut iter = F::char_indices(self.as_byte_slice()); match iter.next() { Some((_, c)) => { next_char = Some(c); if let Some((n, _)) = iter.next() { skip = n as u32; } } None => { next_char = None; } } } if skip != 0 { self.unsafe_pop_front(skip); } else { self.clear(); } next_char } } /// Remove and return a run of characters at the front of the `Tendril` /// which are classified the same according to the function `classify`. /// /// Returns `None` on an empty string. #[inline] pub fn pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril, R)> where C: FnMut(char) -> R, R: PartialEq, { let (class, first_mismatch); { let mut chars = unsafe { F::char_indices(self.as_byte_slice()) }; let (_, first) = unwrap_or_return!(chars.next(), None); class = classify(first); first_mismatch = chars.find(|&(_, ch)| &classify(ch) != &class); } match first_mismatch { Some((idx, _)) => unsafe { let t = self.unsafe_subtendril(0, idx as u32); self.unsafe_pop_front(idx as u32); Some((t, class)) }, None => { let t = self.clone(); self.clear(); Some((t, class)) } } } /// Push a character, if it can be represented in this format. #[inline] pub fn try_push_char(&mut self, c: char) -> Result<(), ()> { F::encode_char(c, |b| unsafe { self.push_bytes_without_validating(b); }) } } /// Extension trait for `io::Read`. pub trait ReadExt: io::Read { fn read_to_tendril(&mut self, buf: &mut Tendril) -> io::Result where A: Atomicity; } impl ReadExt for T where T: io::Read, { /// Read all bytes until EOF. fn read_to_tendril(&mut self, buf: &mut Tendril) -> io::Result where A: Atomicity, { // Adapted from libstd/io/mod.rs. const DEFAULT_BUF_SIZE: u32 = 64 * 1024; let start_len = buf.len(); let mut len = start_len; let mut new_write_size = 16; let ret; loop { if len == buf.len() { if new_write_size < DEFAULT_BUF_SIZE { new_write_size *= 2; } // FIXME: this exposes uninitialized bytes to a generic R type // this is fine for R=File which never reads these bytes, // but user-defined types might. // The standard library pushes zeros to `Vec` for that reason. unsafe { buf.push_uninitialized(new_write_size); } } match self.read(&mut buf[len..]) { Ok(0) => { ret = Ok(len - start_len); break; } Ok(n) => len += n, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} Err(e) => { ret = Err(e); break; } } } let buf_len = buf.len32(); buf.pop_back(buf_len - (len as u32)); ret } } impl io::Write for Tendril where A: Atomicity, { #[inline] fn write(&mut self, buf: &[u8]) -> io::Result { self.push_slice(buf); Ok(buf.len()) } #[inline] fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { self.push_slice(buf); Ok(()) } #[inline(always)] fn flush(&mut self) -> io::Result<()> { Ok(()) } } #[cfg(feature = "encoding")] impl encoding::ByteWriter for Tendril where A: Atomicity, { #[inline] fn write_byte(&mut self, b: u8) { self.push_slice(&[b]); } #[inline] fn write_bytes(&mut self, v: &[u8]) { self.push_slice(v); } #[inline] fn writer_hint(&mut self, additional: usize) { self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32); } } impl Tendril where A: Atomicity, F: fmt::SliceFormat, { /// Decode from some character encoding into UTF-8. /// /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/) /// for more information. #[inline] #[cfg(feature = "encoding")] pub fn decode( &self, encoding: EncodingRef, trap: DecoderTrap, ) -> Result, ::std::borrow::Cow<'static, str>> { let mut ret = Tendril::new(); encoding.decode_to(&*self, trap, &mut ret).map(|_| ret) } /// Push "uninitialized bytes" onto the end. /// /// Really, this grows the tendril without writing anything to the new area. /// It's only defined for byte tendrils because it's only useful if you /// plan to then mutate the buffer. #[inline] pub unsafe fn push_uninitialized(&mut self, n: u32) { let new_len = self.len32().checked_add(n).expect(OFLOW); if new_len <= MAX_INLINE_LEN as u32 && self.ptr.get().get() <= MAX_INLINE_TAG { self.ptr.set(inline_tag(new_len)) } else { self.make_owned_with_capacity(new_len); self.set_len(new_len); } } } impl strfmt::Display for Tendril where A: Atomicity, { #[inline] fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result { ::fmt(&**self, f) } } impl str::FromStr for Tendril where A: Atomicity, { type Err = (); #[inline] fn from_str(s: &str) -> Result { Ok(Tendril::from_slice(s)) } } impl strfmt::Write for Tendril where A: Atomicity, { #[inline] fn write_str(&mut self, s: &str) -> strfmt::Result { self.push_slice(s); Ok(()) } } #[cfg(feature = "encoding")] impl encoding::StringWriter for Tendril where A: Atomicity, { #[inline] fn write_char(&mut self, c: char) { self.push_char(c); } #[inline] fn write_str(&mut self, s: &str) { self.push_slice(s); } #[inline] fn writer_hint(&mut self, additional: usize) { self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32); } } impl Tendril where A: Atomicity, { /// Encode from UTF-8 into some other character encoding. /// /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/) /// for more information. #[inline] #[cfg(feature = "encoding")] pub fn encode( &self, encoding: EncodingRef, trap: EncoderTrap, ) -> Result, ::std::borrow::Cow<'static, str>> { let mut ret = Tendril::new(); encoding.encode_to(&*self, trap, &mut ret).map(|_| ret) } /// Push a character onto the end. #[inline] pub fn push_char(&mut self, c: char) { unsafe { self.push_bytes_without_validating(c.encode_utf8(&mut [0_u8; 4]).as_bytes()); } } /// Create a `Tendril` from a single character. #[inline] pub fn from_char(c: char) -> Tendril { let mut t: Tendril = Tendril::new(); t.push_char(c); t } /// Helper for the `format_tendril!` macro. #[inline] pub fn format(args: strfmt::Arguments) -> Tendril { use std::fmt::Write; let mut output: Tendril = Tendril::new(); let _ = write!(&mut output, "{}", args); output } } /// Create a `StrTendril` through string formatting. /// /// Works just like the standard `format!` macro. #[macro_export] macro_rules! format_tendril { ($($arg:tt)*) => ($crate::StrTendril::format(format_args!($($arg)*))) } impl<'a, F, A> From<&'a F::Slice> for Tendril where F: fmt::SliceFormat, A: Atomicity, { #[inline] fn from(input: &F::Slice) -> Tendril { Tendril::from_slice(input) } } impl From for Tendril where A: Atomicity, { #[inline] fn from(input: String) -> Tendril { Tendril::from_slice(&*input) } } impl AsRef for Tendril where F: fmt::SliceFormat, A: Atomicity, { #[inline] fn as_ref(&self) -> &F::Slice { &**self } } impl From> for String where A: Atomicity, { #[inline] fn from(input: Tendril) -> String { String::from(&*input) } } impl<'a, A> From<&'a Tendril> for String where A: Atomicity, { #[inline] fn from(input: &'a Tendril) -> String { String::from(&**input) } } #[cfg(all(test, feature = "bench"))] #[path = "bench.rs"] mod bench; #[cfg(test)] mod test { use super::{ Atomic, ByteTendril, Header, NonAtomic, ReadExt, SendTendril, SliceExt, StrTendril, Tendril, }; use fmt; use std::iter; use std::thread; fn assert_send() {} #[test] fn smoke_test() { assert_eq!("", &*"".to_tendril()); assert_eq!("abc", &*"abc".to_tendril()); assert_eq!("Hello, world!", &*"Hello, world!".to_tendril()); assert_eq!(b"", &*b"".to_tendril()); assert_eq!(b"abc", &*b"abc".to_tendril()); assert_eq!(b"Hello, world!", &*b"Hello, world!".to_tendril()); } #[test] fn assert_sizes() { use std::mem; struct EmptyWithDrop; impl Drop for EmptyWithDrop { fn drop(&mut self) {} } let compiler_uses_inline_drop_flags = mem::size_of::() > 0; let correct = mem::size_of::<*const ()>() + 8 + if compiler_uses_inline_drop_flags { 1 } else { 0 }; assert_eq!(correct, mem::size_of::()); assert_eq!(correct, mem::size_of::()); assert_eq!(correct, mem::size_of::>()); assert_eq!(correct, mem::size_of::>()); assert_eq!( mem::size_of::<*const ()>() * 2, mem::size_of::>(), ); assert_eq!( mem::size_of::>(), mem::size_of::>(), ); } #[test] fn validate_utf8() { assert!(ByteTendril::try_from_byte_slice(b"\xFF").is_ok()); assert!(StrTendril::try_from_byte_slice(b"\xFF").is_err()); assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xFF").is_err()); assert!(StrTendril::try_from_byte_slice(b"\xEA\x99").is_err()); assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xAE\xEA").is_err()); assert_eq!( "\u{a66e}", &*StrTendril::try_from_byte_slice(b"\xEA\x99\xAE").unwrap() ); let mut t = StrTendril::new(); assert!(t.try_push_bytes(b"\xEA\x99").is_err()); assert!(t.try_push_bytes(b"\xAE").is_err()); assert!(t.try_push_bytes(b"\xEA\x99\xAE").is_ok()); assert_eq!("\u{a66e}", &*t); } #[test] fn share_and_unshare() { let s = b"foobarbaz".to_tendril(); assert_eq!(b"foobarbaz", &*s); assert!(!s.is_shared()); let mut t = s.clone(); assert_eq!(s.as_ptr(), t.as_ptr()); assert!(s.is_shared()); assert!(t.is_shared()); t.push_slice(b"quux"); assert_eq!(b"foobarbaz", &*s); assert_eq!(b"foobarbazquux", &*t); assert!(s.as_ptr() != t.as_ptr()); assert!(!t.is_shared()); } #[test] fn format_display() { assert_eq!("foobar", &*format!("{}", "foobar".to_tendril())); let mut s = "foo".to_tendril(); assert_eq!("foo", &*format!("{}", s)); let t = s.clone(); assert_eq!("foo", &*format!("{}", s)); assert_eq!("foo", &*format!("{}", t)); s.push_slice("barbaz!"); assert_eq!("foobarbaz!", &*format!("{}", s)); assert_eq!("foo", &*format!("{}", t)); } #[test] fn format_debug() { assert_eq!( r#"Tendril(inline: "foobar")"#, &*format!("{:?}", "foobar".to_tendril()) ); assert_eq!( r#"Tendril(inline: [102, 111, 111, 98, 97, 114])"#, &*format!("{:?}", b"foobar".to_tendril()) ); let t = "anextralongstring".to_tendril(); assert_eq!( r#"Tendril(owned: "anextralongstring")"#, &*format!("{:?}", t) ); let _ = t.clone(); assert_eq!( r#"Tendril(shared: "anextralongstring")"#, &*format!("{:?}", t) ); } #[test] fn subtendril() { assert_eq!("foo".to_tendril(), "foo-bar".to_tendril().subtendril(0, 3)); assert_eq!("bar".to_tendril(), "foo-bar".to_tendril().subtendril(4, 3)); let mut t = "foo-bar".to_tendril(); t.pop_front(2); assert_eq!("o-bar".to_tendril(), t); t.pop_back(1); assert_eq!("o-ba".to_tendril(), t); assert_eq!( "foo".to_tendril(), "foo-a-longer-string-bar-baz".to_tendril().subtendril(0, 3) ); assert_eq!( "oo-a-".to_tendril(), "foo-a-longer-string-bar-baz".to_tendril().subtendril(1, 5) ); assert_eq!( "bar".to_tendril(), "foo-a-longer-string-bar-baz".to_tendril().subtendril(20, 3) ); let mut t = "another rather long string".to_tendril(); t.pop_front(2); assert!(t.starts_with("other rather")); t.pop_back(1); assert_eq!("other rather long strin".to_tendril(), t); assert!(t.is_shared()); } #[test] fn subtendril_invalid() { assert!("\u{a66e}".to_tendril().try_subtendril(0, 2).is_err()); assert!("\u{a66e}".to_tendril().try_subtendril(1, 2).is_err()); assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 3).is_err()); assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 2).is_err()); assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 1).is_err()); assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 3).is_err()); assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 2).is_err()); assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 1).is_err()); assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 2).is_err()); assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 1).is_err()); assert!("\u{1f4a9}".to_tendril().try_subtendril(3, 1).is_err()); let mut t = "\u{1f4a9}zzzzzz".to_tendril(); assert!(t.try_pop_front(1).is_err()); assert!(t.try_pop_front(2).is_err()); assert!(t.try_pop_front(3).is_err()); assert!(t.try_pop_front(4).is_ok()); assert_eq!("zzzzzz", &*t); let mut t = "zzzzzz\u{1f4a9}".to_tendril(); assert!(t.try_pop_back(1).is_err()); assert!(t.try_pop_back(2).is_err()); assert!(t.try_pop_back(3).is_err()); assert!(t.try_pop_back(4).is_ok()); assert_eq!("zzzzzz", &*t); } #[test] fn conversion() { assert_eq!( &[0x66, 0x6F, 0x6F].to_tendril(), "foo".to_tendril().as_bytes() ); assert_eq!( [0x66, 0x6F, 0x6F].to_tendril(), "foo".to_tendril().into_bytes() ); let ascii: Tendril = b"hello".to_tendril().try_reinterpret().unwrap(); assert_eq!(&"hello".to_tendril(), ascii.as_superset()); assert_eq!("hello".to_tendril(), ascii.clone().into_superset()); assert!(b"\xFF" .to_tendril() .try_reinterpret::() .is_err()); let t = "hello".to_tendril(); let ascii: &Tendril = t.try_as_subset().unwrap(); assert_eq!(b"hello", &**ascii.as_bytes()); assert!("ő" .to_tendril() .try_reinterpret_view::() .is_err()); assert!("ő".to_tendril().try_as_subset::().is_err()); let ascii: Tendril = "hello".to_tendril().try_into_subset().unwrap(); assert_eq!(b"hello", &**ascii.as_bytes()); assert!("ő".to_tendril().try_reinterpret::().is_err()); assert!("ő".to_tendril().try_into_subset::().is_err()); } #[test] fn clear() { let mut t = "foo-".to_tendril(); t.clear(); assert_eq!(t.len(), 0); assert_eq!(t.len32(), 0); assert_eq!(&*t, ""); let mut t = "much longer".to_tendril(); let s = t.clone(); t.clear(); assert_eq!(t.len(), 0); assert_eq!(t.len32(), 0); assert_eq!(&*t, ""); assert_eq!(&*s, "much longer"); } #[test] fn push_tendril() { let mut t = "abc".to_tendril(); t.push_tendril(&"xyz".to_tendril()); assert_eq!("abcxyz", &*t); } #[test] fn wtf8() { assert!(Tendril::::try_from_byte_slice(b"\xED\xA0\xBD").is_ok()); assert!(Tendril::::try_from_byte_slice(b"\xED\xB2\xA9").is_ok()); assert!(Tendril::::try_from_byte_slice(b"\xED\xA0\xBD\xED\xB2\xA9").is_err()); let t: Tendril = Tendril::try_from_byte_slice(b"\xED\xA0\xBD\xEA\x99\xAE").unwrap(); assert!(b"\xED\xA0\xBD".to_tendril().try_reinterpret().unwrap() == t.subtendril(0, 3)); assert!(b"\xEA\x99\xAE".to_tendril().try_reinterpret().unwrap() == t.subtendril(3, 3)); assert!(t.try_reinterpret_view::().is_err()); assert!(t.try_subtendril(0, 1).is_err()); assert!(t.try_subtendril(0, 2).is_err()); assert!(t.try_subtendril(1, 1).is_err()); assert!(t.try_subtendril(3, 1).is_err()); assert!(t.try_subtendril(3, 2).is_err()); assert!(t.try_subtendril(4, 1).is_err()); // paired surrogates let mut t: Tendril = Tendril::try_from_byte_slice(b"\xED\xA0\xBD").unwrap(); assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok()); assert_eq!(b"\xF0\x9F\x92\xA9", t.as_byte_slice()); assert!(t.try_reinterpret_view::().is_ok()); // unpaired surrogates let mut t: Tendril = Tendril::try_from_byte_slice(b"\xED\xA0\xBB").unwrap(); assert!(t.try_push_bytes(b"\xED\xA0").is_err()); assert!(t.try_push_bytes(b"\xED").is_err()); assert!(t.try_push_bytes(b"\xA0").is_err()); assert!(t.try_push_bytes(b"\xED\xA0\xBD").is_ok()); assert_eq!(b"\xED\xA0\xBB\xED\xA0\xBD", t.as_byte_slice()); assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok()); assert_eq!(b"\xED\xA0\xBB\xF0\x9F\x92\xA9", t.as_byte_slice()); assert!(t.try_reinterpret_view::().is_err()); } #[test] fn front_char() { let mut t = "".to_tendril(); assert_eq!(None, t.pop_front_char()); assert_eq!(None, t.pop_front_char()); let mut t = "abc".to_tendril(); assert_eq!(Some('a'), t.pop_front_char()); assert_eq!(Some('b'), t.pop_front_char()); assert_eq!(Some('c'), t.pop_front_char()); assert_eq!(None, t.pop_front_char()); assert_eq!(None, t.pop_front_char()); let mut t = "főo-a-longer-string-bar-baz".to_tendril(); assert_eq!(28, t.len()); assert_eq!(Some('f'), t.pop_front_char()); assert_eq!(Some('ő'), t.pop_front_char()); assert_eq!(Some('o'), t.pop_front_char()); assert_eq!(Some('-'), t.pop_front_char()); assert_eq!(23, t.len()); } #[test] fn char_run() { for &(s, exp) in &[ ("", None), (" ", Some((" ", true))), ("x", Some(("x", false))), (" \t \n", Some((" \t \n", true))), ("xyzzy", Some(("xyzzy", false))), (" xyzzy", Some((" ", true))), ("xyzzy ", Some(("xyzzy", false))), (" xyzzy ", Some((" ", true))), ("xyzzy hi", Some(("xyzzy", false))), ("中 ", Some(("中", false))), (" 中 ", Some((" ", true))), (" 中 ", Some((" ", true))), (" 中 ", Some((" ", true))), ] { let mut t = s.to_tendril(); let res = t.pop_front_char_run(char::is_whitespace); match exp { None => assert!(res.is_none()), Some((es, ec)) => { let (rt, rc) = res.unwrap(); assert_eq!(es, &*rt); assert_eq!(ec, rc); } } } } #[test] fn deref_mut_inline() { let mut t = "xyő".to_tendril().into_bytes(); t[3] = 0xff; assert_eq!(b"xy\xC5\xFF", &*t); assert!(t.try_reinterpret_view::().is_err()); t[3] = 0x8b; assert_eq!("xyŋ", &**t.try_reinterpret_view::().unwrap()); unsafe { t.push_uninitialized(3); t[4] = 0xEA; t[5] = 0x99; t[6] = 0xAE; assert_eq!( "xyŋ\u{a66e}", &**t.try_reinterpret_view::().unwrap() ); t.push_uninitialized(20); t.pop_back(20); assert_eq!( "xyŋ\u{a66e}", &**t.try_reinterpret_view::().unwrap() ); } } #[test] fn deref_mut() { let mut t = b"0123456789".to_tendril(); let u = t.clone(); assert!(t.is_shared()); t[9] = 0xff; assert!(!t.is_shared()); assert_eq!(b"0123456789", &*u); assert_eq!(b"012345678\xff", &*t); } #[test] fn push_char() { let mut t = "xyz".to_tendril(); t.push_char('o'); assert_eq!("xyzo", &*t); t.push_char('ő'); assert_eq!("xyzoő", &*t); t.push_char('\u{a66e}'); assert_eq!("xyzoő\u{a66e}", &*t); t.push_char('\u{1f4a9}'); assert_eq!("xyzoő\u{a66e}\u{1f4a9}", &*t); assert_eq!(t.len(), 13); } #[test] #[cfg(feature = "encoding")] fn encode() { use encoding::{all, EncoderTrap}; let t = "안녕하세요 러스트".to_tendril(); assert_eq!( b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae", &*t.encode(all::WINDOWS_949, EncoderTrap::Strict).unwrap() ); let t = "Энергия пробуждения ия-я-я! \u{a66e}".to_tendril(); assert_eq!( b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\ \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21 ?", &*t.encode(all::KOI8_U, EncoderTrap::Replace).unwrap() ); let t = "\u{1f4a9}".to_tendril(); assert!(t.encode(all::WINDOWS_1252, EncoderTrap::Strict).is_err()); } #[test] #[cfg(feature = "encoding")] fn decode() { use encoding::{all, DecoderTrap}; let t = b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\ \xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae" .to_tendril(); assert_eq!( "안녕하세요 러스트", &*t.decode(all::WINDOWS_949, DecoderTrap::Strict).unwrap() ); let t = b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\ \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21" .to_tendril(); assert_eq!( "Энергия пробуждения ия-я-я!", &*t.decode(all::KOI8_U, DecoderTrap::Replace).unwrap() ); let t = b"x \xff y".to_tendril(); assert!(t.decode(all::UTF_8, DecoderTrap::Strict).is_err()); let t = b"x \xff y".to_tendril(); assert_eq!( "x \u{fffd} y", &*t.decode(all::UTF_8, DecoderTrap::Replace).unwrap() ); } #[test] fn ascii() { fn mk(x: &[u8]) -> Tendril { x.to_tendril().try_reinterpret().unwrap() } let mut t = mk(b"xyz"); assert_eq!(Some('x'), t.pop_front_char()); assert_eq!(Some('y'), t.pop_front_char()); assert_eq!(Some('z'), t.pop_front_char()); assert_eq!(None, t.pop_front_char()); let mut t = mk(b" \t xyz"); assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace)); assert!(Some((mk(b"xyz"), false)) == t.pop_front_char_run(char::is_whitespace)); assert!(t.pop_front_char_run(char::is_whitespace).is_none()); let mut t = Tendril::::new(); assert!(t.try_push_char('x').is_ok()); assert!(t.try_push_char('\0').is_ok()); assert!(t.try_push_char('\u{a0}').is_err()); assert_eq!(b"x\0", t.as_byte_slice()); } #[test] fn latin1() { fn mk(x: &[u8]) -> Tendril { x.to_tendril().try_reinterpret().unwrap() } let mut t = mk(b"\xd8_\xd8"); assert_eq!(Some('Ø'), t.pop_front_char()); assert_eq!(Some('_'), t.pop_front_char()); assert_eq!(Some('Ø'), t.pop_front_char()); assert_eq!(None, t.pop_front_char()); let mut t = mk(b" \t \xfe\xa7z"); assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace)); assert!(Some((mk(b"\xfe\xa7z"), false)) == t.pop_front_char_run(char::is_whitespace)); assert!(t.pop_front_char_run(char::is_whitespace).is_none()); let mut t = Tendril::::new(); assert!(t.try_push_char('x').is_ok()); assert!(t.try_push_char('\0').is_ok()); assert!(t.try_push_char('\u{a0}').is_ok()); assert!(t.try_push_char('ő').is_err()); assert!(t.try_push_char('я').is_err()); assert!(t.try_push_char('\u{a66e}').is_err()); assert!(t.try_push_char('\u{1f4a9}').is_err()); assert_eq!(b"x\0\xa0", t.as_byte_slice()); } #[test] fn format() { assert_eq!("", &*format_tendril!("")); assert_eq!( "two and two make 4", &*format_tendril!("two and two make {}", 2 + 2) ); } #[test] fn merge_shared() { let t = "012345678901234567890123456789".to_tendril(); let a = t.subtendril(10, 20); assert!(a.is_shared()); assert_eq!("01234567890123456789", &*a); let mut b = t.subtendril(0, 10); assert!(b.is_shared()); assert_eq!("0123456789", &*b); b.push_tendril(&a); assert!(b.is_shared()); assert!(a.is_shared()); assert!(a.is_shared_with(&b)); assert!(b.is_shared_with(&a)); assert_eq!("012345678901234567890123456789", &*b); assert!(t.is_shared()); assert!(t.is_shared_with(&a)); assert!(t.is_shared_with(&b)); } #[test] fn merge_cant_share() { let t = "012345678901234567890123456789".to_tendril(); let mut b = t.subtendril(0, 10); assert!(b.is_shared()); assert_eq!("0123456789", &*b); b.push_tendril(&"abcd".to_tendril()); assert!(!b.is_shared()); assert_eq!("0123456789abcd", &*b); } #[test] fn shared_doesnt_reserve() { let mut t = "012345678901234567890123456789".to_tendril(); let a = t.subtendril(1, 10); assert!(t.is_shared()); t.reserve(10); assert!(t.is_shared()); let _ = a; } #[test] fn out_of_bounds() { assert!("".to_tendril().try_subtendril(0, 1).is_err()); assert!("abc".to_tendril().try_subtendril(0, 4).is_err()); assert!("abc".to_tendril().try_subtendril(3, 1).is_err()); assert!("abc".to_tendril().try_subtendril(7, 1).is_err()); let mut t = "".to_tendril(); assert!(t.try_pop_front(1).is_err()); assert!(t.try_pop_front(5).is_err()); assert!(t.try_pop_front(500).is_err()); assert!(t.try_pop_back(1).is_err()); assert!(t.try_pop_back(5).is_err()); assert!(t.try_pop_back(500).is_err()); let mut t = "abcd".to_tendril(); assert!(t.try_pop_front(1).is_ok()); assert!(t.try_pop_front(4).is_err()); assert!(t.try_pop_front(500).is_err()); assert!(t.try_pop_back(1).is_ok()); assert!(t.try_pop_back(3).is_err()); assert!(t.try_pop_back(500).is_err()); } #[test] fn compare() { for &a in &[ "indiscretions", "validity", "hallucinogenics", "timelessness", "original", "microcosms", "boilers", "mammoth", ] { for &b in &[ "intrepidly", "frigid", "spa", "cardigans", "guileful", "evaporated", "unenthusiastic", "legitimate", ] { let ta = a.to_tendril(); let tb = b.to_tendril(); assert_eq!(a.eq(b), ta.eq(&tb)); assert_eq!(a.ne(b), ta.ne(&tb)); assert_eq!(a.lt(b), ta.lt(&tb)); assert_eq!(a.le(b), ta.le(&tb)); assert_eq!(a.gt(b), ta.gt(&tb)); assert_eq!(a.ge(b), ta.ge(&tb)); assert_eq!(a.partial_cmp(b), ta.partial_cmp(&tb)); assert_eq!(a.cmp(b), ta.cmp(&tb)); } } } #[test] fn extend_and_from_iterator() { // Testing Extend and FromIterator for the various Ts. // Tendril let mut t = "Hello".to_tendril(); t.extend(None::<&Tendril<_>>.into_iter()); assert_eq!("Hello", &*t); t.extend(&[", ".to_tendril(), "world".to_tendril(), "!".to_tendril()]); assert_eq!("Hello, world!", &*t); assert_eq!( "Hello, world!", &*[ "Hello".to_tendril(), ", ".to_tendril(), "world".to_tendril(), "!".to_tendril() ] .iter() .collect::() ); // &str let mut t = "Hello".to_tendril(); t.extend(None::<&str>.into_iter()); assert_eq!("Hello", &*t); t.extend([", ", "world", "!"].iter().map(|&s| s)); assert_eq!("Hello, world!", &*t); assert_eq!( "Hello, world!", &*["Hello", ", ", "world", "!"] .iter() .map(|&s| s) .collect::() ); // &[u8] let mut t = b"Hello".to_tendril(); t.extend(None::<&[u8]>.into_iter()); assert_eq!(b"Hello", &*t); t.extend( [b", ".as_ref(), b"world".as_ref(), b"!".as_ref()] .iter() .map(|&s| s), ); assert_eq!(b"Hello, world!", &*t); assert_eq!( b"Hello, world!", &*[ b"Hello".as_ref(), b", ".as_ref(), b"world".as_ref(), b"!".as_ref() ] .iter() .map(|&s| s) .collect::() ); let string = "the quick brown fox jumps over the lazy dog"; let string_expected = string.to_tendril(); let bytes = string.as_bytes(); let bytes_expected = bytes.to_tendril(); // char assert_eq!(string_expected, string.chars().collect()); let mut tendril = StrTendril::new(); tendril.extend(string.chars()); assert_eq!(string_expected, tendril); // &u8 assert_eq!(bytes_expected, bytes.iter().collect()); let mut tendril = ByteTendril::new(); tendril.extend(bytes); assert_eq!(bytes_expected, tendril); // u8 assert_eq!(bytes_expected, bytes.iter().map(|&b| b).collect()); let mut tendril = ByteTendril::new(); tendril.extend(bytes.iter().map(|&b| b)); assert_eq!(bytes_expected, tendril); } #[test] fn from_str() { use std::str::FromStr; let t: Tendril<_> = FromStr::from_str("foo bar baz").unwrap(); assert_eq!("foo bar baz", &*t); } #[test] fn from_char() { assert_eq!("o", &*StrTendril::from_char('o')); assert_eq!("ő", &*StrTendril::from_char('ő')); assert_eq!("\u{a66e}", &*StrTendril::from_char('\u{a66e}')); assert_eq!("\u{1f4a9}", &*StrTendril::from_char('\u{1f4a9}')); } #[test] #[cfg_attr(miri, ignore)] // slow fn read() { fn check(x: &[u8]) { use std::io::Cursor; let mut t = ByteTendril::new(); assert_eq!(x.len(), Cursor::new(x).read_to_tendril(&mut t).unwrap()); assert_eq!(x, &*t); } check(b""); check(b"abcd"); let long: Vec = iter::repeat(b'x').take(1_000_000).collect(); check(&long); } #[test] fn hash_map_key() { use std::collections::HashMap; // As noted with Borrow, indexing on HashMap is byte-based because of // https://github.com/rust-lang/rust/issues/27108. let mut map = HashMap::new(); map.insert("foo".to_tendril(), 1); assert_eq!(map.get(b"foo".as_ref()), Some(&1)); assert_eq!(map.get(b"bar".as_ref()), None); let mut map = HashMap::new(); map.insert(b"foo".to_tendril(), 1); assert_eq!(map.get(b"foo".as_ref()), Some(&1)); assert_eq!(map.get(b"bar".as_ref()), None); } #[test] fn atomic() { assert_send::>(); let s: Tendril = Tendril::from_slice("this is a string"); assert!(!s.is_shared()); let mut t = s.clone(); assert!(s.is_shared()); let sp = s.as_ptr() as usize; thread::spawn(move || { assert!(t.is_shared()); t.push_slice(" extended"); assert_eq!("this is a string extended", &*t); assert!(t.as_ptr() as usize != sp); assert!(!t.is_shared()); }) .join() .unwrap(); assert!(s.is_shared()); assert_eq!("this is a string", &*s); } #[test] fn send() { assert_send::>(); let s = "this is a string".to_tendril(); let t = s.clone(); let s2 = s.into_send(); thread::spawn(move || { let s = StrTendril::from(s2); assert!(!s.is_shared()); assert_eq!("this is a string", &*s); }) .join() .unwrap(); assert_eq!("this is a string", &*t); } /// https://github.com/servo/tendril/issues/58 #[test] fn issue_58() { let data = "

Hello!

, World!"; let s: Tendril = data.into(); assert_eq!(&*s, data); let s: Tendril = s.into_send().into(); assert_eq!(&*s, data); } #[test] fn inline_send() { let s = "x".to_tendril(); let t = s.clone(); let s2 = s.into_send(); thread::spawn(move || { let s = StrTendril::from(s2); assert!(!s.is_shared()); assert_eq!("x", &*s); }) .join() .unwrap(); assert_eq!("x", &*t); } }