#![no_std] extern crate alloc; use alloc::{ borrow::Cow, boxed::Box, string::{String, ToString}, sync::Arc, }; use core::{ borrow::Borrow, cmp::{self, Ordering}, convert::Infallible, fmt, hash, iter, mem::transmute, ops::Deref, str::FromStr, }; /// A `SmolStr` is a string type that has the following properties: /// /// * `size_of::() == 24 (therefor == size_of::() on 64 bit platforms) /// * `Clone` is `O(1)` /// * Strings are stack-allocated if they are: /// * Up to 23 bytes long /// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist /// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated /// /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming /// languages. Strings consisting of a series of newlines, followed by a series of /// whitespace are a typical pattern in computer programs because of indentation. /// Note that a specialized interner might be a better solution for some use cases. /// /// `WS`: A string of 32 newlines followed by 128 spaces. #[derive(Clone)] pub struct SmolStr(Repr); impl SmolStr { #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { let _len_is_short = [(); INLINE_CAP + 1][len]; const ZEROS: &[u8] = &[0; INLINE_CAP]; let mut buf = [0; INLINE_CAP]; macro_rules! s { ($($idx:literal),*) => ( $(s!(set $idx);)* ); (set $idx:literal) => ({ let src: &[u8] = [ZEROS, bytes][($idx < len) as usize]; let byte = src[$idx]; let _is_ascii = [(); 128][byte as usize]; buf[$idx] = byte }); } s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); SmolStr(Repr::Inline { len: unsafe { transmute(len as u8) }, buf, }) } /// Constructs inline variant of `SmolStr`. /// /// Panics if `text.len() > 23`. #[inline] pub const fn new_inline(text: &str) -> SmolStr { let mut buf = [0; INLINE_CAP]; let mut i = 0; while i < text.len() { buf[i] = text.as_bytes()[i]; i += 1 } SmolStr(Repr::Inline { len: unsafe { transmute(text.len() as u8) }, buf, }) } pub fn new(text: T) -> SmolStr where T: AsRef, { SmolStr(Repr::new(text)) } #[inline(always)] pub fn as_str(&self) -> &str { self.0.as_str() } #[inline(always)] pub fn to_string(&self) -> String { self.as_str().to_string() } #[inline(always)] pub fn len(&self) -> usize { self.0.len() } #[inline(always)] pub fn is_empty(&self) -> bool { self.0.is_empty() } #[inline(always)] pub fn is_heap_allocated(&self) -> bool { match self.0 { Repr::Heap(..) => true, _ => false, } } fn from_char_iter>(mut iter: I) -> SmolStr { let (min_size, _) = iter.size_hint(); if min_size > INLINE_CAP { let heap: String = iter.collect(); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } let mut len = 0; let mut buf = [0u8; INLINE_CAP]; while let Some(ch) = iter.next() { let size = ch.len_utf8(); if size + len > INLINE_CAP { let (min_remaining, _) = iter.size_hint(); let mut heap = String::with_capacity(size + len + min_remaining); heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); heap.push(ch); heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } ch.encode_utf8(&mut buf[len..]); len += size; } SmolStr(Repr::Inline { len: unsafe { transmute(len as u8) }, buf, }) } } impl Default for SmolStr { fn default() -> SmolStr { SmolStr::new("") } } impl Deref for SmolStr { type Target = str; fn deref(&self) -> &str { self.as_str() } } impl PartialEq for SmolStr { fn eq(&self, other: &SmolStr) -> bool { self.as_str() == other.as_str() } } impl Eq for SmolStr {} impl PartialEq for SmolStr { fn eq(&self, other: &str) -> bool { self.as_str() == other } } impl PartialEq for str { fn eq(&self, other: &SmolStr) -> bool { other == self } } impl<'a> PartialEq<&'a str> for SmolStr { fn eq(&self, other: &&'a str) -> bool { self == *other } } impl<'a> PartialEq for &'a str { fn eq(&self, other: &SmolStr) -> bool { *self == other } } impl PartialEq for SmolStr { fn eq(&self, other: &String) -> bool { self.as_str() == other } } impl PartialEq for String { fn eq(&self, other: &SmolStr) -> bool { other == self } } impl<'a> PartialEq<&'a String> for SmolStr { fn eq(&self, other: &&'a String) -> bool { self == *other } } impl<'a> PartialEq for &'a String { fn eq(&self, other: &SmolStr) -> bool { *self == other } } impl Ord for SmolStr { fn cmp(&self, other: &SmolStr) -> Ordering { self.as_str().cmp(other.as_str()) } } impl PartialOrd for SmolStr { fn partial_cmp(&self, other: &SmolStr) -> Option { Some(self.cmp(other)) } } impl hash::Hash for SmolStr { fn hash(&self, hasher: &mut H) { self.as_str().hash(hasher) } } impl fmt::Debug for SmolStr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) } } impl fmt::Display for SmolStr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl iter::FromIterator for SmolStr { fn from_iter>(iter: I) -> SmolStr { let iter = iter.into_iter(); Self::from_char_iter(iter) } } fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr where T: AsRef, String: iter::Extend, { let mut len = 0; let mut buf = [0u8; INLINE_CAP]; while let Some(slice) = iter.next() { let slice = slice.as_ref(); let size = slice.len(); if size + len > INLINE_CAP { let mut heap = String::with_capacity(size + len); heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); heap.push_str(&slice); heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } (&mut buf[len..][..size]).copy_from_slice(slice.as_bytes()); len += size; } SmolStr(Repr::Inline { len: unsafe { transmute(len as u8) }, buf, }) } impl iter::FromIterator for SmolStr { fn from_iter>(iter: I) -> SmolStr { build_from_str_iter(iter.into_iter()) } } impl<'a> iter::FromIterator<&'a String> for SmolStr { fn from_iter>(iter: I) -> SmolStr { SmolStr::from_iter(iter.into_iter().map(|x| x.as_str())) } } impl<'a> iter::FromIterator<&'a str> for SmolStr { fn from_iter>(iter: I) -> SmolStr { build_from_str_iter(iter.into_iter()) } } impl AsRef for SmolStr { #[inline(always)] fn as_ref(&self) -> &str { self.as_str() } } impl From<&str> for SmolStr { #[inline] fn from(s: &str) -> SmolStr { SmolStr::new(s) } } impl From<&mut str> for SmolStr { #[inline] fn from(s: &mut str) -> SmolStr { SmolStr::new(s) } } impl From<&String> for SmolStr { #[inline] fn from(s: &String) -> SmolStr { SmolStr::new(s) } } impl From for SmolStr { #[inline(always)] fn from(text: String) -> Self { Self::new(text) } } impl From> for SmolStr { #[inline] fn from(s: Box) -> SmolStr { SmolStr::new(s) } } impl<'a> From> for SmolStr { #[inline] fn from(s: Cow<'a, str>) -> SmolStr { SmolStr::new(s) } } impl From for String { #[inline(always)] fn from(text: SmolStr) -> Self { text.as_str().into() } } impl Borrow for SmolStr { #[inline(always)] fn borrow(&self) -> &str { self.as_str() } } impl FromStr for SmolStr { type Err = Infallible; #[inline] fn from_str(s: &str) -> Result { Ok(SmolStr::from(s)) } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for SmolStr { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { let s = <&str>::arbitrary(u)?; Ok(SmolStr::new(s)) } } const INLINE_CAP: usize = 23; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; #[derive(Clone, Copy, Debug)] #[repr(u8)] enum InlineSize { _V0 = 0, _V1, _V2, _V3, _V4, _V5, _V6, _V7, _V8, _V9, _V10, _V11, _V12, _V13, _V14, _V15, _V16, _V17, _V18, _V19, _V20, _V21, _V22, _V23, } #[derive(Clone, Debug)] enum Repr { Heap(Arc), Inline { len: InlineSize, buf: [u8; INLINE_CAP], }, Substring { newlines: usize, spaces: usize, }, } impl Repr { fn new(text: T) -> Self where T: AsRef, { { let text = text.as_ref(); let len = text.len(); if len <= INLINE_CAP { let mut buf = [0; INLINE_CAP]; buf[..len].copy_from_slice(text.as_bytes()); return Repr::Inline { len: unsafe { transmute(len as u8) }, buf, }; } if len <= N_NEWLINES + N_SPACES { let bytes = text.as_bytes(); let possible_newline_count = cmp::min(len, N_NEWLINES); let newlines = bytes[..possible_newline_count] .iter() .take_while(|&&b| b == b'\n') .count(); let possible_space_count = len - newlines; if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') { let spaces = possible_space_count; return Repr::Substring { newlines, spaces }; } } } Repr::Heap(text.as_ref().into()) } #[inline(always)] fn len(&self) -> usize { match self { Repr::Heap(data) => data.len(), Repr::Inline { len, .. } => *len as usize, Repr::Substring { newlines, spaces } => *newlines + *spaces, } } #[inline(always)] fn is_empty(&self) -> bool { match self { Repr::Heap(data) => data.is_empty(), Repr::Inline { len, .. } => *len as u8 == 0, // A substring isn't created for an empty string. Repr::Substring { .. } => false, } } #[inline] fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, Repr::Inline { len, buf } => { let len = *len as usize; let buf = &buf[..len]; unsafe { ::core::str::from_utf8_unchecked(buf) } } Repr::Substring { newlines, spaces } => { let newlines = *newlines; let spaces = *spaces; assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] } } } } #[cfg(feature = "serde")] mod serde { use alloc::{string::String, vec::Vec}; use core::fmt; use serde::de::{Deserializer, Error, Unexpected, Visitor}; use crate::SmolStr; // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result where D: Deserializer<'de>, { struct SmolStrVisitor; impl<'a> Visitor<'a> for SmolStrVisitor { type Value = SmolStr; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a string") } fn visit_str(self, v: &str) -> Result where E: Error, { Ok(SmolStr::from(v)) } fn visit_borrowed_str(self, v: &'a str) -> Result where E: Error, { Ok(SmolStr::from(v)) } fn visit_string(self, v: String) -> Result where E: Error, { Ok(SmolStr::from(v)) } fn visit_bytes(self, v: &[u8]) -> Result where E: Error, { match core::str::from_utf8(v) { Ok(s) => Ok(SmolStr::from(s)), Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), } } fn visit_borrowed_bytes(self, v: &'a [u8]) -> Result where E: Error, { match core::str::from_utf8(v) { Ok(s) => Ok(SmolStr::from(s)), Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), } } fn visit_byte_buf(self, v: Vec) -> Result where E: Error, { match String::from_utf8(v) { Ok(s) => Ok(SmolStr::from(s)), Err(e) => Err(Error::invalid_value( Unexpected::Bytes(&e.into_bytes()), &self, )), } } } deserializer.deserialize_str(SmolStrVisitor) } impl serde::Serialize for SmolStr { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { self.as_str().serialize(serializer) } } impl<'de> serde::Deserialize<'de> for SmolStr { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { smol_str(deserializer) } } }