summaryrefslogtreecommitdiffstats
path: root/library/core/src/char
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
commit9835e2ae736235810b4ea1c162ca5e65c547e770 (patch)
tree3fcebf40ed70e581d776a8a4c65923e8ec20e026 /library/core/src/char
parentReleasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff)
downloadrustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz
rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/core/src/char')
-rw-r--r--library/core/src/char/convert.rs12
-rw-r--r--library/core/src/char/methods.rs76
-rw-r--r--library/core/src/char/mod.rs252
3 files changed, 156 insertions, 184 deletions
diff --git a/library/core/src/char/convert.rs b/library/core/src/char/convert.rs
index 136bbcb8b..b84e4b35b 100644
--- a/library/core/src/char/convert.rs
+++ b/library/core/src/char/convert.rs
@@ -27,8 +27,7 @@ pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
}
#[stable(feature = "char_convert", since = "1.13.0")]
-#[rustc_const_unstable(feature = "const_convert", issue = "88674")]
-impl const From<char> for u32 {
+impl From<char> for u32 {
/// Converts a [`char`] into a [`u32`].
///
/// # Examples
@@ -47,8 +46,7 @@ impl const From<char> for u32 {
}
#[stable(feature = "more_char_conversions", since = "1.51.0")]
-#[rustc_const_unstable(feature = "const_convert", issue = "88674")]
-impl const From<char> for u64 {
+impl From<char> for u64 {
/// Converts a [`char`] into a [`u64`].
///
/// # Examples
@@ -69,8 +67,7 @@ impl const From<char> for u64 {
}
#[stable(feature = "more_char_conversions", since = "1.51.0")]
-#[rustc_const_unstable(feature = "const_convert", issue = "88674")]
-impl const From<char> for u128 {
+impl From<char> for u128 {
/// Converts a [`char`] into a [`u128`].
///
/// # Examples
@@ -123,8 +120,7 @@ impl TryFrom<char> for u8 {
/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
/// C0 and C1 control codes.
#[stable(feature = "char_convert", since = "1.13.0")]
-#[rustc_const_unstable(feature = "const_convert", issue = "88674")]
-impl const From<u8> for char {
+impl From<u8> for char {
/// Converts a [`u8`] into a [`char`].
///
/// # Examples
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index 9bc97ea0b..515b8d20e 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -1,5 +1,6 @@
//! impl char {}
+use crate::ascii;
use crate::slice;
use crate::str::from_utf8_unchecked_mut;
use crate::unicode::printable::is_printable;
@@ -380,20 +381,7 @@ impl char {
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn escape_unicode(self) -> EscapeUnicode {
- let c = self as u32;
-
- // or-ing 1 ensures that for c==0 the code computes that one
- // digit should be printed and (which is the same) avoids the
- // (31 - 32) underflow
- let msb = 31 - (c | 1).leading_zeros();
-
- // the index of the most significant hex digit
- let ms_hex_digit = msb / 4;
- EscapeUnicode {
- c: self,
- state: EscapeUnicodeState::Backslash,
- hex_digit_idx: ms_hex_digit as usize,
- }
+ EscapeUnicode::new(self)
}
/// An extended version of `escape_debug` that optionally permits escaping
@@ -403,21 +391,20 @@ impl char {
/// characters, and double quotes in strings.
#[inline]
pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
- let init_state = match self {
- '\0' => EscapeDefaultState::Backslash('0'),
- '\t' => EscapeDefaultState::Backslash('t'),
- '\r' => EscapeDefaultState::Backslash('r'),
- '\n' => EscapeDefaultState::Backslash('n'),
- '\\' => EscapeDefaultState::Backslash(self),
- '"' if args.escape_double_quote => EscapeDefaultState::Backslash(self),
- '\'' if args.escape_single_quote => EscapeDefaultState::Backslash(self),
+ match self {
+ '\0' => EscapeDebug::backslash(ascii::Char::Digit0),
+ '\t' => EscapeDebug::backslash(ascii::Char::SmallT),
+ '\r' => EscapeDebug::backslash(ascii::Char::SmallR),
+ '\n' => EscapeDebug::backslash(ascii::Char::SmallN),
+ '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
+ '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
+ '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
_ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
- EscapeDefaultState::Unicode(self.escape_unicode())
+ EscapeDebug::from_unicode(self.escape_unicode())
}
- _ if is_printable(self) => EscapeDefaultState::Char(self),
- _ => EscapeDefaultState::Unicode(self.escape_unicode()),
- };
- EscapeDebug(EscapeDefault { state: init_state })
+ _ if is_printable(self) => EscapeDebug::printable(self),
+ _ => EscapeDebug::from_unicode(self.escape_unicode()),
+ }
}
/// Returns an iterator that yields the literal escape code of a character
@@ -515,15 +502,14 @@ impl char {
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn escape_default(self) -> EscapeDefault {
- let init_state = match self {
- '\t' => EscapeDefaultState::Backslash('t'),
- '\r' => EscapeDefaultState::Backslash('r'),
- '\n' => EscapeDefaultState::Backslash('n'),
- '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
- '\x20'..='\x7e' => EscapeDefaultState::Char(self),
- _ => EscapeDefaultState::Unicode(self.escape_unicode()),
- };
- EscapeDefault { state: init_state }
+ match self {
+ '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
+ '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
+ '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
+ '\\' | '\'' | '"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
+ '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
+ _ => EscapeDefault::from_unicode(self.escape_unicode()),
+ }
}
/// Returns the number of bytes this `char` would need if encoded in UTF-8.
@@ -1116,6 +1102,24 @@ impl char {
*self as u32 <= 0x7F
}
+ /// Returns `Some` if the value is within the ASCII range,
+ /// or `None` if it's not.
+ ///
+ /// This is preferred to [`Self::is_ascii`] when you're passing the value
+ /// along to something else that can take [`ascii::Char`] rather than
+ /// needing to check again for itself whether the value is in ASCII.
+ #[must_use]
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn as_ascii(&self) -> Option<ascii::Char> {
+ if self.is_ascii() {
+ // SAFETY: Just checked that this is ASCII.
+ Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
+ } else {
+ None
+ }
+ }
+
/// Makes a copy of the value in its ASCII upper case equivalent.
///
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs
index 8ec78e887..5c4291287 100644
--- a/library/core/src/char/mod.rs
+++ b/library/core/src/char/mod.rs
@@ -38,9 +38,12 @@ pub use self::methods::encode_utf16_raw;
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
pub use self::methods::encode_utf8_raw;
+use crate::ascii;
use crate::error::Error;
+use crate::escape;
use crate::fmt::{self, Write};
use crate::iter::FusedIterator;
+use crate::num::NonZeroUsize;
pub(crate) use self::methods::EscapeDebugExtArgs;
@@ -146,86 +149,44 @@ pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
/// [`escape_unicode`]: char::escape_unicode
#[derive(Clone, Debug)]
#[stable(feature = "rust1", since = "1.0.0")]
-pub struct EscapeUnicode {
- c: char,
- state: EscapeUnicodeState,
-
- // The index of the next hex digit to be printed (0 if none),
- // i.e., the number of remaining hex digits to be printed;
- // increasing from the least significant digit: 0x543210
- hex_digit_idx: usize,
-}
+pub struct EscapeUnicode(escape::EscapeIterInner<10>);
-// The enum values are ordered so that their representation is the
-// same as the remaining length (besides the hexadecimal digits). This
-// likely makes `len()` a single load from memory) and inline-worth.
-#[derive(Clone, Debug)]
-enum EscapeUnicodeState {
- Done,
- RightBrace,
- Value,
- LeftBrace,
- Type,
- Backslash,
+impl EscapeUnicode {
+ fn new(chr: char) -> Self {
+ let mut data = [ascii::Char::Null; 10];
+ let range = escape::escape_unicode_into(&mut data, chr);
+ Self(escape::EscapeIterInner::new(data, range))
+ }
}
#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for EscapeUnicode {
type Item = char;
+ #[inline]
fn next(&mut self) -> Option<char> {
- match self.state {
- EscapeUnicodeState::Backslash => {
- self.state = EscapeUnicodeState::Type;
- Some('\\')
- }
- EscapeUnicodeState::Type => {
- self.state = EscapeUnicodeState::LeftBrace;
- Some('u')
- }
- EscapeUnicodeState::LeftBrace => {
- self.state = EscapeUnicodeState::Value;
- Some('{')
- }
- EscapeUnicodeState::Value => {
- let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
- let c = char::from_digit(hex_digit, 16).unwrap();
- if self.hex_digit_idx == 0 {
- self.state = EscapeUnicodeState::RightBrace;
- } else {
- self.hex_digit_idx -= 1;
- }
- Some(c)
- }
- EscapeUnicodeState::RightBrace => {
- self.state = EscapeUnicodeState::Done;
- Some('}')
- }
- EscapeUnicodeState::Done => None,
- }
+ self.0.next().map(char::from)
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
- let n = self.len();
+ let n = self.0.len();
(n, Some(n))
}
#[inline]
fn count(self) -> usize {
- self.len()
+ self.0.len()
}
- fn last(self) -> Option<char> {
- match self.state {
- EscapeUnicodeState::Done => None,
+ #[inline]
+ fn last(mut self) -> Option<char> {
+ self.0.next_back().map(char::from)
+ }
- EscapeUnicodeState::RightBrace
- | EscapeUnicodeState::Value
- | EscapeUnicodeState::LeftBrace
- | EscapeUnicodeState::Type
- | EscapeUnicodeState::Backslash => Some('}'),
- }
+ #[inline]
+ fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
+ self.0.advance_by(n)
}
}
@@ -233,16 +194,7 @@ impl Iterator for EscapeUnicode {
impl ExactSizeIterator for EscapeUnicode {
#[inline]
fn len(&self) -> usize {
- // The match is a single memory access with no branching
- self.hex_digit_idx
- + match self.state {
- EscapeUnicodeState::Done => 0,
- EscapeUnicodeState::RightBrace => 1,
- EscapeUnicodeState::Value => 2,
- EscapeUnicodeState::LeftBrace => 3,
- EscapeUnicodeState::Type => 4,
- EscapeUnicodeState::Backslash => 5,
- }
+ self.0.len()
}
}
@@ -252,10 +204,7 @@ impl FusedIterator for EscapeUnicode {}
#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for EscapeUnicode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- for c in self.clone() {
- f.write_char(c)?;
- }
- Ok(())
+ f.write_str(self.0.as_str())
}
}
@@ -267,90 +216,60 @@ impl fmt::Display for EscapeUnicode {
/// [`escape_default`]: char::escape_default
#[derive(Clone, Debug)]
#[stable(feature = "rust1", since = "1.0.0")]
-pub struct EscapeDefault {
- state: EscapeDefaultState,
-}
+pub struct EscapeDefault(escape::EscapeIterInner<10>);
-#[derive(Clone, Debug)]
-enum EscapeDefaultState {
- Done,
- Char(char),
- Backslash(char),
- Unicode(EscapeUnicode),
+impl EscapeDefault {
+ fn printable(chr: ascii::Char) -> Self {
+ let data = [chr];
+ Self(escape::EscapeIterInner::from_array(data))
+ }
+
+ fn backslash(chr: ascii::Char) -> Self {
+ let data = [ascii::Char::ReverseSolidus, chr];
+ Self(escape::EscapeIterInner::from_array(data))
+ }
+
+ fn from_unicode(esc: EscapeUnicode) -> Self {
+ Self(esc.0)
+ }
}
#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for EscapeDefault {
type Item = char;
+ #[inline]
fn next(&mut self) -> Option<char> {
- match self.state {
- EscapeDefaultState::Backslash(c) => {
- self.state = EscapeDefaultState::Char(c);
- Some('\\')
- }
- EscapeDefaultState::Char(c) => {
- self.state = EscapeDefaultState::Done;
- Some(c)
- }
- EscapeDefaultState::Done => None,
- EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
- }
+ self.0.next().map(char::from)
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
- let n = self.len();
+ let n = self.0.len();
(n, Some(n))
}
#[inline]
fn count(self) -> usize {
- self.len()
+ self.0.len()
}
- fn nth(&mut self, n: usize) -> Option<char> {
- match self.state {
- EscapeDefaultState::Backslash(c) if n == 0 => {
- self.state = EscapeDefaultState::Char(c);
- Some('\\')
- }
- EscapeDefaultState::Backslash(c) if n == 1 => {
- self.state = EscapeDefaultState::Done;
- Some(c)
- }
- EscapeDefaultState::Backslash(_) => {
- self.state = EscapeDefaultState::Done;
- None
- }
- EscapeDefaultState::Char(c) => {
- self.state = EscapeDefaultState::Done;
-
- if n == 0 { Some(c) } else { None }
- }
- EscapeDefaultState::Done => None,
- EscapeDefaultState::Unicode(ref mut i) => i.nth(n),
- }
+ #[inline]
+ fn last(mut self) -> Option<char> {
+ self.0.next_back().map(char::from)
}
- fn last(self) -> Option<char> {
- match self.state {
- EscapeDefaultState::Unicode(iter) => iter.last(),
- EscapeDefaultState::Done => None,
- EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c),
- }
+ #[inline]
+ fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
+ self.0.advance_by(n)
}
}
#[stable(feature = "exact_size_escape", since = "1.11.0")]
impl ExactSizeIterator for EscapeDefault {
+ #[inline]
fn len(&self) -> usize {
- match self.state {
- EscapeDefaultState::Done => 0,
- EscapeDefaultState::Char(_) => 1,
- EscapeDefaultState::Backslash(_) => 2,
- EscapeDefaultState::Unicode(ref iter) => iter.len(),
- }
+ self.0.len()
}
}
@@ -360,10 +279,7 @@ impl FusedIterator for EscapeDefault {}
#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for EscapeDefault {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- for c in self.clone() {
- f.write_char(c)?;
- }
- Ok(())
+ f.write_str(self.0.as_str())
}
}
@@ -375,21 +291,74 @@ impl fmt::Display for EscapeDefault {
/// [`escape_debug`]: char::escape_debug
#[stable(feature = "char_escape_debug", since = "1.20.0")]
#[derive(Clone, Debug)]
-pub struct EscapeDebug(EscapeDefault);
+pub struct EscapeDebug(EscapeDebugInner);
+
+#[derive(Clone, Debug)]
+// Note: It’s possible to manually encode the EscapeDebugInner inside of
+// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
+// a char) which would likely result in a more optimised code. For now we use
+// the option easier to implement.
+enum EscapeDebugInner {
+ Bytes(escape::EscapeIterInner<10>),
+ Char(char),
+}
+
+impl EscapeDebug {
+ fn printable(chr: char) -> Self {
+ Self(EscapeDebugInner::Char(chr))
+ }
+
+ fn backslash(chr: ascii::Char) -> Self {
+ let data = [ascii::Char::ReverseSolidus, chr];
+ let iter = escape::EscapeIterInner::from_array(data);
+ Self(EscapeDebugInner::Bytes(iter))
+ }
+
+ fn from_unicode(esc: EscapeUnicode) -> Self {
+ Self(EscapeDebugInner::Bytes(esc.0))
+ }
+
+ fn clear(&mut self) {
+ let bytes = escape::EscapeIterInner::from_array([]);
+ self.0 = EscapeDebugInner::Bytes(bytes);
+ }
+}
#[stable(feature = "char_escape_debug", since = "1.20.0")]
impl Iterator for EscapeDebug {
type Item = char;
+
+ #[inline]
fn next(&mut self) -> Option<char> {
- self.0.next()
+ match self.0 {
+ EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from),
+ EscapeDebugInner::Char(chr) => {
+ self.clear();
+ Some(chr)
+ }
+ }
}
+
fn size_hint(&self) -> (usize, Option<usize>) {
- self.0.size_hint()
+ let n = self.len();
+ (n, Some(n))
+ }
+
+ #[inline]
+ fn count(self) -> usize {
+ self.len()
}
}
#[stable(feature = "char_escape_debug", since = "1.20.0")]
-impl ExactSizeIterator for EscapeDebug {}
+impl ExactSizeIterator for EscapeDebug {
+ fn len(&self) -> usize {
+ match &self.0 {
+ EscapeDebugInner::Bytes(bytes) => bytes.len(),
+ EscapeDebugInner::Char(_) => 1,
+ }
+ }
+}
#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for EscapeDebug {}
@@ -397,7 +366,10 @@ impl FusedIterator for EscapeDebug {}
#[stable(feature = "char_escape_debug", since = "1.20.0")]
impl fmt::Display for EscapeDebug {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- fmt::Display::fmt(&self.0, f)
+ match &self.0 {
+ EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()),
+ EscapeDebugInner::Char(chr) => f.write_char(*chr),
+ }
}
}