diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
commit | 9835e2ae736235810b4ea1c162ca5e65c547e770 (patch) | |
tree | 3fcebf40ed70e581d776a8a4c65923e8ec20e026 /library/core/src/char | |
parent | Releasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff) | |
download | rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip |
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/core/src/char')
-rw-r--r-- | library/core/src/char/convert.rs | 12 | ||||
-rw-r--r-- | library/core/src/char/methods.rs | 76 | ||||
-rw-r--r-- | library/core/src/char/mod.rs | 252 |
3 files changed, 156 insertions, 184 deletions
diff --git a/library/core/src/char/convert.rs b/library/core/src/char/convert.rs index 136bbcb8b..b84e4b35b 100644 --- a/library/core/src/char/convert.rs +++ b/library/core/src/char/convert.rs @@ -27,8 +27,7 @@ pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char { } #[stable(feature = "char_convert", since = "1.13.0")] -#[rustc_const_unstable(feature = "const_convert", issue = "88674")] -impl const From<char> for u32 { +impl From<char> for u32 { /// Converts a [`char`] into a [`u32`]. /// /// # Examples @@ -47,8 +46,7 @@ impl const From<char> for u32 { } #[stable(feature = "more_char_conversions", since = "1.51.0")] -#[rustc_const_unstable(feature = "const_convert", issue = "88674")] -impl const From<char> for u64 { +impl From<char> for u64 { /// Converts a [`char`] into a [`u64`]. /// /// # Examples @@ -69,8 +67,7 @@ impl const From<char> for u64 { } #[stable(feature = "more_char_conversions", since = "1.51.0")] -#[rustc_const_unstable(feature = "const_convert", issue = "88674")] -impl const From<char> for u128 { +impl From<char> for u128 { /// Converts a [`char`] into a [`u128`]. /// /// # Examples @@ -123,8 +120,7 @@ impl TryFrom<char> for u8 { /// for a superset of Windows-1252 that fills the remaining blanks with corresponding /// C0 and C1 control codes. #[stable(feature = "char_convert", since = "1.13.0")] -#[rustc_const_unstable(feature = "const_convert", issue = "88674")] -impl const From<u8> for char { +impl From<u8> for char { /// Converts a [`u8`] into a [`char`]. /// /// # Examples diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 9bc97ea0b..515b8d20e 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1,5 +1,6 @@ //! impl char {} +use crate::ascii; use crate::slice; use crate::str::from_utf8_unchecked_mut; use crate::unicode::printable::is_printable; @@ -380,20 +381,7 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn escape_unicode(self) -> EscapeUnicode { - let c = self as u32; - - // or-ing 1 ensures that for c==0 the code computes that one - // digit should be printed and (which is the same) avoids the - // (31 - 32) underflow - let msb = 31 - (c | 1).leading_zeros(); - - // the index of the most significant hex digit - let ms_hex_digit = msb / 4; - EscapeUnicode { - c: self, - state: EscapeUnicodeState::Backslash, - hex_digit_idx: ms_hex_digit as usize, - } + EscapeUnicode::new(self) } /// An extended version of `escape_debug` that optionally permits escaping @@ -403,21 +391,20 @@ impl char { /// characters, and double quotes in strings. #[inline] pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug { - let init_state = match self { - '\0' => EscapeDefaultState::Backslash('0'), - '\t' => EscapeDefaultState::Backslash('t'), - '\r' => EscapeDefaultState::Backslash('r'), - '\n' => EscapeDefaultState::Backslash('n'), - '\\' => EscapeDefaultState::Backslash(self), - '"' if args.escape_double_quote => EscapeDefaultState::Backslash(self), - '\'' if args.escape_single_quote => EscapeDefaultState::Backslash(self), + match self { + '\0' => EscapeDebug::backslash(ascii::Char::Digit0), + '\t' => EscapeDebug::backslash(ascii::Char::SmallT), + '\r' => EscapeDebug::backslash(ascii::Char::SmallR), + '\n' => EscapeDebug::backslash(ascii::Char::SmallN), + '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus), + '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark), + '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe), _ if args.escape_grapheme_extended && self.is_grapheme_extended() => { - EscapeDefaultState::Unicode(self.escape_unicode()) + EscapeDebug::from_unicode(self.escape_unicode()) } - _ if is_printable(self) => EscapeDefaultState::Char(self), - _ => EscapeDefaultState::Unicode(self.escape_unicode()), - }; - EscapeDebug(EscapeDefault { state: init_state }) + _ if is_printable(self) => EscapeDebug::printable(self), + _ => EscapeDebug::from_unicode(self.escape_unicode()), + } } /// Returns an iterator that yields the literal escape code of a character @@ -515,15 +502,14 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn escape_default(self) -> EscapeDefault { - let init_state = match self { - '\t' => EscapeDefaultState::Backslash('t'), - '\r' => EscapeDefaultState::Backslash('r'), - '\n' => EscapeDefaultState::Backslash('n'), - '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), - '\x20'..='\x7e' => EscapeDefaultState::Char(self), - _ => EscapeDefaultState::Unicode(self.escape_unicode()), - }; - EscapeDefault { state: init_state } + match self { + '\t' => EscapeDefault::backslash(ascii::Char::SmallT), + '\r' => EscapeDefault::backslash(ascii::Char::SmallR), + '\n' => EscapeDefault::backslash(ascii::Char::SmallN), + '\\' | '\'' | '"' => EscapeDefault::backslash(self.as_ascii().unwrap()), + '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()), + _ => EscapeDefault::from_unicode(self.escape_unicode()), + } } /// Returns the number of bytes this `char` would need if encoded in UTF-8. @@ -1116,6 +1102,24 @@ impl char { *self as u32 <= 0x7F } + /// Returns `Some` if the value is within the ASCII range, + /// or `None` if it's not. + /// + /// This is preferred to [`Self::is_ascii`] when you're passing the value + /// along to something else that can take [`ascii::Char`] rather than + /// needing to check again for itself whether the value is in ASCII. + #[must_use] + #[unstable(feature = "ascii_char", issue = "110998")] + #[inline] + pub const fn as_ascii(&self) -> Option<ascii::Char> { + if self.is_ascii() { + // SAFETY: Just checked that this is ASCII. + Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) }) + } else { + None + } + } + /// Makes a copy of the value in its ASCII upper case equivalent. /// /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs index 8ec78e887..5c4291287 100644 --- a/library/core/src/char/mod.rs +++ b/library/core/src/char/mod.rs @@ -38,9 +38,12 @@ pub use self::methods::encode_utf16_raw; #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] pub use self::methods::encode_utf8_raw; +use crate::ascii; use crate::error::Error; +use crate::escape; use crate::fmt::{self, Write}; use crate::iter::FusedIterator; +use crate::num::NonZeroUsize; pub(crate) use self::methods::EscapeDebugExtArgs; @@ -146,86 +149,44 @@ pub const fn from_digit(num: u32, radix: u32) -> Option<char> { /// [`escape_unicode`]: char::escape_unicode #[derive(Clone, Debug)] #[stable(feature = "rust1", since = "1.0.0")] -pub struct EscapeUnicode { - c: char, - state: EscapeUnicodeState, - - // The index of the next hex digit to be printed (0 if none), - // i.e., the number of remaining hex digits to be printed; - // increasing from the least significant digit: 0x543210 - hex_digit_idx: usize, -} +pub struct EscapeUnicode(escape::EscapeIterInner<10>); -// The enum values are ordered so that their representation is the -// same as the remaining length (besides the hexadecimal digits). This -// likely makes `len()` a single load from memory) and inline-worth. -#[derive(Clone, Debug)] -enum EscapeUnicodeState { - Done, - RightBrace, - Value, - LeftBrace, - Type, - Backslash, +impl EscapeUnicode { + fn new(chr: char) -> Self { + let mut data = [ascii::Char::Null; 10]; + let range = escape::escape_unicode_into(&mut data, chr); + Self(escape::EscapeIterInner::new(data, range)) + } } #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for EscapeUnicode { type Item = char; + #[inline] fn next(&mut self) -> Option<char> { - match self.state { - EscapeUnicodeState::Backslash => { - self.state = EscapeUnicodeState::Type; - Some('\\') - } - EscapeUnicodeState::Type => { - self.state = EscapeUnicodeState::LeftBrace; - Some('u') - } - EscapeUnicodeState::LeftBrace => { - self.state = EscapeUnicodeState::Value; - Some('{') - } - EscapeUnicodeState::Value => { - let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf; - let c = char::from_digit(hex_digit, 16).unwrap(); - if self.hex_digit_idx == 0 { - self.state = EscapeUnicodeState::RightBrace; - } else { - self.hex_digit_idx -= 1; - } - Some(c) - } - EscapeUnicodeState::RightBrace => { - self.state = EscapeUnicodeState::Done; - Some('}') - } - EscapeUnicodeState::Done => None, - } + self.0.next().map(char::from) } #[inline] fn size_hint(&self) -> (usize, Option<usize>) { - let n = self.len(); + let n = self.0.len(); (n, Some(n)) } #[inline] fn count(self) -> usize { - self.len() + self.0.len() } - fn last(self) -> Option<char> { - match self.state { - EscapeUnicodeState::Done => None, + #[inline] + fn last(mut self) -> Option<char> { + self.0.next_back().map(char::from) + } - EscapeUnicodeState::RightBrace - | EscapeUnicodeState::Value - | EscapeUnicodeState::LeftBrace - | EscapeUnicodeState::Type - | EscapeUnicodeState::Backslash => Some('}'), - } + #[inline] + fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> { + self.0.advance_by(n) } } @@ -233,16 +194,7 @@ impl Iterator for EscapeUnicode { impl ExactSizeIterator for EscapeUnicode { #[inline] fn len(&self) -> usize { - // The match is a single memory access with no branching - self.hex_digit_idx - + match self.state { - EscapeUnicodeState::Done => 0, - EscapeUnicodeState::RightBrace => 1, - EscapeUnicodeState::Value => 2, - EscapeUnicodeState::LeftBrace => 3, - EscapeUnicodeState::Type => 4, - EscapeUnicodeState::Backslash => 5, - } + self.0.len() } } @@ -252,10 +204,7 @@ impl FusedIterator for EscapeUnicode {} #[stable(feature = "char_struct_display", since = "1.16.0")] impl fmt::Display for EscapeUnicode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for c in self.clone() { - f.write_char(c)?; - } - Ok(()) + f.write_str(self.0.as_str()) } } @@ -267,90 +216,60 @@ impl fmt::Display for EscapeUnicode { /// [`escape_default`]: char::escape_default #[derive(Clone, Debug)] #[stable(feature = "rust1", since = "1.0.0")] -pub struct EscapeDefault { - state: EscapeDefaultState, -} +pub struct EscapeDefault(escape::EscapeIterInner<10>); -#[derive(Clone, Debug)] -enum EscapeDefaultState { - Done, - Char(char), - Backslash(char), - Unicode(EscapeUnicode), +impl EscapeDefault { + fn printable(chr: ascii::Char) -> Self { + let data = [chr]; + Self(escape::EscapeIterInner::from_array(data)) + } + + fn backslash(chr: ascii::Char) -> Self { + let data = [ascii::Char::ReverseSolidus, chr]; + Self(escape::EscapeIterInner::from_array(data)) + } + + fn from_unicode(esc: EscapeUnicode) -> Self { + Self(esc.0) + } } #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for EscapeDefault { type Item = char; + #[inline] fn next(&mut self) -> Option<char> { - match self.state { - EscapeDefaultState::Backslash(c) => { - self.state = EscapeDefaultState::Char(c); - Some('\\') - } - EscapeDefaultState::Char(c) => { - self.state = EscapeDefaultState::Done; - Some(c) - } - EscapeDefaultState::Done => None, - EscapeDefaultState::Unicode(ref mut iter) => iter.next(), - } + self.0.next().map(char::from) } #[inline] fn size_hint(&self) -> (usize, Option<usize>) { - let n = self.len(); + let n = self.0.len(); (n, Some(n)) } #[inline] fn count(self) -> usize { - self.len() + self.0.len() } - fn nth(&mut self, n: usize) -> Option<char> { - match self.state { - EscapeDefaultState::Backslash(c) if n == 0 => { - self.state = EscapeDefaultState::Char(c); - Some('\\') - } - EscapeDefaultState::Backslash(c) if n == 1 => { - self.state = EscapeDefaultState::Done; - Some(c) - } - EscapeDefaultState::Backslash(_) => { - self.state = EscapeDefaultState::Done; - None - } - EscapeDefaultState::Char(c) => { - self.state = EscapeDefaultState::Done; - - if n == 0 { Some(c) } else { None } - } - EscapeDefaultState::Done => None, - EscapeDefaultState::Unicode(ref mut i) => i.nth(n), - } + #[inline] + fn last(mut self) -> Option<char> { + self.0.next_back().map(char::from) } - fn last(self) -> Option<char> { - match self.state { - EscapeDefaultState::Unicode(iter) => iter.last(), - EscapeDefaultState::Done => None, - EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c), - } + #[inline] + fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> { + self.0.advance_by(n) } } #[stable(feature = "exact_size_escape", since = "1.11.0")] impl ExactSizeIterator for EscapeDefault { + #[inline] fn len(&self) -> usize { - match self.state { - EscapeDefaultState::Done => 0, - EscapeDefaultState::Char(_) => 1, - EscapeDefaultState::Backslash(_) => 2, - EscapeDefaultState::Unicode(ref iter) => iter.len(), - } + self.0.len() } } @@ -360,10 +279,7 @@ impl FusedIterator for EscapeDefault {} #[stable(feature = "char_struct_display", since = "1.16.0")] impl fmt::Display for EscapeDefault { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for c in self.clone() { - f.write_char(c)?; - } - Ok(()) + f.write_str(self.0.as_str()) } } @@ -375,21 +291,74 @@ impl fmt::Display for EscapeDefault { /// [`escape_debug`]: char::escape_debug #[stable(feature = "char_escape_debug", since = "1.20.0")] #[derive(Clone, Debug)] -pub struct EscapeDebug(EscapeDefault); +pub struct EscapeDebug(EscapeDebugInner); + +#[derive(Clone, Debug)] +// Note: It’s possible to manually encode the EscapeDebugInner inside of +// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds +// a char) which would likely result in a more optimised code. For now we use +// the option easier to implement. +enum EscapeDebugInner { + Bytes(escape::EscapeIterInner<10>), + Char(char), +} + +impl EscapeDebug { + fn printable(chr: char) -> Self { + Self(EscapeDebugInner::Char(chr)) + } + + fn backslash(chr: ascii::Char) -> Self { + let data = [ascii::Char::ReverseSolidus, chr]; + let iter = escape::EscapeIterInner::from_array(data); + Self(EscapeDebugInner::Bytes(iter)) + } + + fn from_unicode(esc: EscapeUnicode) -> Self { + Self(EscapeDebugInner::Bytes(esc.0)) + } + + fn clear(&mut self) { + let bytes = escape::EscapeIterInner::from_array([]); + self.0 = EscapeDebugInner::Bytes(bytes); + } +} #[stable(feature = "char_escape_debug", since = "1.20.0")] impl Iterator for EscapeDebug { type Item = char; + + #[inline] fn next(&mut self) -> Option<char> { - self.0.next() + match self.0 { + EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from), + EscapeDebugInner::Char(chr) => { + self.clear(); + Some(chr) + } + } } + fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() + let n = self.len(); + (n, Some(n)) + } + + #[inline] + fn count(self) -> usize { + self.len() } } #[stable(feature = "char_escape_debug", since = "1.20.0")] -impl ExactSizeIterator for EscapeDebug {} +impl ExactSizeIterator for EscapeDebug { + fn len(&self) -> usize { + match &self.0 { + EscapeDebugInner::Bytes(bytes) => bytes.len(), + EscapeDebugInner::Char(_) => 1, + } + } +} #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for EscapeDebug {} @@ -397,7 +366,10 @@ impl FusedIterator for EscapeDebug {} #[stable(feature = "char_escape_debug", since = "1.20.0")] impl fmt::Display for EscapeDebug { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.0, f) + match &self.0 { + EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()), + EscapeDebugInner::Char(chr) => f.write_char(*chr), + } } } |