//! This uses the name `AsciiChar`, even though it's not exposed that way right now, //! because it avoids a whole bunch of "are you sure you didn't mean `char`?" //! suggestions from rustc if you get anything slightly wrong in here, and overall //! helps with clarity as we're also referring to `char` intentionally in here. use crate::fmt::{self, Write}; use crate::mem::transmute; /// One of the 128 Unicode characters from U+0000 through U+007F, /// often known as the [ASCII] subset. /// /// Officially, this is the first [block] in Unicode, _Basic Latin_. /// For details, see the [*C0 Controls and Basic Latin*][chart] code chart. /// /// This block was based on older 7-bit character code standards such as /// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2]. /// /// # When to use this /// /// The main advantage of this subset is that it's always valid UTF-8. As such, /// the `&[ascii::Char]` -> `&str` conversion function (as well as other related /// ones) are O(1): *no* runtime checks are needed. /// /// If you're consuming strings, you should usually handle Unicode and thus /// accept `str`s, not limit yourself to `ascii::Char`s. /// /// However, certain formats are intentionally designed to produce ASCII-only /// output in order to be 8-bit-clean. In those cases, it can be simpler and /// faster to generate `ascii::Char`s instead of dealing with the variable width /// properties of general UTF-8 encoded strings, while still allowing the result /// to be used freely with other Rust things that deal in general `str`s. /// /// For example, a UUID library might offer a way to produce the string /// representation of a UUID as an `[ascii::Char; 36]` to avoid memory /// allocation yet still allow it to be used as UTF-8 via `as_str` without /// paying for validation (or needing `unsafe` code) the way it would if it /// were provided as a `[u8; 36]`. /// /// # Layout /// /// This type is guaranteed to have a size and alignment of 1 byte. /// /// # Names /// /// The variants on this type are [Unicode names][NamesList] of the characters /// in upper camel case, with a few tweaks: /// - For `` characters, the primary alias name is used. /// - `LATIN` is dropped, as this block has no non-latin letters. /// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block. /// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc. /// /// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII /// [block]: https://www.unicode.org/glossary/index.html#block /// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf /// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf /// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] #[unstable(feature = "ascii_char", issue = "110998")] #[repr(u8)] pub enum AsciiChar { /// U+0000 #[unstable(feature = "ascii_char_variants", issue = "110998")] Null = 0, /// U+0001 #[unstable(feature = "ascii_char_variants", issue = "110998")] StartOfHeading = 1, /// U+0002 #[unstable(feature = "ascii_char_variants", issue = "110998")] StartOfText = 2, /// U+0003 #[unstable(feature = "ascii_char_variants", issue = "110998")] EndOfText = 3, /// U+0004 #[unstable(feature = "ascii_char_variants", issue = "110998")] EndOfTransmission = 4, /// U+0005 #[unstable(feature = "ascii_char_variants", issue = "110998")] Enquiry = 5, /// U+0006 #[unstable(feature = "ascii_char_variants", issue = "110998")] Acknowledge = 6, /// U+0007 #[unstable(feature = "ascii_char_variants", issue = "110998")] Bell = 7, /// U+0008 #[unstable(feature = "ascii_char_variants", issue = "110998")] Backspace = 8, /// U+0009 #[unstable(feature = "ascii_char_variants", issue = "110998")] CharacterTabulation = 9, /// U+000A #[unstable(feature = "ascii_char_variants", issue = "110998")] LineFeed = 10, /// U+000B #[unstable(feature = "ascii_char_variants", issue = "110998")] LineTabulation = 11, /// U+000C #[unstable(feature = "ascii_char_variants", issue = "110998")] FormFeed = 12, /// U+000D #[unstable(feature = "ascii_char_variants", issue = "110998")] CarriageReturn = 13, /// U+000E #[unstable(feature = "ascii_char_variants", issue = "110998")] ShiftOut = 14, /// U+000F #[unstable(feature = "ascii_char_variants", issue = "110998")] ShiftIn = 15, /// U+0010 #[unstable(feature = "ascii_char_variants", issue = "110998")] DataLinkEscape = 16, /// U+0011 #[unstable(feature = "ascii_char_variants", issue = "110998")] DeviceControlOne = 17, /// U+0012 #[unstable(feature = "ascii_char_variants", issue = "110998")] DeviceControlTwo = 18, /// U+0013 #[unstable(feature = "ascii_char_variants", issue = "110998")] DeviceControlThree = 19, /// U+0014 #[unstable(feature = "ascii_char_variants", issue = "110998")] DeviceControlFour = 20, /// U+0015 #[unstable(feature = "ascii_char_variants", issue = "110998")] NegativeAcknowledge = 21, /// U+0016 #[unstable(feature = "ascii_char_variants", issue = "110998")] SynchronousIdle = 22, /// U+0017 #[unstable(feature = "ascii_char_variants", issue = "110998")] EndOfTransmissionBlock = 23, /// U+0018 #[unstable(feature = "ascii_char_variants", issue = "110998")] Cancel = 24, /// U+0019 #[unstable(feature = "ascii_char_variants", issue = "110998")] EndOfMedium = 25, /// U+001A #[unstable(feature = "ascii_char_variants", issue = "110998")] Substitute = 26, /// U+001B #[unstable(feature = "ascii_char_variants", issue = "110998")] Escape = 27, /// U+001C #[unstable(feature = "ascii_char_variants", issue = "110998")] InformationSeparatorFour = 28, /// U+001D #[unstable(feature = "ascii_char_variants", issue = "110998")] InformationSeparatorThree = 29, /// U+001E #[unstable(feature = "ascii_char_variants", issue = "110998")] InformationSeparatorTwo = 30, /// U+001F #[unstable(feature = "ascii_char_variants", issue = "110998")] InformationSeparatorOne = 31, /// U+0020 #[unstable(feature = "ascii_char_variants", issue = "110998")] Space = 32, /// U+0021 #[unstable(feature = "ascii_char_variants", issue = "110998")] ExclamationMark = 33, /// U+0022 #[unstable(feature = "ascii_char_variants", issue = "110998")] QuotationMark = 34, /// U+0023 #[unstable(feature = "ascii_char_variants", issue = "110998")] NumberSign = 35, /// U+0024 #[unstable(feature = "ascii_char_variants", issue = "110998")] DollarSign = 36, /// U+0025 #[unstable(feature = "ascii_char_variants", issue = "110998")] PercentSign = 37, /// U+0026 #[unstable(feature = "ascii_char_variants", issue = "110998")] Ampersand = 38, /// U+0027 #[unstable(feature = "ascii_char_variants", issue = "110998")] Apostrophe = 39, /// U+0028 #[unstable(feature = "ascii_char_variants", issue = "110998")] LeftParenthesis = 40, /// U+0029 #[unstable(feature = "ascii_char_variants", issue = "110998")] RightParenthesis = 41, /// U+002A #[unstable(feature = "ascii_char_variants", issue = "110998")] Asterisk = 42, /// U+002B #[unstable(feature = "ascii_char_variants", issue = "110998")] PlusSign = 43, /// U+002C #[unstable(feature = "ascii_char_variants", issue = "110998")] Comma = 44, /// U+002D #[unstable(feature = "ascii_char_variants", issue = "110998")] HyphenMinus = 45, /// U+002E #[unstable(feature = "ascii_char_variants", issue = "110998")] FullStop = 46, /// U+002F #[unstable(feature = "ascii_char_variants", issue = "110998")] Solidus = 47, /// U+0030 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit0 = 48, /// U+0031 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit1 = 49, /// U+0032 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit2 = 50, /// U+0033 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit3 = 51, /// U+0034 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit4 = 52, /// U+0035 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit5 = 53, /// U+0036 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit6 = 54, /// U+0037 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit7 = 55, /// U+0038 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit8 = 56, /// U+0039 #[unstable(feature = "ascii_char_variants", issue = "110998")] Digit9 = 57, /// U+003A #[unstable(feature = "ascii_char_variants", issue = "110998")] Colon = 58, /// U+003B #[unstable(feature = "ascii_char_variants", issue = "110998")] Semicolon = 59, /// U+003C #[unstable(feature = "ascii_char_variants", issue = "110998")] LessThanSign = 60, /// U+003D #[unstable(feature = "ascii_char_variants", issue = "110998")] EqualsSign = 61, /// U+003E #[unstable(feature = "ascii_char_variants", issue = "110998")] GreaterThanSign = 62, /// U+003F #[unstable(feature = "ascii_char_variants", issue = "110998")] QuestionMark = 63, /// U+0040 #[unstable(feature = "ascii_char_variants", issue = "110998")] CommercialAt = 64, /// U+0041 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalA = 65, /// U+0042 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalB = 66, /// U+0043 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalC = 67, /// U+0044 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalD = 68, /// U+0045 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalE = 69, /// U+0046 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalF = 70, /// U+0047 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalG = 71, /// U+0048 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalH = 72, /// U+0049 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalI = 73, /// U+004A #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalJ = 74, /// U+004B #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalK = 75, /// U+004C #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalL = 76, /// U+004D #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalM = 77, /// U+004E #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalN = 78, /// U+004F #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalO = 79, /// U+0050 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalP = 80, /// U+0051 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalQ = 81, /// U+0052 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalR = 82, /// U+0053 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalS = 83, /// U+0054 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalT = 84, /// U+0055 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalU = 85, /// U+0056 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalV = 86, /// U+0057 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalW = 87, /// U+0058 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalX = 88, /// U+0059 #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalY = 89, /// U+005A #[unstable(feature = "ascii_char_variants", issue = "110998")] CapitalZ = 90, /// U+005B #[unstable(feature = "ascii_char_variants", issue = "110998")] LeftSquareBracket = 91, /// U+005C #[unstable(feature = "ascii_char_variants", issue = "110998")] ReverseSolidus = 92, /// U+005D #[unstable(feature = "ascii_char_variants", issue = "110998")] RightSquareBracket = 93, /// U+005E #[unstable(feature = "ascii_char_variants", issue = "110998")] CircumflexAccent = 94, /// U+005F #[unstable(feature = "ascii_char_variants", issue = "110998")] LowLine = 95, /// U+0060 #[unstable(feature = "ascii_char_variants", issue = "110998")] GraveAccent = 96, /// U+0061 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallA = 97, /// U+0062 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallB = 98, /// U+0063 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallC = 99, /// U+0064 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallD = 100, /// U+0065 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallE = 101, /// U+0066 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallF = 102, /// U+0067 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallG = 103, /// U+0068 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallH = 104, /// U+0069 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallI = 105, /// U+006A #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallJ = 106, /// U+006B #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallK = 107, /// U+006C #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallL = 108, /// U+006D #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallM = 109, /// U+006E #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallN = 110, /// U+006F #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallO = 111, /// U+0070 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallP = 112, /// U+0071 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallQ = 113, /// U+0072 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallR = 114, /// U+0073 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallS = 115, /// U+0074 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallT = 116, /// U+0075 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallU = 117, /// U+0076 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallV = 118, /// U+0077 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallW = 119, /// U+0078 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallX = 120, /// U+0079 #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallY = 121, /// U+007A #[unstable(feature = "ascii_char_variants", issue = "110998")] SmallZ = 122, /// U+007B #[unstable(feature = "ascii_char_variants", issue = "110998")] LeftCurlyBracket = 123, /// U+007C #[unstable(feature = "ascii_char_variants", issue = "110998")] VerticalLine = 124, /// U+007D #[unstable(feature = "ascii_char_variants", issue = "110998")] RightCurlyBracket = 125, /// U+007E #[unstable(feature = "ascii_char_variants", issue = "110998")] Tilde = 126, /// U+007F #[unstable(feature = "ascii_char_variants", issue = "110998")] Delete = 127, } impl AsciiChar { /// Creates an ascii character from the byte `b`, /// or returns `None` if it's too large. #[unstable(feature = "ascii_char", issue = "110998")] #[inline] pub const fn from_u8(b: u8) -> Option { if b <= 127 { // SAFETY: Just checked that `b` is in-range Some(unsafe { Self::from_u8_unchecked(b) }) } else { None } } /// Creates an ASCII character from the byte `b`, /// without checking whether it's valid. /// /// # Safety /// /// `b` must be in `0..=127`, or else this is UB. #[unstable(feature = "ascii_char", issue = "110998")] #[inline] pub const unsafe fn from_u8_unchecked(b: u8) -> Self { // SAFETY: Our safety precondition is that `b` is in-range. unsafe { transmute(b) } } /// When passed the *number* `0`, `1`, …, `9`, returns the *character* /// `'0'`, `'1'`, …, `'9'` respectively. /// /// If `d >= 10`, returns `None`. #[unstable(feature = "ascii_char", issue = "110998")] #[inline] pub const fn digit(d: u8) -> Option { if d < 10 { // SAFETY: Just checked it's in-range. Some(unsafe { Self::digit_unchecked(d) }) } else { None } } /// When passed the *number* `0`, `1`, …, `9`, returns the *character* /// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range. /// /// # Safety /// /// This is immediate UB if called with `d > 64`. /// /// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic. /// Notably, it should not be expected to return hex digits, or any other /// reasonable extension of the decimal digits. /// /// (This lose safety condition is intended to simplify soundness proofs /// when writing code using this method, since the implementation doesn't /// need something really specific, not to make those other arguments do /// something useful. It might be tightened before stabilization.) #[unstable(feature = "ascii_char", issue = "110998")] #[inline] pub const unsafe fn digit_unchecked(d: u8) -> Self { debug_assert!(d < 10); // SAFETY: `'0'` through `'9'` are U+00030 through U+0039, // so because `d` must be 64 or less the addition can return at most // 112 (0x70), which doesn't overflow and is within the ASCII range. unsafe { let byte = b'0'.unchecked_add(d); Self::from_u8_unchecked(byte) } } /// Gets this ASCII character as a byte. #[unstable(feature = "ascii_char", issue = "110998")] #[inline] pub const fn to_u8(self) -> u8 { self as u8 } /// Gets this ASCII character as a `char` Unicode Scalar Value. #[unstable(feature = "ascii_char", issue = "110998")] #[inline] pub const fn to_char(self) -> char { self as u8 as char } /// Views this ASCII character as a one-code-unit UTF-8 `str`. #[unstable(feature = "ascii_char", issue = "110998")] #[inline] pub const fn as_str(&self) -> &str { crate::slice::from_ref(self).as_str() } } impl [AsciiChar] { /// Views this slice of ASCII characters as a UTF-8 `str`. #[unstable(feature = "ascii_char", issue = "110998")] #[inline] pub const fn as_str(&self) -> &str { let ascii_ptr: *const Self = self; let str_ptr = ascii_ptr as *const str; // SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte // code unit having the same value as the ASCII byte. unsafe { &*str_ptr } } /// Views this slice of ASCII characters as a slice of `u8` bytes. #[unstable(feature = "ascii_char", issue = "110998")] #[inline] pub const fn as_bytes(&self) -> &[u8] { self.as_str().as_bytes() } } #[unstable(feature = "ascii_char", issue = "110998")] impl fmt::Display for AsciiChar { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { ::fmt(self.as_str(), f) } } #[unstable(feature = "ascii_char", issue = "110998")] impl fmt::Debug for AsciiChar { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { #[inline] fn backslash(a: AsciiChar) -> ([AsciiChar; 4], u8) { ([AsciiChar::ReverseSolidus, a, AsciiChar::Null, AsciiChar::Null], 2) } let (buf, len) = match self { AsciiChar::Null => backslash(AsciiChar::Digit0), AsciiChar::CharacterTabulation => backslash(AsciiChar::SmallT), AsciiChar::CarriageReturn => backslash(AsciiChar::SmallR), AsciiChar::LineFeed => backslash(AsciiChar::SmallN), AsciiChar::ReverseSolidus => backslash(AsciiChar::ReverseSolidus), AsciiChar::Apostrophe => backslash(AsciiChar::Apostrophe), _ => { let byte = self.to_u8(); if !byte.is_ascii_control() { ([*self, AsciiChar::Null, AsciiChar::Null, AsciiChar::Null], 1) } else { const HEX_DIGITS: [AsciiChar; 16] = *b"0123456789abcdef".as_ascii().unwrap(); let hi = HEX_DIGITS[usize::from(byte >> 4)]; let lo = HEX_DIGITS[usize::from(byte & 0xf)]; ([AsciiChar::ReverseSolidus, AsciiChar::SmallX, hi, lo], 4) } } }; f.write_char('\'')?; for byte in &buf[..len as usize] { f.write_str(byte.as_str())?; } f.write_char('\'') } }