diff options
Diffstat (limited to 'third_party/rust/litrs/src/byte')
-rw-r--r-- | third_party/rust/litrs/src/byte/mod.rs | 107 | ||||
-rw-r--r-- | third_party/rust/litrs/src/byte/tests.rs | 188 |
2 files changed, 295 insertions, 0 deletions
diff --git a/third_party/rust/litrs/src/byte/mod.rs b/third_party/rust/litrs/src/byte/mod.rs new file mode 100644 index 0000000000..ffdff5d04a --- /dev/null +++ b/third_party/rust/litrs/src/byte/mod.rs @@ -0,0 +1,107 @@ +use core::fmt; + +use crate::{ + Buffer, ParseError, + err::{perr, ParseErrorKind::*}, + escape::unescape, + parse::check_suffix, +}; + + +/// A (single) byte literal, e.g. `b'k'` or `b'!'`. +/// +/// See [the reference][ref] for more information. +/// +/// [ref]: https://doc.rust-lang.org/reference/tokens.html#byte-literals +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ByteLit<B: Buffer> { + raw: B, + /// Start index of the suffix or `raw.len()` if there is no suffix. + start_suffix: usize, + value: u8, +} + +impl<B: Buffer> ByteLit<B> { + /// Parses the input as a byte literal. Returns an error if the input is + /// invalid or represents a different kind of literal. + pub fn parse(input: B) -> Result<Self, ParseError> { + if input.is_empty() { + return Err(perr(None, Empty)); + } + if !input.starts_with("b'") { + return Err(perr(None, InvalidByteLiteralStart)); + } + + let (value, start_suffix) = parse_impl(&input)?; + Ok(Self { raw: input, value, start_suffix }) + } + + /// Returns the byte value that this literal represents. + pub fn value(&self) -> u8 { + self.value + } + + /// The optional suffix. Returns `""` if the suffix is empty/does not exist. + pub fn suffix(&self) -> &str { + &(*self.raw)[self.start_suffix..] + } + + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } + +} + +impl ByteLit<&str> { + /// Makes a copy of the underlying buffer and returns the owned version of + /// `Self`. + pub fn to_owned(&self) -> ByteLit<String> { + ByteLit { + raw: self.raw.to_owned(), + start_suffix: self.start_suffix, + value: self.value, + } + } +} + +impl<B: Buffer> fmt::Display for ByteLit<B> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad(&self.raw) + } +} + +/// Precondition: must start with `b'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str) -> Result<(u8, usize), ParseError> { + let input_bytes = input.as_bytes(); + let first = input_bytes.get(2).ok_or(perr(None, UnterminatedByteLiteral))?; + let (c, len) = match first { + b'\'' if input_bytes.get(3) == Some(&b'\'') => return Err(perr(2, UnescapedSingleQuote)), + b'\'' => return Err(perr(None, EmptyByteLiteral)), + b'\n' | b'\t' | b'\r' => return Err(perr(2, UnescapedSpecialWhitespace)), + b'\\' => unescape::<u8>(&input[2..], 2)?, + other if other.is_ascii() => (*other, 1), + _ => return Err(perr(2, NonAsciiInByteLiteral)), + }; + + match input[2 + len..].find('\'') { + Some(0) => {} + Some(_) => return Err(perr(None, OverlongByteLiteral)), + None => return Err(perr(None, UnterminatedByteLiteral)), + } + + let start_suffix = 2 + len + 1; + let suffix = &input[start_suffix..]; + check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?; + + Ok((c, start_suffix)) +} + +#[cfg(test)] +mod tests; diff --git a/third_party/rust/litrs/src/byte/tests.rs b/third_party/rust/litrs/src/byte/tests.rs new file mode 100644 index 0000000000..3cf16b5fc2 --- /dev/null +++ b/third_party/rust/litrs/src/byte/tests.rs @@ -0,0 +1,188 @@ +use crate::{ByteLit, Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}; + +// ===== Utility functions ======================================================================= + +macro_rules! check { + ($lit:literal) => { check!($lit, stringify!($lit), "") }; + ($lit:literal, $input:expr, $suffix:literal) => { + let input = $input; + let expected = ByteLit { + raw: input, + start_suffix: input.len() - $suffix.len(), + value: $lit, + }; + + assert_parse_ok_eq(input, ByteLit::parse(input), expected.clone(), "ByteLit::parse"); + assert_parse_ok_eq(input, Literal::parse(input), Literal::Byte(expected), "Literal::parse"); + let lit = ByteLit::parse(input).unwrap(); + assert_eq!(lit.value(), $lit); + assert_eq!(lit.suffix(), $suffix); + assert_roundtrip(expected.to_owned(), input); + }; +} + + +// ===== Actual tests ============================================================================ + +#[test] +fn alphanumeric() { + check!(b'a'); + check!(b'b'); + check!(b'y'); + check!(b'z'); + check!(b'A'); + check!(b'B'); + check!(b'Y'); + check!(b'Z'); + + check!(b'0'); + check!(b'1'); + check!(b'8'); + check!(b'9'); +} + +#[test] +fn special_chars() { + check!(b' '); + check!(b'!'); + check!(b'"'); + check!(b'#'); + check!(b'$'); + check!(b'%'); + check!(b'&'); + check!(b'('); + check!(b')'); + check!(b'*'); + check!(b'+'); + check!(b','); + check!(b'-'); + check!(b'.'); + check!(b'/'); + check!(b':'); + check!(b';'); + check!(b'<'); + check!(b'='); + check!(b'>'); + check!(b'?'); + check!(b'@'); + check!(b'['); + check!(b']'); + check!(b'^'); + check!(b'_'); + check!(b'`'); + check!(b'{'); + check!(b'|'); + check!(b'}'); + check!(b'~'); +} + +#[test] +fn quote_escapes() { + check!(b'\''); + check!(b'\"'); +} + +#[test] +fn ascii_escapes() { + check!(b'\n'); + check!(b'\r'); + check!(b'\t'); + check!(b'\\'); + check!(b'\0'); + + check!(b'\x00'); + check!(b'\x01'); + check!(b'\x0c'); + check!(b'\x0D'); + check!(b'\x13'); + check!(b'\x30'); + check!(b'\x30'); + check!(b'\x4B'); + check!(b'\x6b'); + check!(b'\x7F'); + check!(b'\x7f'); +} + +#[test] +fn byte_escapes() { + check!(b'\x80'); + check!(b'\x8a'); + check!(b'\x8C'); + check!(b'\x99'); + check!(b'\xa0'); + check!(b'\xAd'); + check!(b'\xfe'); + check!(b'\xFe'); + check!(b'\xfF'); + check!(b'\xFF'); +} + +#[test] +fn suffixes() { + check!(b'a', r##"b'a'peter"##, "peter"); + check!(b'#', r##"b'#'peter"##, "peter"); + check!(b'\n', r##"b'\n'peter"##, "peter"); + check!(b'\'', r##"b'\''peter"##, "peter"); + check!(b'\"', r##"b'\"'peter"##, "peter"); + check!(b'\xFF', r##"b'\xFF'peter"##, "peter"); +} + +#[test] +fn invald_escapes() { + assert_err!(ByteLit, r"b'\a'", UnknownEscape, 2..4); + assert_err!(ByteLit, r"b'\y'", UnknownEscape, 2..4); + assert_err!(ByteLit, r"b'\", UnterminatedEscape, 2..3); + assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..5); + assert_err!(ByteLit, r"b'\x1'", InvalidXEscape, 2..6); + assert_err!(ByteLit, r"b'\xaj'", InvalidXEscape, 2..6); + assert_err!(ByteLit, r"b'\xjb'", InvalidXEscape, 2..6); +} + +#[test] +fn unicode_escape_not_allowed() { + assert_err!(ByteLit, r"b'\u{0}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{00}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{b}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{B}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{7e}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{E4}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{e4}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{fc}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{Fc}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{fC}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{FC}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{b10}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{B10}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{0b10}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{2764}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{1f602}'", UnicodeEscapeInByteLiteral, 2..4); + assert_err!(ByteLit, r"b'\u{1F602}'", UnicodeEscapeInByteLiteral, 2..4); +} + +#[test] +fn parse_err() { + assert_err!(ByteLit, r"b''", EmptyByteLiteral, None); + assert_err!(ByteLit, r"b' ''", UnexpectedChar, 4..5); + + assert_err!(ByteLit, r"b'", UnterminatedByteLiteral, None); + assert_err!(ByteLit, r"b'a", UnterminatedByteLiteral, None); + assert_err!(ByteLit, r"b'\n", UnterminatedByteLiteral, None); + assert_err!(ByteLit, r"b'\x35", UnterminatedByteLiteral, None); + + assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, None); + assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, None); + assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, None); + + assert_err!(ByteLit, r"", Empty, None); + + assert_err!(ByteLit, r"b'''", UnescapedSingleQuote, 2); + assert_err!(ByteLit, r"b''''", UnescapedSingleQuote, 2); + + assert_err!(ByteLit, "b'\n'", UnescapedSpecialWhitespace, 2); + assert_err!(ByteLit, "b'\t'", UnescapedSpecialWhitespace, 2); + assert_err!(ByteLit, "b'\r'", UnescapedSpecialWhitespace, 2); + + assert_err!(ByteLit, "b'న'", NonAsciiInByteLiteral, 2); + assert_err!(ByteLit, "b'犬'", NonAsciiInByteLiteral, 2); + assert_err!(ByteLit, "b'🦊'", NonAsciiInByteLiteral, 2); +} |