summaryrefslogtreecommitdiffstats
path: root/third_party/rust/ron/src/parse.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/ron/src/parse.rs
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/ron/src/parse.rs')
-rw-r--r--third_party/rust/ron/src/parse.rs978
1 files changed, 978 insertions, 0 deletions
diff --git a/third_party/rust/ron/src/parse.rs b/third_party/rust/ron/src/parse.rs
new file mode 100644
index 0000000000..b0cc632f30
--- /dev/null
+++ b/third_party/rust/ron/src/parse.rs
@@ -0,0 +1,978 @@
+#![allow(clippy::identity_op)]
+
+use std::{
+ char::from_u32 as char_from_u32,
+ str::{from_utf8, from_utf8_unchecked, FromStr},
+};
+
+use crate::{
+ error::{Error, Position, Result, SpannedError, SpannedResult},
+ extensions::Extensions,
+};
+
+// We have the following char categories.
+const INT_CHAR: u8 = 1 << 0; // [0-9A-Fa-f_]
+const FLOAT_CHAR: u8 = 1 << 1; // [0-9\.Ee+-_]
+const IDENT_FIRST_CHAR: u8 = 1 << 2; // [A-Za-z_]
+const IDENT_OTHER_CHAR: u8 = 1 << 3; // [A-Za-z_0-9]
+const IDENT_RAW_CHAR: u8 = 1 << 4; // [A-Za-z_0-9\.+-]
+const WHITESPACE_CHAR: u8 = 1 << 5; // [\n\t\r ]
+
+// We encode each char as belonging to some number of these categories.
+const DIGIT: u8 = INT_CHAR | FLOAT_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [0-9]
+const ABCDF: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [ABCDFabcdf]
+const UNDER: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [_]
+const E____: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [Ee]
+const G2Z__: u8 = IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [G-Zg-z]
+const PUNCT: u8 = FLOAT_CHAR | IDENT_RAW_CHAR; // [\.+-]
+const WS___: u8 = WHITESPACE_CHAR; // [\t\n\r ]
+const _____: u8 = 0; // everything else
+
+// Table of encodings, for fast predicates. (Non-ASCII and special chars are
+// shown with '·' in the comment.)
+#[rustfmt::skip]
+const ENCODINGS: [u8; 256] = [
+/* 0 1 2 3 4 5 6 7 8 9 */
+/* 0+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, WS___,
+/* 10+: ·········· */ WS___, _____, _____, WS___, _____, _____, _____, _____, _____, _____,
+/* 20+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 30+: ·· !"#$%&' */ _____, _____, WS___, _____, _____, _____, _____, _____, _____, _____,
+/* 40+: ()*+,-./01 */ _____, _____, _____, PUNCT, _____, PUNCT, PUNCT, _____, DIGIT, DIGIT,
+/* 50+: 23456789:; */ DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, _____, _____,
+/* 60+: <=>?@ABCDE */ _____, _____, _____, _____, _____, ABCDF, ABCDF, ABCDF, ABCDF, E____,
+/* 70+: FGHIJKLMNO */ ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
+/* 80+: PQRSTUVWZY */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
+/* 90+: Z[\]^_`abc */ G2Z__, _____, _____, _____, _____, UNDER, _____, ABCDF, ABCDF, ABCDF,
+/* 100+: defghijklm */ ABCDF, E____, ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
+/* 110+: nopqrstuvw */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
+/* 120+: xyz{|}~··· */ G2Z__, G2Z__, G2Z__, _____, _____, _____, _____, _____, _____, _____,
+/* 130+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 140+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 150+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 160+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 170+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 180+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 190+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 200+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 210+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 220+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 230+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 240+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 250+: ·········· */ _____, _____, _____, _____, _____, _____
+];
+
+const fn is_int_char(c: u8) -> bool {
+ ENCODINGS[c as usize] & INT_CHAR != 0
+}
+
+const fn is_float_char(c: u8) -> bool {
+ ENCODINGS[c as usize] & FLOAT_CHAR != 0
+}
+
+pub const fn is_ident_first_char(c: u8) -> bool {
+ ENCODINGS[c as usize] & IDENT_FIRST_CHAR != 0
+}
+
+pub const fn is_ident_other_char(c: u8) -> bool {
+ ENCODINGS[c as usize] & IDENT_OTHER_CHAR != 0
+}
+
+const fn is_ident_raw_char(c: u8) -> bool {
+ ENCODINGS[c as usize] & IDENT_RAW_CHAR != 0
+}
+
+const fn is_whitespace_char(c: u8) -> bool {
+ ENCODINGS[c as usize] & WHITESPACE_CHAR != 0
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum AnyNum {
+ F32(f32),
+ F64(f64),
+ I8(i8),
+ U8(u8),
+ I16(i16),
+ U16(u16),
+ I32(i32),
+ U32(u32),
+ I64(i64),
+ U64(u64),
+ #[cfg(feature = "integer128")]
+ I128(i128),
+ #[cfg(feature = "integer128")]
+ U128(u128),
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct Bytes<'a> {
+ /// Bits set according to the `Extensions` enum.
+ pub exts: Extensions,
+ bytes: &'a [u8],
+ cursor: Position,
+}
+
+#[cfg(feature = "integer128")]
+pub(crate) type LargeUInt = u128;
+#[cfg(not(feature = "integer128"))]
+pub(crate) type LargeUInt = u64;
+#[cfg(feature = "integer128")]
+pub(crate) type LargeSInt = i128;
+#[cfg(not(feature = "integer128"))]
+pub(crate) type LargeSInt = i64;
+
+impl<'a> Bytes<'a> {
+ pub fn new(bytes: &'a [u8]) -> SpannedResult<Self> {
+ let mut b = Bytes {
+ exts: Extensions::empty(),
+ bytes,
+ cursor: Position { line: 1, col: 1 },
+ };
+
+ b.skip_ws().map_err(|e| b.span_error(e))?;
+
+ // Loop over all extensions attributes
+ loop {
+ let attribute = b.extensions().map_err(|e| b.span_error(e))?;
+
+ if attribute.is_empty() {
+ break;
+ }
+
+ b.exts |= attribute;
+ b.skip_ws().map_err(|e| b.span_error(e))?;
+ }
+
+ Ok(b)
+ }
+
+ pub fn span_error(&self, code: Error) -> SpannedError {
+ SpannedError {
+ code,
+ position: self.cursor,
+ }
+ }
+
+ pub fn advance(&mut self, bytes: usize) -> Result<()> {
+ for _ in 0..bytes {
+ self.advance_single()?;
+ }
+
+ Ok(())
+ }
+
+ pub fn advance_single(&mut self) -> Result<()> {
+ if self.peek_or_eof()? == b'\n' {
+ self.cursor.line += 1;
+ self.cursor.col = 1;
+ } else {
+ self.cursor.col += 1;
+ }
+
+ self.bytes = &self.bytes[1..];
+
+ Ok(())
+ }
+
+ fn any_integer<T: Num>(&mut self, sign: i8) -> Result<T> {
+ let base = if self.peek() == Some(b'0') {
+ match self.bytes.get(1).cloned() {
+ Some(b'x') => 16,
+ Some(b'b') => 2,
+ Some(b'o') => 8,
+ _ => 10,
+ }
+ } else {
+ 10
+ };
+
+ if base != 10 {
+ // If we have `0x45A` for example,
+ // cut it to `45A`.
+ let _ = self.advance(2);
+ }
+
+ let num_bytes = self.next_bytes_contained_in(is_int_char);
+
+ if num_bytes == 0 {
+ return Err(Error::ExpectedInteger);
+ }
+
+ let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
+
+ if s.as_bytes()[0] == b'_' {
+ return Err(Error::UnderscoreAtBeginning);
+ }
+
+ fn calc_num<T: Num>(
+ bytes: &Bytes,
+ s: &str,
+ base: u8,
+ mut f: impl FnMut(&mut T, u8) -> bool,
+ ) -> Result<T> {
+ let mut num_acc = T::from_u8(0);
+
+ for &byte in s.as_bytes() {
+ if byte == b'_' {
+ continue;
+ }
+
+ if num_acc.checked_mul_ext(base) {
+ return Err(Error::IntegerOutOfBounds);
+ }
+
+ let digit = bytes.decode_hex(byte)?;
+
+ if digit >= base {
+ return Err(Error::ExpectedInteger);
+ }
+
+ if f(&mut num_acc, digit) {
+ return Err(Error::IntegerOutOfBounds);
+ }
+ }
+
+ Ok(num_acc)
+ }
+
+ let res = if sign > 0 {
+ calc_num(self, s, base, T::checked_add_ext)
+ } else {
+ calc_num(self, s, base, T::checked_sub_ext)
+ };
+
+ let _ = self.advance(num_bytes);
+
+ res
+ }
+
+ pub fn any_num(&mut self) -> Result<AnyNum> {
+ // We are not doing float comparisons here in the traditional sense.
+ // Instead, this code checks if a f64 fits inside an f32.
+ #[allow(clippy::float_cmp)]
+ fn any_float(f: f64) -> Result<AnyNum> {
+ if f == f64::from(f as f32) {
+ Ok(AnyNum::F32(f as f32))
+ } else {
+ Ok(AnyNum::F64(f))
+ }
+ }
+
+ let bytes_backup = self.bytes;
+
+ let first_byte = self.peek_or_eof()?;
+ let is_signed = first_byte == b'-' || first_byte == b'+';
+ let is_float = self.next_bytes_is_float();
+
+ if is_float {
+ let f = self.float::<f64>()?;
+
+ any_float(f)
+ } else {
+ let max_u8 = LargeUInt::from(std::u8::MAX);
+ let max_u16 = LargeUInt::from(std::u16::MAX);
+ let max_u32 = LargeUInt::from(std::u32::MAX);
+ #[cfg_attr(not(feature = "integer128"), allow(clippy::useless_conversion))]
+ let max_u64 = LargeUInt::from(std::u64::MAX);
+
+ let min_i8 = LargeSInt::from(std::i8::MIN);
+ let max_i8 = LargeSInt::from(std::i8::MAX);
+ let min_i16 = LargeSInt::from(std::i16::MIN);
+ let max_i16 = LargeSInt::from(std::i16::MAX);
+ let min_i32 = LargeSInt::from(std::i32::MIN);
+ let max_i32 = LargeSInt::from(std::i32::MAX);
+ #[cfg_attr(not(feature = "integer128"), allow(clippy::useless_conversion))]
+ let min_i64 = LargeSInt::from(std::i64::MIN);
+ #[cfg_attr(not(feature = "integer128"), allow(clippy::useless_conversion))]
+ let max_i64 = LargeSInt::from(std::i64::MAX);
+
+ if is_signed {
+ match self.signed_integer::<LargeSInt>() {
+ Ok(x) => {
+ if x >= min_i8 && x <= max_i8 {
+ Ok(AnyNum::I8(x as i8))
+ } else if x >= min_i16 && x <= max_i16 {
+ Ok(AnyNum::I16(x as i16))
+ } else if x >= min_i32 && x <= max_i32 {
+ Ok(AnyNum::I32(x as i32))
+ } else if x >= min_i64 && x <= max_i64 {
+ Ok(AnyNum::I64(x as i64))
+ } else {
+ #[cfg(feature = "integer128")]
+ {
+ Ok(AnyNum::I128(x))
+ }
+ #[cfg(not(feature = "integer128"))]
+ {
+ Ok(AnyNum::I64(x))
+ }
+ }
+ }
+ Err(_) => {
+ self.bytes = bytes_backup;
+
+ any_float(self.float::<f64>()?)
+ }
+ }
+ } else {
+ match self.unsigned_integer::<LargeUInt>() {
+ Ok(x) => {
+ if x <= max_u8 {
+ Ok(AnyNum::U8(x as u8))
+ } else if x <= max_u16 {
+ Ok(AnyNum::U16(x as u16))
+ } else if x <= max_u32 {
+ Ok(AnyNum::U32(x as u32))
+ } else if x <= max_u64 {
+ Ok(AnyNum::U64(x as u64))
+ } else {
+ #[cfg(feature = "integer128")]
+ {
+ Ok(AnyNum::U128(x))
+ }
+ #[cfg(not(feature = "integer128"))]
+ {
+ Ok(AnyNum::U64(x))
+ }
+ }
+ }
+ Err(_) => {
+ self.bytes = bytes_backup;
+
+ any_float(self.float::<f64>()?)
+ }
+ }
+ }
+ }
+ }
+
+ pub fn bool(&mut self) -> Result<bool> {
+ if self.consume("true") {
+ Ok(true)
+ } else if self.consume("false") {
+ Ok(false)
+ } else {
+ Err(Error::ExpectedBoolean)
+ }
+ }
+
+ pub fn bytes(&self) -> &[u8] {
+ self.bytes
+ }
+
+ pub fn char(&mut self) -> Result<char> {
+ if !self.consume("'") {
+ return Err(Error::ExpectedChar);
+ }
+
+ let c = self.peek_or_eof()?;
+
+ let c = if c == b'\\' {
+ let _ = self.advance(1);
+
+ self.parse_escape()?
+ } else {
+ // Check where the end of the char (') is and try to
+ // interpret the rest as UTF-8
+
+ let max = self.bytes.len().min(5);
+ let pos: usize = self.bytes[..max]
+ .iter()
+ .position(|&x| x == b'\'')
+ .ok_or(Error::ExpectedChar)?;
+ let s = from_utf8(&self.bytes[0..pos]).map_err(Error::from)?;
+ let mut chars = s.chars();
+
+ let first = chars.next().ok_or(Error::ExpectedChar)?;
+ if chars.next().is_some() {
+ return Err(Error::ExpectedChar);
+ }
+
+ let _ = self.advance(pos);
+
+ first
+ };
+
+ if !self.consume("'") {
+ return Err(Error::ExpectedChar);
+ }
+
+ Ok(c)
+ }
+
+ pub fn comma(&mut self) -> Result<bool> {
+ self.skip_ws()?;
+
+ if self.consume(",") {
+ self.skip_ws()?;
+
+ Ok(true)
+ } else {
+ Ok(false)
+ }
+ }
+
+ /// Only returns true if the char after `ident` cannot belong
+ /// to an identifier.
+ pub fn check_ident(&mut self, ident: &str) -> bool {
+ self.test_for(ident) && !self.check_ident_other_char(ident.len())
+ }
+
+ fn check_ident_other_char(&self, index: usize) -> bool {
+ self.bytes
+ .get(index)
+ .map_or(false, |&b| is_ident_other_char(b))
+ }
+
+ /// Should only be used on a working copy
+ pub fn check_tuple_struct(mut self) -> Result<bool> {
+ if self.identifier().is_err() {
+ // if there's no field ident, this is a tuple struct
+ return Ok(true);
+ }
+
+ self.skip_ws()?;
+
+ // if there is no colon after the ident, this can only be a unit struct
+ self.eat_byte().map(|c| c != b':')
+ }
+
+ /// Only returns true if the char after `ident` cannot belong
+ /// to an identifier.
+ pub fn consume_ident(&mut self, ident: &str) -> bool {
+ if self.check_ident(ident) {
+ let _ = self.advance(ident.len());
+
+ true
+ } else {
+ false
+ }
+ }
+
+ pub fn consume_struct_name(&mut self, ident: &'static str) -> Result<bool> {
+ if self.check_ident("") {
+ Ok(false)
+ } else if ident.is_empty() {
+ Err(Error::ExpectedStructLike)
+ } else if self.check_ident(ident) {
+ let _ = self.advance(ident.len());
+
+ Ok(true)
+ } else {
+ // If the following is not even an identifier, then a missing
+ // opening `(` seems more likely
+ let maybe_ident = self
+ .identifier()
+ .map_err(|_| Error::ExpectedNamedStructLike(ident))?;
+
+ let found = std::str::from_utf8(maybe_ident).map_err(Error::from)?;
+
+ Err(Error::ExpectedDifferentStructName {
+ expected: ident,
+ found: String::from(found),
+ })
+ }
+ }
+
+ pub fn consume(&mut self, s: &str) -> bool {
+ if self.test_for(s) {
+ let _ = self.advance(s.len());
+
+ true
+ } else {
+ false
+ }
+ }
+
+ fn consume_all(&mut self, all: &[&str]) -> Result<bool> {
+ all.iter()
+ .map(|elem| {
+ if self.consume(elem) {
+ self.skip_ws()?;
+
+ Ok(true)
+ } else {
+ Ok(false)
+ }
+ })
+ .fold(Ok(true), |acc, x| acc.and_then(|val| x.map(|x| x && val)))
+ }
+
+ pub fn eat_byte(&mut self) -> Result<u8> {
+ let peek = self.peek_or_eof()?;
+ let _ = self.advance_single();
+
+ Ok(peek)
+ }
+
+ pub fn expect_byte(&mut self, byte: u8, error: Error) -> Result<()> {
+ self.eat_byte()
+ .and_then(|b| if b == byte { Ok(()) } else { Err(error) })
+ }
+
+ /// Returns the extensions bit mask.
+ fn extensions(&mut self) -> Result<Extensions> {
+ if self.peek() != Some(b'#') {
+ return Ok(Extensions::empty());
+ }
+
+ if !self.consume_all(&["#", "!", "[", "enable", "("])? {
+ return Err(Error::ExpectedAttribute);
+ }
+
+ self.skip_ws()?;
+ let mut extensions = Extensions::empty();
+
+ loop {
+ let ident = self.identifier()?;
+ let extension = Extensions::from_ident(ident).ok_or_else(|| {
+ Error::NoSuchExtension(String::from_utf8_lossy(ident).into_owned())
+ })?;
+
+ extensions |= extension;
+
+ let comma = self.comma()?;
+
+ // If we have no comma but another item, return an error
+ if !comma && self.check_ident_other_char(0) {
+ return Err(Error::ExpectedComma);
+ }
+
+ // If there's no comma, assume the list ended.
+ // If there is, it might be a trailing one, thus we only
+ // continue the loop if we get an ident char.
+ if !comma || !self.check_ident_other_char(0) {
+ break;
+ }
+ }
+
+ self.skip_ws()?;
+
+ if self.consume_all(&[")", "]"])? {
+ Ok(extensions)
+ } else {
+ Err(Error::ExpectedAttributeEnd)
+ }
+ }
+
+ pub fn float<T>(&mut self) -> Result<T>
+ where
+ T: FromStr,
+ {
+ for literal in &["inf", "+inf", "-inf", "NaN", "+NaN", "-NaN"] {
+ if self.consume_ident(literal) {
+ return FromStr::from_str(literal).map_err(|_| unreachable!()); // must not fail
+ }
+ }
+
+ let num_bytes = self.next_bytes_contained_in(is_float_char);
+
+ // Since `rustc` allows `1_0.0_1`, lint against underscores in floats
+ if let Some(err_bytes) = self.bytes[0..num_bytes].iter().position(|b| *b == b'_') {
+ let _ = self.advance(err_bytes);
+
+ return Err(Error::FloatUnderscore);
+ }
+
+ let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
+ let res = FromStr::from_str(s).map_err(|_| Error::ExpectedFloat);
+
+ let _ = self.advance(num_bytes);
+
+ res
+ }
+
+ pub fn identifier(&mut self) -> Result<&'a [u8]> {
+ let next = self.peek_or_eof()?;
+ if !is_ident_first_char(next) {
+ return Err(Error::ExpectedIdentifier);
+ }
+
+ // If the next two bytes signify the start of a raw string literal,
+ // return an error.
+ let length = if next == b'r' {
+ match self.bytes.get(1).ok_or(Error::Eof)? {
+ b'"' => return Err(Error::ExpectedIdentifier),
+ b'#' => {
+ let after_next = self.bytes.get(2).cloned().unwrap_or_default();
+ // Note: it's important to check this before advancing forward, so that
+ // the value-type deserializer can fall back to parsing it differently.
+ if !is_ident_raw_char(after_next) {
+ return Err(Error::ExpectedIdentifier);
+ }
+ // skip "r#"
+ let _ = self.advance(2);
+ self.next_bytes_contained_in(is_ident_raw_char)
+ }
+ _ => self.next_bytes_contained_in(is_ident_other_char),
+ }
+ } else {
+ self.next_bytes_contained_in(is_ident_other_char)
+ };
+
+ let ident = &self.bytes[..length];
+ let _ = self.advance(length);
+
+ Ok(ident)
+ }
+
+ pub fn next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize {
+ self.bytes.iter().take_while(|&&b| allowed(b)).count()
+ }
+
+ pub fn next_bytes_is_float(&self) -> bool {
+ if let Some(byte) = self.peek() {
+ let skip = match byte {
+ b'+' | b'-' => 1,
+ _ => 0,
+ };
+ let flen = self
+ .bytes
+ .iter()
+ .skip(skip)
+ .take_while(|&&b| is_float_char(b))
+ .count();
+ let ilen = self
+ .bytes
+ .iter()
+ .skip(skip)
+ .take_while(|&&b| is_int_char(b))
+ .count();
+ flen > ilen
+ } else {
+ false
+ }
+ }
+
+ pub fn skip_ws(&mut self) -> Result<()> {
+ loop {
+ while self.peek().map_or(false, is_whitespace_char) {
+ let _ = self.advance_single();
+ }
+
+ if !self.skip_comment()? {
+ return Ok(());
+ }
+ }
+ }
+
+ pub fn peek(&self) -> Option<u8> {
+ self.bytes.first().cloned()
+ }
+
+ pub fn peek_or_eof(&self) -> Result<u8> {
+ self.bytes.first().cloned().ok_or(Error::Eof)
+ }
+
+ pub fn signed_integer<T>(&mut self) -> Result<T>
+ where
+ T: Num,
+ {
+ match self.peek_or_eof()? {
+ b'+' => {
+ let _ = self.advance_single();
+
+ self.any_integer(1)
+ }
+ b'-' => {
+ let _ = self.advance_single();
+
+ self.any_integer(-1)
+ }
+ _ => self.any_integer(1),
+ }
+ }
+
+ pub fn string(&mut self) -> Result<ParsedStr<'a>> {
+ if self.consume("\"") {
+ self.escaped_string()
+ } else if self.consume("r") {
+ self.raw_string()
+ } else {
+ Err(Error::ExpectedString)
+ }
+ }
+
+ fn escaped_string(&mut self) -> Result<ParsedStr<'a>> {
+ use std::iter::repeat;
+
+ let (i, end_or_escape) = self
+ .bytes
+ .iter()
+ .enumerate()
+ .find(|&(_, &b)| b == b'\\' || b == b'"')
+ .ok_or(Error::ExpectedStringEnd)?;
+
+ if *end_or_escape == b'"' {
+ let s = from_utf8(&self.bytes[..i]).map_err(Error::from)?;
+
+ // Advance by the number of bytes of the string
+ // + 1 for the `"`.
+ let _ = self.advance(i + 1);
+
+ Ok(ParsedStr::Slice(s))
+ } else {
+ let mut i = i;
+ let mut s: Vec<_> = self.bytes[..i].to_vec();
+
+ loop {
+ let _ = self.advance(i + 1);
+ let character = self.parse_escape()?;
+ match character.len_utf8() {
+ 1 => s.push(character as u8),
+ len => {
+ let start = s.len();
+ s.extend(repeat(0).take(len));
+ character.encode_utf8(&mut s[start..]);
+ }
+ }
+
+ let (new_i, end_or_escape) = self
+ .bytes
+ .iter()
+ .enumerate()
+ .find(|&(_, &b)| b == b'\\' || b == b'"')
+ .ok_or(Error::ExpectedStringEnd)?;
+
+ i = new_i;
+ s.extend_from_slice(&self.bytes[..i]);
+
+ if *end_or_escape == b'"' {
+ let _ = self.advance(i + 1);
+
+ let s = String::from_utf8(s).map_err(Error::from)?;
+ break Ok(ParsedStr::Allocated(s));
+ }
+ }
+ }
+ }
+
+ fn raw_string(&mut self) -> Result<ParsedStr<'a>> {
+ let num_hashes = self.bytes.iter().take_while(|&&b| b == b'#').count();
+ let hashes = &self.bytes[..num_hashes];
+ let _ = self.advance(num_hashes);
+
+ if !self.consume("\"") {
+ return Err(Error::ExpectedString);
+ }
+
+ let ending = [&[b'"'], hashes].concat();
+ let i = self
+ .bytes
+ .windows(num_hashes + 1)
+ .position(|window| window == ending.as_slice())
+ .ok_or(Error::ExpectedStringEnd)?;
+
+ let s = from_utf8(&self.bytes[..i]).map_err(Error::from)?;
+
+ // Advance by the number of bytes of the string
+ // + `num_hashes` + 1 for the `"`.
+ let _ = self.advance(i + num_hashes + 1);
+
+ Ok(ParsedStr::Slice(s))
+ }
+
+ fn test_for(&self, s: &str) -> bool {
+ s.bytes()
+ .enumerate()
+ .all(|(i, b)| self.bytes.get(i).map_or(false, |t| *t == b))
+ }
+
+ pub fn unsigned_integer<T: Num>(&mut self) -> Result<T> {
+ self.any_integer(1)
+ }
+
+ fn decode_ascii_escape(&mut self) -> Result<u8> {
+ let mut n = 0;
+ for _ in 0..2 {
+ n <<= 4;
+ let byte = self.eat_byte()?;
+ let decoded = self.decode_hex(byte)?;
+ n |= decoded;
+ }
+
+ Ok(n)
+ }
+
+ #[inline]
+ fn decode_hex(&self, c: u8) -> Result<u8> {
+ match c {
+ c @ b'0'..=b'9' => Ok(c - b'0'),
+ c @ b'a'..=b'f' => Ok(10 + c - b'a'),
+ c @ b'A'..=b'F' => Ok(10 + c - b'A'),
+ _ => Err(Error::InvalidEscape("Non-hex digit found")),
+ }
+ }
+
+ fn parse_escape(&mut self) -> Result<char> {
+ let c = match self.eat_byte()? {
+ b'\'' => '\'',
+ b'"' => '"',
+ b'\\' => '\\',
+ b'n' => '\n',
+ b'r' => '\r',
+ b't' => '\t',
+ b'0' => '\0',
+ b'x' => self.decode_ascii_escape()? as char,
+ b'u' => {
+ self.expect_byte(b'{', Error::InvalidEscape("Missing { in Unicode escape"))?;
+
+ let mut bytes: u32 = 0;
+ let mut num_digits = 0;
+
+ while num_digits < 6 {
+ let byte = self.peek_or_eof()?;
+
+ if byte == b'}' {
+ break;
+ } else {
+ self.advance_single()?;
+ }
+
+ let byte = self.decode_hex(byte)?;
+ bytes <<= 4;
+ bytes |= u32::from(byte);
+
+ num_digits += 1;
+ }
+
+ if num_digits == 0 {
+ return Err(Error::InvalidEscape(
+ "Expected 1-6 digits, got 0 digits in Unicode escape",
+ ));
+ }
+
+ self.expect_byte(
+ b'}',
+ Error::InvalidEscape("No } at the end of Unicode escape"),
+ )?;
+ char_from_u32(bytes).ok_or(Error::InvalidEscape("Not a valid char"))?
+ }
+ _ => {
+ return Err(Error::InvalidEscape("Unknown escape character"));
+ }
+ };
+
+ Ok(c)
+ }
+
+ fn skip_comment(&mut self) -> Result<bool> {
+ if self.consume("/") {
+ match self.eat_byte()? {
+ b'/' => {
+ let bytes = self.bytes.iter().take_while(|&&b| b != b'\n').count();
+
+ let _ = self.advance(bytes);
+ }
+ b'*' => {
+ let mut level = 1;
+
+ while level > 0 {
+ let bytes = self
+ .bytes
+ .iter()
+ .take_while(|&&b| b != b'/' && b != b'*')
+ .count();
+
+ if self.bytes.is_empty() {
+ return Err(Error::UnclosedBlockComment);
+ }
+
+ let _ = self.advance(bytes);
+
+ // check whether / or * and take action
+ if self.consume("/*") {
+ level += 1;
+ } else if self.consume("*/") {
+ level -= 1;
+ } else {
+ self.eat_byte().map_err(|_| Error::UnclosedBlockComment)?;
+ }
+ }
+ }
+ b => return Err(Error::UnexpectedByte(b as char)),
+ }
+
+ Ok(true)
+ } else {
+ Ok(false)
+ }
+ }
+}
+
+pub trait Num {
+ fn from_u8(x: u8) -> Self;
+
+ /// Returns `true` on overflow
+ fn checked_mul_ext(&mut self, x: u8) -> bool;
+
+ /// Returns `true` on overflow
+ fn checked_add_ext(&mut self, x: u8) -> bool;
+
+ /// Returns `true` on overflow
+ fn checked_sub_ext(&mut self, x: u8) -> bool;
+}
+
+macro_rules! impl_num {
+ ($ty:ident) => {
+ impl Num for $ty {
+ fn from_u8(x: u8) -> Self {
+ x as $ty
+ }
+
+ fn checked_mul_ext(&mut self, x: u8) -> bool {
+ match self.checked_mul(Self::from_u8(x)) {
+ Some(n) => {
+ *self = n;
+ false
+ }
+ None => true,
+ }
+ }
+
+ fn checked_add_ext(&mut self, x: u8) -> bool {
+ match self.checked_add(Self::from_u8(x)) {
+ Some(n) => {
+ *self = n;
+ false
+ }
+ None => true,
+ }
+ }
+
+ fn checked_sub_ext(&mut self, x: u8) -> bool {
+ match self.checked_sub(Self::from_u8(x)) {
+ Some(n) => {
+ *self = n;
+ false
+ }
+ None => true,
+ }
+ }
+ }
+ };
+ ($($tys:ident)*) => {
+ $( impl_num!($tys); )*
+ };
+}
+
+#[cfg(feature = "integer128")]
+impl_num!(u8 u16 u32 u64 u128 i8 i16 i32 i64 i128);
+#[cfg(not(feature = "integer128"))]
+impl_num!(u8 u16 u32 u64 i8 i16 i32 i64);
+
+#[derive(Clone, Debug)]
+pub enum ParsedStr<'a> {
+ Allocated(String),
+ Slice(&'a str),
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn decode_x10() {
+ let mut bytes = Bytes::new(b"10").unwrap();
+ assert_eq!(bytes.decode_ascii_escape(), Ok(0x10));
+ }
+}