Adding upstream version 115.7.0esr.upstream/115.7.0esr upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
commit: 36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree: 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/ron/src/parse.rs
parent: Initial commit. (diff)
download: firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
1 files changed, 978 insertions, 0 deletions
diff --git a/third_party/rust/ron/src/parse.rs b/third_party/rust/ron/src/parse.rs
new file mode 100644
index 0000000000..b0cc632f30
--- /dev/null
+++ b/third_party/rust/ron/src/parse.rs
@@ -0,0 +1,978 @@
+#![allow(clippy::identity_op)]
+
+use std::{
+    char::from_u32 as char_from_u32,
+    str::{from_utf8, from_utf8_unchecked, FromStr},
+};
+
+use crate::{
+    error::{Error, Position, Result, SpannedError, SpannedResult},
+    extensions::Extensions,
+};
+
+// We have the following char categories.
+const INT_CHAR: u8 = 1 << 0; // [0-9A-Fa-f_]
+const FLOAT_CHAR: u8 = 1 << 1; // [0-9\.Ee+-_]
+const IDENT_FIRST_CHAR: u8 = 1 << 2; // [A-Za-z_]
+const IDENT_OTHER_CHAR: u8 = 1 << 3; // [A-Za-z_0-9]
+const IDENT_RAW_CHAR: u8 = 1 << 4; // [A-Za-z_0-9\.+-]
+const WHITESPACE_CHAR: u8 = 1 << 5; // [\n\t\r ]
+
+// We encode each char as belonging to some number of these categories.
+const DIGIT: u8 = INT_CHAR | FLOAT_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [0-9]
+const ABCDF: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [ABCDFabcdf]
+const UNDER: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [_]
+const E____: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [Ee]
+const G2Z__: u8 = IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [G-Zg-z]
+const PUNCT: u8 = FLOAT_CHAR | IDENT_RAW_CHAR; // [\.+-]
+const WS___: u8 = WHITESPACE_CHAR; // [\t\n\r ]
+const _____: u8 = 0; // everything else
+
+// Table of encodings, for fast predicates. (Non-ASCII and special chars are
+// shown with '·' in the comment.)
+#[rustfmt::skip]
+const ENCODINGS: [u8; 256] = [
+/*                     0      1      2      3      4      5      6      7      8      9    */
+/*   0+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, WS___,
+/*  10+: ·········· */ WS___, _____, _____, WS___, _____, _____, _____, _____, _____, _____,
+/*  20+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/*  30+: ·· !"#$%&' */ _____, _____, WS___, _____, _____, _____, _____, _____, _____, _____,
+/*  40+: ()*+,-./01 */ _____, _____, _____, PUNCT, _____, PUNCT, PUNCT, _____, DIGIT, DIGIT,
+/*  50+: 23456789:; */ DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, _____, _____,
+/*  60+: <=>?@ABCDE */ _____, _____, _____, _____, _____, ABCDF, ABCDF, ABCDF, ABCDF, E____,
+/*  70+: FGHIJKLMNO */ ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
+/*  80+: PQRSTUVWZY */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
+/*  90+: Z[\]^_`abc */ G2Z__, _____, _____, _____, _____, UNDER, _____, ABCDF, ABCDF, ABCDF,
+/* 100+: defghijklm */ ABCDF, E____, ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
+/* 110+: nopqrstuvw */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
+/* 120+: xyz{|}~··· */ G2Z__, G2Z__, G2Z__, _____, _____, _____, _____, _____, _____, _____,
+/* 130+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 140+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 150+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 160+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 170+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 180+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 190+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 200+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 210+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 220+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 230+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 240+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
+/* 250+: ·········· */ _____, _____, _____, _____, _____, _____
+];
+
+const fn is_int_char(c: u8) -> bool {
+    ENCODINGS[c as usize] & INT_CHAR != 0
+}
+
+const fn is_float_char(c: u8) -> bool {
+    ENCODINGS[c as usize] & FLOAT_CHAR != 0
+}
+
+pub const fn is_ident_first_char(c: u8) -> bool {
+    ENCODINGS[c as usize] & IDENT_FIRST_CHAR != 0
+}
+
+pub const fn is_ident_other_char(c: u8) -> bool {
+    ENCODINGS[c as usize] & IDENT_OTHER_CHAR != 0
+}
+
+const fn is_ident_raw_char(c: u8) -> bool {
+    ENCODINGS[c as usize] & IDENT_RAW_CHAR != 0
+}
+
+const fn is_whitespace_char(c: u8) -> bool {
+    ENCODINGS[c as usize] & WHITESPACE_CHAR != 0
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum AnyNum {
+    F32(f32),
+    F64(f64),
+    I8(i8),
+    U8(u8),
+    I16(i16),
+    U16(u16),
+    I32(i32),
+    U32(u32),
+    I64(i64),
+    U64(u64),
+    #[cfg(feature = "integer128")]
+    I128(i128),
+    #[cfg(feature = "integer128")]
+    U128(u128),
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct Bytes<'a> {
+    /// Bits set according to the `Extensions` enum.
+    pub exts: Extensions,
+    bytes: &'a [u8],
+    cursor: Position,
+}
+
+#[cfg(feature = "integer128")]
+pub(crate) type LargeUInt = u128;
+#[cfg(not(feature = "integer128"))]
+pub(crate) type LargeUInt = u64;
+#[cfg(feature = "integer128")]
+pub(crate) type LargeSInt = i128;
+#[cfg(not(feature = "integer128"))]
+pub(crate) type LargeSInt = i64;
+
+impl<'a> Bytes<'a> {
+    pub fn new(bytes: &'a [u8]) -> SpannedResult<Self> {
+        let mut b = Bytes {
+            exts: Extensions::empty(),
+            bytes,
+            cursor: Position { line: 1, col: 1 },
+        };
+
+        b.skip_ws().map_err(|e| b.span_error(e))?;
+
+        // Loop over all extensions attributes
+        loop {
+            let attribute = b.extensions().map_err(|e| b.span_error(e))?;
+
+            if attribute.is_empty() {
+                break;
+            }
+
+            b.exts |= attribute;
+            b.skip_ws().map_err(|e| b.span_error(e))?;
+        }
+
+        Ok(b)
+    }
+
+    pub fn span_error(&self, code: Error) -> SpannedError {
+        SpannedError {
+            code,
+            position: self.cursor,
+        }
+    }
+
+    pub fn advance(&mut self, bytes: usize) -> Result<()> {
+        for _ in 0..bytes {
+            self.advance_single()?;
+        }
+
+        Ok(())
+    }
+
+    pub fn advance_single(&mut self) -> Result<()> {
+        if self.peek_or_eof()? == b'\n' {
+            self.cursor.line += 1;
+            self.cursor.col = 1;
+        } else {
+            self.cursor.col += 1;
+        }
+
+        self.bytes = &self.bytes[1..];
+
+        Ok(())
+    }
+
+    fn any_integer<T: Num>(&mut self, sign: i8) -> Result<T> {
+        let base = if self.peek() == Some(b'0') {
+            match self.bytes.get(1).cloned() {
+                Some(b'x') => 16,
+                Some(b'b') => 2,
+                Some(b'o') => 8,
+                _ => 10,
+            }
+        } else {
+            10
+        };
+
+        if base != 10 {
+            // If we have `0x45A` for example,
+            // cut it to `45A`.
+            let _ = self.advance(2);
+        }
+
+        let num_bytes = self.next_bytes_contained_in(is_int_char);
+
+        if num_bytes == 0 {
+            return Err(Error::ExpectedInteger);
+        }
+
+        let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
+
+        if s.as_bytes()[0] == b'_' {
+            return Err(Error::UnderscoreAtBeginning);
+        }
+
+        fn calc_num<T: Num>(
+            bytes: &Bytes,
+            s: &str,
+            base: u8,
+            mut f: impl FnMut(&mut T, u8) -> bool,
+        ) -> Result<T> {
+            let mut num_acc = T::from_u8(0);
+
+            for &byte in s.as_bytes() {
+                if byte == b'_' {
+                    continue;
+                }
+
+                if num_acc.checked_mul_ext(base) {
+                    return Err(Error::IntegerOutOfBounds);
+                }
+
+                let digit = bytes.decode_hex(byte)?;
+
+                if digit >= base {
+                    return Err(Error::ExpectedInteger);
+                }
+
+                if f(&mut num_acc, digit) {
+                    return Err(Error::IntegerOutOfBounds);
+                }
+            }
+
+            Ok(num_acc)
+        }
+
+        let res = if sign > 0 {
+            calc_num(self, s, base, T::checked_add_ext)
+        } else {
+            calc_num(self, s, base, T::checked_sub_ext)
+        };
+
+        let _ = self.advance(num_bytes);
+
+        res
+    }
+
+    pub fn any_num(&mut self) -> Result<AnyNum> {
+        // We are not doing float comparisons here in the traditional sense.
+        // Instead, this code checks if a f64 fits inside an f32.
+        #[allow(clippy::float_cmp)]
+        fn any_float(f: f64) -> Result<AnyNum> {
+            if f == f64::from(f as f32) {
+                Ok(AnyNum::F32(f as f32))
+            } else {
+                Ok(AnyNum::F64(f))
+            }
+        }
+
+        let bytes_backup = self.bytes;
+
+        let first_byte = self.peek_or_eof()?;
+        let is_signed = first_byte == b'-' || first_byte == b'+';
+        let is_float = self.next_bytes_is_float();
+
+        if is_float {
+            let f = self.float::<f64>()?;
+
+            any_float(f)
+        } else {
+            let max_u8 = LargeUInt::from(std::u8::MAX);
+            let max_u16 = LargeUInt::from(std::u16::MAX);
+            let max_u32 = LargeUInt::from(std::u32::MAX);
+            #[cfg_attr(not(feature = "integer128"), allow(clippy::useless_conversion))]
+            let max_u64 = LargeUInt::from(std::u64::MAX);
+
+            let min_i8 = LargeSInt::from(std::i8::MIN);
+            let max_i8 = LargeSInt::from(std::i8::MAX);
+            let min_i16 = LargeSInt::from(std::i16::MIN);
+            let max_i16 = LargeSInt::from(std::i16::MAX);
+            let min_i32 = LargeSInt::from(std::i32::MIN);
+            let max_i32 = LargeSInt::from(std::i32::MAX);
+            #[cfg_attr(not(feature = "integer128"), allow(clippy::useless_conversion))]
+            let min_i64 = LargeSInt::from(std::i64::MIN);
+            #[cfg_attr(not(feature = "integer128"), allow(clippy::useless_conversion))]
+            let max_i64 = LargeSInt::from(std::i64::MAX);
+
+            if is_signed {
+                match self.signed_integer::<LargeSInt>() {
+                    Ok(x) => {
+                        if x >= min_i8 && x <= max_i8 {
+                            Ok(AnyNum::I8(x as i8))
+                        } else if x >= min_i16 && x <= max_i16 {
+                            Ok(AnyNum::I16(x as i16))
+                        } else if x >= min_i32 && x <= max_i32 {
+                            Ok(AnyNum::I32(x as i32))
+                        } else if x >= min_i64 && x <= max_i64 {
+                            Ok(AnyNum::I64(x as i64))
+                        } else {
+                            #[cfg(feature = "integer128")]
+                            {
+                                Ok(AnyNum::I128(x))
+                            }
+                            #[cfg(not(feature = "integer128"))]
+                            {
+                                Ok(AnyNum::I64(x))
+                            }
+                        }
+                    }
+                    Err(_) => {
+                        self.bytes = bytes_backup;
+
+                        any_float(self.float::<f64>()?)
+                    }
+                }
+            } else {
+                match self.unsigned_integer::<LargeUInt>() {
+                    Ok(x) => {
+                        if x <= max_u8 {
+                            Ok(AnyNum::U8(x as u8))
+                        } else if x <= max_u16 {
+                            Ok(AnyNum::U16(x as u16))
+                        } else if x <= max_u32 {
+                            Ok(AnyNum::U32(x as u32))
+                        } else if x <= max_u64 {
+                            Ok(AnyNum::U64(x as u64))
+                        } else {
+                            #[cfg(feature = "integer128")]
+                            {
+                                Ok(AnyNum::U128(x))
+                            }
+                            #[cfg(not(feature = "integer128"))]
+                            {
+                                Ok(AnyNum::U64(x))
+                            }
+                        }
+                    }
+                    Err(_) => {
+                        self.bytes = bytes_backup;
+
+                        any_float(self.float::<f64>()?)
+                    }
+                }
+            }
+        }
+    }
+
+    pub fn bool(&mut self) -> Result<bool> {
+        if self.consume("true") {
+            Ok(true)
+        } else if self.consume("false") {
+            Ok(false)
+        } else {
+            Err(Error::ExpectedBoolean)
+        }
+    }
+
+    pub fn bytes(&self) -> &[u8] {
+        self.bytes
+    }
+
+    pub fn char(&mut self) -> Result<char> {
+        if !self.consume("'") {
+            return Err(Error::ExpectedChar);
+        }
+
+        let c = self.peek_or_eof()?;
+
+        let c = if c == b'\\' {
+            let _ = self.advance(1);
+
+            self.parse_escape()?
+        } else {
+            // Check where the end of the char (') is and try to
+            // interpret the rest as UTF-8
+
+            let max = self.bytes.len().min(5);
+            let pos: usize = self.bytes[..max]
+                .iter()
+                .position(|&x| x == b'\'')
+                .ok_or(Error::ExpectedChar)?;
+            let s = from_utf8(&self.bytes[0..pos]).map_err(Error::from)?;
+            let mut chars = s.chars();
+
+            let first = chars.next().ok_or(Error::ExpectedChar)?;
+            if chars.next().is_some() {
+                return Err(Error::ExpectedChar);
+            }
+
+            let _ = self.advance(pos);
+
+            first
+        };
+
+        if !self.consume("'") {
+            return Err(Error::ExpectedChar);
+        }
+
+        Ok(c)
+    }
+
+    pub fn comma(&mut self) -> Result<bool> {
+        self.skip_ws()?;
+
+        if self.consume(",") {
+            self.skip_ws()?;
+
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    /// Only returns true if the char after `ident` cannot belong
+    /// to an identifier.
+    pub fn check_ident(&mut self, ident: &str) -> bool {
+        self.test_for(ident) && !self.check_ident_other_char(ident.len())
+    }
+
+    fn check_ident_other_char(&self, index: usize) -> bool {
+        self.bytes
+            .get(index)
+            .map_or(false, |&b| is_ident_other_char(b))
+    }
+
+    /// Should only be used on a working copy
+    pub fn check_tuple_struct(mut self) -> Result<bool> {
+        if self.identifier().is_err() {
+            // if there's no field ident, this is a tuple struct
+            return Ok(true);
+        }
+
+        self.skip_ws()?;
+
+        // if there is no colon after the ident, this can only be a unit struct
+        self.eat_byte().map(|c| c != b':')
+    }
+
+    /// Only returns true if the char after `ident` cannot belong
+    /// to an identifier.
+    pub fn consume_ident(&mut self, ident: &str) -> bool {
+        if self.check_ident(ident) {
+            let _ = self.advance(ident.len());
+
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn consume_struct_name(&mut self, ident: &'static str) -> Result<bool> {
+        if self.check_ident("") {
+            Ok(false)
+        } else if ident.is_empty() {
+            Err(Error::ExpectedStructLike)
+        } else if self.check_ident(ident) {
+            let _ = self.advance(ident.len());
+
+            Ok(true)
+        } else {
+            // If the following is not even an identifier, then a missing
+            //  opening `(` seems more likely
+            let maybe_ident = self
+                .identifier()
+                .map_err(|_| Error::ExpectedNamedStructLike(ident))?;
+
+            let found = std::str::from_utf8(maybe_ident).map_err(Error::from)?;
+
+            Err(Error::ExpectedDifferentStructName {
+                expected: ident,
+                found: String::from(found),
+            })
+        }
+    }
+
+    pub fn consume(&mut self, s: &str) -> bool {
+        if self.test_for(s) {
+            let _ = self.advance(s.len());
+
+            true
+        } else {
+            false
+        }
+    }
+
+    fn consume_all(&mut self, all: &[&str]) -> Result<bool> {
+        all.iter()
+            .map(|elem| {
+                if self.consume(elem) {
+                    self.skip_ws()?;
+
+                    Ok(true)
+                } else {
+                    Ok(false)
+                }
+            })
+            .fold(Ok(true), |acc, x| acc.and_then(|val| x.map(|x| x && val)))
+    }
+
+    pub fn eat_byte(&mut self) -> Result<u8> {
+        let peek = self.peek_or_eof()?;
+        let _ = self.advance_single();
+
+        Ok(peek)
+    }
+
+    pub fn expect_byte(&mut self, byte: u8, error: Error) -> Result<()> {
+        self.eat_byte()
+            .and_then(|b| if b == byte { Ok(()) } else { Err(error) })
+    }
+
+    /// Returns the extensions bit mask.
+    fn extensions(&mut self) -> Result<Extensions> {
+        if self.peek() != Some(b'#') {
+            return Ok(Extensions::empty());
+        }
+
+        if !self.consume_all(&["#", "!", "[", "enable", "("])? {
+            return Err(Error::ExpectedAttribute);
+        }
+
+        self.skip_ws()?;
+        let mut extensions = Extensions::empty();
+
+        loop {
+            let ident = self.identifier()?;
+            let extension = Extensions::from_ident(ident).ok_or_else(|| {
+                Error::NoSuchExtension(String::from_utf8_lossy(ident).into_owned())
+            })?;
+
+            extensions |= extension;
+
+            let comma = self.comma()?;
+
+            // If we have no comma but another item, return an error
+            if !comma && self.check_ident_other_char(0) {
+                return Err(Error::ExpectedComma);
+            }
+
+            // If there's no comma, assume the list ended.
+            // If there is, it might be a trailing one, thus we only
+            // continue the loop if we get an ident char.
+            if !comma || !self.check_ident_other_char(0) {
+                break;
+            }
+        }
+
+        self.skip_ws()?;
+
+        if self.consume_all(&[")", "]"])? {
+            Ok(extensions)
+        } else {
+            Err(Error::ExpectedAttributeEnd)
+        }
+    }
+
+    pub fn float<T>(&mut self) -> Result<T>
+    where
+        T: FromStr,
+    {
+        for literal in &["inf", "+inf", "-inf", "NaN", "+NaN", "-NaN"] {
+            if self.consume_ident(literal) {
+                return FromStr::from_str(literal).map_err(|_| unreachable!()); // must not fail
+            }
+        }
+
+        let num_bytes = self.next_bytes_contained_in(is_float_char);
+
+        // Since `rustc` allows `1_0.0_1`, lint against underscores in floats
+        if let Some(err_bytes) = self.bytes[0..num_bytes].iter().position(|b| *b == b'_') {
+            let _ = self.advance(err_bytes);
+
+            return Err(Error::FloatUnderscore);
+        }
+
+        let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
+        let res = FromStr::from_str(s).map_err(|_| Error::ExpectedFloat);
+
+        let _ = self.advance(num_bytes);
+
+        res
+    }
+
+    pub fn identifier(&mut self) -> Result<&'a [u8]> {
+        let next = self.peek_or_eof()?;
+        if !is_ident_first_char(next) {
+            return Err(Error::ExpectedIdentifier);
+        }
+
+        // If the next two bytes signify the start of a raw string literal,
+        // return an error.
+        let length = if next == b'r' {
+            match self.bytes.get(1).ok_or(Error::Eof)? {
+                b'"' => return Err(Error::ExpectedIdentifier),
+                b'#' => {
+                    let after_next = self.bytes.get(2).cloned().unwrap_or_default();
+                    // Note: it's important to check this before advancing forward, so that
+                    // the value-type deserializer can fall back to parsing it differently.
+                    if !is_ident_raw_char(after_next) {
+                        return Err(Error::ExpectedIdentifier);
+                    }
+                    // skip "r#"
+                    let _ = self.advance(2);
+                    self.next_bytes_contained_in(is_ident_raw_char)
+                }
+                _ => self.next_bytes_contained_in(is_ident_other_char),
+            }
+        } else {
+            self.next_bytes_contained_in(is_ident_other_char)
+        };
+
+        let ident = &self.bytes[..length];
+        let _ = self.advance(length);
+
+        Ok(ident)
+    }
+
+    pub fn next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize {
+        self.bytes.iter().take_while(|&&b| allowed(b)).count()
+    }
+
+    pub fn next_bytes_is_float(&self) -> bool {
+        if let Some(byte) = self.peek() {
+            let skip = match byte {
+                b'+' | b'-' => 1,
+                _ => 0,
+            };
+            let flen = self
+                .bytes
+                .iter()
+                .skip(skip)
+                .take_while(|&&b| is_float_char(b))
+                .count();
+            let ilen = self
+                .bytes
+                .iter()
+                .skip(skip)
+                .take_while(|&&b| is_int_char(b))
+                .count();
+            flen > ilen
+        } else {
+            false
+        }
+    }
+
+    pub fn skip_ws(&mut self) -> Result<()> {
+        loop {
+            while self.peek().map_or(false, is_whitespace_char) {
+                let _ = self.advance_single();
+            }
+
+            if !self.skip_comment()? {
+                return Ok(());
+            }
+        }
+    }
+
+    pub fn peek(&self) -> Option<u8> {
+        self.bytes.first().cloned()
+    }
+
+    pub fn peek_or_eof(&self) -> Result<u8> {
+        self.bytes.first().cloned().ok_or(Error::Eof)
+    }
+
+    pub fn signed_integer<T>(&mut self) -> Result<T>
+    where
+        T: Num,
+    {
+        match self.peek_or_eof()? {
+            b'+' => {
+                let _ = self.advance_single();
+
+                self.any_integer(1)
+            }
+            b'-' => {
+                let _ = self.advance_single();
+
+                self.any_integer(-1)
+            }
+            _ => self.any_integer(1),
+        }
+    }
+
+    pub fn string(&mut self) -> Result<ParsedStr<'a>> {
+        if self.consume("\"") {
+            self.escaped_string()
+        } else if self.consume("r") {
+            self.raw_string()
+        } else {
+            Err(Error::ExpectedString)
+        }
+    }
+
+    fn escaped_string(&mut self) -> Result<ParsedStr<'a>> {
+        use std::iter::repeat;
+
+        let (i, end_or_escape) = self
+            .bytes
+            .iter()
+            .enumerate()
+            .find(|&(_, &b)| b == b'\\' || b == b'"')
+            .ok_or(Error::ExpectedStringEnd)?;
+
+        if *end_or_escape == b'"' {
+            let s = from_utf8(&self.bytes[..i]).map_err(Error::from)?;
+
+            // Advance by the number of bytes of the string
+            // + 1 for the `"`.
+            let _ = self.advance(i + 1);
+
+            Ok(ParsedStr::Slice(s))
+        } else {
+            let mut i = i;
+            let mut s: Vec<_> = self.bytes[..i].to_vec();
+
+            loop {
+                let _ = self.advance(i + 1);
+                let character = self.parse_escape()?;
+                match character.len_utf8() {
+                    1 => s.push(character as u8),
+                    len => {
+                        let start = s.len();
+                        s.extend(repeat(0).take(len));
+                        character.encode_utf8(&mut s[start..]);
+                    }
+                }
+
+                let (new_i, end_or_escape) = self
+                    .bytes
+                    .iter()
+                    .enumerate()
+                    .find(|&(_, &b)| b == b'\\' || b == b'"')
+                    .ok_or(Error::ExpectedStringEnd)?;
+
+                i = new_i;
+                s.extend_from_slice(&self.bytes[..i]);
+
+                if *end_or_escape == b'"' {
+                    let _ = self.advance(i + 1);
+
+                    let s = String::from_utf8(s).map_err(Error::from)?;
+                    break Ok(ParsedStr::Allocated(s));
+                }
+            }
+        }
+    }
+
+    fn raw_string(&mut self) -> Result<ParsedStr<'a>> {
+        let num_hashes = self.bytes.iter().take_while(|&&b| b == b'#').count();
+        let hashes = &self.bytes[..num_hashes];
+        let _ = self.advance(num_hashes);
+
+        if !self.consume("\"") {
+            return Err(Error::ExpectedString);
+        }
+
+        let ending = [&[b'"'], hashes].concat();
+        let i = self
+            .bytes
+            .windows(num_hashes + 1)
+            .position(|window| window == ending.as_slice())
+            .ok_or(Error::ExpectedStringEnd)?;
+
+        let s = from_utf8(&self.bytes[..i]).map_err(Error::from)?;
+
+        // Advance by the number of bytes of the string
+        // + `num_hashes` + 1 for the `"`.
+        let _ = self.advance(i + num_hashes + 1);
+
+        Ok(ParsedStr::Slice(s))
+    }
+
+    fn test_for(&self, s: &str) -> bool {
+        s.bytes()
+            .enumerate()
+            .all(|(i, b)| self.bytes.get(i).map_or(false, |t| *t == b))
+    }
+
+    pub fn unsigned_integer<T: Num>(&mut self) -> Result<T> {
+        self.any_integer(1)
+    }
+
+    fn decode_ascii_escape(&mut self) -> Result<u8> {
+        let mut n = 0;
+        for _ in 0..2 {
+            n <<= 4;
+            let byte = self.eat_byte()?;
+            let decoded = self.decode_hex(byte)?;
+            n |= decoded;
+        }
+
+        Ok(n)
+    }
+
+    #[inline]
+    fn decode_hex(&self, c: u8) -> Result<u8> {
+        match c {
+            c @ b'0'..=b'9' => Ok(c - b'0'),
+            c @ b'a'..=b'f' => Ok(10 + c - b'a'),
+            c @ b'A'..=b'F' => Ok(10 + c - b'A'),
+            _ => Err(Error::InvalidEscape("Non-hex digit found")),
+        }
+    }
+
+    fn parse_escape(&mut self) -> Result<char> {
+        let c = match self.eat_byte()? {
+            b'\'' => '\'',
+            b'"' => '"',
+            b'\\' => '\\',
+            b'n' => '\n',
+            b'r' => '\r',
+            b't' => '\t',
+            b'0' => '\0',
+            b'x' => self.decode_ascii_escape()? as char,
+            b'u' => {
+                self.expect_byte(b'{', Error::InvalidEscape("Missing { in Unicode escape"))?;
+
+                let mut bytes: u32 = 0;
+                let mut num_digits = 0;
+
+                while num_digits < 6 {
+                    let byte = self.peek_or_eof()?;
+
+                    if byte == b'}' {
+                        break;
+                    } else {
+                        self.advance_single()?;
+                    }
+
+                    let byte = self.decode_hex(byte)?;
+                    bytes <<= 4;
+                    bytes |= u32::from(byte);
+
+                    num_digits += 1;
+                }
+
+                if num_digits == 0 {
+                    return Err(Error::InvalidEscape(
+                        "Expected 1-6 digits, got 0 digits in Unicode escape",
+                    ));
+                }
+
+                self.expect_byte(
+                    b'}',
+                    Error::InvalidEscape("No } at the end of Unicode escape"),
+                )?;
+                char_from_u32(bytes).ok_or(Error::InvalidEscape("Not a valid char"))?
+            }
+            _ => {
+                return Err(Error::InvalidEscape("Unknown escape character"));
+            }
+        };
+
+        Ok(c)
+    }
+
+    fn skip_comment(&mut self) -> Result<bool> {
+        if self.consume("/") {
+            match self.eat_byte()? {
+                b'/' => {
+                    let bytes = self.bytes.iter().take_while(|&&b| b != b'\n').count();
+
+                    let _ = self.advance(bytes);
+                }
+                b'*' => {
+                    let mut level = 1;
+
+                    while level > 0 {
+                        let bytes = self
+                            .bytes
+                            .iter()
+                            .take_while(|&&b| b != b'/' && b != b'*')
+                            .count();
+
+                        if self.bytes.is_empty() {
+                            return Err(Error::UnclosedBlockComment);
+                        }
+
+                        let _ = self.advance(bytes);
+
+                        // check whether / or * and take action
+                        if self.consume("/*") {
+                            level += 1;
+                        } else if self.consume("*/") {
+                            level -= 1;
+                        } else {
+                            self.eat_byte().map_err(|_| Error::UnclosedBlockComment)?;
+                        }
+                    }
+                }
+                b => return Err(Error::UnexpectedByte(b as char)),
+            }
+
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+}
+
+pub trait Num {
+    fn from_u8(x: u8) -> Self;
+
+    /// Returns `true` on overflow
+    fn checked_mul_ext(&mut self, x: u8) -> bool;
+
+    /// Returns `true` on overflow
+    fn checked_add_ext(&mut self, x: u8) -> bool;
+
+    /// Returns `true` on overflow
+    fn checked_sub_ext(&mut self, x: u8) -> bool;
+}
+
+macro_rules! impl_num {
+    ($ty:ident) => {
+        impl Num for $ty {
+            fn from_u8(x: u8) -> Self {
+                x as $ty
+            }
+
+            fn checked_mul_ext(&mut self, x: u8) -> bool {
+                match self.checked_mul(Self::from_u8(x)) {
+                    Some(n) => {
+                        *self = n;
+                        false
+                    }
+                    None => true,
+                }
+            }
+
+            fn checked_add_ext(&mut self, x: u8) -> bool {
+                match self.checked_add(Self::from_u8(x)) {
+                    Some(n) => {
+                        *self = n;
+                        false
+                    }
+                    None => true,
+                }
+            }
+
+            fn checked_sub_ext(&mut self, x: u8) -> bool {
+                match self.checked_sub(Self::from_u8(x)) {
+                    Some(n) => {
+                        *self = n;
+                        false
+                    }
+                    None => true,
+                }
+            }
+        }
+    };
+    ($($tys:ident)*) => {
+        $( impl_num!($tys); )*
+    };
+}
+
+#[cfg(feature = "integer128")]
+impl_num!(u8 u16 u32 u64 u128 i8 i16 i32 i64 i128);
+#[cfg(not(feature = "integer128"))]
+impl_num!(u8 u16 u32 u64 i8 i16 i32 i64);
+
+#[derive(Clone, Debug)]
+pub enum ParsedStr<'a> {
+    Allocated(String),
+    Slice(&'a str),
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn decode_x10() {
+        let mut bytes = Bytes::new(b"10").unwrap();
+        assert_eq!(bytes.decode_ascii_escape(), Ok(0x10));
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
commit	36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree	105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/ron/src/parse.rs
parent	Initial commit. (diff)
download	firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip