Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/cexpr/src
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
4 files changed, 1164 insertions, 0 deletions
diff --git a/third_party/rust/cexpr/src/expr.rs b/third_party/rust/cexpr/src/expr.rs
new file mode 100644
index 0000000000..7f7e458bd4
--- /dev/null
+++ b/third_party/rust/cexpr/src/expr.rs
@@ -0,0 +1,610 @@
+// (C) Copyright 2016 Jethro G. Beekman
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//! Evaluating C expressions from tokens.
+//!
+//! Numerical operators are supported. All numerical values are treated as
+//! `i64` or `f64`. Type casting is not supported. `i64` are converted to
+//! `f64` when used in conjunction with a `f64`. Right shifts are always
+//! arithmetic shifts.
+//!
+//! The `sizeof` operator is not supported.
+//!
+//! String concatenation is supported, but width prefixes are ignored; all
+//! strings are treated as narrow strings.
+//!
+//! Use the `IdentifierParser` to substitute identifiers found in expressions.
+
+use std::collections::HashMap;
+use std::num::Wrapping;
+use std::ops::{
+    AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, RemAssign, ShlAssign,
+    ShrAssign, SubAssign,
+};
+
+use crate::literal::{self, CChar};
+use crate::token::{Kind as TokenKind, Token};
+use crate::ToCexprResult;
+use nom::branch::alt;
+use nom::combinator::{complete, map, map_opt};
+use nom::multi::{fold_many0, many0, separated_list0};
+use nom::sequence::{delimited, pair, preceded};
+use nom::*;
+
+/// Expression parser/evaluator that supports identifiers.
+#[derive(Debug)]
+pub struct IdentifierParser<'ident> {
+    identifiers: &'ident HashMap<Vec<u8>, EvalResult>,
+}
+#[derive(Copy, Clone)]
+struct PRef<'a>(&'a IdentifierParser<'a>);
+
+/// A shorthand for the type of cexpr expression evaluation results.
+pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error<&'a [Token]>>;
+
+/// The result of parsing a literal or evaluating an expression.
+#[derive(Debug, Clone, PartialEq)]
+#[allow(missing_docs)]
+pub enum EvalResult {
+    Int(Wrapping<i64>),
+    Float(f64),
+    Char(CChar),
+    Str(Vec<u8>),
+    Invalid,
+}
+
+macro_rules! result_opt (
+	(fn $n:ident: $e:ident -> $t:ty) => (
+		#[allow(dead_code)]
+        #[allow(clippy::wrong_self_convention)]
+		fn $n(self) -> Option<$t> {
+			if let EvalResult::$e(v) = self {
+				Some(v)
+			} else {
+				None
+			}
+		}
+	);
+);
+
+impl EvalResult {
+    result_opt!(fn as_int: Int -> Wrapping<i64>);
+    result_opt!(fn as_float: Float -> f64);
+    result_opt!(fn as_char: Char -> CChar);
+    result_opt!(fn as_str: Str -> Vec<u8>);
+
+    #[allow(clippy::wrong_self_convention)]
+    fn as_numeric(self) -> Option<EvalResult> {
+        match self {
+            EvalResult::Int(_) | EvalResult::Float(_) => Some(self),
+            _ => None,
+        }
+    }
+}
+
+impl From<Vec<u8>> for EvalResult {
+    fn from(s: Vec<u8>) -> EvalResult {
+        EvalResult::Str(s)
+    }
+}
+
+// ===========================================
+// ============= Clang tokens ================
+// ===========================================
+
+macro_rules! exact_token (
+	($k:ident, $c:expr) => ({
+        move |input: &[Token]| {
+		if input.is_empty() {
+			let res: CResult<'_, &[u8]> = Err(crate::nom::Err::Incomplete(Needed::new($c.len())));
+			res
+		} else {
+			if input[0].kind==TokenKind::$k && &input[0].raw[..]==$c {
+				Ok((&input[1..], &input[0].raw[..]))
+			} else {
+				Err(crate::nom::Err::Error((input, crate::ErrorKind::ExactToken(TokenKind::$k,$c)).into()))
+			}
+		}
+        }
+	});
+);
+
+fn identifier_token(input: &[Token]) -> CResult<'_, &[u8]> {
+    if input.is_empty() {
+        let res: CResult<'_, &[u8]> = Err(nom::Err::Incomplete(Needed::new(1)));
+        res
+    } else {
+        if input[0].kind == TokenKind::Identifier {
+            Ok((&input[1..], &input[0].raw[..]))
+        } else {
+            Err(crate::nom::Err::Error((input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into()))
+        }
+    }
+}
+
+fn p(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
+    exact_token!(Punctuation, c.as_bytes())
+}
+
+fn one_of_punctuation(c: &'static [&'static str]) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
+    move |input| {
+        if input.is_empty() {
+            let min = c
+                .iter()
+                .map(|opt| opt.len())
+                .min()
+                .expect("at least one option");
+            Err(crate::nom::Err::Incomplete(Needed::new(min)))
+        } else if input[0].kind == TokenKind::Punctuation
+            && c.iter().any(|opt| opt.as_bytes() == &input[0].raw[..])
+        {
+            Ok((&input[1..], &input[0].raw[..]))
+        } else {
+            Err(crate::nom::Err::Error(
+                (
+                    input,
+                    crate::ErrorKind::ExactTokens(TokenKind::Punctuation, c),
+                )
+                    .into(),
+            ))
+        }
+    }
+}
+
+// ==================================================
+// ============= Numeric expressions ================
+// ==================================================
+
+impl<'a> AddAssign<&'a EvalResult> for EvalResult {
+    fn add_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a + b),
+            (&Float(a), &Int(b)) => Float(a + (b.0 as f64)),
+            (&Int(a), &Float(b)) => Float(a.0 as f64 + b),
+            (&Float(a), &Float(b)) => Float(a + b),
+            _ => Invalid,
+        };
+    }
+}
+impl<'a> BitAndAssign<&'a EvalResult> for EvalResult {
+    fn bitand_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a & b),
+            _ => Invalid,
+        };
+    }
+}
+impl<'a> BitOrAssign<&'a EvalResult> for EvalResult {
+    fn bitor_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a | b),
+            _ => Invalid,
+        };
+    }
+}
+impl<'a> BitXorAssign<&'a EvalResult> for EvalResult {
+    fn bitxor_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a ^ b),
+            _ => Invalid,
+        };
+    }
+}
+impl<'a> DivAssign<&'a EvalResult> for EvalResult {
+    fn div_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a / b),
+            (&Float(a), &Int(b)) => Float(a / (b.0 as f64)),
+            (&Int(a), &Float(b)) => Float(a.0 as f64 / b),
+            (&Float(a), &Float(b)) => Float(a / b),
+            _ => Invalid,
+        };
+    }
+}
+impl<'a> MulAssign<&'a EvalResult> for EvalResult {
+    fn mul_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a * b),
+            (&Float(a), &Int(b)) => Float(a * (b.0 as f64)),
+            (&Int(a), &Float(b)) => Float(a.0 as f64 * b),
+            (&Float(a), &Float(b)) => Float(a * b),
+            _ => Invalid,
+        };
+    }
+}
+impl<'a> RemAssign<&'a EvalResult> for EvalResult {
+    fn rem_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a % b),
+            (&Float(a), &Int(b)) => Float(a % (b.0 as f64)),
+            (&Int(a), &Float(b)) => Float(a.0 as f64 % b),
+            (&Float(a), &Float(b)) => Float(a % b),
+            _ => Invalid,
+        };
+    }
+}
+impl<'a> ShlAssign<&'a EvalResult> for EvalResult {
+    fn shl_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a << (b.0 as usize)),
+            _ => Invalid,
+        };
+    }
+}
+impl<'a> ShrAssign<&'a EvalResult> for EvalResult {
+    fn shr_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a >> (b.0 as usize)),
+            _ => Invalid,
+        };
+    }
+}
+impl<'a> SubAssign<&'a EvalResult> for EvalResult {
+    fn sub_assign(&mut self, rhs: &'a EvalResult) {
+        use self::EvalResult::*;
+        *self = match (&*self, rhs) {
+            (&Int(a), &Int(b)) => Int(a - b),
+            (&Float(a), &Int(b)) => Float(a - (b.0 as f64)),
+            (&Int(a), &Float(b)) => Float(a.0 as f64 - b),
+            (&Float(a), &Float(b)) => Float(a - b),
+            _ => Invalid,
+        };
+    }
+}
+
+fn unary_op(input: (&[u8], EvalResult)) -> Option<EvalResult> {
+    use self::EvalResult::*;
+    assert_eq!(input.0.len(), 1);
+    match (input.0[0], input.1) {
+        (b'+', i) => Some(i),
+        (b'-', Int(i)) => Some(Int(Wrapping(i.0.wrapping_neg()))), // impl Neg for Wrapping not until rust 1.10...
+        (b'-', Float(i)) => Some(Float(-i)),
+        (b'-', _) => unreachable!("non-numeric unary op"),
+        (b'~', Int(i)) => Some(Int(!i)),
+        (b'~', Float(_)) => None,
+        (b'~', _) => unreachable!("non-numeric unary op"),
+        _ => unreachable!("invalid unary op"),
+    }
+}
+
+fn numeric<I: Clone, E: nom::error::ParseError<I>, F>(
+    f: F,
+) -> impl FnMut(I) -> nom::IResult<I, EvalResult, E>
+where
+    F: FnMut(I) -> nom::IResult<I, EvalResult, E>,
+{
+    nom::combinator::map_opt(f, EvalResult::as_numeric)
+}
+
+impl<'a> PRef<'a> {
+    fn unary(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        alt((
+            delimited(p("("), |i| self.numeric_expr(i), p(")")),
+            numeric(|i| self.literal(i)),
+            numeric(|i| self.identifier(i)),
+            map_opt(
+                pair(one_of_punctuation(&["+", "-", "~"][..]), |i| self.unary(i)),
+                unary_op,
+            ),
+        ))(input)
+    }
+
+    fn mul_div_rem(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        let (input, acc) = self.unary(input)?;
+        fold_many0(
+            pair(complete(one_of_punctuation(&["*", "/", "%"][..])), |i| {
+                self.unary(i)
+            }),
+            move || acc.clone(),
+            |mut acc, (op, val): (&[u8], EvalResult)| {
+                match op[0] as char {
+                    '*' => acc *= &val,
+                    '/' => acc /= &val,
+                    '%' => acc %= &val,
+                    _ => unreachable!(),
+                };
+                acc
+            },
+        )(input)
+    }
+
+    fn add_sub(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        let (input, acc) = self.mul_div_rem(input)?;
+        fold_many0(
+            pair(complete(one_of_punctuation(&["+", "-"][..])), |i| {
+                self.mul_div_rem(i)
+            }),
+            move || acc.clone(),
+            |mut acc, (op, val): (&[u8], EvalResult)| {
+                match op[0] as char {
+                    '+' => acc += &val,
+                    '-' => acc -= &val,
+                    _ => unreachable!(),
+                };
+                acc
+            },
+        )(input)
+    }
+
+    fn shl_shr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        let (input, acc) = self.add_sub(input)?;
+        numeric(fold_many0(
+            pair(complete(one_of_punctuation(&["<<", ">>"][..])), |i| {
+                self.add_sub(i)
+            }),
+            move || acc.clone(),
+            |mut acc, (op, val): (&[u8], EvalResult)| {
+                match op {
+                    b"<<" => acc <<= &val,
+                    b">>" => acc >>= &val,
+                    _ => unreachable!(),
+                };
+                acc
+            },
+        ))(input)
+    }
+
+    fn and(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        let (input, acc) = self.shl_shr(input)?;
+        numeric(fold_many0(
+            preceded(complete(p("&")), |i| self.shl_shr(i)),
+            move || acc.clone(),
+            |mut acc, val: EvalResult| {
+                acc &= &val;
+                acc
+            },
+        ))(input)
+    }
+
+    fn xor(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        let (input, acc) = self.and(input)?;
+        numeric(fold_many0(
+            preceded(complete(p("^")), |i| self.and(i)),
+            move || acc.clone(),
+            |mut acc, val: EvalResult| {
+                acc ^= &val;
+                acc
+            },
+        ))(input)
+    }
+
+    fn or(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        let (input, acc) = self.xor(input)?;
+        numeric(fold_many0(
+            preceded(complete(p("|")), |i| self.xor(i)),
+            move || acc.clone(),
+            |mut acc, val: EvalResult| {
+                acc |= &val;
+                acc
+            },
+        ))(input)
+    }
+
+    #[inline(always)]
+    fn numeric_expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        self.or(input)
+    }
+}
+
+// =======================================================
+// ============= Literals and identifiers ================
+// =======================================================
+
+impl<'a> PRef<'a> {
+    fn identifier(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        match input.split_first() {
+            None => Err(Err::Incomplete(Needed::new(1))),
+            Some((
+                &Token {
+                    kind: TokenKind::Identifier,
+                    ref raw,
+                },
+                rest,
+            )) => {
+                if let Some(r) = self.identifiers.get(&raw[..]) {
+                    Ok((rest, r.clone()))
+                } else {
+                    Err(Err::Error(
+                        (input, crate::ErrorKind::UnknownIdentifier).into(),
+                    ))
+                }
+            }
+            Some(_) => Err(Err::Error(
+                (input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into(),
+            )),
+        }
+    }
+
+    fn literal(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        match input.split_first() {
+            None => Err(Err::Incomplete(Needed::new(1))),
+            Some((
+                &Token {
+                    kind: TokenKind::Literal,
+                    ref raw,
+                },
+                rest,
+            )) => match literal::parse(raw) {
+                Ok((_, result)) => Ok((rest, result)),
+                _ => Err(Err::Error((input, crate::ErrorKind::InvalidLiteral).into())),
+            },
+            Some(_) => Err(Err::Error(
+                (input, crate::ErrorKind::TypedToken(TokenKind::Literal)).into(),
+            )),
+        }
+    }
+
+    fn string(self, input: &'_ [Token]) -> CResult<'_, Vec<u8>> {
+        alt((
+            map_opt(|i| self.literal(i), EvalResult::as_str),
+            map_opt(|i| self.identifier(i), EvalResult::as_str),
+        ))(input)
+        .to_cexpr_result()
+    }
+
+    // "string1" "string2" etc...
+    fn concat_str(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        map(
+            pair(|i| self.string(i), many0(complete(|i| self.string(i)))),
+            |(first, v)| {
+                Vec::into_iter(v)
+                    .fold(first, |mut s, elem| {
+                        Vec::extend_from_slice(&mut s, Vec::<u8>::as_slice(&elem));
+                        s
+                    })
+                    .into()
+            },
+        )(input)
+        .to_cexpr_result()
+    }
+
+    fn expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+        alt((
+            |i| self.numeric_expr(i),
+            delimited(p("("), |i| self.expr(i), p(")")),
+            |i| self.concat_str(i),
+            |i| self.literal(i),
+            |i| self.identifier(i),
+        ))(input)
+        .to_cexpr_result()
+    }
+
+    fn macro_definition(self, input: &'_ [Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
+        pair(identifier_token, |i| self.expr(i))(input)
+    }
+}
+
+impl<'a> ::std::ops::Deref for PRef<'a> {
+    type Target = IdentifierParser<'a>;
+    fn deref(&self) -> &IdentifierParser<'a> {
+        self.0
+    }
+}
+
+impl<'ident> IdentifierParser<'ident> {
+    fn as_ref(&self) -> PRef<'_> {
+        PRef(self)
+    }
+
+    /// Create a new `IdentifierParser` with a set of known identifiers. When
+    /// a known identifier is encountered during parsing, it is substituted
+    /// for the value specified.
+    pub fn new(identifiers: &HashMap<Vec<u8>, EvalResult>) -> IdentifierParser<'_> {
+        IdentifierParser { identifiers }
+    }
+
+    /// Parse and evaluate an expression of a list of tokens.
+    ///
+    /// Returns an error if the input is not a valid expression or if the token
+    /// stream contains comments, keywords or unknown identifiers.
+    pub fn expr<'a>(&self, input: &'a [Token]) -> CResult<'a, EvalResult> {
+        self.as_ref().expr(input)
+    }
+
+    /// Parse and evaluate a macro definition from a list of tokens.
+    ///
+    /// Returns the identifier for the macro and its replacement evaluated as an
+    /// expression. The input should not include `#define`.
+    ///
+    /// Returns an error if the replacement is not a valid expression, if called
+    /// on most function-like macros, or if the token stream contains comments,
+    /// keywords or unknown identifiers.
+    ///
+    /// N.B. This is intended to fail on function-like macros, but if it the
+    /// macro takes a single argument, the argument name is defined as an
+    /// identifier, and the macro otherwise parses as an expression, it will
+    /// return a result even on function-like macros.
+    ///
+    /// ```c
+    /// // will evaluate into IDENTIFIER
+    /// #define DELETE(IDENTIFIER)
+    /// // will evaluate into IDENTIFIER-3
+    /// #define NEGATIVE_THREE(IDENTIFIER)  -3
+    /// ```
+    pub fn macro_definition<'a>(&self, input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> {
+        crate::assert_full_parse(self.as_ref().macro_definition(input))
+    }
+}
+
+/// Parse and evaluate an expression of a list of tokens.
+///
+/// Returns an error if the input is not a valid expression or if the token
+/// stream contains comments, keywords or identifiers.
+pub fn expr(input: &[Token]) -> CResult<'_, EvalResult> {
+    IdentifierParser::new(&HashMap::new()).expr(input)
+}
+
+/// Parse and evaluate a macro definition from a list of tokens.
+///
+/// Returns the identifier for the macro and its replacement evaluated as an
+/// expression. The input should not include `#define`.
+///
+/// Returns an error if the replacement is not a valid expression, if called
+/// on a function-like macro, or if the token stream contains comments,
+/// keywords or identifiers.
+pub fn macro_definition(input: &[Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
+    IdentifierParser::new(&HashMap::new()).macro_definition(input)
+}
+
+/// Parse a functional macro declaration from a list of tokens.
+///
+/// Returns the identifier for the macro and the argument list (in order). The
+/// input should not include `#define`. The actual definition is not parsed and
+/// may be obtained from the unparsed data returned.
+///
+/// Returns an error if the input is not a functional macro or if the token
+/// stream contains comments.
+///
+/// # Example
+/// ```
+/// use cexpr::expr::{IdentifierParser, EvalResult, fn_macro_declaration};
+/// use cexpr::assert_full_parse;
+/// use cexpr::token::Kind::*;
+/// use cexpr::token::Token;
+///
+/// // #define SUFFIX(arg) arg "suffix"
+/// let tokens = vec![
+///     (Identifier,  &b"SUFFIX"[..]).into(),
+///     (Punctuation, &b"("[..]).into(),
+///     (Identifier,  &b"arg"[..]).into(),
+///     (Punctuation, &b")"[..]).into(),
+///     (Identifier,  &b"arg"[..]).into(),
+///     (Literal,     &br#""suffix""#[..]).into(),
+/// ];
+///
+/// // Try to parse the functional part
+/// let (expr, (ident, args)) = fn_macro_declaration(&tokens).unwrap();
+/// assert_eq!(ident, b"SUFFIX");
+///
+/// // Create dummy arguments
+/// let idents = args.into_iter().map(|arg|
+///     (arg.to_owned(), EvalResult::Str(b"test".to_vec()))
+/// ).collect();
+///
+/// // Evaluate the macro
+/// let (_, evaluated) = assert_full_parse(IdentifierParser::new(&idents).expr(expr)).unwrap();
+/// assert_eq!(evaluated, EvalResult::Str(b"testsuffix".to_vec()));
+/// ```
+pub fn fn_macro_declaration(input: &[Token]) -> CResult<'_, (&[u8], Vec<&[u8]>)> {
+    pair(
+        identifier_token,
+        delimited(
+            p("("),
+            separated_list0(p(","), identifier_token),
+            p(")"),
+        ),
+    )(input)
+}
diff --git a/third_party/rust/cexpr/src/lib.rs b/third_party/rust/cexpr/src/lib.rs
new file mode 100644
index 0000000000..5170f97d13
--- /dev/null
+++ b/third_party/rust/cexpr/src/lib.rs
@@ -0,0 +1,149 @@
+// (C) Copyright 2016 Jethro G. Beekman
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//! A C expression parser and evaluator.
+//!
+//! This crate provides methods for parsing and evaluating simple C expressions. In general, the
+//! crate can handle most arithmetic expressions that would appear in macros or the definition of
+//! constants, as well as string and character constants.
+//!
+//! The main entry point for is [`token::parse`], which parses a byte string and returns its
+//! evaluated value.
+#![warn(rust_2018_idioms)]
+#![warn(missing_docs)]
+#![allow(deprecated)]
+
+pub mod nom {
+    //! nom's result types, re-exported.
+    pub use nom::{error::ErrorKind, error::Error, Err, IResult, Needed};
+}
+pub mod expr;
+pub mod literal;
+pub mod token;
+
+/// Parsing errors specific to C parsing
+#[derive(Debug)]
+pub enum ErrorKind {
+    /// Expected the specified token
+    ExactToken(token::Kind, &'static [u8]),
+    /// Expected one of the specified tokens
+    ExactTokens(token::Kind, &'static [&'static str]),
+    /// Expected a token of the specified kind
+    TypedToken(token::Kind),
+    /// An unknown identifier was encountered
+    UnknownIdentifier,
+    /// An invalid literal was encountered.
+    ///
+    /// When encountered, this generally means a bug exists in the data that
+    /// was passed in or the parsing logic.
+    InvalidLiteral,
+    /// A full parse was requested, but data was left over after parsing finished.
+    Partial,
+    /// An error occurred in an underlying nom parser.
+    Parser(nom::ErrorKind),
+}
+
+impl From<nom::ErrorKind> for ErrorKind {
+    fn from(k: nom::ErrorKind) -> Self {
+        ErrorKind::Parser(k)
+    }
+}
+
+impl From<u32> for ErrorKind {
+    fn from(_: u32) -> Self {
+        ErrorKind::InvalidLiteral
+    }
+}
+
+/// Parsing errors specific to C parsing.
+///
+/// This is a superset of `(I, nom::ErrorKind)` that includes the additional errors specified by
+/// [`ErrorKind`].
+#[derive(Debug)]
+pub struct Error<I> {
+    /// The remainder of the input stream at the time of the error.
+    pub input: I,
+    /// The error that occurred.
+    pub error: ErrorKind,
+}
+
+impl<I> From<(I, nom::ErrorKind)> for Error<I> {
+    fn from(e: (I, nom::ErrorKind)) -> Self {
+        Self::from((e.0, ErrorKind::from(e.1)))
+    }
+}
+
+impl<I> From<(I, ErrorKind)> for Error<I> {
+    fn from(e: (I, ErrorKind)) -> Self {
+        Self {
+            input: e.0,
+            error: e.1,
+        }
+    }
+}
+
+impl<I> From<::nom::error::Error<I>> for Error<I> {
+    fn from(e: ::nom::error::Error<I>) -> Self {
+        Self {
+            input: e.input,
+            error: e.code.into(),
+        }
+    }
+}
+
+impl<I> ::nom::error::ParseError<I> for Error<I> {
+    fn from_error_kind(input: I, kind: nom::ErrorKind) -> Self {
+        Self {
+            input,
+            error: kind.into(),
+        }
+    }
+
+    fn append(_: I, _: nom::ErrorKind, other: Self) -> Self {
+        other
+    }
+}
+
+// in lieu of https://github.com/Geal/nom/issues/1010
+trait ToCexprResult<I, O> {
+    fn to_cexpr_result(self) -> nom::IResult<I, O, Error<I>>;
+}
+impl<I, O, E> ToCexprResult<I, O> for nom::IResult<I, O, E>
+where
+    Error<I>: From<E>,
+{
+    fn to_cexpr_result(self) -> nom::IResult<I, O, Error<I>> {
+        match self {
+            Ok(v) => Ok(v),
+            Err(nom::Err::Incomplete(n)) => Err(nom::Err::Incomplete(n)),
+            Err(nom::Err::Error(e)) => Err(nom::Err::Error(e.into())),
+            Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(e.into())),
+        }
+    }
+}
+
+/// If the input result indicates a succesful parse, but there is data left,
+/// return an `Error::Partial` instead.
+pub fn assert_full_parse<'i, I: 'i, O, E>(
+    result: nom::IResult<&'i [I], O, E>,
+) -> nom::IResult<&'i [I], O, Error<&'i [I]>>
+where
+    Error<&'i [I]>: From<E>,
+{
+    match result.to_cexpr_result() {
+        Ok((rem, output)) => {
+            if rem.is_empty() {
+                Ok((rem, output))
+            } else {
+                Err(nom::Err::Error((rem, ErrorKind::Partial).into()))
+            }
+        }
+        Err(nom::Err::Incomplete(n)) => Err(nom::Err::Incomplete(n)),
+        Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(e)),
+        Err(nom::Err::Error(e)) => Err(nom::Err::Error(e)),
+    }
+}
diff --git a/third_party/rust/cexpr/src/literal.rs b/third_party/rust/cexpr/src/literal.rs
new file mode 100644
index 0000000000..68e85c7dad
--- /dev/null
+++ b/third_party/rust/cexpr/src/literal.rs
@@ -0,0 +1,361 @@
+// (C) Copyright 2016 Jethro G. Beekman
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//! Parsing C literals from byte slices.
+//!
+//! This will parse a representation of a C literal into a Rust type.
+//!
+//! # characters
+//! Character literals are stored into the `CChar` type, which can hold values
+//! that are not valid Unicode code points. ASCII characters are represented as
+//! `char`, literal bytes with the high byte set are converted into the raw
+//! representation. Escape sequences are supported. If hex and octal escapes
+//! map to an ASCII character, that is used, otherwise, the raw encoding is
+//! used, including for values over 255. Unicode escapes are checked for
+//! validity and mapped to `char`. Character sequences are not supported. Width
+//! prefixes are ignored.
+//!
+//! # strings
+//! Strings are interpreted as byte vectors. Escape sequences are supported. If
+//! hex and octal escapes map onto multi-byte characters, they are truncated to
+//! one 8-bit character. Unicode escapes are converted into their UTF-8
+//! encoding. Width prefixes are ignored.
+//!
+//! # integers
+//! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are
+//! all supported. If the literal value is between `i64::MAX` and `u64::MAX`,
+//! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and
+//! sign suffixes are ignored. Sign prefixes are not supported.
+//!
+//! # real numbers
+//! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are
+//! not supported in the significand. Hexadecimal floating points are not
+//! supported.
+
+use std::char;
+use std::str::{self, FromStr};
+
+use nom::branch::alt;
+use nom::bytes::complete::is_not;
+use nom::bytes::complete::tag;
+use nom::character::complete::{char, one_of};
+use nom::combinator::{complete, map, map_opt, opt, recognize};
+use nom::multi::{fold_many0, many0, many1, many_m_n};
+use nom::sequence::{delimited, pair, preceded, terminated, tuple};
+use nom::*;
+
+use crate::expr::EvalResult;
+use crate::ToCexprResult;
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+/// Representation of a C character
+pub enum CChar {
+    /// A character that can be represented as a `char`
+    Char(char),
+    /// Any other character (8-bit characters, unicode surrogates, etc.)
+    Raw(u64),
+}
+
+impl From<u8> for CChar {
+    fn from(i: u8) -> CChar {
+        match i {
+            0..=0x7f => CChar::Char(i as u8 as char),
+            _ => CChar::Raw(i as u64),
+        }
+    }
+}
+
+// A non-allocating version of this would be nice...
+impl std::convert::Into<Vec<u8>> for CChar {
+    fn into(self) -> Vec<u8> {
+        match self {
+            CChar::Char(c) => {
+                let mut s = String::with_capacity(4);
+                s.extend(&[c]);
+                s.into_bytes()
+            }
+            CChar::Raw(i) => {
+                let mut v = Vec::with_capacity(1);
+                v.push(i as u8);
+                v
+            }
+        }
+    }
+}
+
+/// ensures the child parser consumes the whole input
+pub fn full<I: Clone, O, F>(
+    f: F,
+) -> impl Fn(I) -> nom::IResult<I, O>
+where
+    I: nom::InputLength,
+    F: Fn(I) -> nom::IResult<I, O>,
+{
+    move |input| {
+        let res = f(input);
+        match res {
+            Ok((i, o)) => {
+                if i.input_len() == 0 {
+                    Ok((i, o))
+                } else {
+                    Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Complete)))
+                }
+            }
+            r => r,
+        }
+    }
+}
+
+// =================================
+// ======== matching digits ========
+// =================================
+
+macro_rules! byte {
+	($($p: pat)|* ) => {{
+        fn parser(i: &[u8]) -> crate::nom::IResult<&[u8], u8> {
+            match i.split_first() {
+                $(Some((&c @ $p,rest)))|* => Ok((rest,c)),
+                Some(_) => Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::OneOf))),
+                None => Err(nom::Err::Incomplete(Needed::new(1))),
+            }
+        }
+
+        parser
+	}}
+}
+
+fn binary(i: &[u8]) -> nom::IResult<&[u8], u8> {
+    byte!(b'0'..=b'1')(i)
+}
+
+fn octal(i: &[u8]) -> nom::IResult<&[u8], u8> {
+    byte!(b'0'..=b'7')(i)
+}
+
+fn decimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
+    byte!(b'0'..=b'9')(i)
+}
+
+fn hexadecimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
+    byte!(b'0' ..= b'9' | b'a' ..= b'f' | b'A' ..= b'F')(i)
+}
+
+// ========================================
+// ======== characters and strings ========
+// ========================================
+
+fn escape2char(c: char) -> CChar {
+    CChar::Char(match c {
+        'a' => '\x07',
+        'b' => '\x08',
+        'f' => '\x0c',
+        'n' => '\n',
+        'r' => '\r',
+        't' => '\t',
+        'v' => '\x0b',
+        _ => unreachable!("invalid escape {}", c),
+    })
+}
+
+fn c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar> {
+    str::from_utf8(&n)
+        .ok()
+        .and_then(|i| u64::from_str_radix(i, radix).ok())
+        .map(|i| match i {
+            0..=0x7f => CChar::Char(i as u8 as char),
+            _ => CChar::Raw(i),
+        })
+}
+
+fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> {
+    str::from_utf8(&n)
+        .ok()
+        .and_then(|i| u32::from_str_radix(i, 16).ok())
+        .and_then(char::from_u32)
+        .map(CChar::Char)
+}
+
+fn escaped_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
+    preceded(
+        char('\\'),
+        alt((
+            map(one_of(r#"'"?\"#), CChar::Char),
+            map(one_of("abfnrtv"), escape2char),
+            map_opt(many_m_n(1, 3, octal), |v| c_raw_escape(v, 8)),
+            map_opt(preceded(char('x'), many1(hexadecimal)), |v| {
+                c_raw_escape(v, 16)
+            }),
+            map_opt(
+                preceded(char('u'), many_m_n(4, 4, hexadecimal)),
+                c_unicode_escape,
+            ),
+            map_opt(
+                preceded(char('U'), many_m_n(8, 8, hexadecimal)),
+                c_unicode_escape,
+            ),
+        )),
+    )(i)
+}
+
+fn c_width_prefix(i: &[u8]) -> nom::IResult<&[u8], &[u8]> {
+    alt((tag("u8"), tag("u"), tag("U"), tag("L")))(i)
+}
+
+fn c_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
+    delimited(
+        terminated(opt(c_width_prefix), char('\'')),
+        alt((
+            escaped_char,
+            map(byte!(0 ..= 91 /* \=92 */ | 93 ..= 255), CChar::from),
+        )),
+        char('\''),
+    )(i)
+}
+
+fn c_string(i: &[u8]) -> nom::IResult<&[u8], Vec<u8>> {
+    delimited(
+        alt((preceded(c_width_prefix, char('"')), char('"'))),
+        fold_many0(
+            alt((
+                map(escaped_char, |c: CChar| c.into()),
+                map(is_not([b'\\', b'"']), |c: &[u8]| c.into()),
+            )),
+            Vec::new,
+            |mut v: Vec<u8>, res: Vec<u8>| {
+                v.extend_from_slice(&res);
+                v
+            },
+        ),
+        char('"'),
+    )(i)
+}
+
+// ================================
+// ======== parse integers ========
+// ================================
+
+fn c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64> {
+    str::from_utf8(&n)
+        .ok()
+        .and_then(|i| u64::from_str_radix(i, radix).ok())
+}
+
+fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    let r = input.split_at_position(|c| c != b'u' && c != b'U' && c != b'l' && c != b'L');
+    match r {
+        Err(Err::Incomplete(_)) => Ok((&input[input.len()..], input)),
+        res => res,
+    }
+}
+
+fn c_int(i: &[u8]) -> nom::IResult<&[u8], i64> {
+    map(
+        terminated(
+            alt((
+                map_opt(preceded(tag("0x"), many1(complete(hexadecimal))), |v| {
+                    c_int_radix(v, 16)
+                }),
+                map_opt(preceded(tag("0X"), many1(complete(hexadecimal))), |v| {
+                    c_int_radix(v, 16)
+                }),
+                map_opt(preceded(tag("0b"), many1(complete(binary))), |v| {
+                    c_int_radix(v, 2)
+                }),
+                map_opt(preceded(tag("0B"), many1(complete(binary))), |v| {
+                    c_int_radix(v, 2)
+                }),
+                map_opt(preceded(char('0'), many1(complete(octal))), |v| {
+                    c_int_radix(v, 8)
+                }),
+                map_opt(many1(complete(decimal)), |v| c_int_radix(v, 10)),
+                |input| Err(crate::nom::Err::Error(nom::error::Error::new(input, crate::nom::ErrorKind::Fix))),
+            )),
+            opt(take_ul),
+        ),
+        |i| i as i64,
+    )(i)
+}
+
+// ==============================
+// ======== parse floats ========
+// ==============================
+
+fn float_width(i: &[u8]) -> nom::IResult<&[u8], u8> {
+    nom::combinator::complete(byte!(b'f' | b'l' | b'F' | b'L'))(i)
+}
+
+fn float_exp(i: &[u8]) -> nom::IResult<&[u8], (Option<u8>, Vec<u8>)> {
+    preceded(
+        byte!(b'e' | b'E'),
+        pair(opt(byte!(b'-' | b'+')), many1(complete(decimal))),
+    )(i)
+}
+
+fn c_float(i: &[u8]) -> nom::IResult<&[u8], f64> {
+    map_opt(
+        alt((
+            terminated(
+                recognize(tuple((
+                    many1(complete(decimal)),
+                    byte!(b'.'),
+                    many0(complete(decimal)),
+                ))),
+                opt(float_width),
+            ),
+            terminated(
+                recognize(tuple((
+                    many0(complete(decimal)),
+                    byte!(b'.'),
+                    many1(complete(decimal)),
+                ))),
+                opt(float_width),
+            ),
+            terminated(
+                recognize(tuple((
+                    many0(complete(decimal)),
+                    opt(byte!(b'.')),
+                    many1(complete(decimal)),
+                    float_exp,
+                ))),
+                opt(float_width),
+            ),
+            terminated(
+                recognize(tuple((
+                    many1(complete(decimal)),
+                    opt(byte!(b'.')),
+                    many0(complete(decimal)),
+                    float_exp,
+                ))),
+                opt(float_width),
+            ),
+            terminated(recognize(many1(complete(decimal))), float_width),
+        )),
+        |v| str::from_utf8(v).ok().and_then(|i| f64::from_str(i).ok()),
+    )(i)
+}
+
+// ================================
+// ======== main interface ========
+// ================================
+
+fn one_literal(input: &[u8]) -> nom::IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
+    alt((
+        map(full(c_char), EvalResult::Char),
+        map(full(c_int), |i| EvalResult::Int(::std::num::Wrapping(i))),
+        map(full(c_float), EvalResult::Float),
+        map(full(c_string), EvalResult::Str),
+    ))(input)
+    .to_cexpr_result()
+}
+
+/// Parse a C literal.
+///
+/// The input must contain exactly the representation of a single literal
+/// token, and in particular no whitespace or sign prefixes.
+pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
+    crate::assert_full_parse(one_literal(input))
+}
diff --git a/third_party/rust/cexpr/src/token.rs b/third_party/rust/cexpr/src/token.rs
new file mode 100644
index 0000000000..dbc5949cd4
--- /dev/null
+++ b/third_party/rust/cexpr/src/token.rs
@@ -0,0 +1,44 @@
+// (C) Copyright 2016 Jethro G. Beekman
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//! Representation of a C token
+//!
+//! This is designed to map onto a libclang CXToken.
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[allow(missing_docs)]
+pub enum Kind {
+    Punctuation,
+    Keyword,
+    Identifier,
+    Literal,
+    Comment,
+}
+
+/// A single token in a C expression.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Token {
+    /// The type of this token.
+    pub kind: Kind,
+    /// The bytes that make up the token.
+    pub raw: Box<[u8]>,
+}
+
+impl<'a> From<(Kind, &'a [u8])> for Token {
+    fn from((kind, value): (Kind, &'a [u8])) -> Token {
+        Token {
+            kind,
+            raw: value.to_owned().into_boxed_slice(),
+        }
+    }
+}
+
+/// Remove all comment tokens from a vector of tokens
+pub fn remove_comments(v: &mut Vec<Token>) -> &mut Vec<Token> {
+    v.retain(|t| t.kind != Kind::Comment);
+    v
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/cexpr/src
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip