diff options
Diffstat (limited to 'third_party/rust/cexpr/src/expr.rs')
-rw-r--r-- | third_party/rust/cexpr/src/expr.rs | 610 |
1 files changed, 610 insertions, 0 deletions
diff --git a/third_party/rust/cexpr/src/expr.rs b/third_party/rust/cexpr/src/expr.rs new file mode 100644 index 0000000000..7f7e458bd4 --- /dev/null +++ b/third_party/rust/cexpr/src/expr.rs @@ -0,0 +1,610 @@ +// (C) Copyright 2016 Jethro G. Beekman +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +//! Evaluating C expressions from tokens. +//! +//! Numerical operators are supported. All numerical values are treated as +//! `i64` or `f64`. Type casting is not supported. `i64` are converted to +//! `f64` when used in conjunction with a `f64`. Right shifts are always +//! arithmetic shifts. +//! +//! The `sizeof` operator is not supported. +//! +//! String concatenation is supported, but width prefixes are ignored; all +//! strings are treated as narrow strings. +//! +//! Use the `IdentifierParser` to substitute identifiers found in expressions. + +use std::collections::HashMap; +use std::num::Wrapping; +use std::ops::{ + AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, RemAssign, ShlAssign, + ShrAssign, SubAssign, +}; + +use crate::literal::{self, CChar}; +use crate::token::{Kind as TokenKind, Token}; +use crate::ToCexprResult; +use nom::branch::alt; +use nom::combinator::{complete, map, map_opt}; +use nom::multi::{fold_many0, many0, separated_list0}; +use nom::sequence::{delimited, pair, preceded}; +use nom::*; + +/// Expression parser/evaluator that supports identifiers. +#[derive(Debug)] +pub struct IdentifierParser<'ident> { + identifiers: &'ident HashMap<Vec<u8>, EvalResult>, +} +#[derive(Copy, Clone)] +struct PRef<'a>(&'a IdentifierParser<'a>); + +/// A shorthand for the type of cexpr expression evaluation results. +pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error<&'a [Token]>>; + +/// The result of parsing a literal or evaluating an expression. +#[derive(Debug, Clone, PartialEq)] +#[allow(missing_docs)] +pub enum EvalResult { + Int(Wrapping<i64>), + Float(f64), + Char(CChar), + Str(Vec<u8>), + Invalid, +} + +macro_rules! result_opt ( + (fn $n:ident: $e:ident -> $t:ty) => ( + #[allow(dead_code)] + #[allow(clippy::wrong_self_convention)] + fn $n(self) -> Option<$t> { + if let EvalResult::$e(v) = self { + Some(v) + } else { + None + } + } + ); +); + +impl EvalResult { + result_opt!(fn as_int: Int -> Wrapping<i64>); + result_opt!(fn as_float: Float -> f64); + result_opt!(fn as_char: Char -> CChar); + result_opt!(fn as_str: Str -> Vec<u8>); + + #[allow(clippy::wrong_self_convention)] + fn as_numeric(self) -> Option<EvalResult> { + match self { + EvalResult::Int(_) | EvalResult::Float(_) => Some(self), + _ => None, + } + } +} + +impl From<Vec<u8>> for EvalResult { + fn from(s: Vec<u8>) -> EvalResult { + EvalResult::Str(s) + } +} + +// =========================================== +// ============= Clang tokens ================ +// =========================================== + +macro_rules! exact_token ( + ($k:ident, $c:expr) => ({ + move |input: &[Token]| { + if input.is_empty() { + let res: CResult<'_, &[u8]> = Err(crate::nom::Err::Incomplete(Needed::new($c.len()))); + res + } else { + if input[0].kind==TokenKind::$k && &input[0].raw[..]==$c { + Ok((&input[1..], &input[0].raw[..])) + } else { + Err(crate::nom::Err::Error((input, crate::ErrorKind::ExactToken(TokenKind::$k,$c)).into())) + } + } + } + }); +); + +fn identifier_token(input: &[Token]) -> CResult<'_, &[u8]> { + if input.is_empty() { + let res: CResult<'_, &[u8]> = Err(nom::Err::Incomplete(Needed::new(1))); + res + } else { + if input[0].kind == TokenKind::Identifier { + Ok((&input[1..], &input[0].raw[..])) + } else { + Err(crate::nom::Err::Error((input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into())) + } + } +} + +fn p(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> { + exact_token!(Punctuation, c.as_bytes()) +} + +fn one_of_punctuation(c: &'static [&'static str]) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> { + move |input| { + if input.is_empty() { + let min = c + .iter() + .map(|opt| opt.len()) + .min() + .expect("at least one option"); + Err(crate::nom::Err::Incomplete(Needed::new(min))) + } else if input[0].kind == TokenKind::Punctuation + && c.iter().any(|opt| opt.as_bytes() == &input[0].raw[..]) + { + Ok((&input[1..], &input[0].raw[..])) + } else { + Err(crate::nom::Err::Error( + ( + input, + crate::ErrorKind::ExactTokens(TokenKind::Punctuation, c), + ) + .into(), + )) + } + } +} + +// ================================================== +// ============= Numeric expressions ================ +// ================================================== + +impl<'a> AddAssign<&'a EvalResult> for EvalResult { + fn add_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a + b), + (&Float(a), &Int(b)) => Float(a + (b.0 as f64)), + (&Int(a), &Float(b)) => Float(a.0 as f64 + b), + (&Float(a), &Float(b)) => Float(a + b), + _ => Invalid, + }; + } +} +impl<'a> BitAndAssign<&'a EvalResult> for EvalResult { + fn bitand_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a & b), + _ => Invalid, + }; + } +} +impl<'a> BitOrAssign<&'a EvalResult> for EvalResult { + fn bitor_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a | b), + _ => Invalid, + }; + } +} +impl<'a> BitXorAssign<&'a EvalResult> for EvalResult { + fn bitxor_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a ^ b), + _ => Invalid, + }; + } +} +impl<'a> DivAssign<&'a EvalResult> for EvalResult { + fn div_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a / b), + (&Float(a), &Int(b)) => Float(a / (b.0 as f64)), + (&Int(a), &Float(b)) => Float(a.0 as f64 / b), + (&Float(a), &Float(b)) => Float(a / b), + _ => Invalid, + }; + } +} +impl<'a> MulAssign<&'a EvalResult> for EvalResult { + fn mul_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a * b), + (&Float(a), &Int(b)) => Float(a * (b.0 as f64)), + (&Int(a), &Float(b)) => Float(a.0 as f64 * b), + (&Float(a), &Float(b)) => Float(a * b), + _ => Invalid, + }; + } +} +impl<'a> RemAssign<&'a EvalResult> for EvalResult { + fn rem_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a % b), + (&Float(a), &Int(b)) => Float(a % (b.0 as f64)), + (&Int(a), &Float(b)) => Float(a.0 as f64 % b), + (&Float(a), &Float(b)) => Float(a % b), + _ => Invalid, + }; + } +} +impl<'a> ShlAssign<&'a EvalResult> for EvalResult { + fn shl_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a << (b.0 as usize)), + _ => Invalid, + }; + } +} +impl<'a> ShrAssign<&'a EvalResult> for EvalResult { + fn shr_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a >> (b.0 as usize)), + _ => Invalid, + }; + } +} +impl<'a> SubAssign<&'a EvalResult> for EvalResult { + fn sub_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self = match (&*self, rhs) { + (&Int(a), &Int(b)) => Int(a - b), + (&Float(a), &Int(b)) => Float(a - (b.0 as f64)), + (&Int(a), &Float(b)) => Float(a.0 as f64 - b), + (&Float(a), &Float(b)) => Float(a - b), + _ => Invalid, + }; + } +} + +fn unary_op(input: (&[u8], EvalResult)) -> Option<EvalResult> { + use self::EvalResult::*; + assert_eq!(input.0.len(), 1); + match (input.0[0], input.1) { + (b'+', i) => Some(i), + (b'-', Int(i)) => Some(Int(Wrapping(i.0.wrapping_neg()))), // impl Neg for Wrapping not until rust 1.10... + (b'-', Float(i)) => Some(Float(-i)), + (b'-', _) => unreachable!("non-numeric unary op"), + (b'~', Int(i)) => Some(Int(!i)), + (b'~', Float(_)) => None, + (b'~', _) => unreachable!("non-numeric unary op"), + _ => unreachable!("invalid unary op"), + } +} + +fn numeric<I: Clone, E: nom::error::ParseError<I>, F>( + f: F, +) -> impl FnMut(I) -> nom::IResult<I, EvalResult, E> +where + F: FnMut(I) -> nom::IResult<I, EvalResult, E>, +{ + nom::combinator::map_opt(f, EvalResult::as_numeric) +} + +impl<'a> PRef<'a> { + fn unary(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + alt(( + delimited(p("("), |i| self.numeric_expr(i), p(")")), + numeric(|i| self.literal(i)), + numeric(|i| self.identifier(i)), + map_opt( + pair(one_of_punctuation(&["+", "-", "~"][..]), |i| self.unary(i)), + unary_op, + ), + ))(input) + } + + fn mul_div_rem(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.unary(input)?; + fold_many0( + pair(complete(one_of_punctuation(&["*", "/", "%"][..])), |i| { + self.unary(i) + }), + move || acc.clone(), + |mut acc, (op, val): (&[u8], EvalResult)| { + match op[0] as char { + '*' => acc *= &val, + '/' => acc /= &val, + '%' => acc %= &val, + _ => unreachable!(), + }; + acc + }, + )(input) + } + + fn add_sub(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.mul_div_rem(input)?; + fold_many0( + pair(complete(one_of_punctuation(&["+", "-"][..])), |i| { + self.mul_div_rem(i) + }), + move || acc.clone(), + |mut acc, (op, val): (&[u8], EvalResult)| { + match op[0] as char { + '+' => acc += &val, + '-' => acc -= &val, + _ => unreachable!(), + }; + acc + }, + )(input) + } + + fn shl_shr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.add_sub(input)?; + numeric(fold_many0( + pair(complete(one_of_punctuation(&["<<", ">>"][..])), |i| { + self.add_sub(i) + }), + move || acc.clone(), + |mut acc, (op, val): (&[u8], EvalResult)| { + match op { + b"<<" => acc <<= &val, + b">>" => acc >>= &val, + _ => unreachable!(), + }; + acc + }, + ))(input) + } + + fn and(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.shl_shr(input)?; + numeric(fold_many0( + preceded(complete(p("&")), |i| self.shl_shr(i)), + move || acc.clone(), + |mut acc, val: EvalResult| { + acc &= &val; + acc + }, + ))(input) + } + + fn xor(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.and(input)?; + numeric(fold_many0( + preceded(complete(p("^")), |i| self.and(i)), + move || acc.clone(), + |mut acc, val: EvalResult| { + acc ^= &val; + acc + }, + ))(input) + } + + fn or(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.xor(input)?; + numeric(fold_many0( + preceded(complete(p("|")), |i| self.xor(i)), + move || acc.clone(), + |mut acc, val: EvalResult| { + acc |= &val; + acc + }, + ))(input) + } + + #[inline(always)] + fn numeric_expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + self.or(input) + } +} + +// ======================================================= +// ============= Literals and identifiers ================ +// ======================================================= + +impl<'a> PRef<'a> { + fn identifier(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + match input.split_first() { + None => Err(Err::Incomplete(Needed::new(1))), + Some(( + &Token { + kind: TokenKind::Identifier, + ref raw, + }, + rest, + )) => { + if let Some(r) = self.identifiers.get(&raw[..]) { + Ok((rest, r.clone())) + } else { + Err(Err::Error( + (input, crate::ErrorKind::UnknownIdentifier).into(), + )) + } + } + Some(_) => Err(Err::Error( + (input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into(), + )), + } + } + + fn literal(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + match input.split_first() { + None => Err(Err::Incomplete(Needed::new(1))), + Some(( + &Token { + kind: TokenKind::Literal, + ref raw, + }, + rest, + )) => match literal::parse(raw) { + Ok((_, result)) => Ok((rest, result)), + _ => Err(Err::Error((input, crate::ErrorKind::InvalidLiteral).into())), + }, + Some(_) => Err(Err::Error( + (input, crate::ErrorKind::TypedToken(TokenKind::Literal)).into(), + )), + } + } + + fn string(self, input: &'_ [Token]) -> CResult<'_, Vec<u8>> { + alt(( + map_opt(|i| self.literal(i), EvalResult::as_str), + map_opt(|i| self.identifier(i), EvalResult::as_str), + ))(input) + .to_cexpr_result() + } + + // "string1" "string2" etc... + fn concat_str(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + map( + pair(|i| self.string(i), many0(complete(|i| self.string(i)))), + |(first, v)| { + Vec::into_iter(v) + .fold(first, |mut s, elem| { + Vec::extend_from_slice(&mut s, Vec::<u8>::as_slice(&elem)); + s + }) + .into() + }, + )(input) + .to_cexpr_result() + } + + fn expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + alt(( + |i| self.numeric_expr(i), + delimited(p("("), |i| self.expr(i), p(")")), + |i| self.concat_str(i), + |i| self.literal(i), + |i| self.identifier(i), + ))(input) + .to_cexpr_result() + } + + fn macro_definition(self, input: &'_ [Token]) -> CResult<'_, (&'_ [u8], EvalResult)> { + pair(identifier_token, |i| self.expr(i))(input) + } +} + +impl<'a> ::std::ops::Deref for PRef<'a> { + type Target = IdentifierParser<'a>; + fn deref(&self) -> &IdentifierParser<'a> { + self.0 + } +} + +impl<'ident> IdentifierParser<'ident> { + fn as_ref(&self) -> PRef<'_> { + PRef(self) + } + + /// Create a new `IdentifierParser` with a set of known identifiers. When + /// a known identifier is encountered during parsing, it is substituted + /// for the value specified. + pub fn new(identifiers: &HashMap<Vec<u8>, EvalResult>) -> IdentifierParser<'_> { + IdentifierParser { identifiers } + } + + /// Parse and evaluate an expression of a list of tokens. + /// + /// Returns an error if the input is not a valid expression or if the token + /// stream contains comments, keywords or unknown identifiers. + pub fn expr<'a>(&self, input: &'a [Token]) -> CResult<'a, EvalResult> { + self.as_ref().expr(input) + } + + /// Parse and evaluate a macro definition from a list of tokens. + /// + /// Returns the identifier for the macro and its replacement evaluated as an + /// expression. The input should not include `#define`. + /// + /// Returns an error if the replacement is not a valid expression, if called + /// on most function-like macros, or if the token stream contains comments, + /// keywords or unknown identifiers. + /// + /// N.B. This is intended to fail on function-like macros, but if it the + /// macro takes a single argument, the argument name is defined as an + /// identifier, and the macro otherwise parses as an expression, it will + /// return a result even on function-like macros. + /// + /// ```c + /// // will evaluate into IDENTIFIER + /// #define DELETE(IDENTIFIER) + /// // will evaluate into IDENTIFIER-3 + /// #define NEGATIVE_THREE(IDENTIFIER) -3 + /// ``` + pub fn macro_definition<'a>(&self, input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> { + crate::assert_full_parse(self.as_ref().macro_definition(input)) + } +} + +/// Parse and evaluate an expression of a list of tokens. +/// +/// Returns an error if the input is not a valid expression or if the token +/// stream contains comments, keywords or identifiers. +pub fn expr(input: &[Token]) -> CResult<'_, EvalResult> { + IdentifierParser::new(&HashMap::new()).expr(input) +} + +/// Parse and evaluate a macro definition from a list of tokens. +/// +/// Returns the identifier for the macro and its replacement evaluated as an +/// expression. The input should not include `#define`. +/// +/// Returns an error if the replacement is not a valid expression, if called +/// on a function-like macro, or if the token stream contains comments, +/// keywords or identifiers. +pub fn macro_definition(input: &[Token]) -> CResult<'_, (&'_ [u8], EvalResult)> { + IdentifierParser::new(&HashMap::new()).macro_definition(input) +} + +/// Parse a functional macro declaration from a list of tokens. +/// +/// Returns the identifier for the macro and the argument list (in order). The +/// input should not include `#define`. The actual definition is not parsed and +/// may be obtained from the unparsed data returned. +/// +/// Returns an error if the input is not a functional macro or if the token +/// stream contains comments. +/// +/// # Example +/// ``` +/// use cexpr::expr::{IdentifierParser, EvalResult, fn_macro_declaration}; +/// use cexpr::assert_full_parse; +/// use cexpr::token::Kind::*; +/// use cexpr::token::Token; +/// +/// // #define SUFFIX(arg) arg "suffix" +/// let tokens = vec![ +/// (Identifier, &b"SUFFIX"[..]).into(), +/// (Punctuation, &b"("[..]).into(), +/// (Identifier, &b"arg"[..]).into(), +/// (Punctuation, &b")"[..]).into(), +/// (Identifier, &b"arg"[..]).into(), +/// (Literal, &br#""suffix""#[..]).into(), +/// ]; +/// +/// // Try to parse the functional part +/// let (expr, (ident, args)) = fn_macro_declaration(&tokens).unwrap(); +/// assert_eq!(ident, b"SUFFIX"); +/// +/// // Create dummy arguments +/// let idents = args.into_iter().map(|arg| +/// (arg.to_owned(), EvalResult::Str(b"test".to_vec())) +/// ).collect(); +/// +/// // Evaluate the macro +/// let (_, evaluated) = assert_full_parse(IdentifierParser::new(&idents).expr(expr)).unwrap(); +/// assert_eq!(evaluated, EvalResult::Str(b"testsuffix".to_vec())); +/// ``` +pub fn fn_macro_declaration(input: &[Token]) -> CResult<'_, (&[u8], Vec<&[u8]>)> { + pair( + identifier_token, + delimited( + p("("), + separated_list0(p(","), identifier_token), + p(")"), + ), + )(input) +} |