summaryrefslogtreecommitdiffstats
path: root/vendor/winnow-0.4.7/examples/s_expression/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/winnow-0.4.7/examples/s_expression/parser.rs')
-rw-r--r--vendor/winnow-0.4.7/examples/s_expression/parser.rs362
1 files changed, 362 insertions, 0 deletions
diff --git a/vendor/winnow-0.4.7/examples/s_expression/parser.rs b/vendor/winnow-0.4.7/examples/s_expression/parser.rs
new file mode 100644
index 000000000..9a1686e1c
--- /dev/null
+++ b/vendor/winnow-0.4.7/examples/s_expression/parser.rs
@@ -0,0 +1,362 @@
+//! In this example we build an [S-expression](https://en.wikipedia.org/wiki/S-expression)
+//! parser and tiny [lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)) interpreter.
+//! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees.
+
+use winnow::{
+ ascii::{alpha1, digit1, multispace0, multispace1},
+ combinator::alt,
+ combinator::repeat,
+ combinator::{cut_err, opt},
+ combinator::{delimited, preceded, terminated},
+ error::VerboseError,
+ token::one_of,
+ IResult, Parser,
+};
+
+/// We start with a top-level function to tie everything together, letting
+/// us call eval on a string directly
+pub fn eval_from_str(src: &str) -> Result<Expr, String> {
+ parse_expr(src)
+ .map_err(|e: winnow::error::ErrMode<VerboseError<&str>>| format!("{:#?}", e))
+ .and_then(|(_, exp)| eval_expression(exp).ok_or_else(|| "Eval failed".to_string()))
+}
+
+/// For parsing, we start by defining the types that define the shape of data that we want.
+/// In this case, we want something tree-like
+
+/// The remaining half is Lists. We implement these as recursive Expressions.
+/// For a list of numbers, we have `'(1 2 3)`, which we'll parse to:
+/// ```
+/// Expr::Quote(vec![Expr::Constant(Atom::Num(1)),
+/// Expr::Constant(Atom::Num(2)),
+/// Expr::Constant(Atom::Num(3))])
+/// Quote takes an S-expression and prevents evaluation of it, making it a data
+/// structure that we can deal with programmatically. Thus any valid expression
+/// is also a valid data structure in Lisp itself.
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub enum Expr {
+ Constant(Atom),
+ /// (func-name arg1 arg2)
+ Application(Box<Expr>, Vec<Expr>),
+ /// (if predicate do-this)
+ If(Box<Expr>, Box<Expr>),
+ /// (if predicate do-this otherwise-do-this)
+ IfElse(Box<Expr>, Box<Expr>, Box<Expr>),
+ /// '(3 (if (+ 3 3) 4 5) 7)
+ Quote(Vec<Expr>),
+}
+
+/// We now wrap this type and a few other primitives into our Atom type.
+/// Remember from before that Atoms form one half of our language.
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub enum Atom {
+ Num(i32),
+ Keyword(String),
+ Boolean(bool),
+ BuiltIn(BuiltIn),
+}
+
+/// Now, the most basic type. We define some built-in functions that our lisp has
+#[derive(Debug, Eq, PartialEq, Clone, Copy)]
+pub enum BuiltIn {
+ Plus,
+ Minus,
+ Times,
+ Divide,
+ Equal,
+ Not,
+}
+
+/// With types defined, we move onto the top-level expression parser!
+fn parse_expr(i: &str) -> IResult<&str, Expr, VerboseError<&str>> {
+ preceded(
+ multispace0,
+ alt((parse_constant, parse_application, parse_if, parse_quote)),
+ )
+ .parse_next(i)
+}
+
+/// We then add the Expr layer on top
+fn parse_constant(i: &str) -> IResult<&str, Expr, VerboseError<&str>> {
+ parse_atom.map(Expr::Constant).parse_next(i)
+}
+
+/// Now we take all these simple parsers and connect them.
+/// We can now parse half of our language!
+fn parse_atom(i: &str) -> IResult<&str, Atom, VerboseError<&str>> {
+ alt((
+ parse_num,
+ parse_bool,
+ parse_builtin.map(Atom::BuiltIn),
+ parse_keyword,
+ ))
+ .parse_next(i)
+}
+
+/// Next up is number parsing. We're keeping it simple here by accepting any number (> 1)
+/// of digits but ending the program if it doesn't fit into an i32.
+fn parse_num(i: &str) -> IResult<&str, Atom, VerboseError<&str>> {
+ alt((
+ digit1.try_map(|digit_str: &str| digit_str.parse::<i32>().map(Atom::Num)),
+ preceded("-", digit1).map(|digit_str: &str| Atom::Num(-digit_str.parse::<i32>().unwrap())),
+ ))
+ .parse_next(i)
+}
+
+/// Our boolean values are also constant, so we can do it the same way
+fn parse_bool(i: &str) -> IResult<&str, Atom, VerboseError<&str>> {
+ alt((
+ "#t".map(|_| Atom::Boolean(true)),
+ "#f".map(|_| Atom::Boolean(false)),
+ ))
+ .parse_next(i)
+}
+
+fn parse_builtin(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> {
+ // alt gives us the result of first parser that succeeds, of the series of
+ // parsers we give it
+ alt((
+ parse_builtin_op,
+ // map lets us process the parsed output, in this case we know what we parsed,
+ // so we ignore the input and return the BuiltIn directly
+ "not".map(|_| BuiltIn::Not),
+ ))
+ .parse_next(i)
+}
+
+/// Continuing the trend of starting from the simplest piece and building up,
+/// we start by creating a parser for the built-in operator functions.
+fn parse_builtin_op(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> {
+ // one_of matches one of the characters we give it
+ let (i, t) = one_of("+-*/=").parse_next(i)?;
+
+ // because we are matching single character tokens, we can do the matching logic
+ // on the returned value
+ Ok((
+ i,
+ match t {
+ '+' => BuiltIn::Plus,
+ '-' => BuiltIn::Minus,
+ '*' => BuiltIn::Times,
+ '/' => BuiltIn::Divide,
+ '=' => BuiltIn::Equal,
+ _ => unreachable!(),
+ },
+ ))
+}
+
+/// The next easiest thing to parse are keywords.
+/// We introduce some error handling combinators: `context` for human readable errors
+/// and `cut_err` to prevent back-tracking.
+///
+/// Put plainly: `preceded(":", cut_err(alpha1))` means that once we see the `:`
+/// character, we have to see one or more alphabetic characters or the input is invalid.
+fn parse_keyword(i: &str) -> IResult<&str, Atom, VerboseError<&str>> {
+ preceded(":", cut_err(alpha1))
+ .context("keyword")
+ .map(|sym_str: &str| Atom::Keyword(sym_str.to_string()))
+ .parse_next(i)
+}
+
+/// We can now use our new combinator to define the rest of the `Expr`s.
+///
+/// Starting with function application, we can see how the parser mirrors our data
+/// definitions: our definition is `Application(Box<Expr>, Vec<Expr>)`, so we know
+/// that we need to parse an expression and then parse 0 or more expressions, all
+/// wrapped in an S-expression.
+///
+/// tuples are themselves a parser, used to sequence parsers together, so we can translate this
+/// directly and then map over it to transform the output into an `Expr::Application`
+fn parse_application(i: &str) -> IResult<&str, Expr, VerboseError<&str>> {
+ let application_inner = (parse_expr, repeat(0.., parse_expr))
+ .map(|(head, tail)| Expr::Application(Box::new(head), tail));
+ // finally, we wrap it in an s-expression
+ s_exp(application_inner).parse_next(i)
+}
+
+/// Because `Expr::If` and `Expr::IfElse` are so similar (we easily could have
+/// defined `Expr::If` to have an `Option` for the else block), we parse both
+/// in a single function.
+///
+/// In fact, we define our parser as if `Expr::If` was defined with an Option in it,
+/// we have the `opt` combinator which fits very nicely here.
+fn parse_if(i: &str) -> IResult<&str, Expr, VerboseError<&str>> {
+ let if_inner = preceded(
+ // here to avoid ambiguity with other names starting with `if`, if we added
+ // variables to our language, we say that if must be terminated by at least
+ // one whitespace character
+ terminated("if", multispace1),
+ cut_err((parse_expr, parse_expr, opt(parse_expr))),
+ )
+ .map(|(pred, true_branch, maybe_false_branch)| {
+ if let Some(false_branch) = maybe_false_branch {
+ Expr::IfElse(
+ Box::new(pred),
+ Box::new(true_branch),
+ Box::new(false_branch),
+ )
+ } else {
+ Expr::If(Box::new(pred), Box::new(true_branch))
+ }
+ })
+ .context("if expression");
+ s_exp(if_inner).parse_next(i)
+}
+
+/// A quoted S-expression is list data structure.
+///
+/// This example doesn't have the symbol atom, but by adding variables and changing
+/// the definition of quote to not always be around an S-expression, we'd get them
+/// naturally.
+fn parse_quote(i: &str) -> IResult<&str, Expr, VerboseError<&str>> {
+ // this should look very straight-forward after all we've done:
+ // we find the `'` (quote) character, use cut_err to say that we're unambiguously
+ // looking for an s-expression of 0 or more expressions, and then parse them
+ preceded("'", cut_err(s_exp(repeat(0.., parse_expr))))
+ .context("quote")
+ .map(Expr::Quote)
+ .parse_next(i)
+}
+
+/// Before continuing, we need a helper function to parse lists.
+/// A list starts with `(` and ends with a matching `)`.
+/// By putting whitespace and newline parsing here, we can avoid having to worry about it
+/// in much of the rest of the parser.
+//.parse_next/
+/// Unlike the previous functions, this function doesn't take or consume input, instead it
+/// takes a parsing function and returns a new parsing function.
+fn s_exp<'a, O1, F>(inner: F) -> impl Parser<&'a str, O1, VerboseError<&'a str>>
+where
+ F: Parser<&'a str, O1, VerboseError<&'a str>>,
+{
+ delimited(
+ '(',
+ preceded(multispace0, inner),
+ cut_err(preceded(multispace0, ')')).context("closing paren"),
+ )
+}
+
+/// And that's it!
+/// We can now parse our entire lisp language.
+///
+/// But in order to make it a little more interesting, we can hack together
+/// a little interpreter to take an Expr, which is really an
+/// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST),
+/// and give us something back
+
+/// This function tries to reduce the AST.
+/// This has to return an Expression rather than an Atom because quoted `s_expressions`
+/// can't be reduced
+fn eval_expression(e: Expr) -> Option<Expr> {
+ match e {
+ // Constants and quoted s-expressions are our base-case
+ Expr::Constant(_) | Expr::Quote(_) => Some(e),
+ // we then recursively `eval_expression` in the context of our special forms
+ // and built-in operators
+ Expr::If(pred, true_branch) => {
+ let reduce_pred = eval_expression(*pred)?;
+ if get_bool_from_expr(reduce_pred)? {
+ eval_expression(*true_branch)
+ } else {
+ None
+ }
+ }
+ Expr::IfElse(pred, true_branch, false_branch) => {
+ let reduce_pred = eval_expression(*pred)?;
+ if get_bool_from_expr(reduce_pred)? {
+ eval_expression(*true_branch)
+ } else {
+ eval_expression(*false_branch)
+ }
+ }
+ Expr::Application(head, tail) => {
+ let reduced_head = eval_expression(*head)?;
+ let reduced_tail = tail
+ .into_iter()
+ .map(eval_expression)
+ .collect::<Option<Vec<Expr>>>()?;
+ if let Expr::Constant(Atom::BuiltIn(bi)) = reduced_head {
+ Some(Expr::Constant(match bi {
+ BuiltIn::Plus => Atom::Num(
+ reduced_tail
+ .into_iter()
+ .map(get_num_from_expr)
+ .collect::<Option<Vec<i32>>>()?
+ .into_iter()
+ .sum(),
+ ),
+ BuiltIn::Times => Atom::Num(
+ reduced_tail
+ .into_iter()
+ .map(get_num_from_expr)
+ .collect::<Option<Vec<i32>>>()?
+ .into_iter()
+ .product(),
+ ),
+ BuiltIn::Equal => Atom::Boolean(
+ reduced_tail
+ .iter()
+ .zip(reduced_tail.iter().skip(1))
+ .all(|(a, b)| a == b),
+ ),
+ BuiltIn::Not => {
+ if reduced_tail.len() != 1 {
+ return None;
+ } else {
+ Atom::Boolean(!get_bool_from_expr(
+ reduced_tail.first().cloned().unwrap(),
+ )?)
+ }
+ }
+ BuiltIn::Minus => {
+ Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
+ let fe = get_num_from_expr(first_elem)?;
+ reduced_tail
+ .into_iter()
+ .map(get_num_from_expr)
+ .collect::<Option<Vec<i32>>>()?
+ .into_iter()
+ .skip(1)
+ .fold(fe, |a, b| a - b)
+ } else {
+ Default::default()
+ })
+ }
+ BuiltIn::Divide => {
+ Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
+ let fe = get_num_from_expr(first_elem)?;
+ reduced_tail
+ .into_iter()
+ .map(get_num_from_expr)
+ .collect::<Option<Vec<i32>>>()?
+ .into_iter()
+ .skip(1)
+ .fold(fe, |a, b| a / b)
+ } else {
+ Default::default()
+ })
+ }
+ }))
+ } else {
+ None
+ }
+ }
+ }
+}
+
+/// To start we define a couple of helper functions
+fn get_num_from_expr(e: Expr) -> Option<i32> {
+ if let Expr::Constant(Atom::Num(n)) = e {
+ Some(n)
+ } else {
+ None
+ }
+}
+
+fn get_bool_from_expr(e: Expr) -> Option<bool> {
+ if let Expr::Constant(Atom::Boolean(b)) = e {
+ Some(b)
+ } else {
+ None
+ }
+}