diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:50 +0000 |
commit | 9835e2ae736235810b4ea1c162ca5e65c547e770 (patch) | |
tree | 3fcebf40ed70e581d776a8a4c65923e8ec20e026 /vendor/winnow/examples/s_expression | |
parent | Releasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff) | |
download | rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip |
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/winnow/examples/s_expression')
-rw-r--r-- | vendor/winnow/examples/s_expression/main.rs | 20 | ||||
-rw-r--r-- | vendor/winnow/examples/s_expression/parser.rs | 362 |
2 files changed, 382 insertions, 0 deletions
diff --git a/vendor/winnow/examples/s_expression/main.rs b/vendor/winnow/examples/s_expression/main.rs new file mode 100644 index 000000000..da055bed8 --- /dev/null +++ b/vendor/winnow/examples/s_expression/main.rs @@ -0,0 +1,20 @@ +//! In this example we build an [S-expression](https://en.wikipedia.org/wiki/S-expression) +//! parser and tiny [lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)) interpreter. +//! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees. + +#![cfg(feature = "alloc")] + +mod parser; + +fn main() { + let expression_1 = "((if (= (+ 3 (/ 9 3)) + (* 2 3)) + * + /) + 456 123)"; + println!( + "\"{}\"\nevaled gives us: {:?}", + expression_1, + parser::eval_from_str(expression_1) + ); +} diff --git a/vendor/winnow/examples/s_expression/parser.rs b/vendor/winnow/examples/s_expression/parser.rs new file mode 100644 index 000000000..9712c02fe --- /dev/null +++ b/vendor/winnow/examples/s_expression/parser.rs @@ -0,0 +1,362 @@ +//! In this example we build an [S-expression](https://en.wikipedia.org/wiki/S-expression) +//! parser and tiny [lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)) interpreter. +//! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees. + +use winnow::{ + branch::alt, + bytes::one_of, + character::{alpha1, digit1, multispace0, multispace1}, + combinator::{cut_err, opt}, + error::VerboseError, + multi::many0, + sequence::{delimited, preceded, terminated}, + IResult, Parser, +}; + +/// We start with a top-level function to tie everything together, letting +/// us call eval on a string directly +pub fn eval_from_str(src: &str) -> Result<Expr, String> { + parse_expr(src) + .map_err(|e: winnow::error::ErrMode<VerboseError<&str>>| format!("{:#?}", e)) + .and_then(|(_, exp)| eval_expression(exp).ok_or_else(|| "Eval failed".to_string())) +} + +/// For parsing, we start by defining the types that define the shape of data that we want. +/// In this case, we want something tree-like + +/// The remaining half is Lists. We implement these as recursive Expressions. +/// For a list of numbers, we have `'(1 2 3)`, which we'll parse to: +/// ``` +/// Expr::Quote(vec![Expr::Constant(Atom::Num(1)), +/// Expr::Constant(Atom::Num(2)), +/// Expr::Constant(Atom::Num(3))]) +/// Quote takes an S-expression and prevents evaluation of it, making it a data +/// structure that we can deal with programmatically. Thus any valid expression +/// is also a valid data structure in Lisp itself. +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum Expr { + Constant(Atom), + /// (func-name arg1 arg2) + Application(Box<Expr>, Vec<Expr>), + /// (if predicate do-this) + If(Box<Expr>, Box<Expr>), + /// (if predicate do-this otherwise-do-this) + IfElse(Box<Expr>, Box<Expr>, Box<Expr>), + /// '(3 (if (+ 3 3) 4 5) 7) + Quote(Vec<Expr>), +} + +/// We now wrap this type and a few other primitives into our Atom type. +/// Remember from before that Atoms form one half of our language. +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum Atom { + Num(i32), + Keyword(String), + Boolean(bool), + BuiltIn(BuiltIn), +} + +/// Now, the most basic type. We define some built-in functions that our lisp has +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum BuiltIn { + Plus, + Minus, + Times, + Divide, + Equal, + Not, +} + +/// With types defined, we move onto the top-level expression parser! +fn parse_expr(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { + preceded( + multispace0, + alt((parse_constant, parse_application, parse_if, parse_quote)), + ) + .parse_next(i) +} + +/// We then add the Expr layer on top +fn parse_constant(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { + parse_atom.map(Expr::Constant).parse_next(i) +} + +/// Now we take all these simple parsers and connect them. +/// We can now parse half of our language! +fn parse_atom(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { + alt(( + parse_num, + parse_bool, + parse_builtin.map(Atom::BuiltIn), + parse_keyword, + )) + .parse_next(i) +} + +/// Next up is number parsing. We're keeping it simple here by accepting any number (> 1) +/// of digits but ending the program if it doesn't fit into an i32. +fn parse_num(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { + alt(( + digit1.map_res(|digit_str: &str| digit_str.parse::<i32>().map(Atom::Num)), + preceded("-", digit1).map(|digit_str: &str| Atom::Num(-digit_str.parse::<i32>().unwrap())), + )) + .parse_next(i) +} + +/// Our boolean values are also constant, so we can do it the same way +fn parse_bool(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { + alt(( + "#t".map(|_| Atom::Boolean(true)), + "#f".map(|_| Atom::Boolean(false)), + )) + .parse_next(i) +} + +fn parse_builtin(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> { + // alt gives us the result of first parser that succeeds, of the series of + // parsers we give it + alt(( + parse_builtin_op, + // map lets us process the parsed output, in this case we know what we parsed, + // so we ignore the input and return the BuiltIn directly + "not".map(|_| BuiltIn::Not), + )) + .parse_next(i) +} + +/// Continuing the trend of starting from the simplest piece and building up, +/// we start by creating a parser for the built-in operator functions. +fn parse_builtin_op(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> { + // one_of matches one of the characters we give it + let (i, t) = one_of("+-*/=").parse_next(i)?; + + // because we are matching single character tokens, we can do the matching logic + // on the returned value + Ok(( + i, + match t { + '+' => BuiltIn::Plus, + '-' => BuiltIn::Minus, + '*' => BuiltIn::Times, + '/' => BuiltIn::Divide, + '=' => BuiltIn::Equal, + _ => unreachable!(), + }, + )) +} + +/// The next easiest thing to parse are keywords. +/// We introduce some error handling combinators: `context` for human readable errors +/// and `cut_err` to prevent back-tracking. +/// +/// Put plainly: `preceded(":", cut_err(alpha1))` means that once we see the `:` +/// character, we have to see one or more alphabetic chararcters or the input is invalid. +fn parse_keyword(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { + preceded(":", cut_err(alpha1)) + .context("keyword") + .map(|sym_str: &str| Atom::Keyword(sym_str.to_string())) + .parse_next(i) +} + +/// We can now use our new combinator to define the rest of the `Expr`s. +/// +/// Starting with function application, we can see how the parser mirrors our data +/// definitions: our definition is `Application(Box<Expr>, Vec<Expr>)`, so we know +/// that we need to parse an expression and then parse 0 or more expressions, all +/// wrapped in an S-expression. +/// +/// tuples are themselves a parser, used to sequence parsers together, so we can translate this +/// directly and then map over it to transform the output into an `Expr::Application` +fn parse_application(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { + let application_inner = + (parse_expr, many0(parse_expr)).map(|(head, tail)| Expr::Application(Box::new(head), tail)); + // finally, we wrap it in an s-expression + s_exp(application_inner).parse_next(i) +} + +/// Because `Expr::If` and `Expr::IfElse` are so similar (we easily could have +/// defined `Expr::If` to have an `Option` for the else block), we parse both +/// in a single function. +/// +/// In fact, we define our parser as if `Expr::If` was defined with an Option in it, +/// we have the `opt` combinator which fits very nicely here. +fn parse_if(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { + let if_inner = preceded( + // here to avoid ambiguity with other names starting with `if`, if we added + // variables to our language, we say that if must be terminated by at least + // one whitespace character + terminated("if", multispace1), + cut_err((parse_expr, parse_expr, opt(parse_expr))), + ) + .map(|(pred, true_branch, maybe_false_branch)| { + if let Some(false_branch) = maybe_false_branch { + Expr::IfElse( + Box::new(pred), + Box::new(true_branch), + Box::new(false_branch), + ) + } else { + Expr::If(Box::new(pred), Box::new(true_branch)) + } + }) + .context("if expression"); + s_exp(if_inner).parse_next(i) +} + +/// A quoted S-expression is list data structure. +/// +/// This example doesn't have the symbol atom, but by adding variables and changing +/// the definition of quote to not always be around an S-expression, we'd get them +/// naturally. +fn parse_quote(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { + // this should look very straight-forward after all we've done: + // we find the `'` (quote) character, use cut_err to say that we're unambiguously + // looking for an s-expression of 0 or more expressions, and then parse them + preceded("'", cut_err(s_exp(many0(parse_expr)))) + .context("quote") + .map(Expr::Quote) + .parse_next(i) +} + +/// Before continuing, we need a helper function to parse lists. +/// A list starts with `(` and ends with a matching `)`. +/// By putting whitespace and newline parsing here, we can avoid having to worry about it +/// in much of the rest of the parser. +//.parse_next/ +/// Unlike the previous functions, this function doesn't take or consume input, instead it +/// takes a parsing function and returns a new parsing function. +fn s_exp<'a, O1, F>(inner: F) -> impl Parser<&'a str, O1, VerboseError<&'a str>> +where + F: Parser<&'a str, O1, VerboseError<&'a str>>, +{ + delimited( + '(', + preceded(multispace0, inner), + cut_err(preceded(multispace0, ')')).context("closing paren"), + ) +} + +/// And that's it! +/// We can now parse our entire lisp language. +/// +/// But in order to make it a little more interesting, we can hack together +/// a little interpreter to take an Expr, which is really an +/// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST), +/// and give us something back + +/// This function tries to reduce the AST. +/// This has to return an Expression rather than an Atom because quoted `s_expressions` +/// can't be reduced +fn eval_expression(e: Expr) -> Option<Expr> { + match e { + // Constants and quoted s-expressions are our base-case + Expr::Constant(_) | Expr::Quote(_) => Some(e), + // we then recursively `eval_expression` in the context of our special forms + // and built-in operators + Expr::If(pred, true_branch) => { + let reduce_pred = eval_expression(*pred)?; + if get_bool_from_expr(reduce_pred)? { + eval_expression(*true_branch) + } else { + None + } + } + Expr::IfElse(pred, true_branch, false_branch) => { + let reduce_pred = eval_expression(*pred)?; + if get_bool_from_expr(reduce_pred)? { + eval_expression(*true_branch) + } else { + eval_expression(*false_branch) + } + } + Expr::Application(head, tail) => { + let reduced_head = eval_expression(*head)?; + let reduced_tail = tail + .into_iter() + .map(eval_expression) + .collect::<Option<Vec<Expr>>>()?; + if let Expr::Constant(Atom::BuiltIn(bi)) = reduced_head { + Some(Expr::Constant(match bi { + BuiltIn::Plus => Atom::Num( + reduced_tail + .into_iter() + .map(get_num_from_expr) + .collect::<Option<Vec<i32>>>()? + .into_iter() + .sum(), + ), + BuiltIn::Times => Atom::Num( + reduced_tail + .into_iter() + .map(get_num_from_expr) + .collect::<Option<Vec<i32>>>()? + .into_iter() + .product(), + ), + BuiltIn::Equal => Atom::Boolean( + reduced_tail + .iter() + .zip(reduced_tail.iter().skip(1)) + .all(|(a, b)| a == b), + ), + BuiltIn::Not => { + if reduced_tail.len() != 1 { + return None; + } else { + Atom::Boolean(!get_bool_from_expr( + reduced_tail.first().cloned().unwrap(), + )?) + } + } + BuiltIn::Minus => { + Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() { + let fe = get_num_from_expr(first_elem)?; + reduced_tail + .into_iter() + .map(get_num_from_expr) + .collect::<Option<Vec<i32>>>()? + .into_iter() + .skip(1) + .fold(fe, |a, b| a - b) + } else { + Default::default() + }) + } + BuiltIn::Divide => { + Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() { + let fe = get_num_from_expr(first_elem)?; + reduced_tail + .into_iter() + .map(get_num_from_expr) + .collect::<Option<Vec<i32>>>()? + .into_iter() + .skip(1) + .fold(fe, |a, b| a / b) + } else { + Default::default() + }) + } + })) + } else { + None + } + } + } +} + +/// To start we define a couple of helper functions +fn get_num_from_expr(e: Expr) -> Option<i32> { + if let Expr::Constant(Atom::Num(n)) = e { + Some(n) + } else { + None + } +} + +fn get_bool_from_expr(e: Expr) -> Option<bool> { + if let Expr::Constant(Atom::Boolean(b)) = e { + Some(b) + } else { + None + } +} |