diff options
Diffstat (limited to 'pigeonhole/src/lib-sieve/sieve-parser.c')
-rw-r--r-- | pigeonhole/src/lib-sieve/sieve-parser.c | 670 |
1 files changed, 670 insertions, 0 deletions
diff --git a/pigeonhole/src/lib-sieve/sieve-parser.c b/pigeonhole/src/lib-sieve/sieve-parser.c new file mode 100644 index 0000000..ee54994 --- /dev/null +++ b/pigeonhole/src/lib-sieve/sieve-parser.c @@ -0,0 +1,670 @@ +/* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file + */ + +#include "lib.h" +#include "istream.h" +#include "failures.h" + +#include "sieve-common.h" +#include "sieve-limits.h" +#include "sieve-script.h" +#include "sieve-lexer.h" +#include "sieve-parser.h" +#include "sieve-error.h" +#include "sieve-ast.h" + +/* + * Forward declarations + */ + +static int +sieve_parser_recover(struct sieve_parser *parser, + enum sieve_token_type end_token); + +/* + * Parser object + */ + +struct sieve_parser { + pool_t pool; + + bool valid; + + struct sieve_script *script; + + struct sieve_error_handler *ehandler; + + const struct sieve_lexer *lexer; + struct sieve_ast *ast; +}; + +struct sieve_parser * +sieve_parser_create(struct sieve_script *script, + struct sieve_error_handler *ehandler, + enum sieve_error *error_r) +{ + struct sieve_parser *parser; + const struct sieve_lexer *lexer; + + lexer = sieve_lexer_create(script, ehandler, error_r); + if (lexer != NULL) { + pool_t pool = pool_alloconly_create("sieve_parser", 4096); + + parser = p_new(pool, struct sieve_parser, 1); + parser->pool = pool; + parser->valid = TRUE; + + parser->ehandler = ehandler; + sieve_error_handler_ref(ehandler); + + parser->script = script; + sieve_script_ref(script); + + parser->lexer = lexer; + parser->ast = NULL; + + return parser; + } + + return NULL; +} + +void sieve_parser_free(struct sieve_parser **parser) +{ + if ((*parser)->ast != NULL) + sieve_ast_unref(&(*parser)->ast); + + sieve_lexer_free(&(*parser)->lexer); + sieve_script_unref(&(*parser)->script); + + sieve_error_handler_unref(&(*parser)->ehandler); + + pool_unref(&(*parser)->pool); + + *parser = NULL; +} + +/* + * Internal error handling + */ + +inline static void ATTR_FORMAT(4, 5) +sieve_parser_error(struct sieve_parser *parser, + const char *csrc_filename, unsigned int csrc_linenum, + const char *fmt, ...) +{ + struct sieve_error_params params = { + .log_type = LOG_TYPE_ERROR, + .csrc = { + .filename = csrc_filename, + .linenum = csrc_linenum, + }, + }; + va_list args; + + va_start(args, fmt); + + /* Don't report a parse error if the lexer complained already */ + if (sieve_lexer_token_type(parser->lexer) != STT_ERROR) { + T_BEGIN { + params.location = sieve_error_script_location( + parser->script, + sieve_lexer_token_line(parser->lexer)); + sieve_logv(parser->ehandler, ¶ms, fmt, args); + } T_END; + } + + parser->valid = FALSE; + + va_end(args); +} +#define sieve_parser_error(parser, ...) \ + sieve_parser_error(parser, __FILE__, __LINE__, __VA_ARGS__) + +/* + * Sieve grammar parsing + */ + +/* sieve_parse_arguments(): + + Parses both command arguments and sub-tests: + arguments = *argument [test / test-list] + argument = string-list / number / tag + string = quoted-string / multi-line [[implicitly handled in lexer]] + string-list = "[" string *("," string) "]" / string ;; if + there is only a single string, the brackets are optional + test-list = "(" test *("," test) ")" + test = identifier arguments + */ +static int +sieve_parse_arguments(struct sieve_parser *parser, struct sieve_ast_node *node, + unsigned int depth) +{ + const struct sieve_lexer *lexer = parser->lexer; + struct sieve_ast_node *test = NULL; + bool test_present = TRUE; + bool arg_present = TRUE; + int result = 1; /* Indicates whether the parser is in a defined, not + necessarily error-free state */ + + /* Parse arguments */ + while (arg_present && result > 0) { + struct sieve_ast_argument *arg; + + if (!parser->valid && + !sieve_errors_more_allowed(parser->ehandler)) { + result = 0; + break; + } + + switch (sieve_lexer_token_type(lexer)) { + /* String list */ + case STT_LSQUARE: + /* Create stinglist object */ + arg = sieve_ast_argument_stringlist_create( + node, sieve_lexer_token_line(parser->lexer)); + if (arg == NULL) break; + sieve_lexer_skip_token(lexer); + + if (sieve_lexer_token_type(lexer) == STT_STRING) { + bool add_failed = FALSE; + + /* Add the string to the list */ + if (!sieve_ast_stringlist_add( + arg, sieve_lexer_token_str(lexer), + sieve_lexer_token_line(parser->lexer))) + add_failed = TRUE; + sieve_lexer_skip_token(lexer); + + while (!add_failed && + sieve_lexer_token_type(lexer) == STT_COMMA) { + sieve_lexer_skip_token(lexer); + + /* Check parser status */ + if (!parser->valid && + !sieve_errors_more_allowed(parser->ehandler)) { + result = sieve_parser_recover(parser, STT_RSQUARE); + break; + } + + if (sieve_lexer_token_type(lexer) == STT_STRING) { + /* Add the string to the list */ + if (!sieve_ast_stringlist_add( + arg, sieve_lexer_token_str(lexer), + sieve_lexer_token_line(parser->lexer))) + add_failed = TRUE; + + sieve_lexer_skip_token(lexer); + } else { + sieve_parser_error(parser, + "expecting string after ',' in string list, " + "but found %s", + sieve_lexer_token_description(lexer)); + result = sieve_parser_recover(parser, STT_RSQUARE); + break; + } + } + + if (add_failed) { + sieve_parser_error(parser, + "failed to accept more items in string list"); + return -1; + } + } else { + sieve_parser_error(parser, + "expecting string after '[' in string list, " + "but found %s", + sieve_lexer_token_description(lexer)); + result = sieve_parser_recover(parser, STT_RSQUARE); + } + + /* Finish the string list */ + if (sieve_lexer_token_type(lexer) == STT_RSQUARE) { + sieve_lexer_skip_token(lexer); + } else { + sieve_parser_error(parser, + "expecting ',' or end of string list ']', " + "but found %s", + sieve_lexer_token_description(lexer)); + + if ((result = sieve_parser_recover(parser, STT_RSQUARE)) > 0) + sieve_lexer_skip_token(lexer); + } + break; + /* Single string */ + case STT_STRING: + arg = sieve_ast_argument_string_create( + node, sieve_lexer_token_str(lexer), + sieve_lexer_token_line(parser->lexer)); + sieve_lexer_skip_token(lexer); + break; + /* Number */ + case STT_NUMBER: + arg = sieve_ast_argument_number_create( + node, sieve_lexer_token_int(lexer), + sieve_lexer_token_line(parser->lexer)); + sieve_lexer_skip_token(lexer); + break; + /* Tag */ + case STT_TAG: + arg = sieve_ast_argument_tag_create( + node, sieve_lexer_token_ident(lexer), + sieve_lexer_token_line(parser->lexer)); + sieve_lexer_skip_token(lexer); + break; + /* End of argument list, continue with tests */ + default: + arg_present = FALSE; + break; + } + + if (arg_present && arg == NULL) { + sieve_parser_error(parser, + "failed to accept more arguments for command '%s'", + node->identifier); + return -1; + } + + if (sieve_ast_argument_count(node) > SIEVE_MAX_COMMAND_ARGUMENTS) { + sieve_parser_error(parser, + "too many arguments for command '%s'", + node->identifier); + return 0; + } + } + + if (result <= 0) + return result; /* Defer recovery to caller */ + + /* --> [ test / test-list ] + test-list = "(" test *("," test) ")" + test = identifier arguments + */ + switch (sieve_lexer_token_type(lexer)) { + /* Single test */ + case STT_IDENTIFIER: + if ((depth + 1) > SIEVE_MAX_TEST_NESTING) { + sieve_parser_error(parser, + "cannot nest tests deeper than %u levels", + SIEVE_MAX_TEST_NESTING); + return 0; + } + + test = sieve_ast_test_create( + node, sieve_lexer_token_ident(lexer), + sieve_lexer_token_line(parser->lexer)); + sieve_lexer_skip_token(lexer); + + /* Theoretically, test can be NULL */ + if (test == NULL) + break; + + /* Parse test arguments, which may include more tests (recurse) */ + if (sieve_parse_arguments(parser, test, depth + 1) <= 0) { + return 0; /* Defer recovery to caller */ + } + + break; + + /* Test list */ + case STT_LBRACKET: + sieve_lexer_skip_token(lexer); + + if (depth+1 > SIEVE_MAX_TEST_NESTING) { + sieve_parser_error(parser, + "cannot nest tests deeper than %u levels", + SIEVE_MAX_TEST_NESTING); + result = sieve_parser_recover(parser, STT_RBRACKET); + + if (result > 0) + sieve_lexer_skip_token(lexer); + return result; + } + + node->test_list = TRUE; + + /* Test starts with identifier */ + if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) { + test = sieve_ast_test_create( + node, sieve_lexer_token_ident(lexer), + sieve_lexer_token_line(parser->lexer)); + sieve_lexer_skip_token(lexer); + + if (test == NULL) + break; + + /* Parse test arguments, which may include more tests (recurse) */ + if ((result = sieve_parse_arguments(parser, test, depth+1)) > 0) { + + /* More tests ? */ + while (sieve_lexer_token_type(lexer) == STT_COMMA) { + sieve_lexer_skip_token(lexer); + + /* Check parser status */ + if (!parser->valid && + !sieve_errors_more_allowed(parser->ehandler)) { + result = sieve_parser_recover(parser, STT_RBRACKET); + break; + } + + /* Test starts with identifier */ + if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) { + test = sieve_ast_test_create( + node, sieve_lexer_token_ident(lexer), + sieve_lexer_token_line(parser->lexer)); + sieve_lexer_skip_token(lexer); + + if (test == NULL) + break; + + /* Parse test arguments, which may include more tests (recurse) */ + if ((result = sieve_parse_arguments(parser, test, depth+1)) <= 0) { + if (result < 0) + return result; + result = sieve_parser_recover(parser, STT_RBRACKET); + break; + } + } else { + sieve_parser_error(parser, + "expecting test identifier after ',' in test list, " + "but found %s", + sieve_lexer_token_description(lexer)); + result = sieve_parser_recover(parser, STT_RBRACKET); + break; + } + } + if (test == NULL) + break; + } else { + if (result < 0) + return result; + + result = sieve_parser_recover(parser, STT_RBRACKET); + } + } else { + sieve_parser_error(parser, + "expecting test identifier after '(' in test list, " + "but found %s", + sieve_lexer_token_description(lexer)); + + result = sieve_parser_recover(parser, STT_RBRACKET); + } + + /* The next token should be a ')', indicating the end of the + test list + --> previous sieve_parser_recover calls try to restore this + situation after parse errors. + */ + if (sieve_lexer_token_type(lexer) == STT_RBRACKET) { + sieve_lexer_skip_token(lexer); + } else { + sieve_parser_error(parser, + "expecting ',' or end of test list ')', " + "but found %s", + sieve_lexer_token_description(lexer)); + + /* Recover function tries to make next token equal to + ')'. If it succeeds we need to skip it. + */ + if ((result = sieve_parser_recover(parser, STT_RBRACKET)) > 0) + sieve_lexer_skip_token(lexer); + } + break; + + default: + /* Not an error: test / test-list is optional + --> any errors are detected by the caller + */ + test_present = FALSE; + break; + } + + if (test_present && test == NULL) { + sieve_parser_error(parser, + "failed to accept more tests for command '%s'", + node->identifier); + return -1; + } + + return result; +} + +/* commands = *command + command = identifier arguments ( ";" / block ) + block = "{" commands "}" + */ +static int +sieve_parse_commands(struct sieve_parser *parser, struct sieve_ast_node *block, + unsigned int depth) +{ + const struct sieve_lexer *lexer = parser->lexer; + int result = 1; + + while (result > 0 && + sieve_lexer_token_type(lexer) == STT_IDENTIFIER) { + struct sieve_ast_node *command; + + /* Check parser status */ + if (!parser->valid && + !sieve_errors_more_allowed(parser->ehandler)) { + result = sieve_parser_recover(parser, STT_SEMICOLON); + break; + } + + /* Create command node */ + command = sieve_ast_command_create( + block, sieve_lexer_token_ident(lexer), + sieve_lexer_token_line(parser->lexer)); + sieve_lexer_skip_token(lexer); + + if (command == NULL) { + sieve_parser_error(parser, + "failed to accept more commands inside the block of command '%s'", + block->identifier); + return -1; + } + + result = sieve_parse_arguments(parser, command, 1); + + /* Check whether the command is properly terminated + (i.e. with ; or a new block) + */ + if (result > 0 && + sieve_lexer_token_type(lexer) != STT_SEMICOLON && + sieve_lexer_token_type(lexer) != STT_LCURLY) { + + sieve_parser_error(parser, + "expected end of command ';' or the beginning of a compound block '{', " + "but found %s", + sieve_lexer_token_description(lexer)); + result = 0; + } + + /* Try to recover from parse errors to reacquire a defined state + */ + if (result == 0) + result = sieve_parser_recover(parser, STT_SEMICOLON); + + /* Don't bother to continue if we are not in a defined state */ + if (result <= 0) + return result; + + switch (sieve_lexer_token_type(lexer)) { + /* End of the command */ + case STT_SEMICOLON: + sieve_lexer_skip_token(lexer); + break; + /* Command has a block {...} */ + case STT_LCURLY: + sieve_lexer_skip_token(lexer); + + /* Check current depth first */ + if ((depth + 1) > SIEVE_MAX_BLOCK_NESTING) { + sieve_parser_error(parser, + "cannot nest command blocks deeper than %u levels", + SIEVE_MAX_BLOCK_NESTING); + result = sieve_parser_recover(parser, STT_RCURLY); + + if (result > 0) + sieve_lexer_skip_token(lexer); + break; + } + + command->block = TRUE; + + if ((result = sieve_parse_commands(parser, command, depth + 1)) > 0) { + if (sieve_lexer_token_type(lexer) != STT_RCURLY) { + sieve_parser_error(parser, + "expected end of compound block '}', " + "but found %s", + sieve_lexer_token_description(lexer)); + result = sieve_parser_recover(parser, STT_RCURLY); + } else { + sieve_lexer_skip_token(lexer); + } + } else { + if (result < 0) + return result; + + if ((result = sieve_parser_recover(parser, STT_RCURLY)) > 0) + sieve_lexer_skip_token(lexer); + } + + break; + + default: + /* Recovered previously, so this cannot happen */ + i_unreached(); + } + } + + return result; +} + +bool sieve_parser_run(struct sieve_parser *parser, struct sieve_ast **ast) +{ + if (parser->ast != NULL) + sieve_ast_unref(&parser->ast); + + /* Create AST object if none is provided */ + if (*ast == NULL) + *ast = sieve_ast_create(parser->script); + else + sieve_ast_ref(*ast); + + parser->ast = *ast; + + /* Scan first token */ + sieve_lexer_skip_token(parser->lexer); + + /* Parse */ + if (sieve_parse_commands(parser, sieve_ast_root(parser->ast), 1) > 0 && + parser->valid) { + /* Parsed right to EOF ? */ + if (sieve_lexer_token_type(parser->lexer) != STT_EOF) { + sieve_parser_error(parser, + "unexpected %s found at (the presumed) end of file", + sieve_lexer_token_description(parser->lexer)); + parser->valid = FALSE; + } + } else { + parser->valid = FALSE; + } + + /* Clean up AST if parse failed */ + if (!parser->valid) { + parser->ast = NULL; + sieve_ast_unref(ast); + } + + return parser->valid; +} + +/* Error recovery: + To continue parsing after an error it is important to find the next + parsible item in the stream. The recover function skips over the remaining + garbage after an error. It tries to find the end of the failed syntax + structure and takes nesting of structures into account. + */ + +/* Assign useful names to priorities for readability */ +enum sieve_grammatical_prio { + SGP_BLOCK = 3, + SGP_COMMAND = 2, + SGP_TEST_LIST = 1, + SGP_STRING_LIST = 0, + + SGP_OTHER = -1 +}; + +static inline enum sieve_grammatical_prio +__get_token_priority(enum sieve_token_type token) +{ + switch (token) { + case STT_LCURLY: + case STT_RCURLY: + return SGP_BLOCK; + case STT_SEMICOLON: + return SGP_COMMAND; + case STT_LBRACKET: + case STT_RBRACKET: + return SGP_TEST_LIST; + case STT_LSQUARE: + case STT_RSQUARE: + return SGP_STRING_LIST; + default: + break; + } + + return SGP_OTHER; +} + +static int +sieve_parser_recover(struct sieve_parser *parser, + enum sieve_token_type end_token) +{ + /* The tokens that begin/end a specific block/command/list in order + of ascending grammatical priority. + */ + static const enum sieve_token_type begin_tokens[4] = { + STT_LSQUARE, STT_LBRACKET, STT_NONE, STT_LCURLY }; + static const enum sieve_token_type end_tokens[4] = { + STT_RSQUARE, STT_RBRACKET, STT_SEMICOLON, STT_RCURLY}; + const struct sieve_lexer *lexer = parser->lexer; + int nesting = 1; + enum sieve_grammatical_prio end_priority = + __get_token_priority(end_token); + + i_assert(end_priority != SGP_OTHER); + + while (sieve_lexer_token_type(lexer) != STT_EOF && + __get_token_priority(sieve_lexer_token_type(lexer)) + <= end_priority) { + if (sieve_lexer_token_type(lexer) == + begin_tokens[end_priority]) { + nesting++; + sieve_lexer_skip_token(lexer); + continue; + } + if (sieve_lexer_token_type(lexer) == + end_tokens[end_priority]) { + nesting--; + + if (nesting == 0) { + /* Next character is the end */ + return 1; + } + } + sieve_lexer_skip_token(lexer); + } + + /* Special case: COMMAND */ + if (end_token == STT_SEMICOLON && + sieve_lexer_token_type(lexer) == STT_LCURLY) { + return 1; + } + + /* End not found before eof or end of surrounding grammatical structure + */ + return 0; +} |