summaryrefslogtreecommitdiffstats
path: root/pigeonhole/src/lib-sieve/sieve-parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'pigeonhole/src/lib-sieve/sieve-parser.c')
-rw-r--r--pigeonhole/src/lib-sieve/sieve-parser.c670
1 files changed, 670 insertions, 0 deletions
diff --git a/pigeonhole/src/lib-sieve/sieve-parser.c b/pigeonhole/src/lib-sieve/sieve-parser.c
new file mode 100644
index 0000000..ee54994
--- /dev/null
+++ b/pigeonhole/src/lib-sieve/sieve-parser.c
@@ -0,0 +1,670 @@
+/* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file
+ */
+
+#include "lib.h"
+#include "istream.h"
+#include "failures.h"
+
+#include "sieve-common.h"
+#include "sieve-limits.h"
+#include "sieve-script.h"
+#include "sieve-lexer.h"
+#include "sieve-parser.h"
+#include "sieve-error.h"
+#include "sieve-ast.h"
+
+/*
+ * Forward declarations
+ */
+
+static int
+sieve_parser_recover(struct sieve_parser *parser,
+ enum sieve_token_type end_token);
+
+/*
+ * Parser object
+ */
+
+struct sieve_parser {
+ pool_t pool;
+
+ bool valid;
+
+ struct sieve_script *script;
+
+ struct sieve_error_handler *ehandler;
+
+ const struct sieve_lexer *lexer;
+ struct sieve_ast *ast;
+};
+
+struct sieve_parser *
+sieve_parser_create(struct sieve_script *script,
+ struct sieve_error_handler *ehandler,
+ enum sieve_error *error_r)
+{
+ struct sieve_parser *parser;
+ const struct sieve_lexer *lexer;
+
+ lexer = sieve_lexer_create(script, ehandler, error_r);
+ if (lexer != NULL) {
+ pool_t pool = pool_alloconly_create("sieve_parser", 4096);
+
+ parser = p_new(pool, struct sieve_parser, 1);
+ parser->pool = pool;
+ parser->valid = TRUE;
+
+ parser->ehandler = ehandler;
+ sieve_error_handler_ref(ehandler);
+
+ parser->script = script;
+ sieve_script_ref(script);
+
+ parser->lexer = lexer;
+ parser->ast = NULL;
+
+ return parser;
+ }
+
+ return NULL;
+}
+
+void sieve_parser_free(struct sieve_parser **parser)
+{
+ if ((*parser)->ast != NULL)
+ sieve_ast_unref(&(*parser)->ast);
+
+ sieve_lexer_free(&(*parser)->lexer);
+ sieve_script_unref(&(*parser)->script);
+
+ sieve_error_handler_unref(&(*parser)->ehandler);
+
+ pool_unref(&(*parser)->pool);
+
+ *parser = NULL;
+}
+
+/*
+ * Internal error handling
+ */
+
+inline static void ATTR_FORMAT(4, 5)
+sieve_parser_error(struct sieve_parser *parser,
+ const char *csrc_filename, unsigned int csrc_linenum,
+ const char *fmt, ...)
+{
+ struct sieve_error_params params = {
+ .log_type = LOG_TYPE_ERROR,
+ .csrc = {
+ .filename = csrc_filename,
+ .linenum = csrc_linenum,
+ },
+ };
+ va_list args;
+
+ va_start(args, fmt);
+
+ /* Don't report a parse error if the lexer complained already */
+ if (sieve_lexer_token_type(parser->lexer) != STT_ERROR) {
+ T_BEGIN {
+ params.location = sieve_error_script_location(
+ parser->script,
+ sieve_lexer_token_line(parser->lexer));
+ sieve_logv(parser->ehandler, &params, fmt, args);
+ } T_END;
+ }
+
+ parser->valid = FALSE;
+
+ va_end(args);
+}
+#define sieve_parser_error(parser, ...) \
+ sieve_parser_error(parser, __FILE__, __LINE__, __VA_ARGS__)
+
+/*
+ * Sieve grammar parsing
+ */
+
+/* sieve_parse_arguments():
+
+ Parses both command arguments and sub-tests:
+ arguments = *argument [test / test-list]
+ argument = string-list / number / tag
+ string = quoted-string / multi-line [[implicitly handled in lexer]]
+ string-list = "[" string *("," string) "]" / string ;; if
+ there is only a single string, the brackets are optional
+ test-list = "(" test *("," test) ")"
+ test = identifier arguments
+ */
+static int
+sieve_parse_arguments(struct sieve_parser *parser, struct sieve_ast_node *node,
+ unsigned int depth)
+{
+ const struct sieve_lexer *lexer = parser->lexer;
+ struct sieve_ast_node *test = NULL;
+ bool test_present = TRUE;
+ bool arg_present = TRUE;
+ int result = 1; /* Indicates whether the parser is in a defined, not
+ necessarily error-free state */
+
+ /* Parse arguments */
+ while (arg_present && result > 0) {
+ struct sieve_ast_argument *arg;
+
+ if (!parser->valid &&
+ !sieve_errors_more_allowed(parser->ehandler)) {
+ result = 0;
+ break;
+ }
+
+ switch (sieve_lexer_token_type(lexer)) {
+ /* String list */
+ case STT_LSQUARE:
+ /* Create stinglist object */
+ arg = sieve_ast_argument_stringlist_create(
+ node, sieve_lexer_token_line(parser->lexer));
+ if (arg == NULL) break;
+ sieve_lexer_skip_token(lexer);
+
+ if (sieve_lexer_token_type(lexer) == STT_STRING) {
+ bool add_failed = FALSE;
+
+ /* Add the string to the list */
+ if (!sieve_ast_stringlist_add(
+ arg, sieve_lexer_token_str(lexer),
+ sieve_lexer_token_line(parser->lexer)))
+ add_failed = TRUE;
+ sieve_lexer_skip_token(lexer);
+
+ while (!add_failed &&
+ sieve_lexer_token_type(lexer) == STT_COMMA) {
+ sieve_lexer_skip_token(lexer);
+
+ /* Check parser status */
+ if (!parser->valid &&
+ !sieve_errors_more_allowed(parser->ehandler)) {
+ result = sieve_parser_recover(parser, STT_RSQUARE);
+ break;
+ }
+
+ if (sieve_lexer_token_type(lexer) == STT_STRING) {
+ /* Add the string to the list */
+ if (!sieve_ast_stringlist_add(
+ arg, sieve_lexer_token_str(lexer),
+ sieve_lexer_token_line(parser->lexer)))
+ add_failed = TRUE;
+
+ sieve_lexer_skip_token(lexer);
+ } else {
+ sieve_parser_error(parser,
+ "expecting string after ',' in string list, "
+ "but found %s",
+ sieve_lexer_token_description(lexer));
+ result = sieve_parser_recover(parser, STT_RSQUARE);
+ break;
+ }
+ }
+
+ if (add_failed) {
+ sieve_parser_error(parser,
+ "failed to accept more items in string list");
+ return -1;
+ }
+ } else {
+ sieve_parser_error(parser,
+ "expecting string after '[' in string list, "
+ "but found %s",
+ sieve_lexer_token_description(lexer));
+ result = sieve_parser_recover(parser, STT_RSQUARE);
+ }
+
+ /* Finish the string list */
+ if (sieve_lexer_token_type(lexer) == STT_RSQUARE) {
+ sieve_lexer_skip_token(lexer);
+ } else {
+ sieve_parser_error(parser,
+ "expecting ',' or end of string list ']', "
+ "but found %s",
+ sieve_lexer_token_description(lexer));
+
+ if ((result = sieve_parser_recover(parser, STT_RSQUARE)) > 0)
+ sieve_lexer_skip_token(lexer);
+ }
+ break;
+ /* Single string */
+ case STT_STRING:
+ arg = sieve_ast_argument_string_create(
+ node, sieve_lexer_token_str(lexer),
+ sieve_lexer_token_line(parser->lexer));
+ sieve_lexer_skip_token(lexer);
+ break;
+ /* Number */
+ case STT_NUMBER:
+ arg = sieve_ast_argument_number_create(
+ node, sieve_lexer_token_int(lexer),
+ sieve_lexer_token_line(parser->lexer));
+ sieve_lexer_skip_token(lexer);
+ break;
+ /* Tag */
+ case STT_TAG:
+ arg = sieve_ast_argument_tag_create(
+ node, sieve_lexer_token_ident(lexer),
+ sieve_lexer_token_line(parser->lexer));
+ sieve_lexer_skip_token(lexer);
+ break;
+ /* End of argument list, continue with tests */
+ default:
+ arg_present = FALSE;
+ break;
+ }
+
+ if (arg_present && arg == NULL) {
+ sieve_parser_error(parser,
+ "failed to accept more arguments for command '%s'",
+ node->identifier);
+ return -1;
+ }
+
+ if (sieve_ast_argument_count(node) > SIEVE_MAX_COMMAND_ARGUMENTS) {
+ sieve_parser_error(parser,
+ "too many arguments for command '%s'",
+ node->identifier);
+ return 0;
+ }
+ }
+
+ if (result <= 0)
+ return result; /* Defer recovery to caller */
+
+ /* --> [ test / test-list ]
+ test-list = "(" test *("," test) ")"
+ test = identifier arguments
+ */
+ switch (sieve_lexer_token_type(lexer)) {
+ /* Single test */
+ case STT_IDENTIFIER:
+ if ((depth + 1) > SIEVE_MAX_TEST_NESTING) {
+ sieve_parser_error(parser,
+ "cannot nest tests deeper than %u levels",
+ SIEVE_MAX_TEST_NESTING);
+ return 0;
+ }
+
+ test = sieve_ast_test_create(
+ node, sieve_lexer_token_ident(lexer),
+ sieve_lexer_token_line(parser->lexer));
+ sieve_lexer_skip_token(lexer);
+
+ /* Theoretically, test can be NULL */
+ if (test == NULL)
+ break;
+
+ /* Parse test arguments, which may include more tests (recurse) */
+ if (sieve_parse_arguments(parser, test, depth + 1) <= 0) {
+ return 0; /* Defer recovery to caller */
+ }
+
+ break;
+
+ /* Test list */
+ case STT_LBRACKET:
+ sieve_lexer_skip_token(lexer);
+
+ if (depth+1 > SIEVE_MAX_TEST_NESTING) {
+ sieve_parser_error(parser,
+ "cannot nest tests deeper than %u levels",
+ SIEVE_MAX_TEST_NESTING);
+ result = sieve_parser_recover(parser, STT_RBRACKET);
+
+ if (result > 0)
+ sieve_lexer_skip_token(lexer);
+ return result;
+ }
+
+ node->test_list = TRUE;
+
+ /* Test starts with identifier */
+ if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
+ test = sieve_ast_test_create(
+ node, sieve_lexer_token_ident(lexer),
+ sieve_lexer_token_line(parser->lexer));
+ sieve_lexer_skip_token(lexer);
+
+ if (test == NULL)
+ break;
+
+ /* Parse test arguments, which may include more tests (recurse) */
+ if ((result = sieve_parse_arguments(parser, test, depth+1)) > 0) {
+
+ /* More tests ? */
+ while (sieve_lexer_token_type(lexer) == STT_COMMA) {
+ sieve_lexer_skip_token(lexer);
+
+ /* Check parser status */
+ if (!parser->valid &&
+ !sieve_errors_more_allowed(parser->ehandler)) {
+ result = sieve_parser_recover(parser, STT_RBRACKET);
+ break;
+ }
+
+ /* Test starts with identifier */
+ if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
+ test = sieve_ast_test_create(
+ node, sieve_lexer_token_ident(lexer),
+ sieve_lexer_token_line(parser->lexer));
+ sieve_lexer_skip_token(lexer);
+
+ if (test == NULL)
+ break;
+
+ /* Parse test arguments, which may include more tests (recurse) */
+ if ((result = sieve_parse_arguments(parser, test, depth+1)) <= 0) {
+ if (result < 0)
+ return result;
+ result = sieve_parser_recover(parser, STT_RBRACKET);
+ break;
+ }
+ } else {
+ sieve_parser_error(parser,
+ "expecting test identifier after ',' in test list, "
+ "but found %s",
+ sieve_lexer_token_description(lexer));
+ result = sieve_parser_recover(parser, STT_RBRACKET);
+ break;
+ }
+ }
+ if (test == NULL)
+ break;
+ } else {
+ if (result < 0)
+ return result;
+
+ result = sieve_parser_recover(parser, STT_RBRACKET);
+ }
+ } else {
+ sieve_parser_error(parser,
+ "expecting test identifier after '(' in test list, "
+ "but found %s",
+ sieve_lexer_token_description(lexer));
+
+ result = sieve_parser_recover(parser, STT_RBRACKET);
+ }
+
+ /* The next token should be a ')', indicating the end of the
+ test list
+ --> previous sieve_parser_recover calls try to restore this
+ situation after parse errors.
+ */
+ if (sieve_lexer_token_type(lexer) == STT_RBRACKET) {
+ sieve_lexer_skip_token(lexer);
+ } else {
+ sieve_parser_error(parser,
+ "expecting ',' or end of test list ')', "
+ "but found %s",
+ sieve_lexer_token_description(lexer));
+
+ /* Recover function tries to make next token equal to
+ ')'. If it succeeds we need to skip it.
+ */
+ if ((result = sieve_parser_recover(parser, STT_RBRACKET)) > 0)
+ sieve_lexer_skip_token(lexer);
+ }
+ break;
+
+ default:
+ /* Not an error: test / test-list is optional
+ --> any errors are detected by the caller
+ */
+ test_present = FALSE;
+ break;
+ }
+
+ if (test_present && test == NULL) {
+ sieve_parser_error(parser,
+ "failed to accept more tests for command '%s'",
+ node->identifier);
+ return -1;
+ }
+
+ return result;
+}
+
+/* commands = *command
+ command = identifier arguments ( ";" / block )
+ block = "{" commands "}"
+ */
+static int
+sieve_parse_commands(struct sieve_parser *parser, struct sieve_ast_node *block,
+ unsigned int depth)
+{
+ const struct sieve_lexer *lexer = parser->lexer;
+ int result = 1;
+
+ while (result > 0 &&
+ sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
+ struct sieve_ast_node *command;
+
+ /* Check parser status */
+ if (!parser->valid &&
+ !sieve_errors_more_allowed(parser->ehandler)) {
+ result = sieve_parser_recover(parser, STT_SEMICOLON);
+ break;
+ }
+
+ /* Create command node */
+ command = sieve_ast_command_create(
+ block, sieve_lexer_token_ident(lexer),
+ sieve_lexer_token_line(parser->lexer));
+ sieve_lexer_skip_token(lexer);
+
+ if (command == NULL) {
+ sieve_parser_error(parser,
+ "failed to accept more commands inside the block of command '%s'",
+ block->identifier);
+ return -1;
+ }
+
+ result = sieve_parse_arguments(parser, command, 1);
+
+ /* Check whether the command is properly terminated
+ (i.e. with ; or a new block)
+ */
+ if (result > 0 &&
+ sieve_lexer_token_type(lexer) != STT_SEMICOLON &&
+ sieve_lexer_token_type(lexer) != STT_LCURLY) {
+
+ sieve_parser_error(parser,
+ "expected end of command ';' or the beginning of a compound block '{', "
+ "but found %s",
+ sieve_lexer_token_description(lexer));
+ result = 0;
+ }
+
+ /* Try to recover from parse errors to reacquire a defined state
+ */
+ if (result == 0)
+ result = sieve_parser_recover(parser, STT_SEMICOLON);
+
+ /* Don't bother to continue if we are not in a defined state */
+ if (result <= 0)
+ return result;
+
+ switch (sieve_lexer_token_type(lexer)) {
+ /* End of the command */
+ case STT_SEMICOLON:
+ sieve_lexer_skip_token(lexer);
+ break;
+ /* Command has a block {...} */
+ case STT_LCURLY:
+ sieve_lexer_skip_token(lexer);
+
+ /* Check current depth first */
+ if ((depth + 1) > SIEVE_MAX_BLOCK_NESTING) {
+ sieve_parser_error(parser,
+ "cannot nest command blocks deeper than %u levels",
+ SIEVE_MAX_BLOCK_NESTING);
+ result = sieve_parser_recover(parser, STT_RCURLY);
+
+ if (result > 0)
+ sieve_lexer_skip_token(lexer);
+ break;
+ }
+
+ command->block = TRUE;
+
+ if ((result = sieve_parse_commands(parser, command, depth + 1)) > 0) {
+ if (sieve_lexer_token_type(lexer) != STT_RCURLY) {
+ sieve_parser_error(parser,
+ "expected end of compound block '}', "
+ "but found %s",
+ sieve_lexer_token_description(lexer));
+ result = sieve_parser_recover(parser, STT_RCURLY);
+ } else {
+ sieve_lexer_skip_token(lexer);
+ }
+ } else {
+ if (result < 0)
+ return result;
+
+ if ((result = sieve_parser_recover(parser, STT_RCURLY)) > 0)
+ sieve_lexer_skip_token(lexer);
+ }
+
+ break;
+
+ default:
+ /* Recovered previously, so this cannot happen */
+ i_unreached();
+ }
+ }
+
+ return result;
+}
+
+bool sieve_parser_run(struct sieve_parser *parser, struct sieve_ast **ast)
+{
+ if (parser->ast != NULL)
+ sieve_ast_unref(&parser->ast);
+
+ /* Create AST object if none is provided */
+ if (*ast == NULL)
+ *ast = sieve_ast_create(parser->script);
+ else
+ sieve_ast_ref(*ast);
+
+ parser->ast = *ast;
+
+ /* Scan first token */
+ sieve_lexer_skip_token(parser->lexer);
+
+ /* Parse */
+ if (sieve_parse_commands(parser, sieve_ast_root(parser->ast), 1) > 0 &&
+ parser->valid) {
+ /* Parsed right to EOF ? */
+ if (sieve_lexer_token_type(parser->lexer) != STT_EOF) {
+ sieve_parser_error(parser,
+ "unexpected %s found at (the presumed) end of file",
+ sieve_lexer_token_description(parser->lexer));
+ parser->valid = FALSE;
+ }
+ } else {
+ parser->valid = FALSE;
+ }
+
+ /* Clean up AST if parse failed */
+ if (!parser->valid) {
+ parser->ast = NULL;
+ sieve_ast_unref(ast);
+ }
+
+ return parser->valid;
+}
+
+/* Error recovery:
+ To continue parsing after an error it is important to find the next
+ parsible item in the stream. The recover function skips over the remaining
+ garbage after an error. It tries to find the end of the failed syntax
+ structure and takes nesting of structures into account.
+ */
+
+/* Assign useful names to priorities for readability */
+enum sieve_grammatical_prio {
+ SGP_BLOCK = 3,
+ SGP_COMMAND = 2,
+ SGP_TEST_LIST = 1,
+ SGP_STRING_LIST = 0,
+
+ SGP_OTHER = -1
+};
+
+static inline enum sieve_grammatical_prio
+__get_token_priority(enum sieve_token_type token)
+{
+ switch (token) {
+ case STT_LCURLY:
+ case STT_RCURLY:
+ return SGP_BLOCK;
+ case STT_SEMICOLON:
+ return SGP_COMMAND;
+ case STT_LBRACKET:
+ case STT_RBRACKET:
+ return SGP_TEST_LIST;
+ case STT_LSQUARE:
+ case STT_RSQUARE:
+ return SGP_STRING_LIST;
+ default:
+ break;
+ }
+
+ return SGP_OTHER;
+}
+
+static int
+sieve_parser_recover(struct sieve_parser *parser,
+ enum sieve_token_type end_token)
+{
+ /* The tokens that begin/end a specific block/command/list in order
+ of ascending grammatical priority.
+ */
+ static const enum sieve_token_type begin_tokens[4] = {
+ STT_LSQUARE, STT_LBRACKET, STT_NONE, STT_LCURLY };
+ static const enum sieve_token_type end_tokens[4] = {
+ STT_RSQUARE, STT_RBRACKET, STT_SEMICOLON, STT_RCURLY};
+ const struct sieve_lexer *lexer = parser->lexer;
+ int nesting = 1;
+ enum sieve_grammatical_prio end_priority =
+ __get_token_priority(end_token);
+
+ i_assert(end_priority != SGP_OTHER);
+
+ while (sieve_lexer_token_type(lexer) != STT_EOF &&
+ __get_token_priority(sieve_lexer_token_type(lexer))
+ <= end_priority) {
+ if (sieve_lexer_token_type(lexer) ==
+ begin_tokens[end_priority]) {
+ nesting++;
+ sieve_lexer_skip_token(lexer);
+ continue;
+ }
+ if (sieve_lexer_token_type(lexer) ==
+ end_tokens[end_priority]) {
+ nesting--;
+
+ if (nesting == 0) {
+ /* Next character is the end */
+ return 1;
+ }
+ }
+ sieve_lexer_skip_token(lexer);
+ }
+
+ /* Special case: COMMAND */
+ if (end_token == STT_SEMICOLON &&
+ sieve_lexer_token_type(lexer) == STT_LCURLY) {
+ return 1;
+ }
+
+ /* End not found before eof or end of surrounding grammatical structure
+ */
+ return 0;
+}