diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
commit | f7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch) | |
tree | a3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /pigeonhole/src/lib-sieve/sieve-lexer.c | |
parent | Initial commit. (diff) | |
download | dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.tar.xz dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.zip |
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | pigeonhole/src/lib-sieve/sieve-lexer.c | 930 |
1 files changed, 930 insertions, 0 deletions
diff --git a/pigeonhole/src/lib-sieve/sieve-lexer.c b/pigeonhole/src/lib-sieve/sieve-lexer.c new file mode 100644 index 0000000..a6968a9 --- /dev/null +++ b/pigeonhole/src/lib-sieve/sieve-lexer.c @@ -0,0 +1,930 @@ +/* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file + */ + +#include "lib.h" +#include "compat.h" +#include "str.h" +#include "str-sanitize.h" +#include "istream.h" + +#include "sieve-common.h" +#include "sieve-limits.h" +#include "sieve-error.h" +#include "sieve-script.h" + +#include "sieve-lexer.h" + +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <ctype.h> + +/* + * Useful macros + */ + +#define DIGIT_VAL(c) (c - '0') + +/* + * Lexer object + */ + +struct sieve_lexical_scanner { + pool_t pool; + struct sieve_instance *svinst; + + struct sieve_script *script; + struct istream *input; + + struct sieve_error_handler *ehandler; + + /* Currently scanned data */ + const unsigned char *buffer; + size_t buffer_size; + size_t buffer_pos; + + struct sieve_lexer lexer; + + int current_line; +}; + +const struct sieve_lexer * +sieve_lexer_create(struct sieve_script *script, + struct sieve_error_handler *ehandler, + enum sieve_error *error_r) +{ + struct sieve_lexical_scanner *scanner; + struct sieve_instance *svinst = sieve_script_svinst(script); + struct istream *stream; + const struct stat *st; + + /* Open script as stream */ + if (sieve_script_get_stream(script, &stream, error_r) < 0) + return NULL; + + /* Check script size */ + if (i_stream_stat(stream, TRUE, &st) >= 0 && st->st_size > 0 && + svinst->max_script_size > 0 && + (uoff_t)st->st_size > svinst->max_script_size) { + sieve_error(ehandler, sieve_script_name(script), + "sieve script is too large (max %zu bytes)", + svinst->max_script_size); + if (error_r != NULL) + *error_r = SIEVE_ERROR_NOT_POSSIBLE; + return NULL; + } + + scanner = i_new(struct sieve_lexical_scanner, 1); + scanner->lexer.scanner = scanner; + + scanner->ehandler = ehandler; + sieve_error_handler_ref(ehandler); + + scanner->input = stream; + i_stream_ref(scanner->input); + + scanner->script = script; + sieve_script_ref(script); + + scanner->buffer = NULL; + scanner->buffer_size = 0; + scanner->buffer_pos = 0; + + scanner->lexer.token_type = STT_NONE; + scanner->lexer.token_str_value = str_new(default_pool, 256); + scanner->lexer.token_int_value = 0; + scanner->lexer.token_line = 1; + + scanner->current_line = 1; + + return &scanner->lexer; +} + +void sieve_lexer_free(const struct sieve_lexer **_lexer) +{ + const struct sieve_lexer *lexer = *_lexer; + struct sieve_lexical_scanner *scanner = lexer->scanner; + + i_stream_unref(&scanner->input); + sieve_script_unref(&scanner->script); + sieve_error_handler_unref(&scanner->ehandler); + str_free(&scanner->lexer.token_str_value); + + i_free(scanner); + *_lexer = NULL; +} + +/* + * Internal error handling + */ + +inline static void ATTR_FORMAT(4, 5) +sieve_lexer_error(const struct sieve_lexer *lexer, + const char *csrc_filename, unsigned int csrc_linenum, + const char *fmt, ...) +{ + struct sieve_lexical_scanner *scanner = lexer->scanner; + struct sieve_error_params params = { + .log_type = LOG_TYPE_ERROR, + .csrc = { + .filename = csrc_filename, + .linenum = csrc_linenum, + }, + }; + va_list args; + + va_start(args, fmt); + + T_BEGIN { + params.location = + sieve_error_script_location(scanner->script, + scanner->current_line); + sieve_logv(scanner->ehandler, ¶ms, fmt, args); + } T_END; + + va_end(args); +} +#define sieve_lexer_error(lexer, ...) \ + sieve_lexer_error(lexer, __FILE__, __LINE__, __VA_ARGS__) + +inline static void ATTR_FORMAT(4, 5) +sieve_lexer_warning(const struct sieve_lexer *lexer, + const char *csrc_filename, unsigned int csrc_linenum, + const char *fmt, ...) +{ + struct sieve_lexical_scanner *scanner = lexer->scanner; + struct sieve_error_params params = { + .log_type = LOG_TYPE_WARNING, + .csrc = { + .filename = csrc_filename, + .linenum = csrc_linenum, + }, + }; + va_list args; + + va_start(args, fmt); + + T_BEGIN { + params.location = + sieve_error_script_location(scanner->script, + scanner->current_line); + sieve_logv(scanner->ehandler, ¶ms, fmt, args); + } T_END; + + va_end(args); +} +#define sieve_lexer_warning(lexer, ...) \ + sieve_lexer_warning(lexer, __FILE__, __LINE__, __VA_ARGS__) + +const char *sieve_lexer_token_description(const struct sieve_lexer *lexer) +{ + switch (lexer->token_type) { + case STT_NONE: + return "no token (bug)"; + case STT_WHITESPACE: + return "whitespace (bug)"; + case STT_EOF: + return "end of file"; + + case STT_NUMBER: + return "number"; + case STT_IDENTIFIER: + return "identifier"; + case STT_TAG: + return "tag"; + case STT_STRING: + return "string"; + + case STT_RBRACKET: + return "')'"; + case STT_LBRACKET: + return "'('"; + case STT_RCURLY: + return "'}'"; + case STT_LCURLY: + return "'{'"; + case STT_RSQUARE: + return "']'"; + case STT_LSQUARE: + return "'['"; + case STT_SEMICOLON: + return "';'"; + case STT_COMMA: + return "','"; + + case STT_SLASH: + return "'/'"; + case STT_COLON: + return "':'"; + + case STT_GARBAGE: + return "unknown characters"; + case STT_ERROR: + return "error token (bug)"; + } + + return "unknown token (bug)"; +} + +/* + * Debug + */ + +void sieve_lexer_token_print(const struct sieve_lexer *lexer) +{ + switch (lexer->token_type) { + case STT_NONE: + printf("??NONE?? "); + break; + case STT_WHITESPACE: + printf("??WHITESPACE?? "); + break; + case STT_EOF: + printf("EOF\n"); + break; + + case STT_NUMBER: + printf("NUMBER "); + break; + case STT_IDENTIFIER: + printf("IDENTIFIER "); + break; + case STT_TAG: + printf("TAG "); + break; + case STT_STRING: + printf("STRING "); + break; + + case STT_RBRACKET: + printf(") "); + break; + case STT_LBRACKET: + printf("( "); + break; + case STT_RCURLY: + printf("}\n"); + break; + case STT_LCURLY: + printf("{\n"); + break; + case STT_RSQUARE: + printf("] "); + break; + case STT_LSQUARE: + printf("[ "); + break; + case STT_SEMICOLON: + printf(";\n"); + break; + case STT_COMMA: + printf(", "); + break; + + case STT_SLASH: + printf("/ "); + break; + case STT_COLON: + printf(": "); + break; + + case STT_GARBAGE: + printf(">>GARBAGE<<"); + break; + case STT_ERROR: + printf(">>ERROR<<"); + break; + default: + printf("UNKNOWN "); + break; + } +} + +/* + * Lexical scanning + */ + +static void sieve_lexer_shift(struct sieve_lexical_scanner *scanner) +{ + if (scanner->buffer_size > 0 && + scanner->buffer[scanner->buffer_pos] == '\n') + scanner->current_line++; + + if (scanner->buffer_size > 0 && + scanner->buffer_pos + 1 < scanner->buffer_size) + scanner->buffer_pos++; + else { + if (scanner->buffer_size > 0) + i_stream_skip(scanner->input, scanner->buffer_size); + + scanner->buffer = i_stream_get_data(scanner->input, + &scanner->buffer_size); + + if (scanner->buffer_size == 0 && + i_stream_read(scanner->input) > 0) { + scanner->buffer = i_stream_get_data( + scanner->input, &scanner->buffer_size); + } + + scanner->buffer_pos = 0; + } +} + +static inline int sieve_lexer_curchar(struct sieve_lexical_scanner *scanner) +{ + if (scanner->buffer_size == 0) + return -1; + + return scanner->buffer[scanner->buffer_pos]; +} + +static inline const char *_char_sanitize(int ch) +{ + if (ch > 31 && ch < 127) + return t_strdup_printf("'%c'", ch); + + return t_strdup_printf("0x%02x", ch); +} + +static bool sieve_lexer_scan_number(struct sieve_lexical_scanner *scanner) +{ + struct sieve_lexer *lexer = &scanner->lexer; + uintmax_t value; + string_t *str; + bool overflow = FALSE; + + str_truncate(lexer->token_str_value,0); + str = lexer->token_str_value; + + while (i_isdigit(sieve_lexer_curchar(scanner))) { + str_append_c(str, sieve_lexer_curchar(scanner)); + sieve_lexer_shift(scanner); + } + + if (str_to_uintmax(str_c(str), &value) < 0 || + value > (sieve_number_t)-1) { + overflow = TRUE; + } else { + switch (sieve_lexer_curchar(scanner)) { + case 'k': + case 'K': /* Kilo */ + if (value > (SIEVE_MAX_NUMBER >> 10)) + overflow = TRUE; + else + value = value << 10; + sieve_lexer_shift(scanner); + break; + case 'm': + case 'M': /* Mega */ + if (value > (SIEVE_MAX_NUMBER >> 20)) + overflow = TRUE; + else + value = value << 20; + sieve_lexer_shift(scanner); + break; + case 'g': + case 'G': /* Giga */ + if (value > (SIEVE_MAX_NUMBER >> 30)) + overflow = TRUE; + else + value = value << 30; + sieve_lexer_shift(scanner); + break; + default: + /* Next token */ + break; + } + } + + /* Check for integer overflow */ + if (overflow) { + sieve_lexer_error(lexer, + "number exceeds integer limits (max %llu)", + (long long) SIEVE_MAX_NUMBER); + lexer->token_type = STT_ERROR; + return FALSE; + } + + lexer->token_type = STT_NUMBER; + lexer->token_int_value = (sieve_number_t)value; + return TRUE; + +} + +static bool +sieve_lexer_scan_hash_comment(struct sieve_lexical_scanner *scanner) +{ + struct sieve_lexer *lexer = &scanner->lexer; + + while (sieve_lexer_curchar(scanner) != '\n') { + switch(sieve_lexer_curchar(scanner)) { + case -1: + if (!scanner->input->eof) { + lexer->token_type = STT_ERROR; + return FALSE; + } + sieve_lexer_warning(lexer, + "no newline (CRLF) at end of hash comment at end of file"); + lexer->token_type = STT_WHITESPACE; + return TRUE; + case '\0': + sieve_lexer_error(lexer, + "encountered NUL character in hash comment"); + lexer->token_type = STT_ERROR; + return FALSE; + default: + break; + } + + /* Stray CR is ignored */ + sieve_lexer_shift(scanner); + } + + sieve_lexer_shift(scanner); + + lexer->token_type = STT_WHITESPACE; + return TRUE; +} + +/* sieve_lexer_scan_raw_token: + * Scans valid tokens and whitespace + */ +static bool +sieve_lexer_scan_raw_token(struct sieve_lexical_scanner *scanner) +{ + struct sieve_lexer *lexer = &scanner->lexer; + string_t *str; + int ret; + + /* Read first character */ + if (lexer->token_type == STT_NONE) { + if ((ret = i_stream_read(scanner->input)) < 0) { + i_assert(ret != -2); + if (!scanner->input->eof) { + lexer->token_type = STT_ERROR; + return FALSE; + } + } + sieve_lexer_shift(scanner); + } + + lexer->token_line = scanner->current_line; + + switch (sieve_lexer_curchar(scanner)) { + + /* whitespace */ + + // hash-comment = ( "#" *CHAR-NOT-CRLF CRLF ) + case '#': + sieve_lexer_shift(scanner); + return sieve_lexer_scan_hash_comment(scanner); + + // bracket-comment = "/*" *(CHAR-NOT-STAR / ("*" CHAR-NOT-SLASH)) "*/" + // ;; No */ allowed inside a comment. + // ;; (No * is allowed unless it is the last character, + // ;; or unless it is followed by a character that isn't a + // ;; slash.) + case '/': + sieve_lexer_shift(scanner); + + if (sieve_lexer_curchar(scanner) == '*') { + sieve_lexer_shift(scanner); + + while (TRUE) { + switch (sieve_lexer_curchar(scanner)) { + case -1: + if (scanner->input->eof) { + sieve_lexer_error(lexer, + "end of file before end of bracket comment " + "('/* ... */') " + "started at line %d", + lexer->token_line); + } + lexer->token_type = STT_ERROR; + return FALSE; + case '*': + sieve_lexer_shift(scanner); + + if (sieve_lexer_curchar(scanner) == '/') { + sieve_lexer_shift(scanner); + + lexer->token_type = STT_WHITESPACE; + return TRUE; + + } else if (sieve_lexer_curchar(scanner) == -1) { + sieve_lexer_error(lexer, + "end of file before end of bracket comment " + "('/* ... */') " + "started at line %d", + lexer->token_line); + lexer->token_type = STT_ERROR; + return FALSE; + } + break; + case '\0': + sieve_lexer_error(lexer, + "encountered NUL character in bracket comment"); + lexer->token_type = STT_ERROR; + return FALSE; + default: + sieve_lexer_shift(scanner); + } + } + + i_unreached(); + return FALSE; + } + + lexer->token_type = STT_SLASH; + return TRUE; + + // comment = bracket-comment / hash-comment + // white-space = 1*(SP / CRLF / HTAB) / comment + case '\t': + case '\r': + case '\n': + case ' ': + sieve_lexer_shift(scanner); + + while (sieve_lexer_curchar(scanner) == '\t' || + sieve_lexer_curchar(scanner) == '\r' || + sieve_lexer_curchar(scanner) == '\n' || + sieve_lexer_curchar(scanner) == ' ') { + + sieve_lexer_shift(scanner); + } + + lexer->token_type = STT_WHITESPACE; + return TRUE; + + /* quoted-string */ + case '"': + sieve_lexer_shift(scanner); + + str_truncate(lexer->token_str_value, 0); + str = lexer->token_str_value; + + while (sieve_lexer_curchar(scanner) != '"') { + if (sieve_lexer_curchar(scanner) == '\\') + sieve_lexer_shift(scanner); + + switch (sieve_lexer_curchar(scanner)) { + + /* End of file */ + case -1: + if (scanner->input->eof) { + sieve_lexer_error(lexer, + "end of file before end of quoted string " + "started at line %d", lexer->token_line); + } + lexer->token_type = STT_ERROR; + return FALSE; + + /* NUL character */ + case '\0': + sieve_lexer_error(lexer, + "encountered NUL character in quoted string " + "started at line %d", lexer->token_line); + lexer->token_type = STT_ERROR; + return FALSE; + + /* CR .. check for LF */ + case '\r': + sieve_lexer_shift(scanner); + + if (sieve_lexer_curchar(scanner) != '\n') { + sieve_lexer_error(lexer, + "found stray carriage-return (CR) character " + "in quoted string started at line %d", + lexer->token_line); + lexer->token_type = STT_ERROR; + return FALSE; + } + + if (str_len(str) <= SIEVE_MAX_STRING_LEN) + str_append(str, "\r\n"); + break; + + /* Loose LF is allowed (non-standard) and converted to CRLF */ + case '\n': + if (str_len(str) <= SIEVE_MAX_STRING_LEN) + str_append(str, "\r\n"); + break; + + /* Other characters */ + default: + if (str_len(str) <= SIEVE_MAX_STRING_LEN) + str_append_c(str, sieve_lexer_curchar(scanner)); + } + + sieve_lexer_shift(scanner); + } + + sieve_lexer_shift(scanner); + + if (str_len(str) > SIEVE_MAX_STRING_LEN) { + sieve_lexer_error(lexer, + "quoted string started at line %d is too long " + "(longer than %llu bytes)", lexer->token_line, + (long long) SIEVE_MAX_STRING_LEN); + lexer->token_type = STT_ERROR; + return FALSE; + } + + lexer->token_type = STT_STRING; + return TRUE; + + /* single character tokens */ + case ']': + sieve_lexer_shift(scanner); + lexer->token_type = STT_RSQUARE; + return TRUE; + case '[': + sieve_lexer_shift(scanner); + lexer->token_type = STT_LSQUARE; + return TRUE; + case '}': + sieve_lexer_shift(scanner); + lexer->token_type = STT_RCURLY; + return TRUE; + case '{': + sieve_lexer_shift(scanner); + lexer->token_type = STT_LCURLY; + return TRUE; + case ')': + sieve_lexer_shift(scanner); + lexer->token_type = STT_RBRACKET; + return TRUE; + case '(': + sieve_lexer_shift(scanner); + lexer->token_type = STT_LBRACKET; + return TRUE; + case ';': + sieve_lexer_shift(scanner); + lexer->token_type = STT_SEMICOLON; + return TRUE; + case ',': + sieve_lexer_shift(scanner); + lexer->token_type = STT_COMMA; + return TRUE; + + /* EOF */ + case -1: + if (!scanner->input->eof) { + lexer->token_type = STT_ERROR; + return FALSE; + } + lexer->token_type = STT_EOF; + return TRUE; + + default: + /* number */ + if (i_isdigit(sieve_lexer_curchar(scanner))) { + return sieve_lexer_scan_number(scanner); + + /* identifier / tag */ + } else if (i_isalpha(sieve_lexer_curchar(scanner)) || + sieve_lexer_curchar(scanner) == '_' || + sieve_lexer_curchar(scanner) == ':') { + + enum sieve_token_type type = STT_IDENTIFIER; + str_truncate(lexer->token_str_value,0); + str = lexer->token_str_value; + + /* If it starts with a ':' it is a tag and not an + identifier */ + if (sieve_lexer_curchar(scanner) == ':') { + sieve_lexer_shift(scanner); // discard colon + type = STT_TAG; + + /* First character still can't be a DIGIT */ + if (i_isalpha(sieve_lexer_curchar(scanner)) || + sieve_lexer_curchar(scanner) == '_') { + str_append_c(str, sieve_lexer_curchar(scanner)); + sieve_lexer_shift(scanner); + } else { + /* Hmm, otherwise it is just a spurious + colon */ + lexer->token_type = STT_COLON; + return TRUE; + } + } else { + str_append_c(str, sieve_lexer_curchar(scanner)); + sieve_lexer_shift(scanner); + } + + /* Scan the rest of the identifier */ + while (i_isalnum(sieve_lexer_curchar(scanner)) || + sieve_lexer_curchar(scanner) == '_') { + + if (str_len(str) <= SIEVE_MAX_IDENTIFIER_LEN) { + str_append_c(str, sieve_lexer_curchar(scanner)); + } + sieve_lexer_shift(scanner); + } + + /* Is this in fact a multiline text string ? */ + if (sieve_lexer_curchar(scanner) == ':' && + type == STT_IDENTIFIER && str_len(str) == 4 && + strncasecmp(str_c(str), "text", 4) == 0) { + sieve_lexer_shift(scanner); // discard colon + + /* Discard SP and HTAB whitespace */ + while (sieve_lexer_curchar(scanner) == ' ' || + sieve_lexer_curchar(scanner) == '\t') + sieve_lexer_shift(scanner); + + /* Discard hash comment or handle single CRLF */ + if (sieve_lexer_curchar(scanner) == '\r') + sieve_lexer_shift(scanner); + switch (sieve_lexer_curchar(scanner)) { + case '#': + if (!sieve_lexer_scan_hash_comment(scanner)) + return FALSE; + if (scanner->input->eof) { + sieve_lexer_error(lexer, + "end of file before end of multi-line string"); + lexer->token_type = STT_ERROR; + return FALSE; + } else if (scanner->input->stream_errno != 0) { + lexer->token_type = STT_ERROR; + return FALSE; + } + break; + case '\n': + sieve_lexer_shift(scanner); + break; + case -1: + if (scanner->input->eof) { + sieve_lexer_error(lexer, + "end of file before end of multi-line string"); + } + lexer->token_type = STT_ERROR; + return FALSE; + default: + sieve_lexer_error(lexer, + "invalid character %s after 'text:' in multiline string", + _char_sanitize(sieve_lexer_curchar(scanner))); + lexer->token_type = STT_ERROR; + return FALSE; + } + + /* Start over */ + str_truncate(str, 0); + + /* Parse literal lines */ + while (TRUE) { + bool cr_shifted = FALSE; + + /* Remove dot-stuffing or detect end of text */ + if (sieve_lexer_curchar(scanner) == '.') { + sieve_lexer_shift(scanner); + + /* Check for CR.. */ + if (sieve_lexer_curchar(scanner) == '\r') { + sieve_lexer_shift(scanner); + cr_shifted = TRUE; + } + + /* ..LF */ + if (sieve_lexer_curchar(scanner) == '\n') { + sieve_lexer_shift(scanner); + + /* End of multi-line string */ + + /* Check whether length limit was violated */ + if (str_len(str) > SIEVE_MAX_STRING_LEN) { + sieve_lexer_error(lexer, + "multi-line string started at line %d is too long " + "(longer than %llu bytes)", lexer->token_line, + (long long) SIEVE_MAX_STRING_LEN); + lexer->token_type = STT_ERROR; + return FALSE; + } + + lexer->token_type = STT_STRING; + return TRUE; + } else if (cr_shifted) { + /* Seen CR, but no LF */ + if (sieve_lexer_curchar(scanner) != -1 || + !scanner->input->eof) { + sieve_lexer_error(lexer, + "found stray carriage-return (CR) character " + "in multi-line string started at line %d", + lexer->token_line); + } + lexer->token_type = STT_ERROR; + return FALSE; + } + + /* Handle dot-stuffing */ + if (str_len(str) <= SIEVE_MAX_STRING_LEN) + str_append_c(str, '.'); + if (sieve_lexer_curchar(scanner) == '.') + sieve_lexer_shift(scanner); + } + + /* Scan the rest of the line */ + while (sieve_lexer_curchar(scanner) != '\n' && + sieve_lexer_curchar(scanner) != '\r') { + + switch (sieve_lexer_curchar(scanner)) { + case -1: + if (scanner->input->eof) { + sieve_lexer_error(lexer, + "end of file before end of multi-line string"); + } + lexer->token_type = STT_ERROR; + return FALSE; + case '\0': + sieve_lexer_error(lexer, + "encountered NUL character in quoted string " + "started at line %d", lexer->token_line); + lexer->token_type = STT_ERROR; + return FALSE; + default: + if (str_len(str) <= SIEVE_MAX_STRING_LEN) + str_append_c(str, sieve_lexer_curchar(scanner)); + } + + sieve_lexer_shift(scanner); + } + + /* If exited loop due to CR, skip it */ + if (sieve_lexer_curchar(scanner) == '\r') + sieve_lexer_shift(scanner); + + /* Now we must see an LF */ + if (sieve_lexer_curchar(scanner) != '\n') { + if (sieve_lexer_curchar(scanner) != -1 || + !scanner->input->eof) { + sieve_lexer_error(lexer, + "found stray carriage-return (CR) character " + "in multi-line string started at line %d", + lexer->token_line); + } + lexer->token_type = STT_ERROR; + return FALSE; + } + + if (str_len(str) <= SIEVE_MAX_STRING_LEN) + str_append(str, "\r\n"); + + sieve_lexer_shift(scanner); + } + + i_unreached(); + lexer->token_type = STT_ERROR; + return FALSE; + } + + if (str_len(str) > SIEVE_MAX_IDENTIFIER_LEN) { + sieve_lexer_error(lexer, + "encountered impossibly long %s%s'", + (type == STT_TAG ? "tag identifier ':" : + "identifier '"), + str_sanitize(str_c(str), + SIEVE_MAX_IDENTIFIER_LEN)); + lexer->token_type = STT_ERROR; + return FALSE; + } + + lexer->token_type = type; + return TRUE; + } + + /* Error (unknown character and EOF handled already) */ + if (lexer->token_type != STT_GARBAGE) { + sieve_lexer_error(lexer, + "unexpected character(s) starting with %s", + _char_sanitize(sieve_lexer_curchar(scanner))); + } + sieve_lexer_shift(scanner); + lexer->token_type = STT_GARBAGE; + return FALSE; + } +} + +void sieve_lexer_skip_token(const struct sieve_lexer *lexer) +{ + /* Scan token while skipping whitespace */ + do { + struct sieve_lexical_scanner *scanner = lexer->scanner; + + if (!sieve_lexer_scan_raw_token(scanner)) { + if (!scanner->input->eof && + scanner->input->stream_errno != 0) { + sieve_critical(scanner->svinst, scanner->ehandler, + sieve_error_script_location(scanner->script, + scanner->current_line), + "error reading script", + "error reading script during lexical analysis: %s", + i_stream_get_error(scanner->input)); + } + return; + } + } while (lexer->token_type == STT_WHITESPACE); +} + |