Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 09:51:24 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 09:51:24 +0000
commit: f7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch)
tree: a3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /pigeonhole/src/lib-sieve/sieve-lexer.c
parent: Initial commit. (diff)
download: dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.tar.xz
dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.zip
1 files changed, 930 insertions, 0 deletions
diff --git a/pigeonhole/src/lib-sieve/sieve-lexer.c b/pigeonhole/src/lib-sieve/sieve-lexer.c
new file mode 100644
index 0000000..a6968a9
--- /dev/null
+++ b/pigeonhole/src/lib-sieve/sieve-lexer.c
@@ -0,0 +1,930 @@
+/* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file
+ */
+
+#include "lib.h"
+#include "compat.h"
+#include "str.h"
+#include "str-sanitize.h"
+#include "istream.h"
+
+#include "sieve-common.h"
+#include "sieve-limits.h"
+#include "sieve-error.h"
+#include "sieve-script.h"
+
+#include "sieve-lexer.h"
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ctype.h>
+
+/*
+ * Useful macros
+ */
+
+#define DIGIT_VAL(c) (c - '0')
+
+/*
+ * Lexer object
+ */
+
+struct sieve_lexical_scanner {
+	pool_t pool;
+	struct sieve_instance *svinst;
+
+	struct sieve_script *script;
+	struct istream *input;
+
+	struct sieve_error_handler *ehandler;
+
+	/* Currently scanned data */
+	const unsigned char *buffer;
+	size_t buffer_size;
+	size_t buffer_pos;
+
+	struct sieve_lexer lexer;
+
+	int current_line;
+};
+
+const struct sieve_lexer *
+sieve_lexer_create(struct sieve_script *script,
+		   struct sieve_error_handler *ehandler,
+		   enum sieve_error *error_r)
+{
+	struct sieve_lexical_scanner *scanner;
+	struct sieve_instance *svinst = sieve_script_svinst(script);
+	struct istream *stream;
+	const struct stat *st;
+
+	/* Open script as stream */
+	if (sieve_script_get_stream(script, &stream, error_r) < 0)
+		return NULL;
+
+	/* Check script size */
+	if (i_stream_stat(stream, TRUE, &st) >= 0 && st->st_size > 0 &&
+	    svinst->max_script_size > 0 &&
+	    (uoff_t)st->st_size > svinst->max_script_size) {
+		sieve_error(ehandler, sieve_script_name(script),
+			"sieve script is too large (max %zu bytes)",
+			svinst->max_script_size);
+		if (error_r != NULL)
+			*error_r = SIEVE_ERROR_NOT_POSSIBLE;
+		return NULL;
+	}
+
+	scanner = i_new(struct sieve_lexical_scanner, 1);
+	scanner->lexer.scanner = scanner;
+
+	scanner->ehandler = ehandler;
+	sieve_error_handler_ref(ehandler);
+
+	scanner->input = stream;
+	i_stream_ref(scanner->input);
+
+	scanner->script = script;
+	sieve_script_ref(script);
+
+	scanner->buffer = NULL;
+	scanner->buffer_size = 0;
+	scanner->buffer_pos = 0;
+
+	scanner->lexer.token_type = STT_NONE;
+	scanner->lexer.token_str_value = str_new(default_pool, 256);
+	scanner->lexer.token_int_value = 0;
+	scanner->lexer.token_line = 1;
+
+	scanner->current_line = 1;
+
+	return &scanner->lexer;
+}
+
+void sieve_lexer_free(const struct sieve_lexer **_lexer)
+{
+	const struct sieve_lexer *lexer = *_lexer;
+	struct sieve_lexical_scanner *scanner = lexer->scanner;
+
+	i_stream_unref(&scanner->input);
+	sieve_script_unref(&scanner->script);
+	sieve_error_handler_unref(&scanner->ehandler);
+	str_free(&scanner->lexer.token_str_value);
+
+	i_free(scanner);
+	*_lexer = NULL;
+}
+
+/*
+ * Internal error handling
+ */
+
+inline static void ATTR_FORMAT(4, 5)
+sieve_lexer_error(const struct sieve_lexer *lexer,
+		  const char *csrc_filename, unsigned int csrc_linenum,
+		  const char *fmt, ...)
+{
+	struct sieve_lexical_scanner *scanner = lexer->scanner;
+	struct sieve_error_params params = {
+		.log_type = LOG_TYPE_ERROR,
+		.csrc = {
+			.filename = csrc_filename,
+			.linenum = csrc_linenum,
+		},
+	};
+	va_list args;
+
+	va_start(args, fmt);
+
+	T_BEGIN {
+		params.location =
+			sieve_error_script_location(scanner->script,
+						    scanner->current_line);
+		sieve_logv(scanner->ehandler, &params, fmt, args);
+	} T_END;
+
+	va_end(args);
+}
+#define sieve_lexer_error(lexer, ...) \
+	sieve_lexer_error(lexer, __FILE__, __LINE__, __VA_ARGS__)
+
+inline static void ATTR_FORMAT(4, 5)
+sieve_lexer_warning(const struct sieve_lexer *lexer,
+		    const char *csrc_filename, unsigned int csrc_linenum,
+		    const char *fmt, ...)
+{
+	struct sieve_lexical_scanner *scanner = lexer->scanner;
+	struct sieve_error_params params = {
+		.log_type = LOG_TYPE_WARNING,
+		.csrc = {
+			.filename = csrc_filename,
+			.linenum = csrc_linenum,
+		},
+	};
+	va_list args;
+
+	va_start(args, fmt);
+
+	T_BEGIN {
+		params.location =
+			sieve_error_script_location(scanner->script,
+						    scanner->current_line);
+		sieve_logv(scanner->ehandler, &params, fmt, args);
+	} T_END;
+
+	va_end(args);
+}
+#define sieve_lexer_warning(lexer, ...) \
+	sieve_lexer_warning(lexer, __FILE__, __LINE__, __VA_ARGS__)
+
+const char *sieve_lexer_token_description(const struct sieve_lexer *lexer)
+{
+	switch (lexer->token_type) {
+	case STT_NONE:
+		return "no token (bug)";
+	case STT_WHITESPACE:
+		return "whitespace (bug)";
+	case STT_EOF:
+		return "end of file";
+
+	case STT_NUMBER:
+		return "number";
+	case STT_IDENTIFIER:
+		return "identifier";
+	case STT_TAG:
+		return "tag";
+	case STT_STRING:
+		return "string";
+
+	case STT_RBRACKET:
+		return "')'";
+	case STT_LBRACKET:
+		return "'('";
+	case STT_RCURLY:
+		return "'}'";
+	case STT_LCURLY:
+		return "'{'";
+	case STT_RSQUARE:
+		return "']'";
+	case STT_LSQUARE:
+		return "'['";
+	case STT_SEMICOLON:
+		return "';'";
+	case STT_COMMA:
+		return "','";
+
+	case STT_SLASH:
+		return "'/'";
+	case STT_COLON:
+		return "':'";
+
+	case STT_GARBAGE:
+		return "unknown characters";
+	case STT_ERROR:
+		return "error token (bug)";
+	}
+
+	return "unknown token (bug)";
+}
+
+/*
+ * Debug
+ */
+
+void sieve_lexer_token_print(const struct sieve_lexer *lexer)
+{
+	switch (lexer->token_type) {
+	case STT_NONE:
+		printf("??NONE?? ");
+		break;
+	case STT_WHITESPACE:
+		printf("??WHITESPACE?? ");
+		break;
+	case STT_EOF:
+		printf("EOF\n");
+		break;
+
+	case STT_NUMBER:
+		printf("NUMBER ");
+		break;
+	case STT_IDENTIFIER:
+		printf("IDENTIFIER ");
+		break;
+	case STT_TAG:
+		printf("TAG ");
+		break;
+	case STT_STRING:
+		printf("STRING ");
+		break;
+
+	case STT_RBRACKET:
+		printf(") ");
+		break;
+	case STT_LBRACKET:
+		printf("( ");
+		break;
+	case STT_RCURLY:
+		printf("}\n");
+		break;
+	case STT_LCURLY:
+		printf("{\n");
+		break;
+	case STT_RSQUARE:
+		printf("] ");
+		break;
+	case STT_LSQUARE:
+		printf("[ ");
+		break;
+	case STT_SEMICOLON:
+		printf(";\n");
+		break;
+	case STT_COMMA:
+		printf(", ");
+		break;
+
+	case STT_SLASH:
+		printf("/ ");
+		break;
+	case STT_COLON:
+		printf(": ");
+		break;
+
+	case STT_GARBAGE:
+		printf(">>GARBAGE<<");
+		break;
+	case STT_ERROR:
+		printf(">>ERROR<<");
+		break;
+	default:
+		printf("UNKNOWN ");
+		break;
+	}
+}
+
+/*
+ * Lexical scanning
+ */
+
+static void sieve_lexer_shift(struct sieve_lexical_scanner *scanner)
+{
+	if (scanner->buffer_size > 0 &&
+	    scanner->buffer[scanner->buffer_pos] == '\n')
+		scanner->current_line++;
+
+	if (scanner->buffer_size > 0 &&
+	    scanner->buffer_pos + 1 < scanner->buffer_size)
+		scanner->buffer_pos++;
+	else {
+		if (scanner->buffer_size > 0)
+			i_stream_skip(scanner->input, scanner->buffer_size);
+
+		scanner->buffer = i_stream_get_data(scanner->input,
+						    &scanner->buffer_size);
+
+		if (scanner->buffer_size == 0 &&
+		    i_stream_read(scanner->input) > 0) {
+	  		scanner->buffer = i_stream_get_data(
+				scanner->input, &scanner->buffer_size);
+		}
+
+		scanner->buffer_pos = 0;
+	}
+}
+
+static inline int sieve_lexer_curchar(struct sieve_lexical_scanner *scanner)
+{
+	if (scanner->buffer_size == 0)
+		return -1;
+
+	return scanner->buffer[scanner->buffer_pos];
+}
+
+static inline const char *_char_sanitize(int ch)
+{
+	if (ch > 31 && ch < 127)
+		return t_strdup_printf("'%c'", ch);
+
+	return t_strdup_printf("0x%02x", ch);
+}
+
+static bool sieve_lexer_scan_number(struct sieve_lexical_scanner *scanner)
+{
+	struct sieve_lexer *lexer = &scanner->lexer;
+	uintmax_t value;
+	string_t *str;
+	bool overflow = FALSE;
+
+	str_truncate(lexer->token_str_value,0);
+	str = lexer->token_str_value;
+
+	while (i_isdigit(sieve_lexer_curchar(scanner))) {
+		str_append_c(str, sieve_lexer_curchar(scanner));
+		sieve_lexer_shift(scanner);
+	}
+
+	if (str_to_uintmax(str_c(str), &value) < 0 ||
+	    value > (sieve_number_t)-1) {
+		overflow = TRUE;
+	} else {
+		switch (sieve_lexer_curchar(scanner)) {
+		case 'k':
+		case 'K': /* Kilo */
+			if (value > (SIEVE_MAX_NUMBER >> 10))
+				overflow = TRUE;
+			else
+				value = value << 10;
+			sieve_lexer_shift(scanner);
+			break;
+		case 'm':
+		case 'M': /* Mega */
+			if (value > (SIEVE_MAX_NUMBER >> 20))
+				overflow = TRUE;
+			else
+				value = value << 20;
+			sieve_lexer_shift(scanner);
+			break;
+		case 'g':
+		case 'G': /* Giga */
+			if (value > (SIEVE_MAX_NUMBER >> 30))
+				overflow = TRUE;
+			else
+				value = value << 30;
+			sieve_lexer_shift(scanner);
+			break;
+		default:
+			/* Next token */
+			break;
+		}
+	}
+
+	/* Check for integer overflow */
+	if (overflow) {
+		sieve_lexer_error(lexer,
+				  "number exceeds integer limits (max %llu)",
+				  (long long) SIEVE_MAX_NUMBER);
+		lexer->token_type = STT_ERROR;
+		return FALSE;
+	}
+
+	lexer->token_type = STT_NUMBER;
+	lexer->token_int_value = (sieve_number_t)value;
+	return TRUE;
+
+}
+
+static bool
+sieve_lexer_scan_hash_comment(struct sieve_lexical_scanner *scanner)
+{
+	struct sieve_lexer *lexer = &scanner->lexer;
+
+	while (sieve_lexer_curchar(scanner) != '\n') {
+		switch(sieve_lexer_curchar(scanner)) {
+		case -1:
+			if (!scanner->input->eof) {
+				lexer->token_type = STT_ERROR;
+				return FALSE;
+			}
+			sieve_lexer_warning(lexer,
+				"no newline (CRLF) at end of hash comment at end of file");
+			lexer->token_type = STT_WHITESPACE;
+			return TRUE;
+		case '\0':
+			sieve_lexer_error(lexer,
+					  "encountered NUL character in hash comment");
+			lexer->token_type = STT_ERROR;
+			return FALSE;
+		default:
+			break;
+		}
+
+		/* Stray CR is ignored */
+		sieve_lexer_shift(scanner);
+	}
+
+	sieve_lexer_shift(scanner);
+
+	lexer->token_type = STT_WHITESPACE;
+	return TRUE;
+}
+
+/* sieve_lexer_scan_raw_token:
+ *   Scans valid tokens and whitespace
+ */
+static bool
+sieve_lexer_scan_raw_token(struct sieve_lexical_scanner *scanner)
+{
+	struct sieve_lexer *lexer = &scanner->lexer;
+	string_t *str;
+	int ret;
+
+	/* Read first character */
+	if (lexer->token_type == STT_NONE) {
+		if ((ret = i_stream_read(scanner->input)) < 0) {
+			i_assert(ret != -2);
+			if (!scanner->input->eof) {
+				lexer->token_type = STT_ERROR;
+				return FALSE;
+			}
+		}
+		sieve_lexer_shift(scanner);
+	}
+
+	lexer->token_line = scanner->current_line;
+
+	switch (sieve_lexer_curchar(scanner)) {
+
+	/* whitespace */
+
+	// hash-comment = ( "#" *CHAR-NOT-CRLF CRLF )
+	case '#':
+		sieve_lexer_shift(scanner);
+		return sieve_lexer_scan_hash_comment(scanner);
+
+	// bracket-comment = "/*" *(CHAR-NOT-STAR / ("*" CHAR-NOT-SLASH)) "*/"
+	//        ;; No */ allowed inside a comment.
+	//        ;; (No * is allowed unless it is the last character,
+	//        ;; or unless it is followed by a character that isn't a
+	//        ;; slash.)
+	case '/':
+		sieve_lexer_shift(scanner);
+
+		if (sieve_lexer_curchar(scanner) == '*') {
+			sieve_lexer_shift(scanner);
+
+			while (TRUE) {
+				switch (sieve_lexer_curchar(scanner)) {
+				case -1:
+					if (scanner->input->eof) {
+						sieve_lexer_error(lexer,
+							"end of file before end of bracket comment "
+							"('/* ... */') "
+							"started at line %d",
+							lexer->token_line);
+					}
+					lexer->token_type = STT_ERROR;
+					return FALSE;
+				case '*':
+					sieve_lexer_shift(scanner);
+
+					if (sieve_lexer_curchar(scanner) == '/') {
+						sieve_lexer_shift(scanner);
+
+						lexer->token_type = STT_WHITESPACE;
+						return TRUE;
+
+					} else if (sieve_lexer_curchar(scanner) == -1) {
+						sieve_lexer_error(lexer,
+							"end of file before end of bracket comment "
+							"('/* ... */') "
+							"started at line %d",
+							lexer->token_line);
+						lexer->token_type = STT_ERROR;
+						return FALSE;
+					}
+					break;
+				case '\0':
+					sieve_lexer_error(lexer,
+						"encountered NUL character in bracket comment");
+					lexer->token_type = STT_ERROR;
+					return FALSE;
+				default:
+					sieve_lexer_shift(scanner);
+				}
+			}
+
+			i_unreached();
+			return FALSE;
+		}
+
+		lexer->token_type = STT_SLASH;
+		return TRUE;
+
+	// comment = bracket-comment / hash-comment
+	// white-space = 1*(SP / CRLF / HTAB) / comment
+	case '\t':
+	case '\r':
+	case '\n':
+	case ' ':
+		sieve_lexer_shift(scanner);
+
+		while (sieve_lexer_curchar(scanner) == '\t' ||
+		       sieve_lexer_curchar(scanner) == '\r' ||
+		       sieve_lexer_curchar(scanner) == '\n' ||
+		       sieve_lexer_curchar(scanner) == ' ') {
+
+			sieve_lexer_shift(scanner);
+		}
+
+		lexer->token_type = STT_WHITESPACE;
+		return TRUE;
+
+	/* quoted-string */
+	case '"':
+		sieve_lexer_shift(scanner);
+
+		str_truncate(lexer->token_str_value, 0);
+		str = lexer->token_str_value;
+
+		while (sieve_lexer_curchar(scanner) != '"') {
+			if (sieve_lexer_curchar(scanner) == '\\')
+				sieve_lexer_shift(scanner);
+
+			switch (sieve_lexer_curchar(scanner)) {
+
+			/* End of file */
+			case -1:
+				if (scanner->input->eof) {
+					sieve_lexer_error(lexer,
+						"end of file before end of quoted string "
+						"started at line %d", lexer->token_line);
+				}
+				lexer->token_type = STT_ERROR;
+				return FALSE;
+
+			/* NUL character */
+			case '\0':
+				sieve_lexer_error(lexer,
+					"encountered NUL character in quoted string "
+					"started at line %d", lexer->token_line);
+				lexer->token_type = STT_ERROR;
+				return FALSE;
+
+			/* CR .. check for LF */
+			case '\r':
+				sieve_lexer_shift(scanner);
+
+				if (sieve_lexer_curchar(scanner) != '\n') {
+					sieve_lexer_error(lexer,
+						"found stray carriage-return (CR) character "
+						"in quoted string started at line %d",
+						lexer->token_line);
+					lexer->token_type = STT_ERROR;
+					return FALSE;
+				}
+
+				if (str_len(str) <= SIEVE_MAX_STRING_LEN)
+					str_append(str, "\r\n");
+				break;
+
+			/* Loose LF is allowed (non-standard) and converted to CRLF */
+			case '\n':
+				if (str_len(str) <= SIEVE_MAX_STRING_LEN)
+					str_append(str, "\r\n");
+				break;
+
+			/* Other characters */
+			default:
+				if (str_len(str) <= SIEVE_MAX_STRING_LEN)
+					str_append_c(str, sieve_lexer_curchar(scanner));
+			}
+
+			sieve_lexer_shift(scanner);
+		}
+
+		sieve_lexer_shift(scanner);
+
+		if (str_len(str) > SIEVE_MAX_STRING_LEN) {
+			sieve_lexer_error(lexer,
+				"quoted string started at line %d is too long "
+				"(longer than %llu bytes)", lexer->token_line,
+				(long long) SIEVE_MAX_STRING_LEN);
+			lexer->token_type = STT_ERROR;
+			return FALSE;
+		}
+
+		lexer->token_type = STT_STRING;
+		return TRUE;
+
+	/* single character tokens */
+	case ']':
+		sieve_lexer_shift(scanner);
+		lexer->token_type = STT_RSQUARE;
+		return TRUE;
+	case '[':
+		sieve_lexer_shift(scanner);
+		lexer->token_type = STT_LSQUARE;
+		return TRUE;
+	case '}':
+		sieve_lexer_shift(scanner);
+		lexer->token_type = STT_RCURLY;
+		return TRUE;
+	case '{':
+		sieve_lexer_shift(scanner);
+		lexer->token_type = STT_LCURLY;
+		return TRUE;
+	case ')':
+		sieve_lexer_shift(scanner);
+		lexer->token_type = STT_RBRACKET;
+		return TRUE;
+	case '(':
+		sieve_lexer_shift(scanner);
+		lexer->token_type = STT_LBRACKET;
+		return TRUE;
+	case ';':
+		sieve_lexer_shift(scanner);
+		lexer->token_type = STT_SEMICOLON;
+		return TRUE;
+	case ',':
+		sieve_lexer_shift(scanner);
+		lexer->token_type = STT_COMMA;
+		return TRUE;
+
+	/* EOF */
+	case -1:
+		if (!scanner->input->eof) {
+			lexer->token_type = STT_ERROR;
+			return FALSE;
+		}
+		lexer->token_type = STT_EOF;
+		return TRUE;
+
+	default:
+		/* number */
+		if (i_isdigit(sieve_lexer_curchar(scanner))) {
+			return sieve_lexer_scan_number(scanner);
+
+		/* identifier / tag */
+		} else if (i_isalpha(sieve_lexer_curchar(scanner)) ||
+			   sieve_lexer_curchar(scanner) == '_' ||
+			   sieve_lexer_curchar(scanner) == ':') {
+
+			enum sieve_token_type type = STT_IDENTIFIER;
+			str_truncate(lexer->token_str_value,0);
+			str = lexer->token_str_value;
+
+			/* If it starts with a ':' it is a tag and not an
+			   identifier */
+ 			if (sieve_lexer_curchar(scanner) == ':') {
+				sieve_lexer_shift(scanner); // discard colon
+				type = STT_TAG;
+
+				/* First character still can't be a DIGIT */
+ 				if (i_isalpha(sieve_lexer_curchar(scanner)) ||
+				    sieve_lexer_curchar(scanner) == '_') {
+					str_append_c(str, sieve_lexer_curchar(scanner));
+					sieve_lexer_shift(scanner);
+				} else {
+					/* Hmm, otherwise it is just a spurious
+					   colon */
+					lexer->token_type = STT_COLON;
+					return TRUE;
+				}
+			} else {
+				str_append_c(str, sieve_lexer_curchar(scanner));
+				sieve_lexer_shift(scanner);
+			}
+
+			/* Scan the rest of the identifier */
+			while (i_isalnum(sieve_lexer_curchar(scanner)) ||
+			       sieve_lexer_curchar(scanner) == '_') {
+
+				if (str_len(str) <= SIEVE_MAX_IDENTIFIER_LEN) {
+	 				str_append_c(str, sieve_lexer_curchar(scanner));
+				}
+				sieve_lexer_shift(scanner);
+			}
+
+			/* Is this in fact a multiline text string ? */
+			if (sieve_lexer_curchar(scanner) == ':' &&
+			    type == STT_IDENTIFIER && str_len(str) == 4 &&
+			    strncasecmp(str_c(str), "text", 4) == 0) {
+				sieve_lexer_shift(scanner); // discard colon
+
+				/* Discard SP and HTAB whitespace */
+				while (sieve_lexer_curchar(scanner) == ' ' ||
+				       sieve_lexer_curchar(scanner) == '\t')
+ 					sieve_lexer_shift(scanner);
+
+				/* Discard hash comment or handle single CRLF */
+				if (sieve_lexer_curchar(scanner) == '\r')
+					sieve_lexer_shift(scanner);
+ 				switch (sieve_lexer_curchar(scanner)) {
+				case '#':
+					if (!sieve_lexer_scan_hash_comment(scanner))
+						return FALSE;
+					if (scanner->input->eof) {
+						sieve_lexer_error(lexer,
+							"end of file before end of multi-line string");
+						lexer->token_type = STT_ERROR;
+						return FALSE;
+					} else if (scanner->input->stream_errno != 0) {
+						lexer->token_type = STT_ERROR;
+						return FALSE;
+					}
+					break;
+				case '\n':
+					sieve_lexer_shift(scanner);
+					break;
+				case -1:
+					if (scanner->input->eof) {
+						sieve_lexer_error(lexer,
+							"end of file before end of multi-line string");
+					}
+					lexer->token_type = STT_ERROR;
+					return FALSE;
+				default:
+ 					sieve_lexer_error(lexer,
+ 						"invalid character %s after 'text:' in multiline string",
+						_char_sanitize(sieve_lexer_curchar(scanner)));
+					lexer->token_type = STT_ERROR;
+					return FALSE;
+				}
+
+				/* Start over */
+				str_truncate(str, 0);
+
+ 				/* Parse literal lines */
+				while (TRUE) {
+					bool cr_shifted = FALSE;
+
+					/* Remove dot-stuffing or detect end of text */
+					if (sieve_lexer_curchar(scanner) == '.') {
+						sieve_lexer_shift(scanner);
+
+						/* Check for CR.. */
+						if (sieve_lexer_curchar(scanner) == '\r') {
+							sieve_lexer_shift(scanner);
+							cr_shifted = TRUE;
+						}
+
+						/* ..LF */
+						if (sieve_lexer_curchar(scanner) == '\n') {
+							sieve_lexer_shift(scanner);
+
+							/* End of multi-line string */
+
+							/* Check whether length limit was violated */
+							if (str_len(str) > SIEVE_MAX_STRING_LEN) {
+								sieve_lexer_error(lexer,
+									"multi-line string started at line %d is too long "
+									"(longer than %llu bytes)", lexer->token_line,
+									(long long) SIEVE_MAX_STRING_LEN);
+									lexer->token_type = STT_ERROR;
+									return FALSE;
+							}
+
+							lexer->token_type = STT_STRING;
+							return TRUE;
+						} else if (cr_shifted) {
+							/* Seen CR, but no LF */
+							if (sieve_lexer_curchar(scanner) != -1 ||
+							    !scanner->input->eof) {
+								sieve_lexer_error(lexer,
+									"found stray carriage-return (CR) character "
+									"in multi-line string started at line %d",
+									lexer->token_line);
+							}
+							lexer->token_type = STT_ERROR;
+							return FALSE;
+						}
+
+						/* Handle dot-stuffing */
+						if (str_len(str) <= SIEVE_MAX_STRING_LEN)
+							str_append_c(str, '.');
+						if (sieve_lexer_curchar(scanner) == '.')
+							sieve_lexer_shift(scanner);
+					}
+
+					/* Scan the rest of the line */
+					while (sieve_lexer_curchar(scanner) != '\n' &&
+					       sieve_lexer_curchar(scanner) != '\r') {
+
+						switch (sieve_lexer_curchar(scanner)) {
+						case -1:
+							if (scanner->input->eof) {
+								sieve_lexer_error(lexer,
+									"end of file before end of multi-line string");
+							}
+ 							lexer->token_type = STT_ERROR;
+ 							return FALSE;
+						case '\0':
+							sieve_lexer_error(lexer,
+								"encountered NUL character in quoted string "
+								"started at line %d", lexer->token_line);
+							lexer->token_type = STT_ERROR;
+							return FALSE;
+						default:
+							if (str_len(str) <= SIEVE_MAX_STRING_LEN)
+  								str_append_c(str, sieve_lexer_curchar(scanner));
+						}
+
+						sieve_lexer_shift(scanner);
+					}
+
+					/* If exited loop due to CR, skip it */
+					if (sieve_lexer_curchar(scanner) == '\r')
+						sieve_lexer_shift(scanner);
+
+					/* Now we must see an LF */
+					if (sieve_lexer_curchar(scanner) != '\n') {
+						if (sieve_lexer_curchar(scanner) != -1 ||
+						    !scanner->input->eof) {
+							sieve_lexer_error(lexer,
+								"found stray carriage-return (CR) character "
+								"in multi-line string started at line %d",
+								lexer->token_line);
+						}
+ 						lexer->token_type = STT_ERROR;
+ 						return FALSE;
+					}
+
+					if (str_len(str) <= SIEVE_MAX_STRING_LEN)
+						str_append(str, "\r\n");
+
+					sieve_lexer_shift(scanner);
+				}
+
+ 				i_unreached();
+				lexer->token_type = STT_ERROR;
+				return FALSE;
+			}
+
+			if (str_len(str) > SIEVE_MAX_IDENTIFIER_LEN) {
+				sieve_lexer_error(lexer,
+					"encountered impossibly long %s%s'",
+					(type == STT_TAG ? "tag identifier ':" :
+					 "identifier '"),
+					str_sanitize(str_c(str),
+						     SIEVE_MAX_IDENTIFIER_LEN));
+				lexer->token_type = STT_ERROR;
+				return FALSE;
+			}
+
+			lexer->token_type = type;
+			return TRUE;
+		}
+
+		/* Error (unknown character and EOF handled already) */
+		if (lexer->token_type != STT_GARBAGE) {
+			sieve_lexer_error(lexer,
+				"unexpected character(s) starting with %s",
+				_char_sanitize(sieve_lexer_curchar(scanner)));
+		}
+		sieve_lexer_shift(scanner);
+		lexer->token_type = STT_GARBAGE;
+		return FALSE;
+	}
+}
+
+void sieve_lexer_skip_token(const struct sieve_lexer *lexer)
+{
+	/* Scan token while skipping whitespace */
+	do {
+		struct sieve_lexical_scanner *scanner = lexer->scanner;
+
+		if (!sieve_lexer_scan_raw_token(scanner)) {
+			if (!scanner->input->eof &&
+			    scanner->input->stream_errno != 0) {
+				sieve_critical(scanner->svinst, scanner->ehandler,
+					sieve_error_script_location(scanner->script,
+								    scanner->current_line),
+					"error reading script",
+					"error reading script during lexical analysis: %s",
+					i_stream_get_error(scanner->input));
+			}
+			return;
+		}
+	} while (lexer->token_type == STT_WHITESPACE);
+}
+
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 09:51:24 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 09:51:24 +0000
commit	f7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch)
tree	a3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /pigeonhole/src/lib-sieve/sieve-lexer.c
parent	Initial commit. (diff)
download	dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.tar.xz dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.zip