/* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file
 */

#include "lib.h"
#include "compat.h"
#include "str.h"
#include "str-sanitize.h"
#include "istream.h"

#include "sieve-common.h"
#include "sieve-limits.h"
#include "sieve-error.h"
#include "sieve-script.h"

#include "sieve-lexer.h"

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>

/*
 * Useful macros
 */

#define DIGIT_VAL(c) (c - '0')

/*
 * Lexer object
 */

struct sieve_lexical_scanner {
	pool_t pool;
	struct sieve_instance *svinst;

	struct sieve_script *script;
	struct istream *input;

	struct sieve_error_handler *ehandler;

	/* Currently scanned data */
	const unsigned char *buffer;
	size_t buffer_size;
	size_t buffer_pos;

	struct sieve_lexer lexer;

	int current_line;
};

const struct sieve_lexer *
sieve_lexer_create(struct sieve_script *script,
		   struct sieve_error_handler *ehandler,
		   enum sieve_error *error_r)
{
	struct sieve_lexical_scanner *scanner;
	struct sieve_instance *svinst = sieve_script_svinst(script);
	struct istream *stream;
	const struct stat *st;

	/* Open script as stream */
	if (sieve_script_get_stream(script, &stream, error_r) < 0)
		return NULL;

	/* Check script size */
	if (i_stream_stat(stream, TRUE, &st) >= 0 && st->st_size > 0 &&
	    svinst->max_script_size > 0 &&
	    (uoff_t)st->st_size > svinst->max_script_size) {
		sieve_error(ehandler, sieve_script_name(script),
			"sieve script is too large (max %zu bytes)",
			svinst->max_script_size);
		if (error_r != NULL)
			*error_r = SIEVE_ERROR_NOT_POSSIBLE;
		return NULL;
	}

	scanner = i_new(struct sieve_lexical_scanner, 1);
	scanner->lexer.scanner = scanner;

	scanner->ehandler = ehandler;
	sieve_error_handler_ref(ehandler);

	scanner->input = stream;
	i_stream_ref(scanner->input);

	scanner->script = script;
	sieve_script_ref(script);

	scanner->buffer = NULL;
	scanner->buffer_size = 0;
	scanner->buffer_pos = 0;

	scanner->lexer.token_type = STT_NONE;
	scanner->lexer.token_str_value = str_new(default_pool, 256);
	scanner->lexer.token_int_value = 0;
	scanner->lexer.token_line = 1;

	scanner->current_line = 1;

	return &scanner->lexer;
}

void sieve_lexer_free(const struct sieve_lexer **_lexer)
{
	const struct sieve_lexer *lexer = *_lexer;
	struct sieve_lexical_scanner *scanner = lexer->scanner;

	i_stream_unref(&scanner->input);
	sieve_script_unref(&scanner->script);
	sieve_error_handler_unref(&scanner->ehandler);
	str_free(&scanner->lexer.token_str_value);

	i_free(scanner);
	*_lexer = NULL;
}

/*
 * Internal error handling
 */

inline static void ATTR_FORMAT(4, 5)
sieve_lexer_error(const struct sieve_lexer *lexer,
		  const char *csrc_filename, unsigned int csrc_linenum,
		  const char *fmt, ...)
{
	struct sieve_lexical_scanner *scanner = lexer->scanner;
	struct sieve_error_params params = {
		.log_type = LOG_TYPE_ERROR,
		.csrc = {
			.filename = csrc_filename,
			.linenum = csrc_linenum,
		},
	};
	va_list args;

	va_start(args, fmt);

	T_BEGIN {
		params.location =
			sieve_error_script_location(scanner->script,
						    scanner->current_line);
		sieve_logv(scanner->ehandler, &params, fmt, args);
	} T_END;

	va_end(args);
}
#define sieve_lexer_error(lexer, ...) \
	sieve_lexer_error(lexer, __FILE__, __LINE__, __VA_ARGS__)

inline static void ATTR_FORMAT(4, 5)
sieve_lexer_warning(const struct sieve_lexer *lexer,
		    const char *csrc_filename, unsigned int csrc_linenum,
		    const char *fmt, ...)
{
	struct sieve_lexical_scanner *scanner = lexer->scanner;
	struct sieve_error_params params = {
		.log_type = LOG_TYPE_WARNING,
		.csrc = {
			.filename = csrc_filename,
			.linenum = csrc_linenum,
		},
	};
	va_list args;

	va_start(args, fmt);

	T_BEGIN {
		params.location =
			sieve_error_script_location(scanner->script,
						    scanner->current_line);
		sieve_logv(scanner->ehandler, &params, fmt, args);
	} T_END;

	va_end(args);
}
#define sieve_lexer_warning(lexer, ...) \
	sieve_lexer_warning(lexer, __FILE__, __LINE__, __VA_ARGS__)

const char *sieve_lexer_token_description(const struct sieve_lexer *lexer)
{
	switch (lexer->token_type) {
	case STT_NONE:
		return "no token (bug)";
	case STT_WHITESPACE:
		return "whitespace (bug)";
	case STT_EOF:
		return "end of file";

	case STT_NUMBER:
		return "number";
	case STT_IDENTIFIER:
		return "identifier";
	case STT_TAG:
		return "tag";
	case STT_STRING:
		return "string";

	case STT_RBRACKET:
		return "')'";
	case STT_LBRACKET:
		return "'('";
	case STT_RCURLY:
		return "'}'";
	case STT_LCURLY:
		return "'{'";
	case STT_RSQUARE:
		return "']'";
	case STT_LSQUARE:
		return "'['";
	case STT_SEMICOLON:
		return "';'";
	case STT_COMMA:
		return "','";

	case STT_SLASH:
		return "'/'";
	case STT_COLON:
		return "':'";

	case STT_GARBAGE:
		return "unknown characters";
	case STT_ERROR:
		return "error token (bug)";
	}

	return "unknown token (bug)";
}

/*
 * Debug
 */

void sieve_lexer_token_print(const struct sieve_lexer *lexer)
{
	switch (lexer->token_type) {
	case STT_NONE:
		printf("??NONE?? ");
		break;
	case STT_WHITESPACE:
		printf("??WHITESPACE?? ");
		break;
	case STT_EOF:
		printf("EOF\n");
		break;

	case STT_NUMBER:
		printf("NUMBER ");
		break;
	case STT_IDENTIFIER:
		printf("IDENTIFIER ");
		break;
	case STT_TAG:
		printf("TAG ");
		break;
	case STT_STRING:
		printf("STRING ");
		break;

	case STT_RBRACKET:
		printf(") ");
		break;
	case STT_LBRACKET:
		printf("( ");
		break;
	case STT_RCURLY:
		printf("}\n");
		break;
	case STT_LCURLY:
		printf("{\n");
		break;
	case STT_RSQUARE:
		printf("] ");
		break;
	case STT_LSQUARE:
		printf("[ ");
		break;
	case STT_SEMICOLON:
		printf(";\n");
		break;
	case STT_COMMA:
		printf(", ");
		break;

	case STT_SLASH:
		printf("/ ");
		break;
	case STT_COLON:
		printf(": ");
		break;

	case STT_GARBAGE:
		printf(">>GARBAGE<<");
		break;
	case STT_ERROR:
		printf(">>ERROR<<");
		break;
	default:
		printf("UNKNOWN ");
		break;
	}
}

/*
 * Lexical scanning
 */

static void sieve_lexer_shift(struct sieve_lexical_scanner *scanner)
{
	if (scanner->buffer_size > 0 &&
	    scanner->buffer[scanner->buffer_pos] == '\n')
		scanner->current_line++;

	if (scanner->buffer_size > 0 &&
	    scanner->buffer_pos + 1 < scanner->buffer_size)
		scanner->buffer_pos++;
	else {
		if (scanner->buffer_size > 0)
			i_stream_skip(scanner->input, scanner->buffer_size);

		scanner->buffer = i_stream_get_data(scanner->input,
						    &scanner->buffer_size);

		if (scanner->buffer_size == 0 &&
		    i_stream_read(scanner->input) > 0) {
	  		scanner->buffer = i_stream_get_data(
				scanner->input, &scanner->buffer_size);
		}

		scanner->buffer_pos = 0;
	}
}

static inline int sieve_lexer_curchar(struct sieve_lexical_scanner *scanner)
{
	if (scanner->buffer_size == 0)
		return -1;

	return scanner->buffer[scanner->buffer_pos];
}

static inline const char *_char_sanitize(int ch)
{
	if (ch > 31 && ch < 127)
		return t_strdup_printf("'%c'", ch);

	return t_strdup_printf("0x%02x", ch);
}

static bool sieve_lexer_scan_number(struct sieve_lexical_scanner *scanner)
{
	struct sieve_lexer *lexer = &scanner->lexer;
	uintmax_t value;
	string_t *str;
	bool overflow = FALSE;

	str_truncate(lexer->token_str_value,0);
	str = lexer->token_str_value;

	while (i_isdigit(sieve_lexer_curchar(scanner))) {
		str_append_c(str, sieve_lexer_curchar(scanner));
		sieve_lexer_shift(scanner);
	}

	if (str_to_uintmax(str_c(str), &value) < 0 ||
	    value > (sieve_number_t)-1) {
		overflow = TRUE;
	} else {
		switch (sieve_lexer_curchar(scanner)) {
		case 'k':
		case 'K': /* Kilo */
			if (value > (SIEVE_MAX_NUMBER >> 10))
				overflow = TRUE;
			else
				value = value << 10;
			sieve_lexer_shift(scanner);
			break;
		case 'm':
		case 'M': /* Mega */
			if (value > (SIEVE_MAX_NUMBER >> 20))
				overflow = TRUE;
			else
				value = value << 20;
			sieve_lexer_shift(scanner);
			break;
		case 'g':
		case 'G': /* Giga */
			if (value > (SIEVE_MAX_NUMBER >> 30))
				overflow = TRUE;
			else
				value = value << 30;
			sieve_lexer_shift(scanner);
			break;
		default:
			/* Next token */
			break;
		}
	}

	/* Check for integer overflow */
	if (overflow) {
		sieve_lexer_error(lexer,
				  "number exceeds integer limits (max %llu)",
				  (long long) SIEVE_MAX_NUMBER);
		lexer->token_type = STT_ERROR;
		return FALSE;
	}

	lexer->token_type = STT_NUMBER;
	lexer->token_int_value = (sieve_number_t)value;
	return TRUE;

}

static bool
sieve_lexer_scan_hash_comment(struct sieve_lexical_scanner *scanner)
{
	struct sieve_lexer *lexer = &scanner->lexer;

	while (sieve_lexer_curchar(scanner) != '\n') {
		switch(sieve_lexer_curchar(scanner)) {
		case -1:
			if (!scanner->input->eof) {
				lexer->token_type = STT_ERROR;
				return FALSE;
			}
			sieve_lexer_warning(lexer,
				"no newline (CRLF) at end of hash comment at end of file");
			lexer->token_type = STT_WHITESPACE;
			return TRUE;
		case '\0':
			sieve_lexer_error(lexer,
					  "encountered NUL character in hash comment");
			lexer->token_type = STT_ERROR;
			return FALSE;
		default:
			break;
		}

		/* Stray CR is ignored */
		sieve_lexer_shift(scanner);
	}

	sieve_lexer_shift(scanner);

	lexer->token_type = STT_WHITESPACE;
	return TRUE;
}

/* sieve_lexer_scan_raw_token:
 *   Scans valid tokens and whitespace
 */
static bool
sieve_lexer_scan_raw_token(struct sieve_lexical_scanner *scanner)
{
	struct sieve_lexer *lexer = &scanner->lexer;
	string_t *str;
	int ret;

	/* Read first character */
	if (lexer->token_type == STT_NONE) {
		if ((ret = i_stream_read(scanner->input)) < 0) {
			i_assert(ret != -2);
			if (!scanner->input->eof) {
				lexer->token_type = STT_ERROR;
				return FALSE;
			}
		}
		sieve_lexer_shift(scanner);
	}

	lexer->token_line = scanner->current_line;

	switch (sieve_lexer_curchar(scanner)) {

	/* whitespace */

	// hash-comment = ( "#" *CHAR-NOT-CRLF CRLF )
	case '#':
		sieve_lexer_shift(scanner);
		return sieve_lexer_scan_hash_comment(scanner);

	// bracket-comment = "/*" *(CHAR-NOT-STAR / ("*" CHAR-NOT-SLASH)) "*/"
	//        ;; No */ allowed inside a comment.
	//        ;; (No * is allowed unless it is the last character,
	//        ;; or unless it is followed by a character that isn't a
	//        ;; slash.)
	case '/':
		sieve_lexer_shift(scanner);

		if (sieve_lexer_curchar(scanner) == '*') {
			sieve_lexer_shift(scanner);

			while (TRUE) {
				switch (sieve_lexer_curchar(scanner)) {
				case -1:
					if (scanner->input->eof) {
						sieve_lexer_error(lexer,
							"end of file before end of bracket comment "
							"('/* ... */') "
							"started at line %d",
							lexer->token_line);
					}
					lexer->token_type = STT_ERROR;
					return FALSE;
				case '*':
					sieve_lexer_shift(scanner);

					if (sieve_lexer_curchar(scanner) == '/') {
						sieve_lexer_shift(scanner);

						lexer->token_type = STT_WHITESPACE;
						return TRUE;

					} else if (sieve_lexer_curchar(scanner) == -1) {
						sieve_lexer_error(lexer,
							"end of file before end of bracket comment "
							"('/* ... */') "
							"started at line %d",
							lexer->token_line);
						lexer->token_type = STT_ERROR;
						return FALSE;
					}
					break;
				case '\0':
					sieve_lexer_error(lexer,
						"encountered NUL character in bracket comment");
					lexer->token_type = STT_ERROR;
					return FALSE;
				default:
					sieve_lexer_shift(scanner);
				}
			}

			i_unreached();
			return FALSE;
		}

		lexer->token_type = STT_SLASH;
		return TRUE;

	// comment = bracket-comment / hash-comment
	// white-space = 1*(SP / CRLF / HTAB) / comment
	case '\t':
	case '\r':
	case '\n':
	case ' ':
		sieve_lexer_shift(scanner);

		while (sieve_lexer_curchar(scanner) == '\t' ||
		       sieve_lexer_curchar(scanner) == '\r' ||
		       sieve_lexer_curchar(scanner) == '\n' ||
		       sieve_lexer_curchar(scanner) == ' ') {

			sieve_lexer_shift(scanner);
		}

		lexer->token_type = STT_WHITESPACE;
		return TRUE;

	/* quoted-string */
	case '"':
		sieve_lexer_shift(scanner);

		str_truncate(lexer->token_str_value, 0);
		str = lexer->token_str_value;

		while (sieve_lexer_curchar(scanner) != '"') {
			if (sieve_lexer_curchar(scanner) == '\\')
				sieve_lexer_shift(scanner);

			switch (sieve_lexer_curchar(scanner)) {

			/* End of file */
			case -1:
				if (scanner->input->eof) {
					sieve_lexer_error(lexer,
						"end of file before end of quoted string "
						"started at line %d", lexer->token_line);
				}
				lexer->token_type = STT_ERROR;
				return FALSE;

			/* NUL character */
			case '\0':
				sieve_lexer_error(lexer,
					"encountered NUL character in quoted string "
					"started at line %d", lexer->token_line);
				lexer->token_type = STT_ERROR;
				return FALSE;

			/* CR .. check for LF */
			case '\r':
				sieve_lexer_shift(scanner);

				if (sieve_lexer_curchar(scanner) != '\n') {
					sieve_lexer_error(lexer,
						"found stray carriage-return (CR) character "
						"in quoted string started at line %d",
						lexer->token_line);
					lexer->token_type = STT_ERROR;
					return FALSE;
				}

				if (str_len(str) <= SIEVE_MAX_STRING_LEN)
					str_append(str, "\r\n");
				break;

			/* Loose LF is allowed (non-standard) and converted to CRLF */
			case '\n':
				if (str_len(str) <= SIEVE_MAX_STRING_LEN)
					str_append(str, "\r\n");
				break;

			/* Other characters */
			default:
				if (str_len(str) <= SIEVE_MAX_STRING_LEN)
					str_append_c(str, sieve_lexer_curchar(scanner));
			}

			sieve_lexer_shift(scanner);
		}

		sieve_lexer_shift(scanner);

		if (str_len(str) > SIEVE_MAX_STRING_LEN) {
			sieve_lexer_error(lexer,
				"quoted string started at line %d is too long "
				"(longer than %llu bytes)", lexer->token_line,
				(long long) SIEVE_MAX_STRING_LEN);
			lexer->token_type = STT_ERROR;
			return FALSE;
		}

		lexer->token_type = STT_STRING;
		return TRUE;

	/* single character tokens */
	case ']':
		sieve_lexer_shift(scanner);
		lexer->token_type = STT_RSQUARE;
		return TRUE;
	case '[':
		sieve_lexer_shift(scanner);
		lexer->token_type = STT_LSQUARE;
		return TRUE;
	case '}':
		sieve_lexer_shift(scanner);
		lexer->token_type = STT_RCURLY;
		return TRUE;
	case '{':
		sieve_lexer_shift(scanner);
		lexer->token_type = STT_LCURLY;
		return TRUE;
	case ')':
		sieve_lexer_shift(scanner);
		lexer->token_type = STT_RBRACKET;
		return TRUE;
	case '(':
		sieve_lexer_shift(scanner);
		lexer->token_type = STT_LBRACKET;
		return TRUE;
	case ';':
		sieve_lexer_shift(scanner);
		lexer->token_type = STT_SEMICOLON;
		return TRUE;
	case ',':
		sieve_lexer_shift(scanner);
		lexer->token_type = STT_COMMA;
		return TRUE;

	/* EOF */
	case -1:
		if (!scanner->input->eof) {
			lexer->token_type = STT_ERROR;
			return FALSE;
		}
		lexer->token_type = STT_EOF;
		return TRUE;

	default:
		/* number */
		if (i_isdigit(sieve_lexer_curchar(scanner))) {
			return sieve_lexer_scan_number(scanner);

		/* identifier / tag */
		} else if (i_isalpha(sieve_lexer_curchar(scanner)) ||
			   sieve_lexer_curchar(scanner) == '_' ||
			   sieve_lexer_curchar(scanner) == ':') {

			enum sieve_token_type type = STT_IDENTIFIER;
			str_truncate(lexer->token_str_value,0);
			str = lexer->token_str_value;

			/* If it starts with a ':' it is a tag and not an
			   identifier */
 			if (sieve_lexer_curchar(scanner) == ':') {
				sieve_lexer_shift(scanner); // discard colon
				type = STT_TAG;

				/* First character still can't be a DIGIT */
 				if (i_isalpha(sieve_lexer_curchar(scanner)) ||
				    sieve_lexer_curchar(scanner) == '_') {
					str_append_c(str, sieve_lexer_curchar(scanner));
					sieve_lexer_shift(scanner);
				} else {
					/* Hmm, otherwise it is just a spurious
					   colon */
					lexer->token_type = STT_COLON;
					return TRUE;
				}
			} else {
				str_append_c(str, sieve_lexer_curchar(scanner));
				sieve_lexer_shift(scanner);
			}

			/* Scan the rest of the identifier */
			while (i_isalnum(sieve_lexer_curchar(scanner)) ||
			       sieve_lexer_curchar(scanner) == '_') {

				if (str_len(str) <= SIEVE_MAX_IDENTIFIER_LEN) {
	 				str_append_c(str, sieve_lexer_curchar(scanner));
				}
				sieve_lexer_shift(scanner);
			}

			/* Is this in fact a multiline text string ? */
			if (sieve_lexer_curchar(scanner) == ':' &&
			    type == STT_IDENTIFIER && str_len(str) == 4 &&
			    strncasecmp(str_c(str), "text", 4) == 0) {
				sieve_lexer_shift(scanner); // discard colon

				/* Discard SP and HTAB whitespace */
				while (sieve_lexer_curchar(scanner) == ' ' ||
				       sieve_lexer_curchar(scanner) == '\t')
 					sieve_lexer_shift(scanner);

				/* Discard hash comment or handle single CRLF */
				if (sieve_lexer_curchar(scanner) == '\r')
					sieve_lexer_shift(scanner);
 				switch (sieve_lexer_curchar(scanner)) {
				case '#':
					if (!sieve_lexer_scan_hash_comment(scanner))
						return FALSE;
					if (scanner->input->eof) {
						sieve_lexer_error(lexer,
							"end of file before end of multi-line string");
						lexer->token_type = STT_ERROR;
						return FALSE;
					} else if (scanner->input->stream_errno != 0) {
						lexer->token_type = STT_ERROR;
						return FALSE;
					}
					break;
				case '\n':
					sieve_lexer_shift(scanner);
					break;
				case -1:
					if (scanner->input->eof) {
						sieve_lexer_error(lexer,
							"end of file before end of multi-line string");
					}
					lexer->token_type = STT_ERROR;
					return FALSE;
				default:
 					sieve_lexer_error(lexer,
 						"invalid character %s after 'text:' in multiline string",
						_char_sanitize(sieve_lexer_curchar(scanner)));
					lexer->token_type = STT_ERROR;
					return FALSE;
				}

				/* Start over */
				str_truncate(str, 0);

 				/* Parse literal lines */
				while (TRUE) {
					bool cr_shifted = FALSE;

					/* Remove dot-stuffing or detect end of text */
					if (sieve_lexer_curchar(scanner) == '.') {
						sieve_lexer_shift(scanner);

						/* Check for CR.. */
						if (sieve_lexer_curchar(scanner) == '\r') {
							sieve_lexer_shift(scanner);
							cr_shifted = TRUE;
						}

						/* ..LF */
						if (sieve_lexer_curchar(scanner) == '\n') {
							sieve_lexer_shift(scanner);

							/* End of multi-line string */

							/* Check whether length limit was violated */
							if (str_len(str) > SIEVE_MAX_STRING_LEN) {
								sieve_lexer_error(lexer,
									"multi-line string started at line %d is too long "
									"(longer than %llu bytes)", lexer->token_line,
									(long long) SIEVE_MAX_STRING_LEN);
									lexer->token_type = STT_ERROR;
									return FALSE;
							}

							lexer->token_type = STT_STRING;
							return TRUE;
						} else if (cr_shifted) {
							/* Seen CR, but no LF */
							if (sieve_lexer_curchar(scanner) != -1 ||
							    !scanner->input->eof) {
								sieve_lexer_error(lexer,
									"found stray carriage-return (CR) character "
									"in multi-line string started at line %d",
									lexer->token_line);
							}
							lexer->token_type = STT_ERROR;
							return FALSE;
						}

						/* Handle dot-stuffing */
						if (str_len(str) <= SIEVE_MAX_STRING_LEN)
							str_append_c(str, '.');
						if (sieve_lexer_curchar(scanner) == '.')
							sieve_lexer_shift(scanner);
					}

					/* Scan the rest of the line */
					while (sieve_lexer_curchar(scanner) != '\n' &&
					       sieve_lexer_curchar(scanner) != '\r') {

						switch (sieve_lexer_curchar(scanner)) {
						case -1:
							if (scanner->input->eof) {
								sieve_lexer_error(lexer,
									"end of file before end of multi-line string");
							}
 							lexer->token_type = STT_ERROR;
 							return FALSE;
						case '\0':
							sieve_lexer_error(lexer,
								"encountered NUL character in quoted string "
								"started at line %d", lexer->token_line);
							lexer->token_type = STT_ERROR;
							return FALSE;
						default:
							if (str_len(str) <= SIEVE_MAX_STRING_LEN)
  								str_append_c(str, sieve_lexer_curchar(scanner));
						}

						sieve_lexer_shift(scanner);
					}

					/* If exited loop due to CR, skip it */
					if (sieve_lexer_curchar(scanner) == '\r')
						sieve_lexer_shift(scanner);

					/* Now we must see an LF */
					if (sieve_lexer_curchar(scanner) != '\n') {
						if (sieve_lexer_curchar(scanner) != -1 ||
						    !scanner->input->eof) {
							sieve_lexer_error(lexer,
								"found stray carriage-return (CR) character "
								"in multi-line string started at line %d",
								lexer->token_line);
						}
 						lexer->token_type = STT_ERROR;
 						return FALSE;
					}

					if (str_len(str) <= SIEVE_MAX_STRING_LEN)
						str_append(str, "\r\n");

					sieve_lexer_shift(scanner);
				}

 				i_unreached();
				lexer->token_type = STT_ERROR;
				return FALSE;
			}

			if (str_len(str) > SIEVE_MAX_IDENTIFIER_LEN) {
				sieve_lexer_error(lexer,
					"encountered impossibly long %s%s'",
					(type == STT_TAG ? "tag identifier ':" :
					 "identifier '"),
					str_sanitize(str_c(str),
						     SIEVE_MAX_IDENTIFIER_LEN));
				lexer->token_type = STT_ERROR;
				return FALSE;
			}

			lexer->token_type = type;
			return TRUE;
		}

		/* Error (unknown character and EOF handled already) */
		if (lexer->token_type != STT_GARBAGE) {
			sieve_lexer_error(lexer,
				"unexpected character(s) starting with %s",
				_char_sanitize(sieve_lexer_curchar(scanner)));
		}
		sieve_lexer_shift(scanner);
		lexer->token_type = STT_GARBAGE;
		return FALSE;
	}
}

void sieve_lexer_skip_token(const struct sieve_lexer *lexer)
{
	/* Scan token while skipping whitespace */
	do {
		struct sieve_lexical_scanner *scanner = lexer->scanner;

		if (!sieve_lexer_scan_raw_token(scanner)) {
			if (!scanner->input->eof &&
			    scanner->input->stream_errno != 0) {
				sieve_critical(scanner->svinst, scanner->ehandler,
					sieve_error_script_location(scanner->script,
								    scanner->current_line),
					"error reading script",
					"error reading script during lexical analysis: %s",
					i_stream_get_error(scanner->input));
			}
			return;
		}
	} while (lexer->token_type == STT_WHITESPACE);
}