summaryrefslogtreecommitdiffstats
path: root/pigeonhole/src/lib-managesieve/managesieve-parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'pigeonhole/src/lib-managesieve/managesieve-parser.c')
-rw-r--r--pigeonhole/src/lib-managesieve/managesieve-parser.c757
1 files changed, 757 insertions, 0 deletions
diff --git a/pigeonhole/src/lib-managesieve/managesieve-parser.c b/pigeonhole/src/lib-managesieve/managesieve-parser.c
new file mode 100644
index 0000000..546e431
--- /dev/null
+++ b/pigeonhole/src/lib-managesieve/managesieve-parser.c
@@ -0,0 +1,757 @@
+/* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file
+ */
+
+#include "lib.h"
+#include "unichar.h"
+#include "istream-private.h"
+#include "ostream.h"
+#include "strescape.h"
+#include "managesieve-parser.h"
+
+#define is_linebreak(c) \
+ ((c) == '\r' || (c) == '\n')
+
+#define LIST_INIT_COUNT 7
+
+enum arg_parse_type {
+ ARG_PARSE_NONE = 0,
+ ARG_PARSE_ATOM,
+ ARG_PARSE_STRING,
+ ARG_PARSE_LITERAL,
+ ARG_PARSE_LITERAL_DATA
+};
+
+struct managesieve_parser {
+ /* permanent */
+ pool_t pool;
+ struct istream *input;
+ size_t max_line_size;
+ enum managesieve_parser_flags flags;
+
+ /* reset by managesieve_parser_reset(): */
+ size_t line_size;
+ ARRAY_TYPE(managesieve_arg_list) root_list;
+ ARRAY_TYPE(managesieve_arg_list) *cur_list;
+ struct managesieve_arg *list_arg;
+
+ enum arg_parse_type cur_type;
+ size_t cur_pos; /* parser position in input buffer */
+
+ int str_first_escape; /* ARG_PARSE_STRING: index to first '\' */
+ uoff_t literal_size; /* ARG_PARSE_LITERAL: string size */
+
+ struct istream *str_stream;
+
+ const char *error;
+
+ bool literal_skip_crlf:1;
+ bool literal_nonsync:1;
+ bool eol:1;
+ bool fatal_error:1;
+};
+
+static struct istream *quoted_string_istream_create
+ (struct managesieve_parser *parser);
+
+struct managesieve_parser *
+managesieve_parser_create(struct istream *input, size_t max_line_size)
+{
+ struct managesieve_parser *parser;
+
+ parser = i_new(struct managesieve_parser, 1);
+ parser->pool = pool_alloconly_create("MANAGESIEVE parser", 8192);
+ parser->input = input;
+ parser->max_line_size = max_line_size;
+
+ p_array_init(&parser->root_list, parser->pool, LIST_INIT_COUNT);
+ parser->cur_list = &parser->root_list;
+ return parser;
+}
+
+void managesieve_parser_destroy(struct managesieve_parser **parser)
+{
+ i_stream_unref(&(*parser)->str_stream);
+
+ pool_unref(&(*parser)->pool);
+ i_free(*parser);
+ *parser = NULL;
+}
+
+void managesieve_parser_reset(struct managesieve_parser *parser)
+{
+ p_clear(parser->pool);
+
+ parser->line_size = 0;
+
+ p_array_init(&parser->root_list, parser->pool, LIST_INIT_COUNT);
+ parser->cur_list = &parser->root_list;
+ parser->list_arg = NULL;
+
+ parser->cur_type = ARG_PARSE_NONE;
+ parser->cur_pos = 0;
+
+ parser->str_first_escape = 0;
+ parser->literal_size = 0;
+
+ parser->error = NULL;
+
+ parser->literal_skip_crlf = FALSE;
+ parser->eol = FALSE;
+
+ i_stream_unref(&parser->str_stream);
+}
+
+const char *
+managesieve_parser_get_error(struct managesieve_parser *parser, bool *fatal)
+{
+ *fatal = parser->fatal_error;
+ return parser->error;
+}
+
+/* Skip over everything parsed so far, plus the following whitespace */
+static bool
+managesieve_parser_skip_to_next(struct managesieve_parser *parser,
+ const unsigned char **data, size_t *data_size)
+{
+ size_t i;
+
+ for (i = parser->cur_pos; i < *data_size; i++) {
+ if ((*data)[i] != ' ')
+ break;
+ }
+
+ parser->line_size += i;
+ i_stream_skip(parser->input, i);
+ parser->cur_pos = 0;
+
+ *data += i;
+ *data_size -= i;
+ return *data_size > 0;
+}
+
+static struct managesieve_arg *
+managesieve_arg_create(struct managesieve_parser *parser)
+{
+ struct managesieve_arg *arg;
+
+ arg = array_append_space(parser->cur_list);
+ arg->parent = parser->list_arg;
+
+ return arg;
+}
+
+static void
+managesieve_parser_save_arg(struct managesieve_parser *parser,
+ const unsigned char *data, size_t size)
+{
+ struct managesieve_arg *arg;
+ char *str;
+
+ arg = managesieve_arg_create(parser);
+
+ switch (parser->cur_type) {
+ case ARG_PARSE_ATOM:
+ /* Simply save the string */
+ arg->type = MANAGESIEVE_ARG_ATOM;
+ arg->_data.str = p_strndup(parser->pool, data, size);
+ arg->str_len = size;
+ break;
+ case ARG_PARSE_STRING:
+ /* Data is quoted and may contain escapes. */
+ if ((parser->flags &
+ MANAGESIEVE_PARSE_FLAG_STRING_STREAM) != 0) {
+ arg->type = MANAGESIEVE_ARG_STRING_STREAM;
+ arg->_data.str_stream = parser->str_stream;
+ } else {
+ i_assert(size > 0);
+
+ arg->type = MANAGESIEVE_ARG_STRING;
+ str = p_strndup(parser->pool, data+1, size-1);
+
+ /* remove the escapes */
+ if (parser->str_first_escape >= 0 &&
+ (parser->flags &
+ MANAGESIEVE_PARSE_FLAG_NO_UNESCAPE) == 0)
+ (void)str_unescape(str);
+
+ arg->_data.str = str;
+ arg->str_len = strlen(str);
+ }
+ break;
+ case ARG_PARSE_LITERAL_DATA:
+ if ((parser->flags &
+ MANAGESIEVE_PARSE_FLAG_STRING_STREAM) != 0) {
+ arg->type = MANAGESIEVE_ARG_STRING_STREAM;
+ arg->_data.str_stream = parser->str_stream;
+ } else if ((parser->flags &
+ MANAGESIEVE_PARSE_FLAG_LITERAL_TYPE) != 0) {
+ arg->type = MANAGESIEVE_ARG_LITERAL;
+ arg->_data.str = p_strndup(parser->pool, data, size);
+ arg->str_len = size;
+ } else {
+ arg->type = MANAGESIEVE_ARG_STRING;
+ arg->_data.str = p_strndup(parser->pool, data, size);
+ arg->str_len = size;
+ }
+ break;
+ default:
+ i_unreached();
+ }
+
+ parser->cur_type = ARG_PARSE_NONE;
+}
+
+static bool is_valid_atom_char(struct managesieve_parser *parser, char chr)
+{
+ if (IS_ATOM_SPECIAL((unsigned char)chr)) {
+ parser->error = "Invalid characters in atom";
+ return FALSE;
+ } else if ((((unsigned char)chr) & 0x80) != 0) {
+ parser->error = "8bit data in atom";
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static bool
+managesieve_parser_read_atom(struct managesieve_parser *parser,
+ const unsigned char *data, size_t data_size)
+{
+ size_t i;
+
+ /* Read until we've found space, CR or LF. */
+ for (i = parser->cur_pos; i < data_size; i++) {
+ if (data[i] == ' ' || data[i] == ')' ||
+ is_linebreak(data[i])) {
+ managesieve_parser_save_arg(parser, data, i);
+ break;
+ } else if (!is_valid_atom_char(parser, data[i]))
+ return FALSE;
+ }
+
+ parser->cur_pos = i;
+ return (parser->cur_type == ARG_PARSE_NONE);
+}
+
+static bool
+managesieve_parser_read_string(struct managesieve_parser *parser,
+ const unsigned char *data, size_t data_size)
+{
+ size_t i;
+
+ /* QUOTED-CHAR = SAFE-UTF8-CHAR / "\" QUOTED-SPECIALS
+ quoted = <"> *QUOTED-CHAR <">
+ ;; limited to 1024 octets between the <">s
+ */
+
+ /* Read until we've found non-escaped ", CR or LF */
+ for (i = parser->cur_pos; i < data_size; i++) {
+ if (data[i] == '"') {
+ if (!uni_utf8_data_is_valid(data+1, i-1)) {
+ parser->error =
+ "Invalid UTF-8 character in quoted-string.";
+ return FALSE;
+ }
+
+ managesieve_parser_save_arg(parser, data, i);
+ i++; /* skip the trailing '"' too */
+ break;
+ }
+
+ if (data[i] == '\0') {
+ parser->error = "NULs not allowed in strings";
+ return FALSE;
+ }
+
+ if (data[i] == '\\') {
+ if (i+1 == data_size) {
+ /* Known data ends with '\' - leave it to next
+ time as well if it happens to be \" */
+ break;
+ }
+
+ /* Save the first escaped char */
+ if (parser->str_first_escape < 0)
+ parser->str_first_escape = i;
+
+ /* Skip the escaped char */
+ i++;
+
+ if (!IS_QUOTED_SPECIAL(data[i])) {
+ parser->error =
+ "Escaped quoted-string character is not a QUOTED-SPECIAL.";
+ return FALSE;
+ }
+ continue;
+ }
+
+ if ((data[i] & 0x80) == 0 && !IS_SAFE_CHAR(data[i])) {
+ parser->error = "String contains invalid character.";
+ return FALSE;
+ }
+ }
+
+ parser->cur_pos = i;
+ return (parser->cur_type == ARG_PARSE_NONE);
+}
+
+static bool managesieve_parser_literal_end(struct managesieve_parser *parser)
+{
+ if ((parser->flags & MANAGESIEVE_PARSE_FLAG_STRING_STREAM) == 0) {
+ if (parser->line_size >= parser->max_line_size ||
+ (parser->literal_size >
+ (parser->max_line_size - parser->line_size))) {
+ /* Too long string, abort. */
+ parser->error = "Literal size too large";
+ parser->fatal_error = TRUE;
+ return FALSE;
+ }
+ }
+
+ parser->cur_type = ARG_PARSE_LITERAL_DATA;
+ parser->literal_skip_crlf = TRUE;
+
+ parser->cur_pos = 0;
+ return TRUE;
+}
+
+static bool
+managesieve_parser_read_literal(struct managesieve_parser *parser,
+ const unsigned char *data, size_t data_size)
+{
+ size_t i;
+
+ /* Expecting digits + "}" */
+ for (i = parser->cur_pos; i < data_size; i++) {
+ if (data[i] == '}') {
+ parser->line_size += i+1;
+ i_stream_skip(parser->input, i+1);
+
+ return managesieve_parser_literal_end(parser);
+ }
+
+ if (parser->literal_nonsync) {
+ parser->error = "Expecting '}' after '+'";
+ return FALSE;
+ }
+
+ if (data[i] == '+') {
+ parser->literal_nonsync = TRUE;
+ continue;
+ }
+
+ if (data[i] < '0' || data[i] > '9') {
+ parser->error = "Invalid literal size";
+ return FALSE;
+ }
+
+ if (parser->literal_size >= ((uoff_t)-1 / 10)) {
+ if (parser->literal_size > ((uoff_t)-1 / 10) ||
+ (uoff_t)(data[i] - '0') > ((uoff_t)-1 % 10)) {
+ parser->error = "Literal size too large";
+ return FALSE;
+ }
+ }
+ parser->literal_size = parser->literal_size * 10 +
+ (data[i] - '0');
+ }
+
+ parser->cur_pos = i;
+ return FALSE;
+}
+
+static bool
+managesieve_parser_read_literal_data(struct managesieve_parser *parser,
+ const unsigned char *data,
+ size_t data_size)
+{
+ if (parser->literal_skip_crlf) {
+ /* Skip \r\n or \n, anything else gives an error */
+ if (data_size == 0)
+ return FALSE;
+
+ if (*data == '\r') {
+ parser->line_size++;
+ data++; data_size--;
+ i_stream_skip(parser->input, 1);
+
+ if (data_size == 0)
+ return FALSE;
+ }
+
+ if (*data != '\n') {
+ parser->error = "Missing LF after literal size";
+ return FALSE;
+ }
+
+ parser->line_size++;
+ data++; data_size--;
+ i_stream_skip(parser->input, 1);
+
+ parser->literal_skip_crlf = FALSE;
+
+ i_assert(parser->cur_pos == 0);
+ }
+
+ if ((parser->flags & MANAGESIEVE_PARSE_FLAG_STRING_STREAM) == 0) {
+ /* Now we just wait until we've read enough data */
+ if (data_size < parser->literal_size) {
+ return FALSE;
+ } else {
+ if (!uni_utf8_data_is_valid(
+ data, (size_t)parser->literal_size)) {
+ parser->error =
+ "Invalid UTF-8 character in literal string.";
+ return FALSE;
+ }
+
+ managesieve_parser_save_arg(
+ parser, data, (size_t)parser->literal_size);
+ parser->cur_pos = (size_t)parser->literal_size;
+ return TRUE;
+ }
+ } else {
+ /* We don't read the data; we just create a stream for the
+ literal */
+ parser->eol = TRUE;
+ parser->str_stream = i_stream_create_limit(
+ parser->input, parser->literal_size);
+ managesieve_parser_save_arg(parser, NULL, 0);
+ return TRUE;
+ }
+}
+
+/* Returns TRUE if argument was fully processed. Also returns TRUE if an
+ argument inside a list was processed. */
+static bool managesieve_parser_read_arg(struct managesieve_parser *parser)
+{
+ const unsigned char *data;
+ size_t data_size;
+
+ data = i_stream_get_data(parser->input, &data_size);
+ if (data_size == 0)
+ return FALSE;
+
+ while (parser->cur_type == ARG_PARSE_NONE) {
+ /* We haven't started parsing yet */
+ if (!managesieve_parser_skip_to_next(parser, &data, &data_size))
+ return FALSE;
+ i_assert(parser->cur_pos == 0);
+
+ switch (data[0]) {
+ case '\r':
+ case '\n':
+ /* Unexpected end of line */
+ parser->eol = TRUE;
+ return FALSE;
+ case '"':
+ parser->cur_type = ARG_PARSE_STRING;
+ parser->str_first_escape = -1;
+ break;
+ case '{':
+ parser->cur_type = ARG_PARSE_LITERAL;
+ parser->literal_size = 0;
+ parser->literal_nonsync = FALSE;
+ break;
+ default:
+ if (!is_valid_atom_char(parser, data[0]))
+ return FALSE;
+ parser->cur_type = ARG_PARSE_ATOM;
+ break;
+ }
+ parser->cur_pos++;
+ }
+
+ i_assert(data_size > 0);
+
+ switch (parser->cur_type) {
+ case ARG_PARSE_ATOM:
+ if (!managesieve_parser_read_atom(parser, data, data_size))
+ return FALSE;
+ break;
+ case ARG_PARSE_STRING:
+ if ((parser->flags &
+ MANAGESIEVE_PARSE_FLAG_STRING_STREAM) != 0) {
+ parser->eol = TRUE;
+ parser->line_size += parser->cur_pos;
+ i_stream_skip(parser->input, parser->cur_pos);
+ parser->cur_pos = 0;
+ parser->str_stream =
+ quoted_string_istream_create(parser);
+ managesieve_parser_save_arg(parser, NULL, 0);
+
+ } else if (!managesieve_parser_read_string(parser,
+ data, data_size)) {
+ return FALSE;
+ }
+ break;
+ case ARG_PARSE_LITERAL:
+ if (!managesieve_parser_read_literal(parser, data, data_size))
+ return FALSE;
+
+ /* Pass through to parsing data. since input->skip was
+ modified, we need to get the data start position again. */
+ data = i_stream_get_data(parser->input, &data_size);
+
+ /* fall through */
+ case ARG_PARSE_LITERAL_DATA:
+ if (!managesieve_parser_read_literal_data(parser,
+ data, data_size))
+ return FALSE;
+ break;
+ default:
+ i_unreached();
+ }
+
+ i_assert(parser->cur_type == ARG_PARSE_NONE);
+ return TRUE;
+}
+
+/* ARG_PARSE_NONE checks that last argument isn't only partially parsed. */
+#define IS_UNFINISHED(parser) \
+ ((parser)->cur_type != ARG_PARSE_NONE || \
+ (parser)->cur_list != &parser->root_list)
+
+static int
+finish_line(struct managesieve_parser *parser, unsigned int count,
+ const struct managesieve_arg **args_r)
+{
+ struct managesieve_arg *arg;
+ int ret = array_count(&parser->root_list);
+
+ parser->line_size += parser->cur_pos;
+ i_stream_skip(parser->input, parser->cur_pos);
+ parser->cur_pos = 0;
+
+ /* Fill the missing parameters with NILs */
+ while (count > array_count(&parser->root_list)) {
+ arg = array_append_space(&parser->root_list);
+ arg->type = MANAGESIEVE_ARG_NONE;
+ }
+ arg = array_append_space(&parser->root_list);
+ arg->type = MANAGESIEVE_ARG_EOL;
+
+ *args_r = array_get(&parser->root_list, &count);
+ return ret;
+}
+
+int managesieve_parser_read_args(struct managesieve_parser *parser,
+ unsigned int count,
+ enum managesieve_parser_flags flags,
+ const struct managesieve_arg **args_r)
+{
+ parser->flags = flags;
+
+ while (!parser->eol && (count == 0 || IS_UNFINISHED(parser) ||
+ array_count(&parser->root_list) < count)) {
+ if (!managesieve_parser_read_arg(parser))
+ break;
+
+ if (parser->line_size > parser->max_line_size) {
+ parser->error = "MANAGESIEVE command line too large";
+ break;
+ }
+ }
+
+ if (parser->error != NULL) {
+ /* Error, abort */
+ parser->line_size += parser->cur_pos;
+ i_stream_skip(parser->input, parser->cur_pos);
+ parser->cur_pos = 0;
+ *args_r = NULL;
+ return -1;
+ } else if ((!IS_UNFINISHED(parser) && count > 0 &&
+ array_count(&parser->root_list) >= count) || parser->eol) {
+ /* All arguments read / end of line. */
+ return finish_line(parser, count, args_r);
+ } else {
+ /* Need more data */
+ *args_r = NULL;
+ return -2;
+ }
+}
+
+int managesieve_parser_finish_line(struct managesieve_parser *parser,
+ unsigned int count,
+ enum managesieve_parser_flags flags,
+ const struct managesieve_arg **args_r)
+{
+ const unsigned char *data;
+ size_t data_size;
+ int ret;
+
+ ret = managesieve_parser_read_args(parser, count, flags, args_r);
+ if (ret == -2) {
+ /* We should have noticed end of everything except atom */
+ if (parser->cur_type == ARG_PARSE_ATOM) {
+ data = i_stream_get_data(parser->input, &data_size);
+ managesieve_parser_save_arg(parser, data, data_size);
+ }
+ }
+ return finish_line(parser, count, args_r);
+}
+
+const char *managesieve_parser_read_word(struct managesieve_parser *parser)
+{
+ const unsigned char *data;
+ size_t i, data_size;
+
+ data = i_stream_get_data(parser->input, &data_size);
+
+ for (i = 0; i < data_size; i++) {
+ if (data[i] == ' ' || data[i] == '\r' || data[i] == '\n')
+ break;
+ }
+
+ if (i < data_size) {
+ data_size = i + (data[i] == ' ' ? 1 : 0);
+ parser->line_size += data_size;
+ i_stream_skip(parser->input, data_size);
+ return p_strndup(parser->pool, data, i);
+ } else {
+ return NULL;
+ }
+}
+
+/*
+ * Quoted string stream
+ */
+
+struct quoted_string_istream {
+ struct istream_private istream;
+
+ /* The end '"' was found */
+ bool str_end:1;
+};
+
+static int
+quoted_string_istream_read_parent(struct quoted_string_istream *qsstream,
+ unsigned int min_bytes)
+{
+ struct istream_private *stream = &qsstream->istream;
+ size_t size, avail;
+ ssize_t ret;
+
+ size = i_stream_get_data_size(stream->parent);
+ while (size < min_bytes) {
+ ret = i_stream_read_memarea(stream->parent);
+ if (ret <= 0) {
+ if (ret == -2) {
+ /* Tiny parent buffer size - shouldn't happen */
+ return -2;
+ }
+ stream->istream.stream_errno =
+ stream->parent->stream_errno;
+ stream->istream.eof = stream->parent->eof;
+ if (ret == -1 && stream->istream.stream_errno == 0) {
+ io_stream_set_error(&stream->iostream,
+ "Quoted string ends without closing quotes");
+ stream->istream.stream_errno = EPIPE;
+ }
+ return ret;
+ }
+ size = i_stream_get_data_size(stream->parent);
+ }
+
+ if (!i_stream_try_alloc(stream, size, &avail))
+ return -2;
+ return 1;
+}
+
+static ssize_t quoted_string_istream_read(struct istream_private *stream)
+{
+ struct quoted_string_istream *qsstream =
+ (struct quoted_string_istream *)stream;
+ const unsigned char *data;
+ unsigned int extra;
+ size_t i, dest, size;
+ ssize_t ret;
+
+ if (qsstream->str_end) {
+ stream->istream.eof = TRUE;
+ return -1;
+ }
+
+ ret = quoted_string_istream_read_parent(qsstream, 1);
+ if (ret <= 0)
+ return ret;
+
+ /* @UNSAFE */
+ dest = stream->pos;
+ extra = 0;
+
+ data = i_stream_get_data(stream->parent, &size);
+ for (i = 0; i < size && dest < stream->buffer_size; ) {
+ if (data[i] == '"') {
+ i++;
+ qsstream->str_end = TRUE;
+ if (dest == stream->pos) {
+ i_stream_skip(stream->parent, i);
+ stream->istream.eof = TRUE;
+ return -1;
+ }
+ break;
+ } else if (data[i] == '\\') {
+ if (i+1 == size) {
+ /* Not enough input for \x */
+ extra = 1;
+ break;
+ }
+ i++;
+
+ if (!IS_QUOTED_SPECIAL(data[i])) {
+ /* Invalid string */
+ io_stream_set_error(&stream->iostream,
+ "Escaped quoted-string character is not a QUOTED-SPECIAL");
+ stream->istream.stream_errno = EINVAL;
+ return -1;
+ }
+ stream->w_buffer[dest++] = data[i];
+ i++;
+ } else {
+ if (data[i] == '\r' || data[i] == '\n') {
+ io_stream_set_error(&stream->iostream,
+ "Quoted string contains an invalid character");
+ stream->istream.stream_errno = EINVAL;
+ return -1;
+ }
+
+ stream->w_buffer[dest++] = data[i];
+ i++;
+ }
+ i_assert(dest <= stream->buffer_size);
+ }
+ i_stream_skip(stream->parent, i);
+
+ ret = dest - stream->pos;
+ if (ret == 0) {
+ /* Not enough input */
+ i_assert(i == 0);
+ i_assert(extra > 0);
+ ret = quoted_string_istream_read_parent(qsstream, extra+1);
+ if (ret <= 0)
+ return ret;
+ return quoted_string_istream_read(stream);
+ }
+ i_assert(ret > 0);
+ stream->pos = dest;
+ return ret;
+}
+
+static struct
+istream *quoted_string_istream_create(struct managesieve_parser *parser)
+{
+ struct quoted_string_istream *qsstream;
+
+ qsstream = i_new(struct quoted_string_istream, 1);
+ qsstream->istream.max_buffer_size =
+ parser->input->real_stream->max_buffer_size;
+ qsstream->istream.read = quoted_string_istream_read;
+
+ qsstream->istream.istream.readable_fd = FALSE;
+ qsstream->istream.istream.blocking = parser->input->blocking;
+ qsstream->istream.istream.seekable = FALSE;
+ return i_stream_create(&qsstream->istream, parser->input,
+ i_stream_get_fd(parser->input), 0);
+}