diff options
Diffstat (limited to 'src/lib-mail/qp-decoder.c')
-rw-r--r-- | src/lib-mail/qp-decoder.c | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/src/lib-mail/qp-decoder.c b/src/lib-mail/qp-decoder.c new file mode 100644 index 0000000..7684803 --- /dev/null +++ b/src/lib-mail/qp-decoder.c @@ -0,0 +1,285 @@ +/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "hex-binary.h" +#include "qp-decoder.h" + +/* quoted-printable lines can be max 76 characters. if we've seen more than + that much whitespace, it means there really shouldn't be anything else left + in the line except trailing whitespace. */ +#define QP_MAX_WHITESPACE_LEN 76 + +#define QP_IS_TRAILING_WHITESPACE(c) \ + ((c) == ' ' || (c) == '\t') + +enum qp_state { + STATE_TEXT = 0, + STATE_WHITESPACE, + STATE_EQUALS, + STATE_EQUALS_WHITESPACE, + STATE_HEX2, + STATE_CR, + STATE_SOFTCR +}; + +struct qp_decoder { + buffer_t *dest; + buffer_t *whitespace; + enum qp_state state; + char hexchar; +}; + +struct qp_decoder *qp_decoder_init(buffer_t *dest) +{ + struct qp_decoder *qp; + + qp = i_new(struct qp_decoder, 1); + qp->dest = dest; + qp->whitespace = buffer_create_dynamic(default_pool, 80); + return qp; +} + +void qp_decoder_deinit(struct qp_decoder **_qp) +{ + struct qp_decoder *qp = *_qp; + + buffer_free(&qp->whitespace); + i_free(qp); +} + +static size_t +qp_decoder_more_text(struct qp_decoder *qp, const unsigned char *src, + size_t src_size) +{ + size_t i, start = 0, ret = src_size; + + for (i = 0; i < src_size; i++) { + if (src[i] > '=') { + /* fast path */ + continue; + } + switch (src[i]) { + case '=': + qp->state = STATE_EQUALS; + break; + case '\r': + qp->state = STATE_CR; + break; + case '\n': + /* LF without preceding CR */ + buffer_append(qp->dest, src+start, i-start); + buffer_append(qp->dest, "\r\n", 2); + start = i+1; + continue; + case ' ': + case '\t': + i_assert(qp->whitespace->used == 0); + qp->state = STATE_WHITESPACE; + buffer_append_c(qp->whitespace, src[i]); + break; + default: + continue; + } + ret = i+1; + break; + } + buffer_append(qp->dest, src+start, i-start); + return ret; +} + +static void qp_decoder_invalid(struct qp_decoder *qp, const char **error_r) +{ + switch (qp->state) { + case STATE_EQUALS: + buffer_append_c(qp->dest, '='); + *error_r = "'=' not followed by two hex digits"; + break; + case STATE_HEX2: + buffer_append_c(qp->dest, '='); + buffer_append_c(qp->dest, qp->hexchar); + *error_r = "'=<hex>' not followed by a hex digit"; + break; + case STATE_EQUALS_WHITESPACE: + buffer_append_c(qp->dest, '='); + buffer_append_buf(qp->dest, qp->whitespace, 0, SIZE_MAX); + buffer_set_used_size(qp->whitespace, 0); + *error_r = "'=<whitespace>' not followed by newline"; + break; + case STATE_CR: + buffer_append_buf(qp->dest, qp->whitespace, 0, SIZE_MAX); + buffer_set_used_size(qp->whitespace, 0); + buffer_append_c(qp->dest, '\r'); + *error_r = "CR not followed by LF"; + break; + case STATE_SOFTCR: + buffer_append_c(qp->dest, '='); + buffer_append_buf(qp->dest, qp->whitespace, 0, SIZE_MAX); + buffer_set_used_size(qp->whitespace, 0); + buffer_append_c(qp->dest, '\r'); + *error_r = "CR not followed by LF"; + break; + case STATE_TEXT: + case STATE_WHITESPACE: + i_unreached(); + } + qp->state = STATE_TEXT; + i_assert(*error_r != NULL); +} + +int qp_decoder_more(struct qp_decoder *qp, const unsigned char *src, + size_t src_size, size_t *invalid_src_pos_r, + const char **error_r) +{ + const char *error; + size_t i; + + *invalid_src_pos_r = SIZE_MAX; + *error_r = NULL; + + for (i = 0; i < src_size; ) { + switch (qp->state) { + case STATE_TEXT: + i += qp_decoder_more_text(qp, src+i, src_size-i); + /* don't increment i any more than we already did, + so continue instead of break */ + continue; + case STATE_WHITESPACE: + if (QP_IS_TRAILING_WHITESPACE(src[i])) { + /* more whitespace */ + if (qp->whitespace->used <= QP_MAX_WHITESPACE_LEN) + buffer_append_c(qp->whitespace, src[i]); + } else if (src[i] == '\r') { + qp->state = STATE_CR; + } else if (src[i] == '\n') { + /* drop the trailing whitespace */ + buffer_append(qp->dest, "\r\n", 2); + buffer_set_used_size(qp->whitespace, 0); + } else { + /* this wasn't trailing whitespace. + put it back. */ + buffer_append_buf(qp->dest, qp->whitespace, + 0, SIZE_MAX); + if (qp->whitespace->used > QP_MAX_WHITESPACE_LEN) { + /* we already truncated some of the + whitespace away, because the line + is too long */ + if (*invalid_src_pos_r == SIZE_MAX) { + *invalid_src_pos_r = i; + *error_r = "Too much whitespace"; + } + } + buffer_set_used_size(qp->whitespace, 0); + qp->state = STATE_TEXT; + continue; /* don't increment i */ + } + break; + case STATE_EQUALS: + if ((src[i] >= '0' && src[i] <= '9') || + (src[i] >= 'A' && src[i] <= 'F') || + /* lowercase hex isn't strictly valid, but allow */ + (src[i] >= 'a' && src[i] <= 'f')) { + qp->hexchar = src[i]; + qp->state = STATE_HEX2; + } else if (QP_IS_TRAILING_WHITESPACE(src[i])) { + i_assert(qp->whitespace->used == 0); + buffer_append_c(qp->whitespace, src[i]); + qp->state = STATE_EQUALS_WHITESPACE; + } else if (src[i] == '\r') + qp->state = STATE_SOFTCR; + else if (src[i] == '\n') { + qp->state = STATE_TEXT; + } else { + /* invalid input */ + qp_decoder_invalid(qp, &error); + if (*invalid_src_pos_r == SIZE_MAX) { + *invalid_src_pos_r = i; + *error_r = error; + } + continue; /* don't increment i */ + } + break; + case STATE_HEX2: + if ((src[i] >= '0' && src[i] <= '9') || + (src[i] >= 'A' && src[i] <= 'F') || + (src[i] >= 'a' && src[i] <= 'f')) { + char data[3]; + + data[0] = qp->hexchar; + data[1] = src[i]; + data[2] = '\0'; + if (hex_to_binary(data, qp->dest) < 0) + i_unreached(); + qp->state = STATE_TEXT; + } else { + /* invalid input */ + qp_decoder_invalid(qp, &error); + if (*invalid_src_pos_r == SIZE_MAX) { + *invalid_src_pos_r = i; + *error_r = error; + } + continue; /* don't increment i */ + } + break; + case STATE_EQUALS_WHITESPACE: + if (QP_IS_TRAILING_WHITESPACE(src[i])) { + if (qp->whitespace->used <= QP_MAX_WHITESPACE_LEN) + buffer_append_c(qp->whitespace, src[i]); + else { + /* if this isn't going to get truncated + anyway, it's going to be an error */ + } + } else if (src[i] == '\r') + qp->state = STATE_SOFTCR; + else if (src[i] == '\n') { + buffer_set_used_size(qp->whitespace, 0); + qp->state = STATE_TEXT; + } else { + /* =<whitespace> not followed by [CR]LF + is invalid. */ + qp_decoder_invalid(qp, &error); + if (*invalid_src_pos_r == SIZE_MAX) { + *invalid_src_pos_r = i; + *error_r = error; + } + continue; /* don't increment i */ + } + break; + case STATE_CR: + case STATE_SOFTCR: + if (src[i] == '\n') { + buffer_set_used_size(qp->whitespace, 0); + if (qp->state != STATE_SOFTCR) + buffer_append(qp->dest, "\r\n", 2); + qp->state = STATE_TEXT; + } else { + qp_decoder_invalid(qp, &error); + if (*invalid_src_pos_r == SIZE_MAX) { + *invalid_src_pos_r = i; + *error_r = error; + } + continue; /* don't increment i */ + } + break; + } + i++; + } + i_assert((*invalid_src_pos_r == SIZE_MAX) == (*error_r == NULL)); + return *invalid_src_pos_r == SIZE_MAX ? 0 : -1; +} + +int qp_decoder_finish(struct qp_decoder *qp, const char **error_r) +{ + int ret; + + if (qp->state == STATE_TEXT || qp->state == STATE_WHITESPACE) { + ret = 0; + *error_r = NULL; + } else { + qp_decoder_invalid(qp, error_r); + ret = -1; + } + qp->state = STATE_TEXT; + buffer_set_used_size(qp->whitespace, 0); + return ret; +} |