summaryrefslogtreecommitdiffstats
path: root/src/lib-mail/qp-decoder.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
commitf7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch)
treea3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/lib-mail/qp-decoder.c
parentInitial commit. (diff)
downloaddovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.tar.xz
dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.zip
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/lib-mail/qp-decoder.c')
-rw-r--r--src/lib-mail/qp-decoder.c285
1 files changed, 285 insertions, 0 deletions
diff --git a/src/lib-mail/qp-decoder.c b/src/lib-mail/qp-decoder.c
new file mode 100644
index 0000000..7684803
--- /dev/null
+++ b/src/lib-mail/qp-decoder.c
@@ -0,0 +1,285 @@
+/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "hex-binary.h"
+#include "qp-decoder.h"
+
+/* quoted-printable lines can be max 76 characters. if we've seen more than
+ that much whitespace, it means there really shouldn't be anything else left
+ in the line except trailing whitespace. */
+#define QP_MAX_WHITESPACE_LEN 76
+
+#define QP_IS_TRAILING_WHITESPACE(c) \
+ ((c) == ' ' || (c) == '\t')
+
+enum qp_state {
+ STATE_TEXT = 0,
+ STATE_WHITESPACE,
+ STATE_EQUALS,
+ STATE_EQUALS_WHITESPACE,
+ STATE_HEX2,
+ STATE_CR,
+ STATE_SOFTCR
+};
+
+struct qp_decoder {
+ buffer_t *dest;
+ buffer_t *whitespace;
+ enum qp_state state;
+ char hexchar;
+};
+
+struct qp_decoder *qp_decoder_init(buffer_t *dest)
+{
+ struct qp_decoder *qp;
+
+ qp = i_new(struct qp_decoder, 1);
+ qp->dest = dest;
+ qp->whitespace = buffer_create_dynamic(default_pool, 80);
+ return qp;
+}
+
+void qp_decoder_deinit(struct qp_decoder **_qp)
+{
+ struct qp_decoder *qp = *_qp;
+
+ buffer_free(&qp->whitespace);
+ i_free(qp);
+}
+
+static size_t
+qp_decoder_more_text(struct qp_decoder *qp, const unsigned char *src,
+ size_t src_size)
+{
+ size_t i, start = 0, ret = src_size;
+
+ for (i = 0; i < src_size; i++) {
+ if (src[i] > '=') {
+ /* fast path */
+ continue;
+ }
+ switch (src[i]) {
+ case '=':
+ qp->state = STATE_EQUALS;
+ break;
+ case '\r':
+ qp->state = STATE_CR;
+ break;
+ case '\n':
+ /* LF without preceding CR */
+ buffer_append(qp->dest, src+start, i-start);
+ buffer_append(qp->dest, "\r\n", 2);
+ start = i+1;
+ continue;
+ case ' ':
+ case '\t':
+ i_assert(qp->whitespace->used == 0);
+ qp->state = STATE_WHITESPACE;
+ buffer_append_c(qp->whitespace, src[i]);
+ break;
+ default:
+ continue;
+ }
+ ret = i+1;
+ break;
+ }
+ buffer_append(qp->dest, src+start, i-start);
+ return ret;
+}
+
+static void qp_decoder_invalid(struct qp_decoder *qp, const char **error_r)
+{
+ switch (qp->state) {
+ case STATE_EQUALS:
+ buffer_append_c(qp->dest, '=');
+ *error_r = "'=' not followed by two hex digits";
+ break;
+ case STATE_HEX2:
+ buffer_append_c(qp->dest, '=');
+ buffer_append_c(qp->dest, qp->hexchar);
+ *error_r = "'=<hex>' not followed by a hex digit";
+ break;
+ case STATE_EQUALS_WHITESPACE:
+ buffer_append_c(qp->dest, '=');
+ buffer_append_buf(qp->dest, qp->whitespace, 0, SIZE_MAX);
+ buffer_set_used_size(qp->whitespace, 0);
+ *error_r = "'=<whitespace>' not followed by newline";
+ break;
+ case STATE_CR:
+ buffer_append_buf(qp->dest, qp->whitespace, 0, SIZE_MAX);
+ buffer_set_used_size(qp->whitespace, 0);
+ buffer_append_c(qp->dest, '\r');
+ *error_r = "CR not followed by LF";
+ break;
+ case STATE_SOFTCR:
+ buffer_append_c(qp->dest, '=');
+ buffer_append_buf(qp->dest, qp->whitespace, 0, SIZE_MAX);
+ buffer_set_used_size(qp->whitespace, 0);
+ buffer_append_c(qp->dest, '\r');
+ *error_r = "CR not followed by LF";
+ break;
+ case STATE_TEXT:
+ case STATE_WHITESPACE:
+ i_unreached();
+ }
+ qp->state = STATE_TEXT;
+ i_assert(*error_r != NULL);
+}
+
+int qp_decoder_more(struct qp_decoder *qp, const unsigned char *src,
+ size_t src_size, size_t *invalid_src_pos_r,
+ const char **error_r)
+{
+ const char *error;
+ size_t i;
+
+ *invalid_src_pos_r = SIZE_MAX;
+ *error_r = NULL;
+
+ for (i = 0; i < src_size; ) {
+ switch (qp->state) {
+ case STATE_TEXT:
+ i += qp_decoder_more_text(qp, src+i, src_size-i);
+ /* don't increment i any more than we already did,
+ so continue instead of break */
+ continue;
+ case STATE_WHITESPACE:
+ if (QP_IS_TRAILING_WHITESPACE(src[i])) {
+ /* more whitespace */
+ if (qp->whitespace->used <= QP_MAX_WHITESPACE_LEN)
+ buffer_append_c(qp->whitespace, src[i]);
+ } else if (src[i] == '\r') {
+ qp->state = STATE_CR;
+ } else if (src[i] == '\n') {
+ /* drop the trailing whitespace */
+ buffer_append(qp->dest, "\r\n", 2);
+ buffer_set_used_size(qp->whitespace, 0);
+ } else {
+ /* this wasn't trailing whitespace.
+ put it back. */
+ buffer_append_buf(qp->dest, qp->whitespace,
+ 0, SIZE_MAX);
+ if (qp->whitespace->used > QP_MAX_WHITESPACE_LEN) {
+ /* we already truncated some of the
+ whitespace away, because the line
+ is too long */
+ if (*invalid_src_pos_r == SIZE_MAX) {
+ *invalid_src_pos_r = i;
+ *error_r = "Too much whitespace";
+ }
+ }
+ buffer_set_used_size(qp->whitespace, 0);
+ qp->state = STATE_TEXT;
+ continue; /* don't increment i */
+ }
+ break;
+ case STATE_EQUALS:
+ if ((src[i] >= '0' && src[i] <= '9') ||
+ (src[i] >= 'A' && src[i] <= 'F') ||
+ /* lowercase hex isn't strictly valid, but allow */
+ (src[i] >= 'a' && src[i] <= 'f')) {
+ qp->hexchar = src[i];
+ qp->state = STATE_HEX2;
+ } else if (QP_IS_TRAILING_WHITESPACE(src[i])) {
+ i_assert(qp->whitespace->used == 0);
+ buffer_append_c(qp->whitespace, src[i]);
+ qp->state = STATE_EQUALS_WHITESPACE;
+ } else if (src[i] == '\r')
+ qp->state = STATE_SOFTCR;
+ else if (src[i] == '\n') {
+ qp->state = STATE_TEXT;
+ } else {
+ /* invalid input */
+ qp_decoder_invalid(qp, &error);
+ if (*invalid_src_pos_r == SIZE_MAX) {
+ *invalid_src_pos_r = i;
+ *error_r = error;
+ }
+ continue; /* don't increment i */
+ }
+ break;
+ case STATE_HEX2:
+ if ((src[i] >= '0' && src[i] <= '9') ||
+ (src[i] >= 'A' && src[i] <= 'F') ||
+ (src[i] >= 'a' && src[i] <= 'f')) {
+ char data[3];
+
+ data[0] = qp->hexchar;
+ data[1] = src[i];
+ data[2] = '\0';
+ if (hex_to_binary(data, qp->dest) < 0)
+ i_unreached();
+ qp->state = STATE_TEXT;
+ } else {
+ /* invalid input */
+ qp_decoder_invalid(qp, &error);
+ if (*invalid_src_pos_r == SIZE_MAX) {
+ *invalid_src_pos_r = i;
+ *error_r = error;
+ }
+ continue; /* don't increment i */
+ }
+ break;
+ case STATE_EQUALS_WHITESPACE:
+ if (QP_IS_TRAILING_WHITESPACE(src[i])) {
+ if (qp->whitespace->used <= QP_MAX_WHITESPACE_LEN)
+ buffer_append_c(qp->whitespace, src[i]);
+ else {
+ /* if this isn't going to get truncated
+ anyway, it's going to be an error */
+ }
+ } else if (src[i] == '\r')
+ qp->state = STATE_SOFTCR;
+ else if (src[i] == '\n') {
+ buffer_set_used_size(qp->whitespace, 0);
+ qp->state = STATE_TEXT;
+ } else {
+ /* =<whitespace> not followed by [CR]LF
+ is invalid. */
+ qp_decoder_invalid(qp, &error);
+ if (*invalid_src_pos_r == SIZE_MAX) {
+ *invalid_src_pos_r = i;
+ *error_r = error;
+ }
+ continue; /* don't increment i */
+ }
+ break;
+ case STATE_CR:
+ case STATE_SOFTCR:
+ if (src[i] == '\n') {
+ buffer_set_used_size(qp->whitespace, 0);
+ if (qp->state != STATE_SOFTCR)
+ buffer_append(qp->dest, "\r\n", 2);
+ qp->state = STATE_TEXT;
+ } else {
+ qp_decoder_invalid(qp, &error);
+ if (*invalid_src_pos_r == SIZE_MAX) {
+ *invalid_src_pos_r = i;
+ *error_r = error;
+ }
+ continue; /* don't increment i */
+ }
+ break;
+ }
+ i++;
+ }
+ i_assert((*invalid_src_pos_r == SIZE_MAX) == (*error_r == NULL));
+ return *invalid_src_pos_r == SIZE_MAX ? 0 : -1;
+}
+
+int qp_decoder_finish(struct qp_decoder *qp, const char **error_r)
+{
+ int ret;
+
+ if (qp->state == STATE_TEXT || qp->state == STATE_WHITESPACE) {
+ ret = 0;
+ *error_r = NULL;
+ } else {
+ qp_decoder_invalid(qp, error_r);
+ ret = -1;
+ }
+ qp->state = STATE_TEXT;
+ buffer_set_used_size(qp->whitespace, 0);
+ return ret;
+}