src/lib-mail/message-id.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "str.h"
#include "rfc822-parser.h"
#include "message-id.h"

static bool get_untokenized_msgid(const char **msgid_p, string_t *msgid)
{
	struct rfc822_parser_context parser;
	int ret;
	bool success = FALSE;

	rfc822_parser_init(&parser, (const unsigned char *)*msgid_p,
			   strlen(*msgid_p), NULL);

	/*
	   msg-id          = [CFWS] "<" id-left "@" id-right ">" [CFWS]
	   id-left         = dot-atom-text / no-fold-quote / obs-id-left
	   id-right        = dot-atom-text / no-fold-literal / obs-id-right
	   no-fold-quote   = DQUOTE *(qtext / quoted-pair) DQUOTE
	   no-fold-literal = "[" *(dtext / quoted-pair) "]"
	*/

	rfc822_skip_lwsp(&parser);

	if (*parser.data == '"')
		ret = rfc822_parse_quoted_string(&parser, msgid);
	else
		ret = rfc822_parse_dot_atom(&parser, msgid);
	if (ret > 0 && *parser.data == '@') {
		str_append_c(msgid, '@');
		parser.data++;
		rfc822_skip_lwsp(&parser);

		if (rfc822_parse_dot_atom(&parser, msgid) > 0 &&
		    *parser.data == '>') {
			*msgid_p = (const char *)parser.data + 1;
			success = TRUE;
		}
	}
	rfc822_parser_deinit(&parser);
	return success;
}

static void strip_lwsp(char *str)
{
	/* @UNSAFE */
	char *dest;

	/* find the first lwsp */
	while (*str != ' ' && *str != '\t' && *str != '\r' && *str != '\n') {
		if (*str == '\0')
			return;
		str++;
	}

	for (dest = str; *str != '\0'; str++) {
		if (*str != ' ' && *str != '\t' && *str != '\r' && *str != '\n')
			*dest++ = *str;
	}
	*dest = '\0';
}

const char *message_id_get_next(const char **msgid_p)
{
	const char *msgid = *msgid_p;
	const char *p;
	string_t *str = NULL;
	bool found_at;

	if (*msgid_p == NULL)
		return NULL;

	for (;;) {
		/* skip until '<' */
		while (*msgid != '<') {
			if (*msgid == '\0') {
				*msgid_p = msgid;
				return NULL;
			}
			msgid++;
		}
		msgid++;

		/* check it through quickly to see if it's already normalized */
		p = msgid; found_at = FALSE;
		for (;; p++) {
			if ((unsigned char)*p >= 'A') /* matches most */
				continue;

			if (*p == '@')
				found_at = TRUE;
			if (*p == '>' || *p == '"' || *p == '(' || *p == '[')
				break;

			if (*p == '\0') {
				*msgid_p = p;
				return NULL;
			}
		}

		if (*p == '>') {
			*msgid_p = p+1;
			if (found_at) {
				char *s;

				s = p_strdup_until(unsafe_data_stack_pool,
						   msgid, p);
				strip_lwsp(s);
				return s;
			}
		} else {
			/* ok, do it the slow way */
			*msgid_p = msgid;

			if (str == NULL) {
				/* allocate only once, so we don't leak
				   with multiple invalid message IDs */
				str = t_str_new(256);
			}
			if (get_untokenized_msgid(msgid_p, str))
				return str_c(str);
		}

		/* invalid message id, see if there's another valid one */
		msgid = *msgid_p;
	}
}