summaryrefslogtreecommitdiffstats
path: root/src/libmime/email_addr.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/libmime/email_addr.c563
1 files changed, 563 insertions, 0 deletions
diff --git a/src/libmime/email_addr.c b/src/libmime/email_addr.c
new file mode 100644
index 0000000..0af7388
--- /dev/null
+++ b/src/libmime/email_addr.c
@@ -0,0 +1,563 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "email_addr.h"
+#include "message.h"
+#include "printf.h"
+#include "smtp_parsers.h"
+
+static void
+rspamd_email_address_unescape(struct rspamd_email_address *addr)
+{
+ const char *h, *end;
+ char *t, *d;
+
+ if (addr->user_len == 0) {
+ return;
+ }
+
+ d = g_malloc(addr->user_len);
+ t = d;
+ h = addr->user;
+ end = h + addr->user_len;
+
+ while (h < end) {
+ if (*h != '\\') {
+ *t++ = *h;
+ }
+ h++;
+ }
+
+ addr->user = d;
+ addr->user_len = t - d;
+ addr->flags |= RSPAMD_EMAIL_ADDR_USER_ALLOCATED;
+}
+
+struct rspamd_email_address *
+rspamd_email_address_from_smtp(const gchar *str, guint len)
+{
+ struct rspamd_email_address addr, *ret;
+ gsize nlen;
+
+ if (str == NULL || len == 0) {
+ return NULL;
+ }
+
+ rspamd_smtp_addr_parse(str, len, &addr);
+
+ if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) {
+ ret = g_malloc(sizeof(*ret));
+ memcpy(ret, &addr, sizeof(addr));
+
+ if ((ret->flags & RSPAMD_EMAIL_ADDR_QUOTED) && ret->addr[0] == '"') {
+ if (ret->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) {
+ /* We also need to unquote user */
+ rspamd_email_address_unescape(ret);
+ }
+
+ /* We need to unquote addr */
+ nlen = ret->domain_len + ret->user_len + 2;
+ ret->addr = g_malloc(nlen + 1);
+ ret->addr_len = rspamd_snprintf((char *) ret->addr, nlen, "%*s@%*s",
+ (gint) ret->user_len, ret->user,
+ (gint) ret->domain_len, ret->domain);
+ ret->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED;
+ }
+
+ return ret;
+ }
+
+ return NULL;
+}
+
+void rspamd_email_address_free(struct rspamd_email_address *addr)
+{
+ if (addr) {
+ if (addr->flags & RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED) {
+ g_free((void *) addr->addr);
+ }
+
+ if (addr->flags & RSPAMD_EMAIL_ADDR_USER_ALLOCATED) {
+ g_free((void *) addr->user);
+ }
+
+ g_free(addr);
+ }
+}
+
+static inline void
+rspamd_email_address_add(rspamd_mempool_t *pool,
+ GPtrArray *ar,
+ struct rspamd_email_address *addr,
+ GString *name)
+{
+ struct rspamd_email_address *elt;
+ guint nlen;
+
+ elt = g_malloc0(sizeof(*elt));
+ rspamd_mempool_notify_alloc(pool, sizeof(*elt));
+
+ if (addr != NULL) {
+ memcpy(elt, addr, sizeof(*addr));
+ }
+ else {
+ elt->addr = "";
+ elt->domain = "";
+ elt->raw = "<>";
+ elt->raw_len = 2;
+ elt->user = "";
+ elt->flags |= RSPAMD_EMAIL_ADDR_EMPTY;
+ }
+
+ if ((elt->flags & RSPAMD_EMAIL_ADDR_QUOTED) && elt->addr[0] == '"') {
+ if (elt->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) {
+ /* We also need to unquote user */
+ rspamd_email_address_unescape(elt);
+ }
+
+ /* We need to unquote addr */
+ nlen = elt->domain_len + elt->user_len + 2;
+ elt->addr = g_malloc(nlen + 1);
+ rspamd_mempool_notify_alloc(pool, nlen + 1);
+ elt->addr_len = rspamd_snprintf((char *) elt->addr, nlen, "%*s@%*s",
+ (gint) elt->user_len, elt->user,
+ (gint) elt->domain_len, elt->domain);
+ elt->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED;
+ }
+
+ if (name->len > 0) {
+ rspamd_gstring_strip(name, " \t\v");
+ elt->name = rspamd_mime_header_decode(pool, name->str, name->len, NULL);
+ }
+
+ rspamd_mempool_notify_alloc(pool, name->len);
+ g_ptr_array_add(ar, elt);
+}
+
+/*
+ * Tries to parse an email address that doesn't conform RFC
+ */
+static gboolean
+rspamd_email_address_parse_heuristic(const char *data, size_t len,
+ struct rspamd_email_address *addr)
+{
+ const gchar *p = data, *at = NULL, *end = data + len;
+ gboolean ret = FALSE;
+
+ memset(addr, 0, sizeof(*addr));
+
+ if (*p == '<' && len > 1) {
+ /* Angled address */
+ addr->addr_len = rspamd_memcspn(p + 1, ">", len - 1);
+ addr->addr = p + 1;
+ addr->raw = p;
+ addr->raw_len = len;
+ ret = TRUE;
+
+ p = p + 1;
+ len = addr->addr_len;
+ end = p + len;
+ }
+ else if (len > 0) {
+ addr->addr = p;
+ addr->addr_len = len;
+ addr->raw = p;
+ addr->raw_len = len;
+ ret = TRUE;
+ }
+
+ if (ret) {
+ at = rspamd_memrchr(p, '@', len);
+
+ if (at != NULL && at + 1 < end) {
+ addr->domain = at + 1;
+ addr->domain_len = end - (at + 1);
+ addr->user = p;
+ addr->user_len = at - p;
+ }
+
+ if (rspamd_str_has_8bit(p, len)) {
+ addr->flags |= RSPAMD_EMAIL_ADDR_HAS_8BIT;
+ }
+ }
+
+ return ret;
+}
+
+static inline int
+rspamd_email_address_check_and_add(const gchar *start, gsize len,
+ GPtrArray *res,
+ rspamd_mempool_t *pool,
+ GString *ns,
+ gint max_elements)
+{
+ struct rspamd_email_address addr;
+
+ g_assert(res != NULL);
+
+ if (max_elements > 0 && res->len >= max_elements) {
+ msg_info_pool_check("reached maximum number of elements %d when adding %v",
+ max_elements,
+ ns);
+
+ return -1;
+ }
+
+ /* The whole email is likely address */
+ memset(&addr, 0, sizeof(addr));
+ rspamd_smtp_addr_parse(start, len, &addr);
+
+ if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) {
+ rspamd_email_address_add(pool, res, &addr, ns);
+ }
+ else {
+ /* Try heuristic */
+ if (rspamd_email_address_parse_heuristic(start,
+ len, &addr)) {
+ rspamd_email_address_add(pool, res, &addr, ns);
+
+ return 1;
+ }
+ else {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+GPtrArray *
+rspamd_email_address_from_mime(rspamd_mempool_t *pool, const gchar *hdr,
+ guint len,
+ GPtrArray *src,
+ gint max_elements)
+{
+ GPtrArray *res = src;
+ gboolean seen_at = FALSE, seen_obrace = FALSE;
+
+ const gchar *p = hdr, *end = hdr + len, *c = hdr, *t;
+ GString *ns, *cpy;
+ gint obraces, ebraces;
+ enum {
+ parse_name = 0,
+ parse_quoted,
+ parse_addr,
+ skip_spaces
+ } state = parse_name,
+ next_state = parse_name;
+
+ if (res == NULL) {
+ res = g_ptr_array_sized_new(2);
+ rspamd_mempool_add_destructor(pool, rspamd_email_address_list_destroy,
+ res);
+ }
+ else if (max_elements > 0 && res->len >= max_elements) {
+ msg_info_pool_check("reached maximum number of elements %d", max_elements);
+
+ return res;
+ }
+
+ ns = g_string_sized_new(len);
+ cpy = g_string_sized_new(len);
+
+ rspamd_mempool_add_destructor(pool, rspamd_gstring_free_hard, cpy);
+
+ /* First, we need to remove all comments as they are terrible */
+ obraces = 0;
+ ebraces = 0;
+
+ while (p < end) {
+ if (state == parse_name) {
+ if (*p == '\\') {
+ if (obraces == 0) {
+ g_string_append_c(cpy, *p);
+ }
+
+ p++;
+ }
+ else {
+ if (*p == '"') {
+ state = parse_quoted;
+ }
+ else if (*p == '(') {
+ obraces++; /* To avoid ) itself being copied */
+ }
+ else if (*p == ')') {
+ ebraces++;
+ p++;
+ }
+
+ if (obraces == ebraces) {
+ obraces = 0;
+ ebraces = 0;
+ }
+ }
+
+ if (p < end && obraces == 0) {
+ g_string_append_c(cpy, *p);
+ }
+ }
+ else {
+ /* Quoted elt */
+ if (*p == '\\') {
+ g_string_append_c(cpy, *p);
+ p++;
+ }
+ else {
+ if (*p == '"') {
+ state = parse_name;
+ }
+ }
+
+ if (p < end) {
+ g_string_append_c(cpy, *p);
+ }
+ }
+
+ p++;
+ }
+
+ state = parse_name;
+
+ p = cpy->str;
+ c = p;
+ end = p + cpy->len;
+
+ while (p < end) {
+ switch (state) {
+ case parse_name:
+ if (*p == '"') {
+ /* We need to strip last spaces and update `ns` */
+ if (p > c) {
+ guint nspaces = 0;
+
+ t = p - 1;
+
+ while (t > c && g_ascii_isspace(*t)) {
+ t--;
+ nspaces++;
+ }
+
+ g_string_append_len(ns, c, t - c + 1);
+
+ if (nspaces > 0) {
+ g_string_append_c(ns, ' ');
+ }
+ }
+
+ state = parse_quoted;
+ c = p + 1;
+ }
+ else if (*p == '<') {
+ if (p > c) {
+ t = p - 1;
+
+ while (t > c && g_ascii_isspace(*t)) {
+ t--;
+ }
+
+ g_string_append_len(ns, c, t - c + 1);
+ }
+
+ c = p;
+ state = parse_addr;
+ }
+ else if (*p == ',') {
+ if (p > c && seen_at) {
+ /*
+ * Last token must be the address:
+ * e.g. Some name name@domain.com
+ */
+ t = p - 1;
+
+ while (t > c && g_ascii_isspace(*t)) {
+ t--;
+ }
+
+ int check = rspamd_email_address_check_and_add(c, t - c + 1,
+ res, pool, ns, max_elements);
+
+ if (check == 0 && res->len == 0) {
+ /* Insert fake address */
+ rspamd_email_address_add(pool, res, NULL, ns);
+ }
+ else if (check != 1) {
+ goto end;
+ }
+
+ /* Cleanup for the next use */
+ g_string_set_size(ns, 0);
+ seen_at = FALSE;
+ }
+
+ state = skip_spaces;
+ next_state = parse_name;
+ }
+ else if (*p == '@') {
+ seen_at = TRUE;
+ }
+
+ p++;
+ break;
+ case parse_quoted:
+ if (*p == '\\') {
+ if (p > c) {
+ g_string_append_len(ns, c, p - c);
+ }
+
+ p++;
+ c = p;
+ }
+ else if (*p == '"') {
+ if (p > c) {
+ g_string_append_len(ns, c, p - c);
+ }
+
+ if (p + 1 < end && g_ascii_isspace(p[1])) {
+ g_string_append_c(ns, ' ');
+ }
+
+ state = skip_spaces;
+ next_state = parse_name;
+ }
+ else if (*p == '@' && seen_obrace) {
+ seen_at = TRUE;
+ }
+ else if (*p == '<') {
+ seen_obrace = TRUE;
+ }
+ p++;
+ break;
+ case parse_addr:
+ if (*p == '>') {
+ int check = rspamd_email_address_check_and_add(c, p - c + 1,
+ res, pool, ns, max_elements);
+ if (check == 0 && res->len == 0) {
+ /* Insert a fake address */
+ rspamd_email_address_add(pool, res, NULL, ns);
+ }
+ else if (check != 1) {
+ goto end;
+ }
+
+ /* Cleanup for the next use */
+ g_string_set_size(ns, 0);
+ seen_at = FALSE;
+ state = skip_spaces;
+ next_state = parse_name;
+ }
+ else if (*p == '@') {
+ seen_at = TRUE;
+ }
+ p++;
+ break;
+ case skip_spaces:
+ if (!g_ascii_isspace(*p)) {
+ c = p;
+ state = next_state;
+ }
+ else {
+ p++;
+ }
+ break;
+ }
+ }
+
+ /* Handle leftover */
+ switch (state) {
+ case parse_name:
+ /* Assume the whole header as name (bad thing) */
+ if (p > c) {
+ while (p > c && g_ascii_isspace(*p)) {
+ p--;
+ }
+
+ if (p > c) {
+ if (seen_at) {
+ /* The whole email is likely address */
+ int check = rspamd_email_address_check_and_add(c, p - c,
+ res, pool, ns, max_elements);
+ if (check == 0 && res->len == 0) {
+ /* Insert a fake address */
+ rspamd_email_address_add(pool, res, NULL, ns);
+ }
+ else if (check != 1) {
+ goto end;
+ }
+ }
+ else {
+ /* No @ seen */
+ g_string_append_len(ns, c, p - c);
+
+ if (res->len == 0) {
+ rspamd_email_address_add(pool, res, NULL, ns);
+ }
+ }
+ }
+ else if (res->len == 0) {
+ rspamd_email_address_add(pool, res, NULL, ns);
+ }
+ }
+ break;
+ case parse_addr:
+ if (p > c) {
+ if (rspamd_email_address_check_and_add(c, p - c,
+ res, pool, ns, max_elements) == 0) {
+ if (res->len == 0) {
+ rspamd_email_address_add(pool, res, NULL, ns);
+ }
+ }
+ }
+ break;
+ case parse_quoted:
+ /* Unfinished quoted string or a comment */
+ /* If we have seen obrace + at, then we still can try to resolve address */
+ if (seen_at && seen_obrace) {
+ p = rspamd_memrchr(cpy->str, '<', cpy->len);
+ g_assert(p != NULL);
+ if (rspamd_email_address_check_and_add(p, end - p,
+ res, pool, ns, max_elements) == 0) {
+ if (res->len == 0) {
+ rspamd_email_address_add(pool, res, NULL, ns);
+ }
+ }
+ }
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+end:
+ rspamd_mempool_notify_alloc(pool, cpy->len);
+ g_string_free(ns, TRUE);
+
+ return res;
+}
+
+void rspamd_email_address_list_destroy(gpointer ptr)
+{
+ GPtrArray *ar = ptr;
+ guint i;
+ struct rspamd_email_address *addr;
+
+ PTR_ARRAY_FOREACH(ar, i, addr)
+ {
+ rspamd_email_address_free(addr);
+ }
+
+ g_ptr_array_free(ar, TRUE);
+} \ No newline at end of file