summaryrefslogtreecommitdiffstats
path: root/tools/rfc822parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/rfc822parse.c')
-rw-r--r--tools/rfc822parse.c1331
1 files changed, 1331 insertions, 0 deletions
diff --git a/tools/rfc822parse.c b/tools/rfc822parse.c
new file mode 100644
index 0000000..0a4e2bc
--- /dev/null
+++ b/tools/rfc822parse.c
@@ -0,0 +1,1331 @@
+/* rfc822parse.c - Simple mail and MIME parser
+ * Copyright (C) 1999, 2000 Werner Koch, Duesseldorf
+ * Copyright (C) 2003, 2004 g10 Code GmbH
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+
+/* According to RFC822 binary zeroes are allowed at many places. We do
+ * not handle this correct especially in the field parsing code. It
+ * should be easy to fix and the API provides a interfaces which
+ * returns the length but in addition makes sure that returned strings
+ * are always ended by a \0.
+ *
+ * Furthermore, the case of field names is changed and thus it is not
+ * always a good idea to use these modified header
+ * lines (e.g. signatures may break).
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include "rfc822parse.h"
+
+/* All valid characters in a header name. */
+#define HEADER_NAME_CHARS ("abcdefghijklmnopqrstuvwxyz" \
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
+ "-01234567890")
+
+
+enum token_type
+ {
+ tSPACE,
+ tATOM,
+ tQUOTED,
+ tDOMAINLIT,
+ tSPECIAL
+ };
+
+/* For now we directly use our TOKEN as the parse context */
+typedef struct rfc822parse_field_context *TOKEN;
+struct rfc822parse_field_context
+{
+ TOKEN next;
+ enum token_type type;
+ struct {
+ unsigned int cont:1;
+ unsigned int lowered:1;
+ } flags;
+ /*TOKEN owner_pantry; */
+ char data[1];
+};
+
+struct hdr_line
+{
+ struct hdr_line *next;
+ int cont; /* This is a continuation of the previous line. */
+ unsigned char line[1];
+};
+
+typedef struct hdr_line *HDR_LINE;
+
+
+struct part
+{
+ struct part *right; /* The next part. */
+ struct part *down; /* A contained part. */
+ HDR_LINE hdr_lines; /* Header lines os that part. */
+ HDR_LINE *hdr_lines_tail; /* Helper for adding lines. */
+ char *boundary; /* Only used in the first part. */
+};
+typedef struct part *part_t;
+
+struct rfc822parse_context
+{
+ rfc822parse_cb_t callback;
+ void *callback_value;
+ int callback_error;
+ int in_body;
+ int in_preamble; /* Wether we are before the first boundary. */
+ part_t parts; /* The tree of parts. */
+ part_t current_part; /* Whom we are processing (points into parts). */
+ const char *boundary; /* Current boundary. */
+};
+
+static HDR_LINE find_header (rfc822parse_t msg, const char *name,
+ int which, HDR_LINE * rprev);
+
+
+static size_t
+length_sans_trailing_ws (const unsigned char *line, size_t len)
+{
+ const unsigned char *p, *mark;
+ size_t n;
+
+ for (mark=NULL, p=line, n=0; n < len; n++, p++)
+ {
+ if (strchr (" \t\r\n", *p ))
+ {
+ if( !mark )
+ mark = p;
+ }
+ else
+ mark = NULL;
+ }
+
+ if (mark)
+ return mark - line;
+ return len;
+}
+
+
+static void
+lowercase_string (unsigned char *string)
+{
+ for (; *string; string++)
+ if (*string >= 'A' && *string <= 'Z')
+ *string = *string - 'A' + 'a';
+}
+
+
+static int
+my_toupper (int c)
+{
+ if (c >= 'a' && c <= 'z')
+ c &= ~0x20;
+ return c;
+}
+
+/* This is the same as ascii_strcasecmp. */
+static int
+my_strcasecmp (const char *a, const char *b)
+{
+ if (a == b)
+ return 0;
+
+ for (; *a && *b; a++, b++)
+ {
+ if (*a != *b && my_toupper(*a) != my_toupper(*b))
+ break;
+ }
+ return *a == *b? 0 : (my_toupper (*a) - my_toupper (*b));
+}
+
+
+#ifndef HAVE_STPCPY
+static char *
+my_stpcpy (char *a,const char *b)
+{
+ while (*b)
+ *a++ = *b++;
+ *a = 0;
+
+ return (char*)a;
+}
+#define stpcpy my_stpcpy
+#endif
+
+
+/* If a callback has been registerd, call it for the event of type
+ EVENT. */
+static int
+do_callback (rfc822parse_t msg, rfc822parse_event_t event)
+{
+ int rc;
+
+ if (!msg->callback || msg->callback_error)
+ return 0;
+ rc = msg->callback (msg->callback_value, event, msg);
+ if (rc)
+ msg->callback_error = rc;
+ return rc;
+}
+
+static part_t
+new_part (void)
+{
+ part_t part;
+
+ part = calloc (1, sizeof *part);
+ if (part)
+ {
+ part->hdr_lines_tail = &part->hdr_lines;
+ }
+ return part;
+}
+
+
+static void
+release_part (part_t part)
+{
+ part_t tmp;
+ HDR_LINE hdr, hdr2;
+
+ for (; part; part = tmp)
+ {
+ tmp = part->right;
+ if (part->down)
+ release_part (part->down);
+ for (hdr = part->hdr_lines; hdr; hdr = hdr2)
+ {
+ hdr2 = hdr->next;
+ free (hdr);
+ }
+ free (part->boundary);
+ free (part);
+ }
+}
+
+
+static void
+release_handle_data (rfc822parse_t msg)
+{
+ release_part (msg->parts);
+ msg->parts = NULL;
+ msg->current_part = NULL;
+ msg->boundary = NULL;
+}
+
+
+/* Check that the header name is valid. We allow all lower and
+ * uppercase letters and, except for the first character, digits and
+ * the dash. The check stops at the first colon or at string end.
+ * Returns true if the name is valid. */
+int
+rfc822_valid_header_name_p (const char *name)
+{
+ const char *s;
+ size_t namelen;
+
+ if ((s=strchr (name, ':')))
+ namelen = s - name;
+ else
+ namelen = strlen (name);
+
+ if (!namelen
+ || strspn (name, HEADER_NAME_CHARS) != namelen
+ || strchr ("-0123456789", *name))
+ return 0;
+ return 1;
+}
+
+
+/* Transform a header NAME into a standard capitalized format.
+ * Conversion stops at the colon. */
+void
+rfc822_capitalize_header_name (char *name)
+{
+ unsigned char *p = name;
+ int first = 1;
+
+ /* Special cases first. */
+ if (!my_strcasecmp (name, "MIME-Version"))
+ {
+ strcpy (name, "MIME-Version");
+ return;
+ }
+
+ /* Regular cases. */
+ for (; *p && *p != ':'; p++)
+ {
+ if (*p == '-')
+ first = 1;
+ else if (first)
+ {
+ if (*p >= 'a' && *p <= 'z')
+ *p = *p - 'a' + 'A';
+ first = 0;
+ }
+ else if (*p >= 'A' && *p <= 'Z')
+ *p = *p - 'A' + 'a';
+ }
+}
+
+
+
+/* Create a new parsing context for an entire rfc822 message and
+ return it. CB and CB_VALUE may be given to callback for certain
+ events. NULL is returned on error with errno set appropriately. */
+rfc822parse_t
+rfc822parse_open (rfc822parse_cb_t cb, void *cb_value)
+{
+ rfc822parse_t msg = calloc (1, sizeof *msg);
+ if (msg)
+ {
+ msg->parts = msg->current_part = new_part ();
+ if (!msg->parts)
+ {
+ free (msg);
+ msg = NULL;
+ }
+ else
+ {
+ msg->callback = cb;
+ msg->callback_value = cb_value;
+ if (do_callback (msg, RFC822PARSE_OPEN))
+ {
+ release_handle_data (msg);
+ free (msg);
+ msg = NULL;
+ }
+ }
+ }
+ return msg;
+}
+
+
+void
+rfc822parse_cancel (rfc822parse_t msg)
+{
+ if (msg)
+ {
+ do_callback (msg, RFC822PARSE_CANCEL);
+ release_handle_data (msg);
+ free (msg);
+ }
+}
+
+
+void
+rfc822parse_close (rfc822parse_t msg)
+{
+ if (msg)
+ {
+ do_callback (msg, RFC822PARSE_CLOSE);
+ release_handle_data (msg);
+ free (msg);
+ }
+}
+
+static part_t
+find_parent (part_t tree, part_t target)
+{
+ part_t part;
+
+ for (part = tree->down; part; part = part->right)
+ {
+ if (part == target)
+ return tree; /* Found. */
+ if (part->down)
+ {
+ part_t tmp = find_parent (part, target);
+ if (tmp)
+ return tmp;
+ }
+ }
+ return NULL;
+}
+
+static void
+set_current_part_to_parent (rfc822parse_t msg)
+{
+ part_t parent;
+
+ assert (msg->current_part);
+ parent = find_parent (msg->parts, msg->current_part);
+ if (!parent)
+ return; /* Already at the top. */
+
+#ifndef NDEBUG
+ {
+ part_t part;
+ for (part = parent->down; part; part = part->right)
+ if (part == msg->current_part)
+ break;
+ assert (part);
+ }
+#endif
+ msg->current_part = parent;
+
+ parent = find_parent (msg->parts, parent);
+ msg->boundary = parent? parent->boundary: NULL;
+}
+
+
+
+/****************
+ * We have read in all header lines and are about to receive the body
+ * part. The delimiter line has already been processed.
+ *
+ * FIXME: we's better return an error in case of memory failures.
+ */
+static int
+transition_to_body (rfc822parse_t msg)
+{
+ rfc822parse_field_t ctx;
+ int rc;
+
+ rc = do_callback (msg, RFC822PARSE_T2BODY);
+ if (!rc)
+ {
+ /* Store the boundary if we have multipart type. */
+ ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
+ if (ctx)
+ {
+ const char *s;
+
+ s = rfc822parse_query_media_type (ctx, NULL);
+ if (s && !strcmp (s,"multipart"))
+ {
+ s = rfc822parse_query_parameter (ctx, "boundary", 0);
+ if (s)
+ {
+ assert (!msg->current_part->boundary);
+ msg->current_part->boundary = malloc (strlen (s) + 1);
+ if (msg->current_part->boundary)
+ {
+ part_t part;
+
+ strcpy (msg->current_part->boundary, s);
+ msg->boundary = msg->current_part->boundary;
+ part = new_part ();
+ if (!part)
+ {
+ int save_errno = errno;
+ rfc822parse_release_field (ctx);
+ errno = save_errno;
+ return -1;
+ }
+ rc = do_callback (msg, RFC822PARSE_LEVEL_DOWN);
+ assert (!msg->current_part->down);
+ msg->current_part->down = part;
+ msg->current_part = part;
+ msg->in_preamble = 1;
+ }
+ }
+ }
+ rfc822parse_release_field (ctx);
+ }
+ }
+
+ return rc;
+}
+
+/* We have just passed a MIME boundary and need to prepare for new part.
+ headers. */
+static int
+transition_to_header (rfc822parse_t msg)
+{
+ part_t part;
+
+ assert (msg->current_part);
+ assert (!msg->current_part->right);
+
+ part = new_part ();
+ if (!part)
+ return -1;
+
+ msg->current_part->right = part;
+ msg->current_part = part;
+ return 0;
+}
+
+
+static int
+insert_header (rfc822parse_t msg, const unsigned char *line, size_t length)
+{
+ HDR_LINE hdr;
+
+ assert (msg->current_part);
+ if (!length)
+ {
+ msg->in_body = 1;
+ return transition_to_body (msg);
+ }
+
+ if (!msg->current_part->hdr_lines)
+ do_callback (msg, RFC822PARSE_BEGIN_HEADER);
+
+ length = length_sans_trailing_ws (line, length);
+ hdr = malloc (sizeof (*hdr) + length);
+ if (!hdr)
+ return -1;
+ hdr->next = NULL;
+ hdr->cont = (*line == ' ' || *line == '\t');
+ memcpy (hdr->line, line, length);
+ hdr->line[length] = 0; /* Make it a string. */
+
+ /* Transform a field name into canonical format. */
+ if (!hdr->cont && strchr (line, ':'))
+ rfc822_capitalize_header_name (hdr->line);
+
+ *msg->current_part->hdr_lines_tail = hdr;
+ msg->current_part->hdr_lines_tail = &hdr->next;
+
+ /* Lets help the caller to prevent mail loops and issue an event for
+ * every Received header. */
+ if (length >= 9 && !memcmp (line, "Received:", 9))
+ do_callback (msg, RFC822PARSE_RCVD_SEEN);
+ return 0;
+}
+
+
+/****************
+ * Note: We handle the body transparent to allow binary zeroes in it.
+ */
+static int
+insert_body (rfc822parse_t msg, const unsigned char *line, size_t length)
+{
+ int rc = 0;
+
+ if (length > 2 && *line == '-' && line[1] == '-' && msg->boundary)
+ {
+ size_t blen = strlen (msg->boundary);
+
+ if (length == blen + 2
+ && !memcmp (line+2, msg->boundary, blen))
+ {
+ rc = do_callback (msg, RFC822PARSE_BOUNDARY);
+ msg->in_body = 0;
+ if (!rc && !msg->in_preamble)
+ rc = transition_to_header (msg);
+ msg->in_preamble = 0;
+ }
+ else if (length == blen + 4
+ && line[length-2] =='-' && line[length-1] == '-'
+ && !memcmp (line+2, msg->boundary, blen))
+ {
+ rc = do_callback (msg, RFC822PARSE_LAST_BOUNDARY);
+ msg->boundary = NULL; /* No current boundary anymore. */
+ set_current_part_to_parent (msg);
+
+ /* Fixme: The next should actually be send right before the
+ next boundary, so that we can mark the epilogue. */
+ if (!rc)
+ rc = do_callback (msg, RFC822PARSE_LEVEL_UP);
+ }
+ }
+ if (msg->in_preamble && !rc)
+ rc = do_callback (msg, RFC822PARSE_PREAMBLE);
+
+ return rc;
+}
+
+/* Insert the next line into the parser. Return 0 on success or true
+ on error with errno set appropriately. */
+int
+rfc822parse_insert (rfc822parse_t msg, const unsigned char *line, size_t length)
+{
+ return (msg->in_body
+ ? insert_body (msg, line, length)
+ : insert_header (msg, line, length));
+}
+
+
+/* Tell the parser that we have finished the message. */
+int
+rfc822parse_finish (rfc822parse_t msg)
+{
+ return do_callback (msg, RFC822PARSE_FINISH);
+}
+
+
+
+/****************
+ * Get a copy of a header line. The line is returned as one long
+ * string with LF to separate the continuation line. Caller must free
+ * the return buffer. WHICH may be used to enumerate over all lines.
+ * Wildcards are allowed. This function works on the current headers;
+ * i.e. the regular mail headers or the MIME headers of the current
+ * part.
+ *
+ * WHICH gives the mode:
+ * -1 := Take the last occurrence
+ * n := Take the n-th one.
+ *
+ * Returns a newly allocated buffer or NULL on error. errno is set in
+ * case of a memory failure or set to 0 if the requested field is not
+ * available.
+ *
+ * If VALUEOFF is not NULL it will receive the offset of the first non
+ * space character in the value part of the line (i.e. after the first
+ * colon).
+ */
+char *
+rfc822parse_get_field (rfc822parse_t msg, const char *name, int which,
+ size_t *valueoff)
+{
+ HDR_LINE h, h2;
+ char *buf, *p;
+ size_t n;
+
+ h = find_header (msg, name, which, NULL);
+ if (!h)
+ {
+ errno = 0;
+ return NULL; /* no such field */
+ }
+
+ n = strlen (h->line) + 1;
+ for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
+ n += strlen (h2->line) + 1;
+
+ buf = p = malloc (n);
+ if (buf)
+ {
+ p = stpcpy (p, h->line);
+ *p++ = '\n';
+ for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
+ {
+ p = stpcpy (p, h2->line);
+ *p++ = '\n';
+ }
+ p[-1] = 0;
+ }
+
+ if (valueoff)
+ {
+ p = strchr (buf, ':');
+ if (!p)
+ *valueoff = 0; /* Oops: should never happen. */
+ else
+ {
+ p++;
+ while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
+ p++;
+ *valueoff = p - buf;
+ }
+ }
+
+ return buf;
+}
+
+
+/****************
+ * Enumerate all header. Caller has to provide the address of a pointer
+ * which has to be initialzed to NULL, the caller should then never change this
+ * pointer until he has closed the enumeration by passing again the address
+ * of the pointer but with msg set to NULL.
+ * The function returns pointers to all the header lines or NULL when
+ * all lines have been enumerated or no headers are available.
+ */
+const char *
+rfc822parse_enum_header_lines (rfc822parse_t msg, void **context)
+{
+ HDR_LINE l;
+
+ if (!msg) /* Close. */
+ return NULL;
+
+ if (*context == msg || !msg->current_part)
+ return NULL;
+
+ l = *context ? (HDR_LINE) *context : msg->current_part->hdr_lines;
+
+ if (l)
+ {
+ *context = l->next ? (void *) (l->next) : (void *) msg;
+ return l->line;
+ }
+ *context = msg; /* Mark end of list. */
+ return NULL;
+}
+
+
+
+/****************
+ * Find a header field. If the Name does end in an asterisk this is meant
+ * to be a wildcard.
+ *
+ * which -1 : Retrieve the last field
+ * >0 : Retrieve the n-th field
+
+ * RPREV may be used to return the predecessor of the returned field;
+ * which may be NULL for the very first one. It has to be initialzed
+ * to either NULL in which case the search start at the first header line,
+ * or it may point to a headerline, where the search should start
+ */
+static HDR_LINE
+find_header (rfc822parse_t msg, const char *name, int which, HDR_LINE *rprev)
+{
+ HDR_LINE hdr, prev = NULL, mark = NULL;
+ unsigned char *p;
+ size_t namelen, n;
+ int found = 0;
+ int glob = 0;
+
+ if (!msg->current_part)
+ return NULL;
+
+ namelen = strlen (name);
+ if (namelen && name[namelen - 1] == '*')
+ {
+ namelen--;
+ glob = 1;
+ }
+
+ hdr = msg->current_part->hdr_lines;
+ if (rprev && *rprev)
+ {
+ /* spool forward to the requested starting place.
+ * we cannot simply set this as we have to return
+ * the previous list element too */
+ for (; hdr && hdr != *rprev; prev = hdr, hdr = hdr->next)
+ ;
+ }
+
+ for (; hdr; prev = hdr, hdr = hdr->next)
+ {
+ if (hdr->cont)
+ continue;
+ if (!(p = strchr (hdr->line, ':')))
+ continue; /* invalid header, just skip it. */
+ n = p - hdr->line;
+ if (!n)
+ continue; /* invalid name */
+ if ((glob ? (namelen <= n) : (namelen == n))
+ && !memcmp (hdr->line, name, namelen))
+ {
+ found++;
+ if (which == -1)
+ mark = hdr;
+ else if (found == which)
+ {
+ if (rprev)
+ *rprev = prev;
+ return hdr;
+ }
+ }
+ }
+ if (mark && rprev)
+ *rprev = prev;
+ return mark;
+}
+
+
+
+static const char *
+skip_ws (const char *s)
+{
+ while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
+ s++;
+ return s;
+}
+
+
+static void
+release_token_list (TOKEN t)
+{
+ while (t)
+ {
+ TOKEN t2 = t->next;
+ /* fixme: If we have owner_pantry, put the token back to
+ * this pantry so that it can be reused later */
+ free (t);
+ t = t2;
+ }
+}
+
+
+static TOKEN
+new_token (enum token_type type, const char *buf, size_t length)
+{
+ TOKEN t;
+
+ /* fixme: look through our pantries to find a suitable
+ * token for reuse */
+ t = malloc (sizeof *t + length);
+ if (t)
+ {
+ t->next = NULL;
+ t->type = type;
+ memset (&t->flags, 0, sizeof (t->flags));
+ t->data[0] = 0;
+ if (buf)
+ {
+ memcpy (t->data, buf, length);
+ t->data[length] = 0; /* Make sure it is a C string. */
+ }
+ else
+ t->data[0] = 0;
+ }
+ return t;
+}
+
+static TOKEN
+append_to_token (TOKEN old, const char *buf, size_t length)
+{
+ size_t n = strlen (old->data);
+ TOKEN t;
+
+ t = malloc (sizeof *t + n + length);
+ if (t)
+ {
+ t->next = old->next;
+ t->type = old->type;
+ t->flags = old->flags;
+ memcpy (t->data, old->data, n);
+ memcpy (t->data + n, buf, length);
+ t->data[n + length] = 0;
+ old->next = NULL;
+ release_token_list (old);
+ }
+ return t;
+}
+
+
+
+/*
+ Parse a field into tokens as defined by rfc822.
+ */
+static TOKEN
+parse_field (HDR_LINE hdr)
+{
+ static const char specials[] = "<>@.,;:\\[]\"()";
+ static const char specials2[] = "<>@.,;:";
+ static const char tspecials[] = "/?=<>@,;:\\[]\"()";
+ static const char tspecials2[] = "/?=<>@.,;:"; /* FIXME: really
+ include '.'?*/
+ static struct
+ {
+ const unsigned char *name;
+ size_t namelen;
+ } tspecial_header[] = {
+ { "Content-Type", 12},
+ { "Content-Transfer-Encoding", 25},
+ { "Content-Disposition", 19},
+ { NULL, 0}
+ };
+ const char *delimiters;
+ const char *delimiters2;
+ const unsigned char *line, *s, *s2;
+ size_t n;
+ int i, invalid = 0;
+ TOKEN t, tok, *tok_tail;
+
+ errno = 0;
+ if (!hdr)
+ return NULL;
+
+ tok = NULL;
+ tok_tail = &tok;
+
+ line = hdr->line;
+ if (!(s = strchr (line, ':')))
+ return NULL; /* oops */
+
+ n = s - line;
+ if (!n)
+ return NULL; /* oops: invalid name */
+
+ delimiters = specials;
+ delimiters2 = specials2;
+ for (i = 0; tspecial_header[i].name; i++)
+ {
+ if (n == tspecial_header[i].namelen
+ && !memcmp (line, tspecial_header[i].name, n))
+ {
+ delimiters = tspecials;
+ delimiters2 = tspecials2;
+ break;
+ }
+ }
+
+ s++; /* Move over the colon. */
+ for (;;)
+ {
+ while (!*s)
+ {
+ if (!hdr->next || !hdr->next->cont)
+ return tok; /* Ready. */
+
+ /* Next item is a header continuation line. */
+ hdr = hdr->next;
+ s = hdr->line;
+ }
+
+ if (*s == '(')
+ {
+ int level = 1;
+ int in_quote = 0;
+
+ invalid = 0;
+ for (s++;; s++)
+ {
+ while (!*s)
+ {
+ if (!hdr->next || !hdr->next->cont)
+ goto oparen_out;
+ /* Next item is a header continuation line. */
+ hdr = hdr->next;
+ s = hdr->line;
+ }
+
+ if (in_quote)
+ {
+ if (*s == '\"')
+ in_quote = 0;
+ else if (*s == '\\' && s[1]) /* what about continuation? */
+ s++;
+ }
+ else if (*s == ')')
+ {
+ if (!--level)
+ break;
+ }
+ else if (*s == '(')
+ level++;
+ else if (*s == '\"')
+ in_quote = 1;
+ }
+ oparen_out:
+ if (!*s)
+ ; /* Actually this is an error, but we don't care about it. */
+ else
+ s++;
+ }
+ else if (*s == '\"' || *s == '[')
+ {
+ /* We do not check for non-allowed nesting of domainliterals */
+ int term = *s == '\"' ? '\"' : ']';
+ invalid = 0;
+ s++;
+ t = NULL;
+
+ for (;;)
+ {
+ for (s2 = s; *s2; s2++)
+ {
+ if (*s2 == term)
+ break;
+ else if (*s2 == '\\' && s2[1]) /* what about continuation? */
+ s2++;
+ }
+
+ t = (t
+ ? append_to_token (t, s, s2 - s)
+ : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s));
+ if (!t)
+ goto failure;
+
+ if (*s2 || !hdr->next || !hdr->next->cont)
+ break;
+ /* Next item is a header continuation line. */
+ hdr = hdr->next;
+ s = hdr->line;
+ }
+ *tok_tail = t;
+ tok_tail = &t->next;
+ s = s2;
+ if (*s)
+ s++; /* skip the delimiter */
+ }
+ else if ((s2 = strchr (delimiters2, *s)))
+ { /* Special characters which are not handled above. */
+ invalid = 0;
+ t = new_token (tSPECIAL, s, 1);
+ if (!t)
+ goto failure;
+ *tok_tail = t;
+ tok_tail = &t->next;
+ s++;
+ }
+ else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
+ {
+ invalid = 0;
+ s = skip_ws (s + 1);
+ }
+ else if (*s > 0x20 && !(*s & 128))
+ { /* Atom. */
+ invalid = 0;
+ for (s2 = s + 1; *s2 > 0x20
+ && !(*s2 & 128) && !strchr (delimiters, *s2); s2++)
+ ;
+ t = new_token (tATOM, s, s2 - s);
+ if (!t)
+ goto failure;
+ *tok_tail = t;
+ tok_tail = &t->next;
+ s = s2;
+ }
+ else
+ { /* Invalid character. */
+ if (!invalid)
+ { /* For parsing we assume only one space. */
+ t = new_token (tSPACE, NULL, 0);
+ if (!t)
+ goto failure;
+ *tok_tail = t;
+ tok_tail = &t->next;
+ invalid = 1;
+ }
+ s++;
+ }
+ }
+ /*NOTREACHED*/
+
+ failure:
+ {
+ int save = errno;
+ release_token_list (tok);
+ errno = save;
+ }
+ return NULL;
+}
+
+
+
+
+/****************
+ * Find and parse a header field.
+ * WHICH indicates what to do if there are multiple instance of the same
+ * field (like "Received"); the following value are defined:
+ * -1 := Take the last occurrence
+ * 0 := Reserved
+ * n := Take the n-th one.
+ * Returns a handle for further operations on the parse context of the field
+ * or NULL if the field was not found.
+ */
+rfc822parse_field_t
+rfc822parse_parse_field (rfc822parse_t msg, const char *name, int which)
+{
+ HDR_LINE hdr;
+
+ if (!which)
+ return NULL;
+
+ hdr = find_header (msg, name, which, NULL);
+ if (!hdr)
+ return NULL;
+ return parse_field (hdr);
+}
+
+void
+rfc822parse_release_field (rfc822parse_field_t ctx)
+{
+ if (ctx)
+ release_token_list (ctx);
+}
+
+
+
+/****************
+ * Check whether T points to a parameter.
+ * A parameter starts with a semicolon and it is assumed that t
+ * points to exactly this one.
+ */
+static int
+is_parameter (TOKEN t)
+{
+ t = t->next;
+ if (!t || t->type != tATOM)
+ return 0;
+ t = t->next;
+ if (!t || !(t->type == tSPECIAL && t->data[0] == '='))
+ return 0;
+ t = t->next;
+ if (!t)
+ return 1; /* We assume that an non existing value is an empty one. */
+ return t->type == tQUOTED || t->type == tATOM;
+}
+
+/*
+ Some header (Content-type) have a special syntax where attribute=value
+ pairs are used after a leading semicolon. The parse_field code
+ knows about these fields and changes the parsing to the one defined
+ in RFC2045.
+ Returns a pointer to the value which is valid as long as the
+ parse context is valid; NULL is returned in case that attr is not
+ defined in the header, a missing value is reppresented by an empty string.
+
+ With LOWER_VALUE set to true, a matching field valuebe be
+ lowercased.
+
+ Note, that ATTR should be lowercase.
+ */
+const char *
+rfc822parse_query_parameter (rfc822parse_field_t ctx, const char *attr,
+ int lower_value)
+{
+ TOKEN t, a;
+
+ for (t = ctx; t; t = t->next)
+ {
+ /* skip to the next semicolon */
+ for (; t && !(t->type == tSPECIAL && t->data[0] == ';'); t = t->next)
+ ;
+ if (!t)
+ return NULL;
+ if (is_parameter (t))
+ { /* Look closer. */
+ a = t->next; /* We know that this is an atom */
+ if ( !a->flags.lowered )
+ {
+ lowercase_string (a->data);
+ a->flags.lowered = 1;
+ }
+ if (!strcmp (a->data, attr))
+ { /* found */
+ t = a->next->next;
+ /* Either T is now an atom, a quoted string or NULL in
+ * which case we return an empty string. */
+
+ if ( lower_value && t && !t->flags.lowered )
+ {
+ lowercase_string (t->data);
+ t->flags.lowered = 1;
+ }
+ return t ? t->data : "";
+ }
+ }
+ }
+ return NULL;
+}
+
+/****************
+ * This function may be used for the Content-Type header to figure out
+ * the media type and subtype. Note, that the returned strings are
+ * guaranteed to be lowercase as required by MIME.
+ *
+ * Returns: a pointer to the media type and if subtype is not NULL,
+ * a pointer to the subtype.
+ */
+const char *
+rfc822parse_query_media_type (rfc822parse_field_t ctx, const char **subtype)
+{
+ TOKEN t = ctx;
+ const char *type;
+
+ if (t->type != tATOM)
+ return NULL;
+ if (!t->flags.lowered)
+ {
+ lowercase_string (t->data);
+ t->flags.lowered = 1;
+ }
+ type = t->data;
+ t = t->next;
+ if (!t || t->type != tSPECIAL || t->data[0] != '/')
+ return NULL;
+ t = t->next;
+ if (!t || t->type != tATOM)
+ return NULL;
+
+ if (subtype)
+ {
+ if (!t->flags.lowered)
+ {
+ lowercase_string (t->data);
+ t->flags.lowered = 1;
+ }
+ *subtype = t->data;
+ }
+ return type;
+}
+
+
+
+
+
+#ifdef TESTING
+
+/* Internal debug function to print the structure of the message. */
+static void
+dump_structure (rfc822parse_t msg, part_t part, int indent)
+{
+ if (!part)
+ {
+ printf ("*** Structure of this message:\n");
+ part = msg->parts;
+ }
+
+ for (; part; part = part->right)
+ {
+ rfc822parse_field_t ctx;
+ part_t save_part; /* ugly hack - we should have a function to
+ get part information. */
+ const char *s;
+
+ save_part = msg->current_part;
+ msg->current_part = part;
+ ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
+ msg->current_part = save_part;
+ if (ctx)
+ {
+ const char *s1, *s2;
+ s1 = rfc822parse_query_media_type (ctx, &s2);
+ if (s1)
+ printf ("*** %*s %s/%s", indent*2, "", s1, s2);
+ else
+ printf ("*** %*s [not found]", indent*2, "");
+
+ s = rfc822parse_query_parameter (ctx, "boundary", 0);
+ if (s)
+ printf (" (boundary=\"%s\")", s);
+ rfc822parse_release_field (ctx);
+ }
+ else
+ printf ("*** %*s text/plain [assumed]", indent*2, "");
+ putchar('\n');
+
+ if (part->down)
+ dump_structure (msg, part->down, indent + 1);
+ }
+
+}
+
+
+
+static void
+show_param (rfc822parse_field_t ctx, const char *name)
+{
+ const char *s;
+
+ if (!ctx)
+ return;
+ s = rfc822parse_query_parameter (ctx, name, 0);
+ if (s)
+ printf ("*** %s: '%s'\n", name, s);
+}
+
+
+
+static void
+show_event (rfc822parse_event_t event)
+{
+ const char *s;
+
+ switch (event)
+ {
+ case RFC822PARSE_OPEN: s= "Open"; break;
+ case RFC822PARSE_CLOSE: s= "Close"; break;
+ case RFC822PARSE_CANCEL: s= "Cancel"; break;
+ case RFC822PARSE_T2BODY: s= "T2Body"; break;
+ case RFC822PARSE_FINISH: s= "Finish"; break;
+ case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break;
+ case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break;
+ case RFC822PARSE_LEVEL_UP: s= "Level_Up"; break;
+ case RFC822PARSE_BOUNDARY: s= "Boundary"; break;
+ case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break;
+ case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break;
+ case RFC822PARSE_PREAMBLE: s= "Preamble"; break;
+ case RFC822PARSE_EPILOGUE: s= "Epilogue"; break;
+ default: s= "***invalid event***"; break;
+ }
+ printf ("*** got RFC822 event %s\n", s);
+}
+
+static int
+msg_cb (void *dummy_arg, rfc822parse_event_t event, rfc822parse_t msg)
+{
+ show_event (event);
+ if (event == RFC822PARSE_T2BODY)
+ {
+ rfc822parse_field_t ctx;
+ void *ectx;
+ const char *line;
+
+ for (ectx=NULL; (line = rfc822parse_enum_header_lines (msg, &ectx)); )
+ {
+ printf ("*** HDR: %s\n", line);
+ }
+ rfc822parse_enum_header_lines (NULL, &ectx); /* Close enumerator. */
+
+ ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
+ if (ctx)
+ {
+ const char *s1, *s2;
+ s1 = rfc822parse_query_media_type (ctx, &s2);
+ if (s1)
+ printf ("*** media: '%s/%s'\n", s1, s2);
+ else
+ printf ("*** media: [not found]\n");
+ show_param (ctx, "boundary");
+ show_param (ctx, "protocol");
+ rfc822parse_release_field (ctx);
+ }
+ else
+ printf ("*** media: text/plain [assumed]\n");
+
+ }
+
+
+ return 0;
+}
+
+
+
+int
+main (int argc, char **argv)
+{
+ char line[5000];
+ size_t length;
+ rfc822parse_t msg;
+
+ msg = rfc822parse_open (msg_cb, NULL);
+ if (!msg)
+ abort ();
+
+ while (fgets (line, sizeof (line), stdin))
+ {
+ length = strlen (line);
+ if (length && line[length - 1] == '\n')
+ line[--length] = 0;
+ if (length && line[length - 1] == '\r')
+ line[--length] = 0;
+ if (rfc822parse_insert (msg, line, length))
+ abort ();
+ }
+
+ dump_structure (msg, NULL, 0);
+
+ rfc822parse_close (msg);
+ return 0;
+}
+#endif
+
+/*
+Local Variables:
+compile-command: "gcc -Wall -Wno-pointer-sign -g -DTESTING -o rfc822parse rfc822parse.c"
+End:
+*/