diff options
Diffstat (limited to 'tools/rfc822parse.c')
-rw-r--r-- | tools/rfc822parse.c | 1331 |
1 files changed, 1331 insertions, 0 deletions
diff --git a/tools/rfc822parse.c b/tools/rfc822parse.c new file mode 100644 index 0000000..0a4e2bc --- /dev/null +++ b/tools/rfc822parse.c @@ -0,0 +1,1331 @@ +/* rfc822parse.c - Simple mail and MIME parser + * Copyright (C) 1999, 2000 Werner Koch, Duesseldorf + * Copyright (C) 2003, 2004 g10 Code GmbH + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, see <https://www.gnu.org/licenses/>. + */ + + +/* According to RFC822 binary zeroes are allowed at many places. We do + * not handle this correct especially in the field parsing code. It + * should be easy to fix and the API provides a interfaces which + * returns the length but in addition makes sure that returned strings + * are always ended by a \0. + * + * Furthermore, the case of field names is changed and thus it is not + * always a good idea to use these modified header + * lines (e.g. signatures may break). + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdarg.h> +#include <assert.h> + +#include "rfc822parse.h" + +/* All valid characters in a header name. */ +#define HEADER_NAME_CHARS ("abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + "-01234567890") + + +enum token_type + { + tSPACE, + tATOM, + tQUOTED, + tDOMAINLIT, + tSPECIAL + }; + +/* For now we directly use our TOKEN as the parse context */ +typedef struct rfc822parse_field_context *TOKEN; +struct rfc822parse_field_context +{ + TOKEN next; + enum token_type type; + struct { + unsigned int cont:1; + unsigned int lowered:1; + } flags; + /*TOKEN owner_pantry; */ + char data[1]; +}; + +struct hdr_line +{ + struct hdr_line *next; + int cont; /* This is a continuation of the previous line. */ + unsigned char line[1]; +}; + +typedef struct hdr_line *HDR_LINE; + + +struct part +{ + struct part *right; /* The next part. */ + struct part *down; /* A contained part. */ + HDR_LINE hdr_lines; /* Header lines os that part. */ + HDR_LINE *hdr_lines_tail; /* Helper for adding lines. */ + char *boundary; /* Only used in the first part. */ +}; +typedef struct part *part_t; + +struct rfc822parse_context +{ + rfc822parse_cb_t callback; + void *callback_value; + int callback_error; + int in_body; + int in_preamble; /* Wether we are before the first boundary. */ + part_t parts; /* The tree of parts. */ + part_t current_part; /* Whom we are processing (points into parts). */ + const char *boundary; /* Current boundary. */ +}; + +static HDR_LINE find_header (rfc822parse_t msg, const char *name, + int which, HDR_LINE * rprev); + + +static size_t +length_sans_trailing_ws (const unsigned char *line, size_t len) +{ + const unsigned char *p, *mark; + size_t n; + + for (mark=NULL, p=line, n=0; n < len; n++, p++) + { + if (strchr (" \t\r\n", *p )) + { + if( !mark ) + mark = p; + } + else + mark = NULL; + } + + if (mark) + return mark - line; + return len; +} + + +static void +lowercase_string (unsigned char *string) +{ + for (; *string; string++) + if (*string >= 'A' && *string <= 'Z') + *string = *string - 'A' + 'a'; +} + + +static int +my_toupper (int c) +{ + if (c >= 'a' && c <= 'z') + c &= ~0x20; + return c; +} + +/* This is the same as ascii_strcasecmp. */ +static int +my_strcasecmp (const char *a, const char *b) +{ + if (a == b) + return 0; + + for (; *a && *b; a++, b++) + { + if (*a != *b && my_toupper(*a) != my_toupper(*b)) + break; + } + return *a == *b? 0 : (my_toupper (*a) - my_toupper (*b)); +} + + +#ifndef HAVE_STPCPY +static char * +my_stpcpy (char *a,const char *b) +{ + while (*b) + *a++ = *b++; + *a = 0; + + return (char*)a; +} +#define stpcpy my_stpcpy +#endif + + +/* If a callback has been registerd, call it for the event of type + EVENT. */ +static int +do_callback (rfc822parse_t msg, rfc822parse_event_t event) +{ + int rc; + + if (!msg->callback || msg->callback_error) + return 0; + rc = msg->callback (msg->callback_value, event, msg); + if (rc) + msg->callback_error = rc; + return rc; +} + +static part_t +new_part (void) +{ + part_t part; + + part = calloc (1, sizeof *part); + if (part) + { + part->hdr_lines_tail = &part->hdr_lines; + } + return part; +} + + +static void +release_part (part_t part) +{ + part_t tmp; + HDR_LINE hdr, hdr2; + + for (; part; part = tmp) + { + tmp = part->right; + if (part->down) + release_part (part->down); + for (hdr = part->hdr_lines; hdr; hdr = hdr2) + { + hdr2 = hdr->next; + free (hdr); + } + free (part->boundary); + free (part); + } +} + + +static void +release_handle_data (rfc822parse_t msg) +{ + release_part (msg->parts); + msg->parts = NULL; + msg->current_part = NULL; + msg->boundary = NULL; +} + + +/* Check that the header name is valid. We allow all lower and + * uppercase letters and, except for the first character, digits and + * the dash. The check stops at the first colon or at string end. + * Returns true if the name is valid. */ +int +rfc822_valid_header_name_p (const char *name) +{ + const char *s; + size_t namelen; + + if ((s=strchr (name, ':'))) + namelen = s - name; + else + namelen = strlen (name); + + if (!namelen + || strspn (name, HEADER_NAME_CHARS) != namelen + || strchr ("-0123456789", *name)) + return 0; + return 1; +} + + +/* Transform a header NAME into a standard capitalized format. + * Conversion stops at the colon. */ +void +rfc822_capitalize_header_name (char *name) +{ + unsigned char *p = name; + int first = 1; + + /* Special cases first. */ + if (!my_strcasecmp (name, "MIME-Version")) + { + strcpy (name, "MIME-Version"); + return; + } + + /* Regular cases. */ + for (; *p && *p != ':'; p++) + { + if (*p == '-') + first = 1; + else if (first) + { + if (*p >= 'a' && *p <= 'z') + *p = *p - 'a' + 'A'; + first = 0; + } + else if (*p >= 'A' && *p <= 'Z') + *p = *p - 'A' + 'a'; + } +} + + + +/* Create a new parsing context for an entire rfc822 message and + return it. CB and CB_VALUE may be given to callback for certain + events. NULL is returned on error with errno set appropriately. */ +rfc822parse_t +rfc822parse_open (rfc822parse_cb_t cb, void *cb_value) +{ + rfc822parse_t msg = calloc (1, sizeof *msg); + if (msg) + { + msg->parts = msg->current_part = new_part (); + if (!msg->parts) + { + free (msg); + msg = NULL; + } + else + { + msg->callback = cb; + msg->callback_value = cb_value; + if (do_callback (msg, RFC822PARSE_OPEN)) + { + release_handle_data (msg); + free (msg); + msg = NULL; + } + } + } + return msg; +} + + +void +rfc822parse_cancel (rfc822parse_t msg) +{ + if (msg) + { + do_callback (msg, RFC822PARSE_CANCEL); + release_handle_data (msg); + free (msg); + } +} + + +void +rfc822parse_close (rfc822parse_t msg) +{ + if (msg) + { + do_callback (msg, RFC822PARSE_CLOSE); + release_handle_data (msg); + free (msg); + } +} + +static part_t +find_parent (part_t tree, part_t target) +{ + part_t part; + + for (part = tree->down; part; part = part->right) + { + if (part == target) + return tree; /* Found. */ + if (part->down) + { + part_t tmp = find_parent (part, target); + if (tmp) + return tmp; + } + } + return NULL; +} + +static void +set_current_part_to_parent (rfc822parse_t msg) +{ + part_t parent; + + assert (msg->current_part); + parent = find_parent (msg->parts, msg->current_part); + if (!parent) + return; /* Already at the top. */ + +#ifndef NDEBUG + { + part_t part; + for (part = parent->down; part; part = part->right) + if (part == msg->current_part) + break; + assert (part); + } +#endif + msg->current_part = parent; + + parent = find_parent (msg->parts, parent); + msg->boundary = parent? parent->boundary: NULL; +} + + + +/**************** + * We have read in all header lines and are about to receive the body + * part. The delimiter line has already been processed. + * + * FIXME: we's better return an error in case of memory failures. + */ +static int +transition_to_body (rfc822parse_t msg) +{ + rfc822parse_field_t ctx; + int rc; + + rc = do_callback (msg, RFC822PARSE_T2BODY); + if (!rc) + { + /* Store the boundary if we have multipart type. */ + ctx = rfc822parse_parse_field (msg, "Content-Type", -1); + if (ctx) + { + const char *s; + + s = rfc822parse_query_media_type (ctx, NULL); + if (s && !strcmp (s,"multipart")) + { + s = rfc822parse_query_parameter (ctx, "boundary", 0); + if (s) + { + assert (!msg->current_part->boundary); + msg->current_part->boundary = malloc (strlen (s) + 1); + if (msg->current_part->boundary) + { + part_t part; + + strcpy (msg->current_part->boundary, s); + msg->boundary = msg->current_part->boundary; + part = new_part (); + if (!part) + { + int save_errno = errno; + rfc822parse_release_field (ctx); + errno = save_errno; + return -1; + } + rc = do_callback (msg, RFC822PARSE_LEVEL_DOWN); + assert (!msg->current_part->down); + msg->current_part->down = part; + msg->current_part = part; + msg->in_preamble = 1; + } + } + } + rfc822parse_release_field (ctx); + } + } + + return rc; +} + +/* We have just passed a MIME boundary and need to prepare for new part. + headers. */ +static int +transition_to_header (rfc822parse_t msg) +{ + part_t part; + + assert (msg->current_part); + assert (!msg->current_part->right); + + part = new_part (); + if (!part) + return -1; + + msg->current_part->right = part; + msg->current_part = part; + return 0; +} + + +static int +insert_header (rfc822parse_t msg, const unsigned char *line, size_t length) +{ + HDR_LINE hdr; + + assert (msg->current_part); + if (!length) + { + msg->in_body = 1; + return transition_to_body (msg); + } + + if (!msg->current_part->hdr_lines) + do_callback (msg, RFC822PARSE_BEGIN_HEADER); + + length = length_sans_trailing_ws (line, length); + hdr = malloc (sizeof (*hdr) + length); + if (!hdr) + return -1; + hdr->next = NULL; + hdr->cont = (*line == ' ' || *line == '\t'); + memcpy (hdr->line, line, length); + hdr->line[length] = 0; /* Make it a string. */ + + /* Transform a field name into canonical format. */ + if (!hdr->cont && strchr (line, ':')) + rfc822_capitalize_header_name (hdr->line); + + *msg->current_part->hdr_lines_tail = hdr; + msg->current_part->hdr_lines_tail = &hdr->next; + + /* Lets help the caller to prevent mail loops and issue an event for + * every Received header. */ + if (length >= 9 && !memcmp (line, "Received:", 9)) + do_callback (msg, RFC822PARSE_RCVD_SEEN); + return 0; +} + + +/**************** + * Note: We handle the body transparent to allow binary zeroes in it. + */ +static int +insert_body (rfc822parse_t msg, const unsigned char *line, size_t length) +{ + int rc = 0; + + if (length > 2 && *line == '-' && line[1] == '-' && msg->boundary) + { + size_t blen = strlen (msg->boundary); + + if (length == blen + 2 + && !memcmp (line+2, msg->boundary, blen)) + { + rc = do_callback (msg, RFC822PARSE_BOUNDARY); + msg->in_body = 0; + if (!rc && !msg->in_preamble) + rc = transition_to_header (msg); + msg->in_preamble = 0; + } + else if (length == blen + 4 + && line[length-2] =='-' && line[length-1] == '-' + && !memcmp (line+2, msg->boundary, blen)) + { + rc = do_callback (msg, RFC822PARSE_LAST_BOUNDARY); + msg->boundary = NULL; /* No current boundary anymore. */ + set_current_part_to_parent (msg); + + /* Fixme: The next should actually be send right before the + next boundary, so that we can mark the epilogue. */ + if (!rc) + rc = do_callback (msg, RFC822PARSE_LEVEL_UP); + } + } + if (msg->in_preamble && !rc) + rc = do_callback (msg, RFC822PARSE_PREAMBLE); + + return rc; +} + +/* Insert the next line into the parser. Return 0 on success or true + on error with errno set appropriately. */ +int +rfc822parse_insert (rfc822parse_t msg, const unsigned char *line, size_t length) +{ + return (msg->in_body + ? insert_body (msg, line, length) + : insert_header (msg, line, length)); +} + + +/* Tell the parser that we have finished the message. */ +int +rfc822parse_finish (rfc822parse_t msg) +{ + return do_callback (msg, RFC822PARSE_FINISH); +} + + + +/**************** + * Get a copy of a header line. The line is returned as one long + * string with LF to separate the continuation line. Caller must free + * the return buffer. WHICH may be used to enumerate over all lines. + * Wildcards are allowed. This function works on the current headers; + * i.e. the regular mail headers or the MIME headers of the current + * part. + * + * WHICH gives the mode: + * -1 := Take the last occurrence + * n := Take the n-th one. + * + * Returns a newly allocated buffer or NULL on error. errno is set in + * case of a memory failure or set to 0 if the requested field is not + * available. + * + * If VALUEOFF is not NULL it will receive the offset of the first non + * space character in the value part of the line (i.e. after the first + * colon). + */ +char * +rfc822parse_get_field (rfc822parse_t msg, const char *name, int which, + size_t *valueoff) +{ + HDR_LINE h, h2; + char *buf, *p; + size_t n; + + h = find_header (msg, name, which, NULL); + if (!h) + { + errno = 0; + return NULL; /* no such field */ + } + + n = strlen (h->line) + 1; + for (h2 = h->next; h2 && h2->cont; h2 = h2->next) + n += strlen (h2->line) + 1; + + buf = p = malloc (n); + if (buf) + { + p = stpcpy (p, h->line); + *p++ = '\n'; + for (h2 = h->next; h2 && h2->cont; h2 = h2->next) + { + p = stpcpy (p, h2->line); + *p++ = '\n'; + } + p[-1] = 0; + } + + if (valueoff) + { + p = strchr (buf, ':'); + if (!p) + *valueoff = 0; /* Oops: should never happen. */ + else + { + p++; + while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') + p++; + *valueoff = p - buf; + } + } + + return buf; +} + + +/**************** + * Enumerate all header. Caller has to provide the address of a pointer + * which has to be initialzed to NULL, the caller should then never change this + * pointer until he has closed the enumeration by passing again the address + * of the pointer but with msg set to NULL. + * The function returns pointers to all the header lines or NULL when + * all lines have been enumerated or no headers are available. + */ +const char * +rfc822parse_enum_header_lines (rfc822parse_t msg, void **context) +{ + HDR_LINE l; + + if (!msg) /* Close. */ + return NULL; + + if (*context == msg || !msg->current_part) + return NULL; + + l = *context ? (HDR_LINE) *context : msg->current_part->hdr_lines; + + if (l) + { + *context = l->next ? (void *) (l->next) : (void *) msg; + return l->line; + } + *context = msg; /* Mark end of list. */ + return NULL; +} + + + +/**************** + * Find a header field. If the Name does end in an asterisk this is meant + * to be a wildcard. + * + * which -1 : Retrieve the last field + * >0 : Retrieve the n-th field + + * RPREV may be used to return the predecessor of the returned field; + * which may be NULL for the very first one. It has to be initialzed + * to either NULL in which case the search start at the first header line, + * or it may point to a headerline, where the search should start + */ +static HDR_LINE +find_header (rfc822parse_t msg, const char *name, int which, HDR_LINE *rprev) +{ + HDR_LINE hdr, prev = NULL, mark = NULL; + unsigned char *p; + size_t namelen, n; + int found = 0; + int glob = 0; + + if (!msg->current_part) + return NULL; + + namelen = strlen (name); + if (namelen && name[namelen - 1] == '*') + { + namelen--; + glob = 1; + } + + hdr = msg->current_part->hdr_lines; + if (rprev && *rprev) + { + /* spool forward to the requested starting place. + * we cannot simply set this as we have to return + * the previous list element too */ + for (; hdr && hdr != *rprev; prev = hdr, hdr = hdr->next) + ; + } + + for (; hdr; prev = hdr, hdr = hdr->next) + { + if (hdr->cont) + continue; + if (!(p = strchr (hdr->line, ':'))) + continue; /* invalid header, just skip it. */ + n = p - hdr->line; + if (!n) + continue; /* invalid name */ + if ((glob ? (namelen <= n) : (namelen == n)) + && !memcmp (hdr->line, name, namelen)) + { + found++; + if (which == -1) + mark = hdr; + else if (found == which) + { + if (rprev) + *rprev = prev; + return hdr; + } + } + } + if (mark && rprev) + *rprev = prev; + return mark; +} + + + +static const char * +skip_ws (const char *s) +{ + while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') + s++; + return s; +} + + +static void +release_token_list (TOKEN t) +{ + while (t) + { + TOKEN t2 = t->next; + /* fixme: If we have owner_pantry, put the token back to + * this pantry so that it can be reused later */ + free (t); + t = t2; + } +} + + +static TOKEN +new_token (enum token_type type, const char *buf, size_t length) +{ + TOKEN t; + + /* fixme: look through our pantries to find a suitable + * token for reuse */ + t = malloc (sizeof *t + length); + if (t) + { + t->next = NULL; + t->type = type; + memset (&t->flags, 0, sizeof (t->flags)); + t->data[0] = 0; + if (buf) + { + memcpy (t->data, buf, length); + t->data[length] = 0; /* Make sure it is a C string. */ + } + else + t->data[0] = 0; + } + return t; +} + +static TOKEN +append_to_token (TOKEN old, const char *buf, size_t length) +{ + size_t n = strlen (old->data); + TOKEN t; + + t = malloc (sizeof *t + n + length); + if (t) + { + t->next = old->next; + t->type = old->type; + t->flags = old->flags; + memcpy (t->data, old->data, n); + memcpy (t->data + n, buf, length); + t->data[n + length] = 0; + old->next = NULL; + release_token_list (old); + } + return t; +} + + + +/* + Parse a field into tokens as defined by rfc822. + */ +static TOKEN +parse_field (HDR_LINE hdr) +{ + static const char specials[] = "<>@.,;:\\[]\"()"; + static const char specials2[] = "<>@.,;:"; + static const char tspecials[] = "/?=<>@,;:\\[]\"()"; + static const char tspecials2[] = "/?=<>@.,;:"; /* FIXME: really + include '.'?*/ + static struct + { + const unsigned char *name; + size_t namelen; + } tspecial_header[] = { + { "Content-Type", 12}, + { "Content-Transfer-Encoding", 25}, + { "Content-Disposition", 19}, + { NULL, 0} + }; + const char *delimiters; + const char *delimiters2; + const unsigned char *line, *s, *s2; + size_t n; + int i, invalid = 0; + TOKEN t, tok, *tok_tail; + + errno = 0; + if (!hdr) + return NULL; + + tok = NULL; + tok_tail = &tok; + + line = hdr->line; + if (!(s = strchr (line, ':'))) + return NULL; /* oops */ + + n = s - line; + if (!n) + return NULL; /* oops: invalid name */ + + delimiters = specials; + delimiters2 = specials2; + for (i = 0; tspecial_header[i].name; i++) + { + if (n == tspecial_header[i].namelen + && !memcmp (line, tspecial_header[i].name, n)) + { + delimiters = tspecials; + delimiters2 = tspecials2; + break; + } + } + + s++; /* Move over the colon. */ + for (;;) + { + while (!*s) + { + if (!hdr->next || !hdr->next->cont) + return tok; /* Ready. */ + + /* Next item is a header continuation line. */ + hdr = hdr->next; + s = hdr->line; + } + + if (*s == '(') + { + int level = 1; + int in_quote = 0; + + invalid = 0; + for (s++;; s++) + { + while (!*s) + { + if (!hdr->next || !hdr->next->cont) + goto oparen_out; + /* Next item is a header continuation line. */ + hdr = hdr->next; + s = hdr->line; + } + + if (in_quote) + { + if (*s == '\"') + in_quote = 0; + else if (*s == '\\' && s[1]) /* what about continuation? */ + s++; + } + else if (*s == ')') + { + if (!--level) + break; + } + else if (*s == '(') + level++; + else if (*s == '\"') + in_quote = 1; + } + oparen_out: + if (!*s) + ; /* Actually this is an error, but we don't care about it. */ + else + s++; + } + else if (*s == '\"' || *s == '[') + { + /* We do not check for non-allowed nesting of domainliterals */ + int term = *s == '\"' ? '\"' : ']'; + invalid = 0; + s++; + t = NULL; + + for (;;) + { + for (s2 = s; *s2; s2++) + { + if (*s2 == term) + break; + else if (*s2 == '\\' && s2[1]) /* what about continuation? */ + s2++; + } + + t = (t + ? append_to_token (t, s, s2 - s) + : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s)); + if (!t) + goto failure; + + if (*s2 || !hdr->next || !hdr->next->cont) + break; + /* Next item is a header continuation line. */ + hdr = hdr->next; + s = hdr->line; + } + *tok_tail = t; + tok_tail = &t->next; + s = s2; + if (*s) + s++; /* skip the delimiter */ + } + else if ((s2 = strchr (delimiters2, *s))) + { /* Special characters which are not handled above. */ + invalid = 0; + t = new_token (tSPECIAL, s, 1); + if (!t) + goto failure; + *tok_tail = t; + tok_tail = &t->next; + s++; + } + else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') + { + invalid = 0; + s = skip_ws (s + 1); + } + else if (*s > 0x20 && !(*s & 128)) + { /* Atom. */ + invalid = 0; + for (s2 = s + 1; *s2 > 0x20 + && !(*s2 & 128) && !strchr (delimiters, *s2); s2++) + ; + t = new_token (tATOM, s, s2 - s); + if (!t) + goto failure; + *tok_tail = t; + tok_tail = &t->next; + s = s2; + } + else + { /* Invalid character. */ + if (!invalid) + { /* For parsing we assume only one space. */ + t = new_token (tSPACE, NULL, 0); + if (!t) + goto failure; + *tok_tail = t; + tok_tail = &t->next; + invalid = 1; + } + s++; + } + } + /*NOTREACHED*/ + + failure: + { + int save = errno; + release_token_list (tok); + errno = save; + } + return NULL; +} + + + + +/**************** + * Find and parse a header field. + * WHICH indicates what to do if there are multiple instance of the same + * field (like "Received"); the following value are defined: + * -1 := Take the last occurrence + * 0 := Reserved + * n := Take the n-th one. + * Returns a handle for further operations on the parse context of the field + * or NULL if the field was not found. + */ +rfc822parse_field_t +rfc822parse_parse_field (rfc822parse_t msg, const char *name, int which) +{ + HDR_LINE hdr; + + if (!which) + return NULL; + + hdr = find_header (msg, name, which, NULL); + if (!hdr) + return NULL; + return parse_field (hdr); +} + +void +rfc822parse_release_field (rfc822parse_field_t ctx) +{ + if (ctx) + release_token_list (ctx); +} + + + +/**************** + * Check whether T points to a parameter. + * A parameter starts with a semicolon and it is assumed that t + * points to exactly this one. + */ +static int +is_parameter (TOKEN t) +{ + t = t->next; + if (!t || t->type != tATOM) + return 0; + t = t->next; + if (!t || !(t->type == tSPECIAL && t->data[0] == '=')) + return 0; + t = t->next; + if (!t) + return 1; /* We assume that an non existing value is an empty one. */ + return t->type == tQUOTED || t->type == tATOM; +} + +/* + Some header (Content-type) have a special syntax where attribute=value + pairs are used after a leading semicolon. The parse_field code + knows about these fields and changes the parsing to the one defined + in RFC2045. + Returns a pointer to the value which is valid as long as the + parse context is valid; NULL is returned in case that attr is not + defined in the header, a missing value is reppresented by an empty string. + + With LOWER_VALUE set to true, a matching field valuebe be + lowercased. + + Note, that ATTR should be lowercase. + */ +const char * +rfc822parse_query_parameter (rfc822parse_field_t ctx, const char *attr, + int lower_value) +{ + TOKEN t, a; + + for (t = ctx; t; t = t->next) + { + /* skip to the next semicolon */ + for (; t && !(t->type == tSPECIAL && t->data[0] == ';'); t = t->next) + ; + if (!t) + return NULL; + if (is_parameter (t)) + { /* Look closer. */ + a = t->next; /* We know that this is an atom */ + if ( !a->flags.lowered ) + { + lowercase_string (a->data); + a->flags.lowered = 1; + } + if (!strcmp (a->data, attr)) + { /* found */ + t = a->next->next; + /* Either T is now an atom, a quoted string or NULL in + * which case we return an empty string. */ + + if ( lower_value && t && !t->flags.lowered ) + { + lowercase_string (t->data); + t->flags.lowered = 1; + } + return t ? t->data : ""; + } + } + } + return NULL; +} + +/**************** + * This function may be used for the Content-Type header to figure out + * the media type and subtype. Note, that the returned strings are + * guaranteed to be lowercase as required by MIME. + * + * Returns: a pointer to the media type and if subtype is not NULL, + * a pointer to the subtype. + */ +const char * +rfc822parse_query_media_type (rfc822parse_field_t ctx, const char **subtype) +{ + TOKEN t = ctx; + const char *type; + + if (t->type != tATOM) + return NULL; + if (!t->flags.lowered) + { + lowercase_string (t->data); + t->flags.lowered = 1; + } + type = t->data; + t = t->next; + if (!t || t->type != tSPECIAL || t->data[0] != '/') + return NULL; + t = t->next; + if (!t || t->type != tATOM) + return NULL; + + if (subtype) + { + if (!t->flags.lowered) + { + lowercase_string (t->data); + t->flags.lowered = 1; + } + *subtype = t->data; + } + return type; +} + + + + + +#ifdef TESTING + +/* Internal debug function to print the structure of the message. */ +static void +dump_structure (rfc822parse_t msg, part_t part, int indent) +{ + if (!part) + { + printf ("*** Structure of this message:\n"); + part = msg->parts; + } + + for (; part; part = part->right) + { + rfc822parse_field_t ctx; + part_t save_part; /* ugly hack - we should have a function to + get part information. */ + const char *s; + + save_part = msg->current_part; + msg->current_part = part; + ctx = rfc822parse_parse_field (msg, "Content-Type", -1); + msg->current_part = save_part; + if (ctx) + { + const char *s1, *s2; + s1 = rfc822parse_query_media_type (ctx, &s2); + if (s1) + printf ("*** %*s %s/%s", indent*2, "", s1, s2); + else + printf ("*** %*s [not found]", indent*2, ""); + + s = rfc822parse_query_parameter (ctx, "boundary", 0); + if (s) + printf (" (boundary=\"%s\")", s); + rfc822parse_release_field (ctx); + } + else + printf ("*** %*s text/plain [assumed]", indent*2, ""); + putchar('\n'); + + if (part->down) + dump_structure (msg, part->down, indent + 1); + } + +} + + + +static void +show_param (rfc822parse_field_t ctx, const char *name) +{ + const char *s; + + if (!ctx) + return; + s = rfc822parse_query_parameter (ctx, name, 0); + if (s) + printf ("*** %s: '%s'\n", name, s); +} + + + +static void +show_event (rfc822parse_event_t event) +{ + const char *s; + + switch (event) + { + case RFC822PARSE_OPEN: s= "Open"; break; + case RFC822PARSE_CLOSE: s= "Close"; break; + case RFC822PARSE_CANCEL: s= "Cancel"; break; + case RFC822PARSE_T2BODY: s= "T2Body"; break; + case RFC822PARSE_FINISH: s= "Finish"; break; + case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break; + case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break; + case RFC822PARSE_LEVEL_UP: s= "Level_Up"; break; + case RFC822PARSE_BOUNDARY: s= "Boundary"; break; + case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break; + case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break; + case RFC822PARSE_PREAMBLE: s= "Preamble"; break; + case RFC822PARSE_EPILOGUE: s= "Epilogue"; break; + default: s= "***invalid event***"; break; + } + printf ("*** got RFC822 event %s\n", s); +} + +static int +msg_cb (void *dummy_arg, rfc822parse_event_t event, rfc822parse_t msg) +{ + show_event (event); + if (event == RFC822PARSE_T2BODY) + { + rfc822parse_field_t ctx; + void *ectx; + const char *line; + + for (ectx=NULL; (line = rfc822parse_enum_header_lines (msg, &ectx)); ) + { + printf ("*** HDR: %s\n", line); + } + rfc822parse_enum_header_lines (NULL, &ectx); /* Close enumerator. */ + + ctx = rfc822parse_parse_field (msg, "Content-Type", -1); + if (ctx) + { + const char *s1, *s2; + s1 = rfc822parse_query_media_type (ctx, &s2); + if (s1) + printf ("*** media: '%s/%s'\n", s1, s2); + else + printf ("*** media: [not found]\n"); + show_param (ctx, "boundary"); + show_param (ctx, "protocol"); + rfc822parse_release_field (ctx); + } + else + printf ("*** media: text/plain [assumed]\n"); + + } + + + return 0; +} + + + +int +main (int argc, char **argv) +{ + char line[5000]; + size_t length; + rfc822parse_t msg; + + msg = rfc822parse_open (msg_cb, NULL); + if (!msg) + abort (); + + while (fgets (line, sizeof (line), stdin)) + { + length = strlen (line); + if (length && line[length - 1] == '\n') + line[--length] = 0; + if (length && line[length - 1] == '\r') + line[--length] = 0; + if (rfc822parse_insert (msg, line, length)) + abort (); + } + + dump_structure (msg, NULL, 0); + + rfc822parse_close (msg); + return 0; +} +#endif + +/* +Local Variables: +compile-command: "gcc -Wall -Wno-pointer-sign -g -DTESTING -o rfc822parse rfc822parse.c" +End: +*/ |