diff options
Diffstat (limited to 'src/lib-smtp/smtp-parser.c')
-rw-r--r-- | src/lib-smtp/smtp-parser.c | 587 |
1 files changed, 587 insertions, 0 deletions
diff --git a/src/lib-smtp/smtp-parser.c b/src/lib-smtp/smtp-parser.c new file mode 100644 index 0000000..5672ff0 --- /dev/null +++ b/src/lib-smtp/smtp-parser.c @@ -0,0 +1,587 @@ +/* Copyright (c) 2013-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "net.h" +#include "str.h" +#include "strescape.h" + +#include "smtp-parser.h" + +#include <ctype.h> + +/* Character definitions from RFC 5321/5322: + + textstring = 1*(%d09 / %d32-126) ; HT, SP, Printable US-ASCII + = 1*(%x09 / %x20-7e) + ehlo-param = 1*(%d33-126) + = 1*(%x21-7e) + ehlo-greet = 1*(%d0-9 / %d11-12 / %d14-127) + = 1*(%x00-09 / %x0b-0c / %x0e-7f) + qtext = %d32-33 / %d35-91 / %d93-126 + = %x20-21 / %x23-5B / %x5d-7e + quoted-pair = %d92 %d32-126 + = %x5c %x20-7e + atext = ALPHA / DIGIT / ; Printable US-ASCII + "!" / "#" / ; characters not including + "$" / "%" / ; specials. Used for atoms. + "&" / "'" / + "*" / "+" / + "-" / "/" / + "=" / "?" / + "^" / "_" / + "`" / "{" / + "|" / "}" / + "~" + = %x21 / %x23-27 / %x2a-2b / %x2d / %x2f-39 / %x3d / + %d3f / %x41-5a / %x5e-7e / + esmtp-value = 1*(%d33-60 / %d62-126) + = 1*(%x21-3c / %x3e-7e) + dcontent = %d33-90 / ; Printable US-ASCII + %d94-126 ; excl. "[", "\", "]" + = %x21-5a / %x5e-7e + xchar = any ASCII CHAR between "!" (33) and "~" (126) inclusive, + except for "+" and "=". [RFC 3461] + = %x21-2a / %2c-3c / %x3e-7e + + Bit mappings (FIXME: rearrange): + + (1<<0) => %x21-2a / %2c-3c / %x3e-7e (xtext) + (1<<1) => %x21 / %x23-27 / %x2a-2b / %x2d / %x2f-39 / %x3d / + %d3f / %x41-5a / %x5e-7e / + (1<<2) => %x28-29 / %x2c / %x2e / %x3a-3c / %x3e / %x40 + (1<<8) => %x00-09 / %x0b-0c / %x0e-20 / %x7f + (1<<5) => %x09 / %5b-5d + (1<<4) => %x5b / %x5d + (1<<3) => %x20 + (1<<9) => %x22 + (1<<6) => %x2b + (1<<7) => %x3d + */ + +/* xtext */ +const uint16_t smtp_xtext_char_mask = (1<<0); +/* atext */ +const uint16_t smtp_atext_char_mask = (1<<1); +/* dcontent */ +const uint16_t smtp_dcontent_char_mask = (1<<1)|(1<<2)|(1<<9); +/* qtext */ +const uint16_t smtp_qtext_char_mask = (1<<1)|(1<<2)|(1<<3)|(1<<4); +/* textstring */ +const uint16_t smtp_textstr_char_mask = (1<<1)|(1<<2)|(1<<9)|(1<<3)|(1<<5); +/* esmtp-value */ +const uint16_t smtp_esmtp_value_char_mask = (1<<0)|(1<<6); +/* ehlo-param */ +const uint16_t smtp_ehlo_param_char_mask = (1<<0)|(1<<6)|(1<<7); +/* ehlo-greet */ +const uint16_t smtp_ehlo_greet_char_mask = (1<<0)|(1<<6)|(1<<7)|(1<<8); +/* quoted-pair */ +const uint16_t smtp_qpair_char_mask = (1<<0)|(1<<3)|(1<<6)|(1<<7); + +const uint16_t smtp_char_lookup[256] = { + 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, // 00 + 0x100, 0x120, 0x000, 0x100, 0x100, 0x000, 0x100, 0x100, // 08 + 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, // 10 + 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, // 18 + 0x108, 0x003, 0x201, 0x003, 0x003, 0x003, 0x003, 0x003, // 20 + 0x005, 0x005, 0x003, 0x042, 0x005, 0x003, 0x005, 0x003, // 28 + 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, // 30 + 0x003, 0x003, 0x005, 0x005, 0x005, 0x082, 0x005, 0x003, // 38 + 0x005, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, // 40 + 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, // 48 + 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, // 50 + 0x003, 0x003, 0x003, 0x031, 0x021, 0x031, 0x003, 0x003, // 58 + 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, // 60 + 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, // 68 + 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, // 70 + 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x003, 0x100, // 78 + + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // 80 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // 88 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // 90 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // 98 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // a0 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // a8 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // b0 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // b8 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // c0 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // c8 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // d0 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // d8 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // e0 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // e8 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // f0 + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, // f8 +}; + +/* + * Parser + */ + +void smtp_parser_init(struct smtp_parser *parser, + pool_t pool, const char *data) +{ + parser->pool = pool; + parser->begin = parser->cur = (const unsigned char *)data; + parser->end = parser->begin + strlen(data); + parser->error = NULL; +} + +/* + * Common syntax + */ + +static int +smtp_parser_parse_ldh_str(struct smtp_parser *parser, + string_t *out) +{ + const unsigned char *pbegin = parser->cur, *palnum; + + /* Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig + Let-dig = ALPHA / DIGIT + */ + + /* Ldh-str */ + palnum = NULL; + while (parser->cur < parser->end) { + if (i_isalnum(*parser->cur)) + palnum = parser->cur; + else if (*parser->cur != '-') + break; + parser->cur++; + } + if (parser->cur == pbegin || palnum == NULL) { + parser->cur = pbegin; + return 0; + } + + parser->cur = palnum+1; + if (out != NULL) + str_append_data(out, pbegin, parser->cur - pbegin); + return 1; +} + +int smtp_parser_parse_domain(struct smtp_parser *parser, + const char **value_r) +{ + string_t *value = NULL; + + /* Domain = sub-domain *("." sub-domain) + sub-domain = Let-dig [Ldh-str] + Let-dig = ALPHA / DIGIT + Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig + + NOTE: A more generic syntax is accepted to be lenient towards + systems that don't adhere to the standards. It allows + '-' and '_' to occur anywhere in a sub-domain. + */ + + /* Let-dig (first) (nope) */ + if (parser->cur >= parser->end || + (!i_isalnum(*parser->cur) && *parser->cur != '-' && + *parser->cur != '_')) + return 0; + + if (value_r != NULL) + value = t_str_new(256); + + for (;;) { + /* Let-dig (nope) */ + if (parser->cur >= parser->end || *parser->cur == '.') { + parser->error = "Empty sub-domain"; + return -1; + } + if (!i_isalnum(*parser->cur) && *parser->cur != '-' && + *parser->cur != '_') { + parser->error = "Invalid character in domain"; + return -1; + } + if (value_r != NULL) + str_append_c(value, *parser->cur); + parser->cur++; + + /* Ldh-str (nope) */ + while (parser->cur < parser->end) { + if (!i_isalnum(*parser->cur) && *parser->cur != '-' && + *parser->cur != '_') + break; + + if (value_r != NULL) + str_append_c(value, *parser->cur); + parser->cur++; + } + + /* *("." sub-domain) */ + if (parser->cur >= parser->end || *parser->cur != '.') + break; + + if (value_r != NULL) + str_append_c(value, '.'); + parser->cur++; + } + + if (value_r != NULL) + *value_r = str_c(value); + return 1; +} + +static int +smtp_parser_parse_snum(struct smtp_parser *parser, string_t *literal, + uint8_t *octet_r) +{ + const unsigned char *pbegin = parser->cur; + uint8_t octet = 0; + + /* Snum = 1*3DIGIT + ; representing a decimal integer + ; value in the range 0 through 255 + */ + + if (*parser->cur < '0' || *parser->cur > '9') + return 0; + do { + if (octet >= ((uint8_t)-1 / 10)) { + if (octet > (uint8_t)-1 / 10) + return -1; + if ((uint8_t)(*parser->cur - '0') > ((uint8_t)-1 % 10)) + return -1; + } + octet = octet * 10 + (*parser->cur - '0'); + parser->cur++; + } while (*parser->cur >= '0' && *parser->cur <= '9'); + + if (literal != NULL) + str_append_data(literal, pbegin, parser->cur - pbegin); + *octet_r = octet; + return 1; +} + +static int +smtp_parser_parse_ipv4_address(struct smtp_parser *parser, + string_t *literal, struct in_addr *ip4_r) +{ + uint8_t octet; + uint32_t ip = 0; + int ret; + int i; + + /* IPv4-address-literal = Snum 3("." Snum) */ + if ((ret = smtp_parser_parse_snum(parser, literal, &octet)) <= 0) + return ret; + ip = octet; + + for (i = 0; i < 3 && parser->cur < parser->end; i++) { + if (*parser->cur != '.') + return -1; + + if (literal != NULL) + str_append_c(literal, '.'); + parser->cur++; + + if (smtp_parser_parse_snum(parser, literal, &octet) <= 0) + return -1; + ip = (ip << 8) + octet; + } + + if (ip4_r != NULL) + ip4_r->s_addr = htonl(ip); + return 1; +} + +int smtp_parser_parse_address_literal(struct smtp_parser *parser, + const char **value_r, struct ip_addr *ip_r) +{ + const unsigned char *pblock; + struct in_addr ip4; + struct in6_addr ip6; + bool ipv6 = FALSE; + string_t *value = NULL, *tagbuf; + int ret; + + /* address-literal = "[" ( IPv4-address-literal / + IPv6-address-literal / + General-address-literal ) "]" + ; See Section 4.1.3 + + IPv6-address-literal = "IPv6:" IPv6-addr + General-address-literal = Standardized-tag ":" 1*dcontent + Standardized-tag = Ldh-str + ; Standardized-tag MUST be specified in a + ; Standards-Track RFC and registered with + ; IANA + dcontent = %d33-90 / ; Printable US-ASCII + %d94-126 ; excl. "[", "\", "]" + */ + + /* "[" */ + if (parser->cur >= parser->end || *parser->cur != '[') + return 0; + parser->cur++; + + if (value_r != NULL) { + value = t_str_new(128); + str_append_c(value, '['); + } + if (ip_r != NULL) + i_zero(ip_r); + + /* IPv4-address-literal / ... */ + i_zero(&ip4); + if ((ret=smtp_parser_parse_ipv4_address(parser, value, &ip4)) != 0) { + if (ret < 0) { + parser->error = "Invalid IPv4 address literal"; + return -1; + } + if (ip_r != NULL) { + ip_r->family = AF_INET; + ip_r->u.ip4 = ip4; + } + + /* ... / IPv6-address-literal / General-address-literal */ + } else { + /* IPv6-address-literal = "IPv6:" IPv6-addr + General-address-literal = Standardized-tag ":" 1*dcontent + Standardized-tag = Ldh-str + */ + if (value_r != NULL) { + tagbuf = value; + } else { + tagbuf = t_str_new(16); + str_append_c(tagbuf, '['); + } + if (smtp_parser_parse_ldh_str(parser, tagbuf) <= 0 || + parser->cur >= parser->end || *parser->cur != ':') { + parser->error = "Invalid address literal"; + return -1; + } + if (strcasecmp(str_c(tagbuf)+1, "IPv6") == 0) + ipv6 = TRUE; + else if (value_r == NULL) { + parser->error = t_strdup_printf( + "Unsupported %s address literal", + str_c(tagbuf)+1); + return -1; + } + parser->cur++; + if (value_r != NULL) + str_append_c(value, ':'); + + /* 1*dcontent */ + pblock = parser->cur; + while (parser->cur < parser->end && + smtp_char_is_dcontent(*parser->cur)) + parser->cur++; + + if (parser->cur == pblock) { + parser->error = "Empty address literal"; + return -1; + } + if (value_r != NULL) + str_append_data(value, pblock, parser->cur - pblock); + + if (ipv6) { + i_zero(&ip6); + if (inet_pton(AF_INET6, t_strndup(pblock, + parser->cur - pblock), &ip6) <= 0) { + parser->error = "Invalid IPv6 address literal"; + return -1; + } + if (ip_r != NULL) { + ip_r->family = AF_INET6; + ip_r->u.ip6 = ip6; + } + } + } + + /* ']' */ + if (parser->cur >= parser->end) { + parser->error = "Missing ']' at end of address literal"; + return -1; + } else if (*parser->cur != ']') { + parser->error = "Invalid character in address literal"; + return -1; + } + + parser->cur++; + if (value_r != NULL) { + str_append_c(value, ']'); + *value_r = str_c(value); + } + return 1; +} + +int smtp_parser_parse_quoted_string(struct smtp_parser *parser, + const char **value_r) +{ + string_t *value = NULL; + const unsigned char *pbegin; + + /* Quoted-string = DQUOTE *QcontentSMTP DQUOTE + QcontentSMTP = qtextSMTP / quoted-pairSMTP + quoted-pairSMTP = %d92 %d32-126 + ; i.e., backslash followed by any ASCII + ; graphic (including itself) or SPace + qtextSMTP = %d32-33 / %d35-91 / %d93-126 + ; i.e., within a quoted string, any + ; ASCII graphic or space is permitted + ; without blackslash-quoting except + ; double-quote and the backslash itself. + */ + + /* DQUOTE */ + if (parser->cur >= parser->end || *parser->cur != '"') + return 0; + parser->cur++; + + if (value_r != NULL) + value = t_str_new(256); + + /* *QcontentSMTP */ + while (parser->cur < parser->end) { + pbegin = parser->cur; + while (parser->cur < parser->end && + smtp_char_is_qtext(*parser->cur)) { + /* qtextSMTP */ + parser->cur++; + } + + if (value_r != NULL) + str_append_data(value, pbegin, parser->cur - pbegin); + + if (parser->cur >= parser->end || *parser->cur != '\\') + break; + parser->cur++; + + /* quoted-pairSMTP */ + if (parser->cur >= parser->end || + !smtp_char_is_qpair(*parser->cur)) { + parser->error = + "Invalid character after '\\' in quoted string"; + return -1; + } + + if (value_r != NULL) + str_append_c(value, *parser->cur); + parser->cur++; + } + + /* DQUOTE */ + if (parser->cur >= parser->end) { + parser->error = "Premature end of quoted string"; + return -1; + } + if (*parser->cur != '"') { + parser->error = "Invalid character in quoted string"; + return -1; + } + parser->cur++; + if (value_r != NULL) + *value_r = str_c(value); + return 1; +} + +static int +smtp_parser_skip_atom(struct smtp_parser *parser) +{ + /* Atom = 1*atext */ + + if (parser->cur >= parser->end || !smtp_char_is_atext(*parser->cur)) + return 0; + parser->cur++; + + while (parser->cur < parser->end && smtp_char_is_atext(*parser->cur)) + parser->cur++; + return 1; +} + +int smtp_parser_parse_atom(struct smtp_parser *parser, + const char **value_r) +{ + const unsigned char *pbegin = parser->cur; + int ret; + + if ((ret=smtp_parser_skip_atom(parser)) <= 0) + return ret; + + if (value_r != NULL) + *value_r = t_strndup(pbegin, parser->cur - pbegin); + return 1; +} + +int smtp_parser_parse_string(struct smtp_parser *parser, + const char **value_r) +{ + int ret; + + /* String = Atom / Quoted-string */ + + if ((ret=smtp_parser_parse_quoted_string(parser, value_r)) != 0) + return ret; + return smtp_parser_parse_atom(parser, value_r); +} + +static bool +smtp_parse_xtext_hexdigit(const unsigned char digit, + unsigned char *hexvalue) +{ + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + *hexvalue = (*hexvalue) << 4; + *hexvalue += digit - '0'; + break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + *hexvalue = (*hexvalue) << 4; + *hexvalue += digit - 'A' + 10; + break; + default: + return FALSE; + } + return TRUE; +} + +int smtp_parser_parse_xtext(struct smtp_parser *parser, + string_t *out) +{ + unsigned char hexchar; + + /* xtext = *( xchar / hexchar ) + xchar = any ASCII CHAR between "!" (33) and "~" (126) inclusive, + except for "+" and "=". + hexchar = ASCII "+" immediately followed by two upper case + hexadecimal digits + */ + if (parser->cur >= parser->end || + (!smtp_char_is_xtext(*parser->cur) && *parser->cur != '+')) + return 0; + + while (parser->cur < parser->end) { + const unsigned char *pbegin = parser->cur; + + while (parser->cur < parser->end && + smtp_char_is_xtext(*parser->cur)) + parser->cur++; + + if (out != NULL) + str_append_data(out, pbegin, parser->cur - pbegin); + + if (parser->cur >= parser->end || *parser->cur != '+') + break; + parser->cur++; + + hexchar = 0; + if (smtp_parse_xtext_hexdigit(*parser->cur, &hexchar)) { + parser->cur++; + if (smtp_parse_xtext_hexdigit(*parser->cur, &hexchar)) { + parser->cur++; + if (out != NULL) + str_append_c(out, hexchar); + continue; + } + } + + parser->error = "Invalid hexchar after '+' in xtext"; + return -1; + } + + return 1; +} |