diff options
Diffstat (limited to 'src/lib-http/http-parser.c')
-rw-r--r-- | src/lib-http/http-parser.c | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/src/lib-http/http-parser.c b/src/lib-http/http-parser.c new file mode 100644 index 0000000..7cd6c2a --- /dev/null +++ b/src/lib-http/http-parser.c @@ -0,0 +1,208 @@ +/* Copyright (c) 2013-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "net.h" +#include "str.h" +#include "strescape.h" +#include "http-url.h" + +#include "http-parser.h" + +/* + Character definitions: + + tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" + / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" + / DIGIT / ALPHA + ; any VCHAR, except special + special = "(" / ")" / "<" / ">" / "@" / "," + / ";" / ":" / "\" / DQUOTE / "/" / "[" + / "]" / "?" / "=" / "{" / "}" + qdtext = OWS / %x21 / %x23-5B / %x5D-7E / obs-text + qdtext-nf = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text + ctext = OWS / %x21-27 / %x2A-5B / %x5D-7E / obs-text + obs-text = %x80-FF + OWS = *( SP / HTAB ) + VCHAR = %x21-7E + 't68char' = ALPHA / DIGIT / "-" / "." / "_" / "~" / "+" / "/" + + 'text' = ( HTAB / SP / VCHAR / obs-text ) + + Character bit mappings: + + (1<<0) => ALPHA / DIGIT / "-" / "." / "_" / "~" / "+" + (1<<1) => "!" / "#" / "$" / "%" / "&" / "'" / "*" / "^" / "`" / "|" + (1<<2) => special + (1<<3) => %x21 / %x2A-5B / %x5D-7E + (1<<4) => %x23-29 + (1<<5) => %x22-27 + (1<<6) => HTAB / SP / obs-text + (1<<7) => "/" + */ + +const unsigned char _http_token_char_mask = (1<<0)|(1<<1); +const unsigned char _http_value_char_mask = (1<<0)|(1<<1)|(1<<2); +const unsigned char _http_text_char_mask = (1<<0)|(1<<1)|(1<<2)|(1<<6); +const unsigned char _http_qdtext_char_mask = (1<<3)|(1<<4)|(1<<6); +const unsigned char _http_ctext_char_mask = (1<<3)|(1<<5)|(1<<6); +const unsigned char _http_token68_char_mask = (1<<0)|(1<<7); + +const unsigned char _http_char_lookup[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, // 00 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10 + 64, 10, 36, 50, 50, 50, 50, 50, 20, 20, 10, 9, 12, 9, 9, 140, // 20 + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 12, 12, 12, 12, 12, // 30 + 12, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 40 + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 4, 12, 10, 9, // 50 + 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 60 + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 10, 12, 9, 0, // 70 + + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 80 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 90 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // A0 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // B0 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // C0 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // D0 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // E0 + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // F0 +}; + +/* + * HTTP value parsing + */ + +void http_parser_init(struct http_parser *parser, + const unsigned char *data, size_t size) +{ + i_zero(parser); + parser->begin = data; + parser->cur = data; + parser->end = data + size; +} + +void http_parse_ows(struct http_parser *parser) +{ + /* OWS = *( SP / HTAB ) */ + if (parser->cur >= parser->end) + return; + while (parser->cur < parser->end && + (parser->cur[0] == ' ' || parser->cur[0] == '\t')) { + parser->cur++; + } +} + +int http_parser_skip_token(struct http_parser *parser) +{ + /* token = 1*tchar */ + + if (parser->cur >= parser->end || !http_char_is_token(*parser->cur)) + return 0; + parser->cur++; + + while (parser->cur < parser->end && http_char_is_token(*parser->cur)) + parser->cur++; + return 1; +} + +int http_parse_token(struct http_parser *parser, const char **token_r) +{ + const unsigned char *first = parser->cur; + int ret; + + if ((ret=http_parser_skip_token(parser)) <= 0) + return ret; + *token_r = t_strndup(first, parser->cur - first); + return 1; +} + +int http_parse_token_list_next(struct http_parser *parser, + const char **token_r) +{ + /* http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21; + Appendix B: + + For compatibility with legacy list rules, recipients SHOULD accept + empty list elements. In other words, consumers would follow the list + productions: + + #element => [ ( "," / element ) *( OWS "," [ OWS element ] ) ] + 1#element => *( "," OWS ) element *( OWS "," [ OWS element ] ) + */ + + for (;;) { + if (http_parse_token(parser, token_r) > 0) + break; + http_parse_ows(parser); + if (parser->cur >= parser->end || parser->cur[0] != ',') + return 0; + parser->cur++; + http_parse_ows(parser); + } + + return 1; +} + +int http_parse_quoted_string(struct http_parser *parser, const char **str_r) +{ + string_t *str; + + /* quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE + qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' + / %x5D-7E ; ']'-'~' + / obs-text + quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) + obs-text = %x80-FF + */ + + /* DQUOTE */ + if (parser->cur >= parser->end || parser->cur[0] != '"') + return 0; + parser->cur++; + + /* *( qdtext / quoted-pair ) */ + str = t_str_new(256); + for (;;) { + const unsigned char *first; + + /* *qdtext */ + first = parser->cur; + while (parser->cur < parser->end && http_char_is_qdtext(*parser->cur)) + parser->cur++; + + if (parser->cur >= parser->end) + return -1; + + str_append_data(str, first, parser->cur - first); + + /* DQUOTE */ + if (*parser->cur == '"') { + parser->cur++; + break; + + /* "\" */ + } else if (*parser->cur == '\\') { + parser->cur++; + + if (parser->cur >= parser->end || !http_char_is_text(*parser->cur)) + return -1; + str_append_c(str, *parser->cur); + parser->cur++; + + /* ERROR */ + } else { + return -1; + } + } + *str_r = str_c(str); + return 1; +} + +int http_parse_token_or_qstring(struct http_parser *parser, + const char **word_r) +{ + if (parser->cur >= parser->end) + return 0; + if (parser->cur[0] == '"') + return http_parse_quoted_string(parser, word_r); + return http_parse_token(parser, word_r); +} |