diff options
Diffstat (limited to 'src/lib-http/http-url.c')
-rw-r--r-- | src/lib-http/http-url.c | 678 |
1 files changed, 678 insertions, 0 deletions
diff --git a/src/lib-http/http-url.c b/src/lib-http/http-url.c new file mode 100644 index 0000000..229a58a --- /dev/null +++ b/src/lib-http/http-url.c @@ -0,0 +1,678 @@ +/* Copyright (c) 2013-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "strfuncs.h" +#include "net.h" +#include "uri-util.h" + +#include "http-url.h" +#include "http-request.h" + +/* + * HTTP URL parser + */ + +struct http_url_parser { + struct uri_parser parser; + + enum http_url_parse_flags flags; + + struct http_url *url; + struct http_url *base; + + enum http_request_target_format req_format; + + bool relative:1; + bool request_target:1; +}; + +static bool http_url_parse_authority_form(struct http_url_parser *url_parser); + +static bool +http_url_parse_scheme(struct http_url_parser *url_parser, const char **scheme_r) +{ + struct uri_parser *parser = &url_parser->parser; + + *scheme_r = NULL; + if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) != 0) + return TRUE; + + if (uri_parse_scheme(parser, scheme_r) <= 0) { + parser->cur = parser->begin; + return FALSE; + } + + return TRUE; +} + +static bool http_url_parse_unknown_scheme(struct http_url_parser *url_parser) +{ + struct uri_parser *parser = &url_parser->parser; + + if (url_parser->request_target) { + /* Valid as non-HTTP scheme, but also try to parse as authority + */ + parser->cur = parser->begin; + if (!http_url_parse_authority_form(url_parser)) { + /* indicate non-http-url */ + url_parser->url = NULL; + url_parser->req_format = + HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE; + } + return TRUE; + } + parser->error = "Not an HTTP URL"; + return FALSE; +} + +static bool +http_url_parse_userinfo(struct http_url_parser *url_parser, + struct uri_authority *auth, + const char **user_r, const char **password_r) +{ + struct uri_parser *parser = &url_parser->parser; + const char *p; + + *user_r = *password_r = NULL; + + if (auth->enc_userinfo == NULL) + return TRUE; + + if ((url_parser->flags & HTTP_URL_ALLOW_USERINFO_PART) == 0) { + /* RFC 7230, Section 2.7.1: http URI Scheme + + A sender MUST NOT generate the userinfo subcomponent (and its + "@" delimiter) when an "http" URI reference is generated + within a message as a request target or header field value. + Before making use of an "http" URI reference received from an + untrusted source, a recipient SHOULD parse for userinfo and + treat its presence as an error; it is likely being used to + obscure the authority for the sake of phishing attacks. + */ + parser->error = "HTTP URL does not allow `userinfo@' part"; + return FALSE; + } + + p = strchr(auth->enc_userinfo, ':'); + if (p == NULL) { + if (!uri_data_decode(parser, auth->enc_userinfo, NULL, user_r)) + return FALSE; + } else { + if (!uri_data_decode(parser, auth->enc_userinfo, p, user_r)) + return FALSE; + if (!uri_data_decode(parser, p + 1, NULL, password_r)) + return FALSE; + } + return TRUE; +} + +static bool http_url_parse_authority(struct http_url_parser *url_parser) +{ + struct uri_parser *parser = &url_parser->parser; + struct http_url *url = url_parser->url; + struct uri_authority auth; + const char *user = NULL, *password = NULL; + int ret; + + if ((ret = uri_parse_host_authority(parser, &auth)) < 0) + return FALSE; + if (auth.host.name == NULL || *auth.host.name == '\0') { + /* RFC 7230, Section 2.7.1: http URI Scheme + + A sender MUST NOT generate an "http" URI with an empty host + identifier. A recipient that processes such a URI reference + MUST reject it as invalid. + */ + parser->error = "HTTP URL does not allow empty host identifier"; + return FALSE; + } + if (ret > 0) { + if (!http_url_parse_userinfo(url_parser, &auth, + &user, &password)) + return FALSE; + } + if (url != NULL) { + uri_host_copy(parser->pool, &url->host, &auth.host); + url->port = auth.port; + url->user = p_strdup(parser->pool, user); + url->password = p_strdup(parser->pool, password); + } + return TRUE; +} + +static bool http_url_parse_authority_form(struct http_url_parser *url_parser) +{ + struct uri_parser *parser = &url_parser->parser; + + if (!http_url_parse_authority(url_parser)) + return FALSE; + if (parser->cur != parser->end) + return FALSE; + url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_AUTHORITY; + return TRUE; +} + +static int +http_url_parse_path(struct http_url_parser *url_parser) +{ + struct uri_parser *parser = &url_parser->parser; + struct http_url *url = url_parser->url, *base = url_parser->base; + const char *const *path; + int path_relative; + string_t *fullpath = NULL; + int ret; + + /* path-abempty / path-absolute / path-noscheme / path-empty */ + if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0) + return -1; + + /* Resolve path */ + if (ret == 0) { + if (url_parser->relative && url != NULL) + url->path = p_strdup(parser->pool, base->path); + return 0; + } + + if (url != NULL) + fullpath = t_str_new(256); + + if (url_parser->relative && path_relative > 0 && base->path != NULL) { + const char *pbegin = base->path; + const char *pend = base->path + strlen(base->path); + const char *p = pend - 1; + + i_assert(*pbegin == '/'); + + /* Discard trailing segments of base path based on how many + effective leading '..' segments were found in the relative + path. + */ + while (path_relative > 0 && p > pbegin) { + while (p > pbegin && *p != '/') p--; + if (p >= pbegin) { + pend = p; + path_relative--; + } + if (p > pbegin) p--; + } + + if (url != NULL && pend > pbegin) + str_append_data(fullpath, pbegin, pend - pbegin); + } + + /* Append relative path */ + while (*path != NULL) { + const char *part; + + if (!uri_data_decode(parser, *path, NULL, &part)) + return -1; + + if (url != NULL) { + str_append_c(fullpath, '/'); + str_append(fullpath, part); + } + path++; + } + + if (url != NULL) + url->path = p_strdup(parser->pool, str_c(fullpath)); + return 1; +} + +static bool +http_url_parse_query(struct http_url_parser *url_parser, bool have_path) +{ + struct uri_parser *parser = &url_parser->parser; + struct http_url *url = url_parser->url, *base = url_parser->base; + const char *query; + int ret; + + if ((ret = uri_parse_query(parser, &query)) < 0) + return FALSE; + if (url == NULL) + return TRUE; + + if (ret > 0) + url->enc_query = p_strdup(parser->pool, query); + else if (url_parser->relative && !have_path) + url->enc_query = p_strdup(parser->pool, base->enc_query); + return TRUE; +} + +static bool +http_url_parse_fragment(struct http_url_parser *url_parser, bool have_path) +{ + struct uri_parser *parser = &url_parser->parser; + struct http_url *url = url_parser->url, *base = url_parser->base; + const char *fragment; + int ret; + + if ((ret = uri_parse_fragment(parser, &fragment)) < 0) + return FALSE; + if (ret > 0 && + (url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) { + parser->error = + "URL fragment not allowed for HTTP URL in this context"; + return FALSE; + } + if (url == NULL) + return TRUE; + + if (ret > 0) + url->enc_fragment = p_strdup(parser->pool, fragment); + else if (url_parser->relative && !have_path) + url->enc_fragment = p_strdup(parser->pool, base->enc_fragment); + return TRUE; +} + +static bool http_url_do_parse(struct http_url_parser *url_parser) +{ + struct uri_parser *parser = &url_parser->parser; + struct http_url *url = url_parser->url, *base = url_parser->base; + bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE, + have_path = FALSE; + const char *scheme; + int ret; + + /* RFC 7230, Appendix B: + + http-URI = "http://" authority path-abempty [ "?" query ] + [ "#" fragment ] + https-URI = "https://" authority path-abempty [ "?" query ] + [ "#" fragment ] + partial-URI = relative-part [ "?" query ] + + request-target = origin-form / absolute-form / authority-form / + asterisk-form + + origin-form = absolute-path [ "?" query ] + absolute-form = absolute-URI + authority-form = authority + asterisk-form = "*" + ; Not parsed here + + absolute-path = 1*( "/" segment ) + + RFC 3986, Appendix A: (implemented in uri-util.h) + + absolute-URI = scheme ":" hier-part [ "?" query ] + + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + authority = [ userinfo "@" ] host [ ":" port ] + + path-abempty = *( "/" segment ) + path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-noscheme = segment-nz-nc *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) + path-empty = 0<pchar> + + segment = *pchar + segment-nz = 1*pchar + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + query = *( pchar / "/" / "?" ) + fragment = *( pchar / "/" / "?" ) + */ + + /* "http:" / "https:" */ + if (http_url_parse_scheme(url_parser, &scheme)) { + if (scheme == NULL) { + /* Scheme externally parsed */ + } else if (strcasecmp(scheme, "https") == 0) { + if (url != NULL) + url->have_ssl = TRUE; + } else if (strcasecmp(scheme, "http") != 0) { + return http_url_parse_unknown_scheme(url_parser); + } + + relative = FALSE; + have_scheme = TRUE; + } + + /* "//" authority ; or + * ["//"] authority ; when parsing a request target + */ + if (parser->cur < parser->end && parser->cur[0] == '/') { + if ((have_scheme || !url_parser->request_target) && + (parser->cur + 1) < parser->end && parser->cur[1] == '/') { + parser->cur += 2; + relative = FALSE; + have_authority = TRUE; + } else { + /* start of absolute-path */ + } + } else if (url_parser->request_target && !have_scheme) { + if (!http_url_parse_authority_form(url_parser)) { + /* not non-HTTP scheme and invalid as authority-form */ + parser->error = "Request target is invalid"; + return FALSE; + } + return TRUE; + } + + if (have_scheme && !have_authority) { + parser->error = "Absolute HTTP URL requires `//' after `http:'"; + return FALSE; + } + + if (have_authority) { + if (!http_url_parse_authority(url_parser)) + return FALSE; + } + + /* Relative URLs are only valid when we have a base URL */ + if (relative) { + if (base == NULL) { + parser->error = "Relative HTTP URL not allowed"; + return FALSE; + } else if (!have_authority && url != NULL) { + uri_host_copy(parser->pool, &url->host, &base->host); + url->port = base->port; + url->have_ssl = base->have_ssl; + url->user = p_strdup_empty(parser->pool, base->user); + url->password = p_strdup_empty(parser->pool, + base->password); + } + + url_parser->relative = TRUE; + } + + /* path-abempty / path-absolute / path-noscheme / path-empty */ + ret = http_url_parse_path(url_parser); + if (ret < 0) + return FALSE; + have_path = (ret > 0); + + /* [ "?" query ] */ + if (!http_url_parse_query(url_parser, have_path)) + return FALSE; + + /* [ "#" fragment ] */ + if (!http_url_parse_fragment(url_parser, have_path)) + return FALSE; + + /* must be at end of URL now */ + i_assert(parser->cur == parser->end); + + if (have_scheme) + url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE; + return TRUE; +} + +/* Public API */ + +int http_url_parse(const char *url, struct http_url *base, + enum http_url_parse_flags flags, pool_t pool, + struct http_url **url_r, const char **error_r) +{ + struct http_url_parser url_parser; + + /* base != NULL indicates whether relative URLs are allowed. However, + certain flags may also dictate whether relative URLs are + allowed/required. */ + i_assert((flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0 || base == NULL); + + i_zero(&url_parser); + uri_parser_init(&url_parser.parser, pool, url); + url_parser.parser.allow_pct_nul = (flags & HTTP_URL_ALLOW_PCT_NUL) != 0; + + url_parser.url = p_new(pool, struct http_url, 1); + url_parser.base = base; + url_parser.flags = flags; + + if (!http_url_do_parse(&url_parser)) { + *error_r = url_parser.parser.error; + return -1; + } + *url_r = url_parser.url; + return 0; +} + +int http_url_request_target_parse(const char *request_target, + const char *host_header, + const struct http_url *default_base, + pool_t pool, + struct http_request_target *target, + const char **error_r) +{ + struct http_url_parser url_parser; + struct uri_authority auth; + struct http_url base; + + i_zero(&base); + if (host_header != NULL && *host_header != '\0') { + struct uri_parser *parser; + + i_zero(&url_parser); + parser = &url_parser.parser; + uri_parser_init(parser, pool, host_header); + + if (uri_parse_host_authority(parser, &auth) <= 0) { + *error_r = t_strdup_printf("Invalid Host header: %s", + parser->error); + return -1; + } + + if (parser->cur != parser->end || auth.enc_userinfo != NULL) { + *error_r = "Invalid Host header: " + "Contains invalid character"; + return -1; + } + + base.host = auth.host; + base.port = auth.port; + } else if (default_base == NULL) { + *error_r = "Empty Host header"; + return -1; + } else { + i_assert(default_base != NULL); + base = *default_base; + } + + if (request_target[0] == '*' && request_target[1] == '\0') { + struct http_url *url = p_new(pool, struct http_url, 1); + + uri_host_copy(pool, &url->host, &base.host); + url->port = base.port; + target->url = url; + target->format = HTTP_REQUEST_TARGET_FORMAT_ASTERISK; + return 0; + } + + i_zero(&url_parser); + uri_parser_init(&url_parser.parser, pool, request_target); + + url_parser.url = p_new(pool, struct http_url, 1); + url_parser.request_target = TRUE; + url_parser.req_format = HTTP_REQUEST_TARGET_FORMAT_ORIGIN; + url_parser.base = &base; + url_parser.flags = 0; + + if (!http_url_do_parse(&url_parser)) { + *error_r = url_parser.parser.error; + return -1; + } + + target->url = url_parser.url; + target->format = url_parser.req_format; + return 0; +} + +/* + * HTTP URL manipulation + */ + +void http_url_init_authority_from(struct http_url *dest, + const struct http_url *src) +{ + i_zero(dest); + dest->host = src->host; + dest->port = src->port; + dest->have_ssl = src->have_ssl; +} + +void http_url_copy_authority(pool_t pool, struct http_url *dest, + const struct http_url *src) +{ + i_zero(dest); + uri_host_copy(pool, &dest->host, &src->host); + dest->port = src->port; + dest->have_ssl = src->have_ssl; +} + +struct http_url * +http_url_clone_authority(pool_t pool, const struct http_url *src) +{ + struct http_url *new_url; + + new_url = p_new(pool, struct http_url, 1); + http_url_copy_authority(pool, new_url, src); + + return new_url; +} + +void http_url_copy(pool_t pool, struct http_url *dest, + const struct http_url *src) +{ + http_url_copy_authority(pool, dest, src); + dest->path = p_strdup(pool, src->path); + dest->enc_query = p_strdup(pool, src->enc_query); + dest->enc_fragment = p_strdup(pool, src->enc_fragment); +} + +void http_url_copy_with_userinfo(pool_t pool, struct http_url *dest, + const struct http_url *src) +{ + http_url_copy(pool, dest, src); + dest->user = p_strdup(pool, src->user); + dest->password = p_strdup(pool, src->password); +} + +struct http_url *http_url_clone(pool_t pool, const struct http_url *src) +{ + struct http_url *new_url; + + new_url = p_new(pool, struct http_url, 1); + http_url_copy(pool, new_url, src); + + return new_url; +} + +struct http_url * +http_url_clone_with_userinfo(pool_t pool, const struct http_url *src) +{ + struct http_url *new_url; + + new_url = p_new(pool, struct http_url, 1); + http_url_copy_with_userinfo(pool, new_url, src); + + return new_url; +} + +/* + * HTTP URL construction + */ + +static void +http_url_add_scheme(string_t *urlstr, const struct http_url *url) +{ + /* scheme */ + if (!url->have_ssl) + uri_append_scheme(urlstr, "http"); + else + uri_append_scheme(urlstr, "https"); + str_append(urlstr, "//"); +} + +static void +http_url_add_authority(string_t *urlstr, const struct http_url *url) +{ + /* host */ + uri_append_host(urlstr, &url->host); + /* port */ + uri_append_port(urlstr, url->port); +} + +static void +http_url_add_target(string_t *urlstr, const struct http_url *url) +{ + if (url->path == NULL || *url->path == '\0') { + /* Older syntax of RFC 2616 requires this slash at all times for + an absolute URL. */ + str_append_c(urlstr, '/'); + } else { + uri_append_path_data(urlstr, "", url->path); + } + + /* query (pre-encoded) */ + if (url->enc_query != NULL) { + str_append_c(urlstr, '?'); + str_append(urlstr, url->enc_query); + } +} + +const char *http_url_create(const struct http_url *url) +{ + string_t *urlstr = t_str_new(512); + + http_url_add_scheme(urlstr, url); + http_url_add_authority(urlstr, url); + http_url_add_target(urlstr, url); + + /* fragment */ + if (url->enc_fragment != NULL) { + str_append_c(urlstr, '#'); + str_append(urlstr, url->enc_fragment); + } + + return str_c(urlstr); +} + +const char *http_url_create_host(const struct http_url *url) +{ + string_t *urlstr = t_str_new(512); + + http_url_add_scheme(urlstr, url); + http_url_add_authority(urlstr, url); + + return str_c(urlstr); +} + +const char *http_url_create_authority(const struct http_url *url) +{ + string_t *urlstr = t_str_new(256); + + http_url_add_authority(urlstr, url); + + return str_c(urlstr); +} + +const char *http_url_create_target(const struct http_url *url) +{ + string_t *urlstr = t_str_new(256); + + http_url_add_target(urlstr, url); + + return str_c(urlstr); +} + +void http_url_escape_path(string_t *out, const char *data) +{ + uri_append_query_data(out, "&;?=+", data); +} + +void http_url_escape_param(string_t *out, const char *data) +{ + uri_append_query_data(out, "&;/?=+", data); +} |