summaryrefslogtreecommitdiffstats
path: root/src/lib-http/http-url.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib-http/http-url.c678
1 files changed, 678 insertions, 0 deletions
diff --git a/src/lib-http/http-url.c b/src/lib-http/http-url.c
new file mode 100644
index 0000000..229a58a
--- /dev/null
+++ b/src/lib-http/http-url.c
@@ -0,0 +1,678 @@
+/* Copyright (c) 2013-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "strfuncs.h"
+#include "net.h"
+#include "uri-util.h"
+
+#include "http-url.h"
+#include "http-request.h"
+
+/*
+ * HTTP URL parser
+ */
+
+struct http_url_parser {
+ struct uri_parser parser;
+
+ enum http_url_parse_flags flags;
+
+ struct http_url *url;
+ struct http_url *base;
+
+ enum http_request_target_format req_format;
+
+ bool relative:1;
+ bool request_target:1;
+};
+
+static bool http_url_parse_authority_form(struct http_url_parser *url_parser);
+
+static bool
+http_url_parse_scheme(struct http_url_parser *url_parser, const char **scheme_r)
+{
+ struct uri_parser *parser = &url_parser->parser;
+
+ *scheme_r = NULL;
+ if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) != 0)
+ return TRUE;
+
+ if (uri_parse_scheme(parser, scheme_r) <= 0) {
+ parser->cur = parser->begin;
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static bool http_url_parse_unknown_scheme(struct http_url_parser *url_parser)
+{
+ struct uri_parser *parser = &url_parser->parser;
+
+ if (url_parser->request_target) {
+ /* Valid as non-HTTP scheme, but also try to parse as authority
+ */
+ parser->cur = parser->begin;
+ if (!http_url_parse_authority_form(url_parser)) {
+ /* indicate non-http-url */
+ url_parser->url = NULL;
+ url_parser->req_format =
+ HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
+ }
+ return TRUE;
+ }
+ parser->error = "Not an HTTP URL";
+ return FALSE;
+}
+
+static bool
+http_url_parse_userinfo(struct http_url_parser *url_parser,
+ struct uri_authority *auth,
+ const char **user_r, const char **password_r)
+{
+ struct uri_parser *parser = &url_parser->parser;
+ const char *p;
+
+ *user_r = *password_r = NULL;
+
+ if (auth->enc_userinfo == NULL)
+ return TRUE;
+
+ if ((url_parser->flags & HTTP_URL_ALLOW_USERINFO_PART) == 0) {
+ /* RFC 7230, Section 2.7.1: http URI Scheme
+
+ A sender MUST NOT generate the userinfo subcomponent (and its
+ "@" delimiter) when an "http" URI reference is generated
+ within a message as a request target or header field value.
+ Before making use of an "http" URI reference received from an
+ untrusted source, a recipient SHOULD parse for userinfo and
+ treat its presence as an error; it is likely being used to
+ obscure the authority for the sake of phishing attacks.
+ */
+ parser->error = "HTTP URL does not allow `userinfo@' part";
+ return FALSE;
+ }
+
+ p = strchr(auth->enc_userinfo, ':');
+ if (p == NULL) {
+ if (!uri_data_decode(parser, auth->enc_userinfo, NULL, user_r))
+ return FALSE;
+ } else {
+ if (!uri_data_decode(parser, auth->enc_userinfo, p, user_r))
+ return FALSE;
+ if (!uri_data_decode(parser, p + 1, NULL, password_r))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static bool http_url_parse_authority(struct http_url_parser *url_parser)
+{
+ struct uri_parser *parser = &url_parser->parser;
+ struct http_url *url = url_parser->url;
+ struct uri_authority auth;
+ const char *user = NULL, *password = NULL;
+ int ret;
+
+ if ((ret = uri_parse_host_authority(parser, &auth)) < 0)
+ return FALSE;
+ if (auth.host.name == NULL || *auth.host.name == '\0') {
+ /* RFC 7230, Section 2.7.1: http URI Scheme
+
+ A sender MUST NOT generate an "http" URI with an empty host
+ identifier. A recipient that processes such a URI reference
+ MUST reject it as invalid.
+ */
+ parser->error = "HTTP URL does not allow empty host identifier";
+ return FALSE;
+ }
+ if (ret > 0) {
+ if (!http_url_parse_userinfo(url_parser, &auth,
+ &user, &password))
+ return FALSE;
+ }
+ if (url != NULL) {
+ uri_host_copy(parser->pool, &url->host, &auth.host);
+ url->port = auth.port;
+ url->user = p_strdup(parser->pool, user);
+ url->password = p_strdup(parser->pool, password);
+ }
+ return TRUE;
+}
+
+static bool http_url_parse_authority_form(struct http_url_parser *url_parser)
+{
+ struct uri_parser *parser = &url_parser->parser;
+
+ if (!http_url_parse_authority(url_parser))
+ return FALSE;
+ if (parser->cur != parser->end)
+ return FALSE;
+ url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_AUTHORITY;
+ return TRUE;
+}
+
+static int
+http_url_parse_path(struct http_url_parser *url_parser)
+{
+ struct uri_parser *parser = &url_parser->parser;
+ struct http_url *url = url_parser->url, *base = url_parser->base;
+ const char *const *path;
+ int path_relative;
+ string_t *fullpath = NULL;
+ int ret;
+
+ /* path-abempty / path-absolute / path-noscheme / path-empty */
+ if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0)
+ return -1;
+
+ /* Resolve path */
+ if (ret == 0) {
+ if (url_parser->relative && url != NULL)
+ url->path = p_strdup(parser->pool, base->path);
+ return 0;
+ }
+
+ if (url != NULL)
+ fullpath = t_str_new(256);
+
+ if (url_parser->relative && path_relative > 0 && base->path != NULL) {
+ const char *pbegin = base->path;
+ const char *pend = base->path + strlen(base->path);
+ const char *p = pend - 1;
+
+ i_assert(*pbegin == '/');
+
+ /* Discard trailing segments of base path based on how many
+ effective leading '..' segments were found in the relative
+ path.
+ */
+ while (path_relative > 0 && p > pbegin) {
+ while (p > pbegin && *p != '/') p--;
+ if (p >= pbegin) {
+ pend = p;
+ path_relative--;
+ }
+ if (p > pbegin) p--;
+ }
+
+ if (url != NULL && pend > pbegin)
+ str_append_data(fullpath, pbegin, pend - pbegin);
+ }
+
+ /* Append relative path */
+ while (*path != NULL) {
+ const char *part;
+
+ if (!uri_data_decode(parser, *path, NULL, &part))
+ return -1;
+
+ if (url != NULL) {
+ str_append_c(fullpath, '/');
+ str_append(fullpath, part);
+ }
+ path++;
+ }
+
+ if (url != NULL)
+ url->path = p_strdup(parser->pool, str_c(fullpath));
+ return 1;
+}
+
+static bool
+http_url_parse_query(struct http_url_parser *url_parser, bool have_path)
+{
+ struct uri_parser *parser = &url_parser->parser;
+ struct http_url *url = url_parser->url, *base = url_parser->base;
+ const char *query;
+ int ret;
+
+ if ((ret = uri_parse_query(parser, &query)) < 0)
+ return FALSE;
+ if (url == NULL)
+ return TRUE;
+
+ if (ret > 0)
+ url->enc_query = p_strdup(parser->pool, query);
+ else if (url_parser->relative && !have_path)
+ url->enc_query = p_strdup(parser->pool, base->enc_query);
+ return TRUE;
+}
+
+static bool
+http_url_parse_fragment(struct http_url_parser *url_parser, bool have_path)
+{
+ struct uri_parser *parser = &url_parser->parser;
+ struct http_url *url = url_parser->url, *base = url_parser->base;
+ const char *fragment;
+ int ret;
+
+ if ((ret = uri_parse_fragment(parser, &fragment)) < 0)
+ return FALSE;
+ if (ret > 0 &&
+ (url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) {
+ parser->error =
+ "URL fragment not allowed for HTTP URL in this context";
+ return FALSE;
+ }
+ if (url == NULL)
+ return TRUE;
+
+ if (ret > 0)
+ url->enc_fragment = p_strdup(parser->pool, fragment);
+ else if (url_parser->relative && !have_path)
+ url->enc_fragment = p_strdup(parser->pool, base->enc_fragment);
+ return TRUE;
+}
+
+static bool http_url_do_parse(struct http_url_parser *url_parser)
+{
+ struct uri_parser *parser = &url_parser->parser;
+ struct http_url *url = url_parser->url, *base = url_parser->base;
+ bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE,
+ have_path = FALSE;
+ const char *scheme;
+ int ret;
+
+ /* RFC 7230, Appendix B:
+
+ http-URI = "http://" authority path-abempty [ "?" query ]
+ [ "#" fragment ]
+ https-URI = "https://" authority path-abempty [ "?" query ]
+ [ "#" fragment ]
+ partial-URI = relative-part [ "?" query ]
+
+ request-target = origin-form / absolute-form / authority-form /
+ asterisk-form
+
+ origin-form = absolute-path [ "?" query ]
+ absolute-form = absolute-URI
+ authority-form = authority
+ asterisk-form = "*"
+ ; Not parsed here
+
+ absolute-path = 1*( "/" segment )
+
+ RFC 3986, Appendix A: (implemented in uri-util.h)
+
+ absolute-URI = scheme ":" hier-part [ "?" query ]
+
+ hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+
+ relative-part = "//" authority path-abempty
+ / path-absolute
+ / path-noscheme
+ / path-empty
+
+ authority = [ userinfo "@" ] host [ ":" port ]
+
+ path-abempty = *( "/" segment )
+ path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ path-noscheme = segment-nz-nc *( "/" segment )
+ path-rootless = segment-nz *( "/" segment )
+ path-empty = 0<pchar>
+
+ segment = *pchar
+ segment-nz = 1*pchar
+ segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ ; non-zero-length segment without any colon ":"
+
+ query = *( pchar / "/" / "?" )
+ fragment = *( pchar / "/" / "?" )
+ */
+
+ /* "http:" / "https:" */
+ if (http_url_parse_scheme(url_parser, &scheme)) {
+ if (scheme == NULL) {
+ /* Scheme externally parsed */
+ } else if (strcasecmp(scheme, "https") == 0) {
+ if (url != NULL)
+ url->have_ssl = TRUE;
+ } else if (strcasecmp(scheme, "http") != 0) {
+ return http_url_parse_unknown_scheme(url_parser);
+ }
+
+ relative = FALSE;
+ have_scheme = TRUE;
+ }
+
+ /* "//" authority ; or
+ * ["//"] authority ; when parsing a request target
+ */
+ if (parser->cur < parser->end && parser->cur[0] == '/') {
+ if ((have_scheme || !url_parser->request_target) &&
+ (parser->cur + 1) < parser->end && parser->cur[1] == '/') {
+ parser->cur += 2;
+ relative = FALSE;
+ have_authority = TRUE;
+ } else {
+ /* start of absolute-path */
+ }
+ } else if (url_parser->request_target && !have_scheme) {
+ if (!http_url_parse_authority_form(url_parser)) {
+ /* not non-HTTP scheme and invalid as authority-form */
+ parser->error = "Request target is invalid";
+ return FALSE;
+ }
+ return TRUE;
+ }
+
+ if (have_scheme && !have_authority) {
+ parser->error = "Absolute HTTP URL requires `//' after `http:'";
+ return FALSE;
+ }
+
+ if (have_authority) {
+ if (!http_url_parse_authority(url_parser))
+ return FALSE;
+ }
+
+ /* Relative URLs are only valid when we have a base URL */
+ if (relative) {
+ if (base == NULL) {
+ parser->error = "Relative HTTP URL not allowed";
+ return FALSE;
+ } else if (!have_authority && url != NULL) {
+ uri_host_copy(parser->pool, &url->host, &base->host);
+ url->port = base->port;
+ url->have_ssl = base->have_ssl;
+ url->user = p_strdup_empty(parser->pool, base->user);
+ url->password = p_strdup_empty(parser->pool,
+ base->password);
+ }
+
+ url_parser->relative = TRUE;
+ }
+
+ /* path-abempty / path-absolute / path-noscheme / path-empty */
+ ret = http_url_parse_path(url_parser);
+ if (ret < 0)
+ return FALSE;
+ have_path = (ret > 0);
+
+ /* [ "?" query ] */
+ if (!http_url_parse_query(url_parser, have_path))
+ return FALSE;
+
+ /* [ "#" fragment ] */
+ if (!http_url_parse_fragment(url_parser, have_path))
+ return FALSE;
+
+ /* must be at end of URL now */
+ i_assert(parser->cur == parser->end);
+
+ if (have_scheme)
+ url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
+ return TRUE;
+}
+
+/* Public API */
+
+int http_url_parse(const char *url, struct http_url *base,
+ enum http_url_parse_flags flags, pool_t pool,
+ struct http_url **url_r, const char **error_r)
+{
+ struct http_url_parser url_parser;
+
+ /* base != NULL indicates whether relative URLs are allowed. However,
+ certain flags may also dictate whether relative URLs are
+ allowed/required. */
+ i_assert((flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0 || base == NULL);
+
+ i_zero(&url_parser);
+ uri_parser_init(&url_parser.parser, pool, url);
+ url_parser.parser.allow_pct_nul = (flags & HTTP_URL_ALLOW_PCT_NUL) != 0;
+
+ url_parser.url = p_new(pool, struct http_url, 1);
+ url_parser.base = base;
+ url_parser.flags = flags;
+
+ if (!http_url_do_parse(&url_parser)) {
+ *error_r = url_parser.parser.error;
+ return -1;
+ }
+ *url_r = url_parser.url;
+ return 0;
+}
+
+int http_url_request_target_parse(const char *request_target,
+ const char *host_header,
+ const struct http_url *default_base,
+ pool_t pool,
+ struct http_request_target *target,
+ const char **error_r)
+{
+ struct http_url_parser url_parser;
+ struct uri_authority auth;
+ struct http_url base;
+
+ i_zero(&base);
+ if (host_header != NULL && *host_header != '\0') {
+ struct uri_parser *parser;
+
+ i_zero(&url_parser);
+ parser = &url_parser.parser;
+ uri_parser_init(parser, pool, host_header);
+
+ if (uri_parse_host_authority(parser, &auth) <= 0) {
+ *error_r = t_strdup_printf("Invalid Host header: %s",
+ parser->error);
+ return -1;
+ }
+
+ if (parser->cur != parser->end || auth.enc_userinfo != NULL) {
+ *error_r = "Invalid Host header: "
+ "Contains invalid character";
+ return -1;
+ }
+
+ base.host = auth.host;
+ base.port = auth.port;
+ } else if (default_base == NULL) {
+ *error_r = "Empty Host header";
+ return -1;
+ } else {
+ i_assert(default_base != NULL);
+ base = *default_base;
+ }
+
+ if (request_target[0] == '*' && request_target[1] == '\0') {
+ struct http_url *url = p_new(pool, struct http_url, 1);
+
+ uri_host_copy(pool, &url->host, &base.host);
+ url->port = base.port;
+ target->url = url;
+ target->format = HTTP_REQUEST_TARGET_FORMAT_ASTERISK;
+ return 0;
+ }
+
+ i_zero(&url_parser);
+ uri_parser_init(&url_parser.parser, pool, request_target);
+
+ url_parser.url = p_new(pool, struct http_url, 1);
+ url_parser.request_target = TRUE;
+ url_parser.req_format = HTTP_REQUEST_TARGET_FORMAT_ORIGIN;
+ url_parser.base = &base;
+ url_parser.flags = 0;
+
+ if (!http_url_do_parse(&url_parser)) {
+ *error_r = url_parser.parser.error;
+ return -1;
+ }
+
+ target->url = url_parser.url;
+ target->format = url_parser.req_format;
+ return 0;
+}
+
+/*
+ * HTTP URL manipulation
+ */
+
+void http_url_init_authority_from(struct http_url *dest,
+ const struct http_url *src)
+{
+ i_zero(dest);
+ dest->host = src->host;
+ dest->port = src->port;
+ dest->have_ssl = src->have_ssl;
+}
+
+void http_url_copy_authority(pool_t pool, struct http_url *dest,
+ const struct http_url *src)
+{
+ i_zero(dest);
+ uri_host_copy(pool, &dest->host, &src->host);
+ dest->port = src->port;
+ dest->have_ssl = src->have_ssl;
+}
+
+struct http_url *
+http_url_clone_authority(pool_t pool, const struct http_url *src)
+{
+ struct http_url *new_url;
+
+ new_url = p_new(pool, struct http_url, 1);
+ http_url_copy_authority(pool, new_url, src);
+
+ return new_url;
+}
+
+void http_url_copy(pool_t pool, struct http_url *dest,
+ const struct http_url *src)
+{
+ http_url_copy_authority(pool, dest, src);
+ dest->path = p_strdup(pool, src->path);
+ dest->enc_query = p_strdup(pool, src->enc_query);
+ dest->enc_fragment = p_strdup(pool, src->enc_fragment);
+}
+
+void http_url_copy_with_userinfo(pool_t pool, struct http_url *dest,
+ const struct http_url *src)
+{
+ http_url_copy(pool, dest, src);
+ dest->user = p_strdup(pool, src->user);
+ dest->password = p_strdup(pool, src->password);
+}
+
+struct http_url *http_url_clone(pool_t pool, const struct http_url *src)
+{
+ struct http_url *new_url;
+
+ new_url = p_new(pool, struct http_url, 1);
+ http_url_copy(pool, new_url, src);
+
+ return new_url;
+}
+
+struct http_url *
+http_url_clone_with_userinfo(pool_t pool, const struct http_url *src)
+{
+ struct http_url *new_url;
+
+ new_url = p_new(pool, struct http_url, 1);
+ http_url_copy_with_userinfo(pool, new_url, src);
+
+ return new_url;
+}
+
+/*
+ * HTTP URL construction
+ */
+
+static void
+http_url_add_scheme(string_t *urlstr, const struct http_url *url)
+{
+ /* scheme */
+ if (!url->have_ssl)
+ uri_append_scheme(urlstr, "http");
+ else
+ uri_append_scheme(urlstr, "https");
+ str_append(urlstr, "//");
+}
+
+static void
+http_url_add_authority(string_t *urlstr, const struct http_url *url)
+{
+ /* host */
+ uri_append_host(urlstr, &url->host);
+ /* port */
+ uri_append_port(urlstr, url->port);
+}
+
+static void
+http_url_add_target(string_t *urlstr, const struct http_url *url)
+{
+ if (url->path == NULL || *url->path == '\0') {
+ /* Older syntax of RFC 2616 requires this slash at all times for
+ an absolute URL. */
+ str_append_c(urlstr, '/');
+ } else {
+ uri_append_path_data(urlstr, "", url->path);
+ }
+
+ /* query (pre-encoded) */
+ if (url->enc_query != NULL) {
+ str_append_c(urlstr, '?');
+ str_append(urlstr, url->enc_query);
+ }
+}
+
+const char *http_url_create(const struct http_url *url)
+{
+ string_t *urlstr = t_str_new(512);
+
+ http_url_add_scheme(urlstr, url);
+ http_url_add_authority(urlstr, url);
+ http_url_add_target(urlstr, url);
+
+ /* fragment */
+ if (url->enc_fragment != NULL) {
+ str_append_c(urlstr, '#');
+ str_append(urlstr, url->enc_fragment);
+ }
+
+ return str_c(urlstr);
+}
+
+const char *http_url_create_host(const struct http_url *url)
+{
+ string_t *urlstr = t_str_new(512);
+
+ http_url_add_scheme(urlstr, url);
+ http_url_add_authority(urlstr, url);
+
+ return str_c(urlstr);
+}
+
+const char *http_url_create_authority(const struct http_url *url)
+{
+ string_t *urlstr = t_str_new(256);
+
+ http_url_add_authority(urlstr, url);
+
+ return str_c(urlstr);
+}
+
+const char *http_url_create_target(const struct http_url *url)
+{
+ string_t *urlstr = t_str_new(256);
+
+ http_url_add_target(urlstr, url);
+
+ return str_c(urlstr);
+}
+
+void http_url_escape_path(string_t *out, const char *data)
+{
+ uri_append_query_data(out, "&;?=+", data);
+}
+
+void http_url_escape_param(string_t *out, const char *data)
+{
+ uri_append_query_data(out, "&;/?=+", data);
+}