diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 02:57:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 02:57:58 +0000 |
commit | be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 (patch) | |
tree | 9754ff1ca740f6346cf8483ec915d4054bc5da2d /web/server/h2o/libh2o/lib/common/url.c | |
parent | Initial commit. (diff) | |
download | netdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.tar.xz netdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.zip |
Adding upstream version 1.44.3.upstream/1.44.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'web/server/h2o/libh2o/lib/common/url.c')
-rw-r--r-- | web/server/h2o/libh2o/lib/common/url.c | 409 |
1 files changed, 409 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/lib/common/url.c b/web/server/h2o/libh2o/lib/common/url.c new file mode 100644 index 00000000..d65d18fb --- /dev/null +++ b/web/server/h2o/libh2o/lib/common/url.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2014,2015 DeNA Co., Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include "h2o/memory.h" +#include "h2o/string_.h" +#include "h2o/url.h" + +const h2o_url_scheme_t H2O_URL_SCHEME_HTTP = {{H2O_STRLIT("http")}, 80}; +const h2o_url_scheme_t H2O_URL_SCHEME_HTTPS = {{H2O_STRLIT("https")}, 443}; + +static int decode_hex(int ch) +{ + if ('0' <= ch && ch <= '9') + return ch - '0'; + if ('A' <= ch && ch <= 'F') + return ch - 'A' + 0xa; + if ('a' <= ch && ch <= 'f') + return ch - 'a' + 0xa; + return -1; +} + +static size_t handle_special_paths(const char *path, size_t off, size_t last_slash) +{ + size_t orig_off = off, part_size = off - last_slash; + + if (part_size == 2 && path[off - 1] == '.') { + --off; + } else if (part_size == 3 && path[off - 2] == '.' && path[off - 1] == '.') { + off -= 2; + if (off > 1) { + for (--off; path[off - 1] != '/'; --off) + ; + } + } + return orig_off - off; +} + +/* Perform path normalization and URL decoding in one pass. + * See h2o_req_t for the purpose of @norm_indexes. */ +static h2o_iovec_t rebuild_path(h2o_mem_pool_t *pool, const char *src, size_t src_len, size_t *query_at, size_t **norm_indexes) +{ + char *dst; + size_t src_off = 0, dst_off = 0, last_slash, rewind; + + { /* locate '?', and set len to the end of input path */ + const char *q = memchr(src, '?', src_len); + if (q != NULL) { + src_len = *query_at = q - src; + } else { + *query_at = SIZE_MAX; + } + } + + /* dst can be 1 byte more than src if src is missing the prefixing '/' */ + dst = h2o_mem_alloc_pool(pool, src_len + 1); + *norm_indexes = h2o_mem_alloc_pool(pool, (src_len + 1) * sizeof(*norm_indexes[0])); + + if (src[0] == '/') + src_off++; + last_slash = dst_off; + dst[dst_off] = '/'; + (*norm_indexes)[dst_off] = src_off; + dst_off++; + + /* decode %xx */ + while (src_off < src_len) { + int hi, lo; + char decoded; + + if (src[src_off] == '%' && (src_off + 2 < src_len) && (hi = decode_hex(src[src_off + 1])) != -1 && + (lo = decode_hex(src[src_off + 2])) != -1) { + decoded = (hi << 4) | lo; + src_off += 3; + } else { + decoded = src[src_off++]; + } + if (decoded == '/') { + rewind = handle_special_paths(dst, dst_off, last_slash); + if (rewind > 0) { + dst_off -= rewind; + last_slash = dst_off - 1; + continue; + } + last_slash = dst_off; + } + dst[dst_off] = decoded; + (*norm_indexes)[dst_off] = src_off; + dst_off++; + } + rewind = handle_special_paths(dst, dst_off, last_slash); + dst_off -= rewind; + + return h2o_iovec_init(dst, dst_off); +} + +h2o_iovec_t h2o_url_normalize_path(h2o_mem_pool_t *pool, const char *path, size_t len, size_t *query_at, size_t **norm_indexes) +{ + const char *p = path, *end = path + len; + h2o_iovec_t ret; + + *query_at = SIZE_MAX; + *norm_indexes = NULL; + + if (len == 0) { + ret = h2o_iovec_init("/", 1); + return ret; + } + + if (path[0] != '/') + goto Rewrite; + + for (; p + 1 < end; ++p) { + if ((p[0] == '/' && p[1] == '.') || p[0] == '%') { + /* detect false positives as well */ + goto Rewrite; + } else if (p[0] == '?') { + *query_at = p - path; + goto Return; + } + } + for (; p < end; ++p) { + if (p[0] == '?') { + *query_at = p - path; + goto Return; + } + } + +Return: + ret.base = (char *)path; + ret.len = p - path; + return ret; + +Rewrite: + ret = rebuild_path(pool, path, len, query_at, norm_indexes); + if (ret.len == 0) + goto RewriteError; + if (ret.base[0] != '/') + goto RewriteError; + if (h2o_strstr(ret.base, ret.len, H2O_STRLIT("/../")) != SIZE_MAX) + goto RewriteError; + if (ret.len >= 3 && memcmp(ret.base + ret.len - 3, "/..", 3) == 0) + goto RewriteError; + return ret; +RewriteError: + fprintf(stderr, "failed to normalize path: `%.*s` => `%.*s`\n", (int)len, path, (int)ret.len, ret.base); + ret = h2o_iovec_init("/", 1); + return ret; +} + +static const char *parse_scheme(const char *s, const char *end, const h2o_url_scheme_t **scheme) +{ + if (end - s >= 5 && memcmp(s, "http:", 5) == 0) { + *scheme = &H2O_URL_SCHEME_HTTP; + return s + 5; + } else if (end - s >= 6 && memcmp(s, "https:", 6) == 0) { + *scheme = &H2O_URL_SCHEME_HTTPS; + return s + 6; + } + return NULL; +} + +const char *h2o_url_parse_hostport(const char *s, size_t len, h2o_iovec_t *host, uint16_t *port) +{ + const char *token_start = s, *token_end, *end = s + len; + + *port = 65535; + + if (token_start == end) + return NULL; + + if (*token_start == '[') { + /* is IPv6 address */ + ++token_start; + if ((token_end = memchr(token_start, ']', end - token_start)) == NULL) + return NULL; + *host = h2o_iovec_init(token_start, token_end - token_start); + token_start = token_end + 1; + } else { + for (token_end = token_start; !(token_end == end || *token_end == '/' || *token_end == ':'); ++token_end) + ; + *host = h2o_iovec_init(token_start, token_end - token_start); + token_start = token_end; + } + + /* disallow zero-length host */ + if (host->len == 0) + return NULL; + + /* parse port */ + if (token_start != end && *token_start == ':') { + size_t p; + ++token_start; + if ((token_end = memchr(token_start, '/', end - token_start)) == NULL) + token_end = end; + if ((p = h2o_strtosize(token_start, token_end - token_start)) >= 65535) + return NULL; + *port = (uint16_t)p; + token_start = token_end; + } + + return token_start; +} + +static int parse_authority_and_path(const char *src, const char *url_end, h2o_url_t *parsed) +{ + const char *p = h2o_url_parse_hostport(src, url_end - src, &parsed->host, &parsed->_port); + if (p == NULL) + return -1; + parsed->authority = h2o_iovec_init(src, p - src); + if (p == url_end) { + parsed->path = h2o_iovec_init(H2O_STRLIT("/")); + } else { + if (*p != '/') + return -1; + parsed->path = h2o_iovec_init(p, url_end - p); + } + return 0; +} + +int h2o_url_parse(const char *url, size_t url_len, h2o_url_t *parsed) +{ + const char *url_end, *p; + + if (url_len == SIZE_MAX) + url_len = strlen(url); + url_end = url + url_len; + + /* check and skip scheme */ + if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL) + return -1; + + /* skip "//" */ + if (!(url_end - p >= 2 && p[0] == '/' && p[1] == '/')) + return -1; + p += 2; + + return parse_authority_and_path(p, url_end, parsed); +} + +int h2o_url_parse_relative(const char *url, size_t url_len, h2o_url_t *parsed) +{ + const char *url_end, *p; + + if (url_len == SIZE_MAX) + url_len = strlen(url); + url_end = url + url_len; + + /* obtain scheme and port number */ + if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL) { + parsed->scheme = NULL; + p = url; + } + + /* handle "//" */ + if (url_end - p >= 2 && p[0] == '/' && p[1] == '/') + return parse_authority_and_path(p + 2, url_end, parsed); + + /* reset authority, host, port, and set path */ + parsed->authority = (h2o_iovec_t){NULL}; + parsed->host = (h2o_iovec_t){NULL}; + parsed->_port = 65535; + parsed->path = h2o_iovec_init(p, url_end - p); + + return 0; +} + +h2o_iovec_t h2o_url_resolve(h2o_mem_pool_t *pool, const h2o_url_t *base, const h2o_url_t *relative, h2o_url_t *dest) +{ + h2o_iovec_t base_path, relative_path, ret; + + assert(base->path.len != 0); + assert(base->path.base[0] == '/'); + + if (relative == NULL) { + /* build URL using base copied to dest */ + *dest = *base; + base_path = base->path; + relative_path = h2o_iovec_init(NULL, 0); + goto Build; + } + + /* scheme */ + dest->scheme = relative->scheme != NULL ? relative->scheme : base->scheme; + + /* authority (and host:port) */ + if (relative->authority.base != NULL) { + assert(relative->host.base != NULL); + dest->authority = relative->authority; + dest->host = relative->host; + dest->_port = relative->_port; + } else { + assert(relative->host.base == NULL); + assert(relative->_port == 65535); + dest->authority = base->authority; + dest->host = base->host; + dest->_port = base->_port; + } + + /* path */ + base_path = base->path; + if (relative->path.base != NULL) { + relative_path = relative->path; + h2o_url_resolve_path(&base_path, &relative_path); + } else { + assert(relative->path.len == 0); + relative_path = (h2o_iovec_t){NULL}; + } + +Build: + /* build the output */ + ret = h2o_concat(pool, dest->scheme->name, h2o_iovec_init(H2O_STRLIT("://")), dest->authority, base_path, relative_path); + /* adjust dest */ + dest->authority.base = ret.base + dest->scheme->name.len + 3; + dest->host.base = dest->authority.base; + if (dest->authority.len != 0 && dest->authority.base[0] == '[') + ++dest->host.base; + dest->path.base = dest->authority.base + dest->authority.len; + dest->path.len = ret.base + ret.len - dest->path.base; + + return ret; +} + +void h2o_url_resolve_path(h2o_iovec_t *base, h2o_iovec_t *relative) +{ + size_t base_path_len = base->len, rel_path_offset = 0; + + if (relative->len != 0 && relative->base[0] == '/') { + base_path_len = 0; + } else { + /* relative path */ + while (base->base[--base_path_len] != '/') + ; + while (rel_path_offset != relative->len) { + if (relative->base[rel_path_offset] == '.') { + if (relative->len - rel_path_offset >= 2 && relative->base[rel_path_offset + 1] == '.' && + (relative->len - rel_path_offset == 2 || relative->base[rel_path_offset + 2] == '/')) { + if (base_path_len != 0) { + while (base->base[--base_path_len] != '/') + ; + } + rel_path_offset += relative->len - rel_path_offset == 2 ? 2 : 3; + continue; + } + if (relative->len - rel_path_offset == 1) { + rel_path_offset += 1; + continue; + } else if (relative->base[rel_path_offset + 1] == '/') { + rel_path_offset += 2; + continue; + } + } + break; + } + base_path_len += 1; + } + + base->len = base_path_len; + *relative = h2o_iovec_init(relative->base + rel_path_offset, relative->len - rel_path_offset); +} + +void h2o_url_copy(h2o_mem_pool_t *pool, h2o_url_t *dest, const h2o_url_t *src) +{ + dest->scheme = src->scheme; + dest->authority = h2o_strdup(pool, src->authority.base, src->authority.len); + dest->host = h2o_strdup(pool, src->host.base, src->host.len); + dest->path = h2o_strdup(pool, src->path.base, src->path.len); + dest->_port = src->_port; +} + +const char *h2o_url_host_to_sun(h2o_iovec_t host, struct sockaddr_un *sa) +{ +#define PREFIX "unix:" + + if (host.len < sizeof(PREFIX) - 1 || memcmp(host.base, PREFIX, sizeof(PREFIX) - 1) != 0) + return h2o_url_host_to_sun_err_is_not_unix_socket; + + if (host.len - sizeof(PREFIX) - 1 >= sizeof(sa->sun_path)) + return "unix-domain socket path is too long"; + + memset(sa, 0, sizeof(*sa)); + sa->sun_family = AF_UNIX; + memcpy(sa->sun_path, host.base + sizeof(PREFIX) - 1, host.len - (sizeof(PREFIX) - 1)); + return NULL; + +#undef PREFIX +} + +const char *h2o_url_host_to_sun_err_is_not_unix_socket = "supplied name does not look like an unix-domain socket"; |