summaryrefslogtreecommitdiffstats
path: root/src/lib/uri-util.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/uri-util.c1332
1 files changed, 1332 insertions, 0 deletions
diff --git a/src/lib/uri-util.c b/src/lib/uri-util.c
new file mode 100644
index 0000000..498bc88
--- /dev/null
+++ b/src/lib/uri-util.c
@@ -0,0 +1,1332 @@
+/* Copyright (c) 2010-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "str.h"
+#include "net.h"
+#include "uri-util.h"
+
+#include <ctype.h>
+
+/* [URI-GEN] RFC3986 Appendix A:
+
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ absolute-URI = scheme ":" hier-part [ "?" query ]
+ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+
+ URI-reference = URI / relative-ref
+ relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+
+ relative-part = "//" authority path-abempty
+ / path-absolute
+ / path-noscheme
+ / path-empty
+ hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+
+ authority = [ userinfo "@" ] host [ ":" port ]
+ userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+ host = IP-literal / IPv4address / reg-name
+ port = *DIGIT
+
+ IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+ IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+ IPv6address = 6( h16 ":" ) ls32
+ / "::" 5( h16 ":" ) ls32
+ / [ h16 ] "::" 4( h16 ":" ) ls32
+ / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+ / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+ / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+ / [ *4( h16 ":" ) h16 ] "::" ls32
+ / [ *5( h16 ":" ) h16 ] "::" h16
+ / [ *6( h16 ":" ) h16 ] "::"
+ h16 = 1*4HEXDIG
+ ls32 = ( h16 ":" h16 ) / IPv4address
+ IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ dec-octet = DIGIT ; 0-9
+ / %x31-39 DIGIT ; 10-99
+ / "1" 2DIGIT ; 100-199
+ / "2" %x30-34 DIGIT ; 200-249
+ / "25" %x30-35 ; 250-255
+ reg-name = *( unreserved / pct-encoded / sub-delims )
+
+ path = path-abempty ; begins with "/" or is empty
+ / path-absolute ; begins with "/" but not "//"
+ / path-noscheme ; begins with a non-colon segment
+ / path-rootless ; begins with a segment
+ / path-empty ; zero characters
+ path-abempty = *( "/" segment )
+ path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ path-noscheme = segment-nz-nc *( "/" segment )
+ path-rootless = segment-nz *( "/" segment )
+ path-empty = 0<pchar>
+
+ segment = *pchar
+ segment-nz = 1*pchar
+ segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ ; non-zero-length segment without any colon ":"
+ pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+
+ query = *( pchar / "/" / "?" )
+ fragment = *( pchar / "/" / "?" )
+
+ pct-encoded = "%" HEXDIG HEXDIG
+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ reserved = gen-delims / sub-delims
+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="
+ */
+
+#define URI_MAX_SCHEME_NAME_LEN 64
+
+/* Character lookup table
+ *
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ * / "*" / "+" / "," / ";" / "=" [bit1]
+ * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
+ * pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
+ * 'pfchar' = unreserved / sub-delims / ":" / "@" / "/"
+ * [bit0|bit1|bit3|bit5]
+ * 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
+ * 'qchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5|bit6]
+ *
+ */
+
+#define CHAR_MASK_UNRESERVED (1<<0)
+#define CHAR_MASK_SUB_DELIMS (1<<1)
+#define CHAR_MASK_PCHAR ((1<<0)|(1<<1)|(1<<3))
+#define CHAR_MASK_PFCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5))
+#define CHAR_MASK_UCHAR ((1<<0)|(1<<1)|(1<<4))
+#define CHAR_MASK_QCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5)|(1<<6))
+#define CHAR_MASK_UNRESERVED_PATH ((1<<0)|(1<<5))
+
+static unsigned const char _uri_char_lookup[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
+ 0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 68, // 30
+ 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
+};
+
+static inline int _decode_hex_digit(const unsigned char digit)
+{
+ switch (digit) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return digit - '0';
+
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ return digit - 'a' + 0x0a;
+
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ return digit - 'A' + 0x0A;
+ }
+ return -1;
+}
+
+static int
+uri_parse_pct_encoded_data(struct uri_parser *parser,
+ const unsigned char **p, const unsigned char *pend,
+ unsigned char *ch_r) ATTR_NULL(3)
+{
+ int value;
+
+ if (**p != '%' || (pend != NULL && *p >= pend))
+ return 0;
+ *p += 1;
+
+ if (**p == 0 || *(*p+1) == 0 || (pend != NULL && *p+1 >= pend)) {
+ parser->error = "Unexpected URI boundary after '%'";
+ return -1;
+ }
+
+ if ((value = _decode_hex_digit(**p)) < 0) {
+ parser->error = p_strdup_printf(parser->pool,
+ "Expecting hex digit after '%%', but found '%c'", **p);
+ return -1;
+ }
+
+ *ch_r = (value & 0x0f) << 4;
+ *p += 1;
+
+ if ((value = _decode_hex_digit(**p)) < 0) {
+ parser->error = p_strdup_printf(parser->pool,
+ "Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
+ return -1;
+ }
+
+ *ch_r |= (value & 0x0f);
+ *p += 1;
+
+ if (!parser->allow_pct_nul && *ch_r == '\0') {
+ parser->error =
+ "Percent encoding is not allowed to encode NUL character";
+ return -1;
+ }
+ return 1;
+}
+
+int uri_parse_pct_encoded(struct uri_parser *parser,
+ unsigned char *ch_r)
+{
+ return uri_parse_pct_encoded_data
+ (parser, &parser->cur, parser->end, ch_r);
+}
+
+static int
+uri_parse_unreserved_char(struct uri_parser *parser, unsigned char *ch_r)
+{
+ if ((*parser->cur & 0x80) != 0)
+ return 0;
+
+ if ((_uri_char_lookup[*parser->cur] & CHAR_MASK_UNRESERVED) != 0) {
+ *ch_r = *parser->cur;
+ parser->cur++;
+ return 1;
+ }
+ return 0;
+}
+
+int uri_parse_unreserved(struct uri_parser *parser, string_t *part)
+{
+ int len = 0;
+
+ while (parser->cur < parser->end) {
+ int ret;
+ unsigned char ch = 0;
+
+ if ((ret = uri_parse_unreserved_char(parser, &ch)) < 0)
+ return -1;
+ if (ret == 0)
+ break;
+
+ if (part != NULL)
+ str_append_c(part, ch);
+ len++;
+ }
+
+ return len > 0 ? 1 : 0;
+}
+
+int uri_parse_unreserved_pct(struct uri_parser *parser, string_t *part)
+{
+ int len = 0;
+
+ while (parser->cur < parser->end) {
+ int ret;
+ unsigned char ch = 0;
+
+ if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
+ return -1;
+ else if (ret == 0 &&
+ (ret=uri_parse_unreserved_char(parser, &ch)) < 0)
+ return -1;
+ if (ret == 0)
+ break;
+
+ if (part != NULL)
+ str_append_c(part, ch);
+ len++;
+ }
+
+ return len > 0 ? 1 : 0;
+}
+
+bool uri_data_decode(struct uri_parser *parser, const char *data,
+ const char *until, const char **decoded_r)
+{
+ const unsigned char *p = (const unsigned char *)data;
+ const unsigned char *pend = (const unsigned char *)until;
+ string_t *decoded;
+ int ret;
+
+ if (pend == NULL) {
+ /* NULL means unlimited; solely rely on '\0' */
+ pend = (const unsigned char *)SIZE_MAX;
+ }
+
+ if (p >= pend || *p == '\0') {
+ if (decoded_r != NULL)
+ *decoded_r = "";
+ return TRUE;
+ }
+
+ decoded = uri_parser_get_tmpbuf(parser, 256);
+ while (p < pend && *p != '\0') {
+ unsigned char ch;
+
+ if ((ret=uri_parse_pct_encoded_data
+ (parser, &p, NULL, &ch)) != 0) {
+ if (ret < 0)
+ return FALSE;
+ str_append_c(decoded, ch);
+ } else {
+ str_append_c(decoded, *p);
+ p++;
+ }
+ }
+
+ if (decoded_r != NULL)
+ *decoded_r = p_strdup(parser->pool, str_c(decoded));
+ return TRUE;
+}
+
+int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
+{
+ const unsigned char *first = parser->cur;
+ size_t len = 1;
+
+ /* RFC 3968:
+ * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ */
+
+ if (parser->cur >= parser->end || !i_isalpha(*parser->cur))
+ return 0;
+ parser->cur++;
+
+ while (len < URI_MAX_SCHEME_NAME_LEN &&
+ parser->cur < parser->end) {
+ if (!i_isalnum(*parser->cur) &&
+ *parser->cur != '+' && *parser->cur != '-' &&
+ *parser->cur != '.')
+ break;
+ parser->cur++;
+ len++;
+ }
+
+ if (parser->cur >= parser->end || *parser->cur != ':') {
+ parser->error = "Invalid URI scheme";
+ return -1;
+ }
+ if (scheme_r != NULL)
+ *scheme_r = t_strndup(first, parser->cur - first);
+ parser->cur++;
+ return 1;
+}
+
+int uri_cut_scheme(const char **uri_p, const char **scheme_r)
+{
+ struct uri_parser parser;
+
+ uri_parser_init(&parser, NULL, *uri_p);
+ if (uri_parse_scheme(&parser, scheme_r) <= 0)
+ return -1;
+ *uri_p = (const char *)parser.cur;
+ return 0;
+}
+
+static int
+uri_parse_dec_octet(struct uri_parser *parser, string_t *literal,
+ uint8_t *octet_r) ATTR_NULL(2)
+{
+ unsigned int octet = 0;
+ int count = 0;
+
+ /* RFC 3986:
+ *
+ * dec-octet = DIGIT ; 0-9
+ * / %x31-39 DIGIT ; 10-99
+ * / "1" 2DIGIT ; 100-199
+ * / "2" %x30-34 DIGIT ; 200-249
+ * / "25" %x30-35 ; 250-255
+ */
+
+ while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
+ octet = octet * 10 + (parser->cur[0] - '0');
+ if (octet > 255)
+ return -1;
+
+ if (literal != NULL)
+ str_append_c(literal, *parser->cur);
+
+ parser->cur++;
+ count++;
+ }
+
+ if (count > 0) {
+ *octet_r = octet;
+ return 1;
+ }
+ return 0;
+}
+
+static int
+uri_parse_ipv4address(struct uri_parser *parser, string_t *literal,
+ struct in_addr *ip4_r) ATTR_NULL(2,3)
+{
+ uint8_t octet;
+ uint32_t ip = 0;
+ int ret;
+ int i;
+
+ /* RFC 3986:
+ *
+ * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ */
+
+ if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
+ return ret;
+ ip = octet;
+
+ for (i = 0; i < 3 && parser->cur < parser->end; i++) {
+ if (*parser->cur != '.')
+ return -1;
+
+ if (literal != NULL)
+ str_append_c(literal, '.');
+ parser->cur++;
+
+ if (uri_parse_dec_octet(parser, literal, &octet) <= 0)
+ return -1;
+ ip = (ip << 8) + octet;
+ }
+
+ if (ip4_r != NULL)
+ ip4_r->s_addr = htonl(ip);
+ return 1;
+}
+
+static int
+uri_do_parse_reg_name(struct uri_parser *parser,
+ string_t *reg_name) ATTR_NULL(2)
+{
+ /* RFC 3986:
+ *
+ * reg-name = *( unreserved / pct-encoded / sub-delims )
+ */
+
+ while (parser->cur < parser->end) {
+ int ret;
+ unsigned char c;
+
+ /* unreserved / pct-encoded */
+ if ((ret=uri_parse_pct_encoded(parser, &c)) < 0)
+ return -1;
+ else if (ret == 0 &&
+ (ret=uri_parse_unreserved_char(parser, &c)) < 0)
+ return -1;
+
+ if (ret > 0) {
+ if (reg_name != NULL)
+ str_append_c(reg_name, c);
+ continue;
+ }
+
+ /* sub-delims */
+ c = *parser->cur;
+ if ((c & 0x80) == 0 && (_uri_char_lookup[c] & CHAR_MASK_SUB_DELIMS) != 0) {
+ if (reg_name != NULL)
+ str_append_c(reg_name, *parser->cur);
+ parser->cur++;
+ continue;
+ }
+ break;
+ }
+ return 0;
+}
+
+int uri_parse_reg_name(struct uri_parser *parser,
+ const char **reg_name_r)
+{
+ string_t *reg_name = NULL;
+ int ret;
+
+ if (reg_name_r != NULL)
+ reg_name = uri_parser_get_tmpbuf(parser, 256);
+
+ if ((ret=uri_do_parse_reg_name(parser, reg_name)) <= 0)
+ return ret;
+
+ if (reg_name_r != NULL)
+ *reg_name_r = str_c(reg_name);
+ return 1;
+}
+
+static int uri_do_parse_host_name(struct uri_parser *parser,
+ string_t *host_name) ATTR_NULL(2)
+{
+ const unsigned char *first, *part;
+ int ret;
+
+ /* RFC 3986, Section 3.2.2:
+
+ A registered name intended for lookup in the DNS uses the syntax
+ defined in Section 3.5 of [RFC1034] and Section 2.1 of [RFC1123].
+ Such a name consists of a sequence of domain labels separated by ".",
+ each domain label starting and ending with an alphanumeric character
+ and possibly also containing "-" characters. The rightmost domain
+ label of a fully qualified domain name in DNS may be followed by a
+ single "." and should be if it is necessary to distinguish between
+ the complete domain name and some local domain.
+
+ RFC 2396, Section 3.2.2 (old URI specification):
+
+ hostname = *( domainlabel "." ) toplabel [ "." ]
+ domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+ toplabel = alpha | alpha *( alphanum | "-" ) alphanum
+
+ The description in RFC 3986 is more liberal, so:
+
+ hostname = *( domainlabel "." ) domainlabel [ "." ]
+ domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+
+ We also support percent encoding in spirit of the generic reg-name,
+ even though this should explicitly not be used according to the RFC.
+ It is, however, not strictly forbidden (unlike older RFC), so we
+ support it.
+ */
+
+ first = part = parser->cur;
+ for (;;) {
+ const unsigned char *offset;
+ unsigned char ch, pch;
+
+ /* alphanum */
+ offset = parser->cur;
+ ch = pch = *parser->cur;
+ if (parser->cur >= parser->end)
+ break;
+ if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0) {
+ return -1;
+ } else if (ret > 0) {
+ if (!i_isalnum(ch))
+ return -1;
+ if (host_name != NULL)
+ str_append_c(host_name, ch);
+ part = parser->cur;
+ } else {
+ if (!i_isalnum(*parser->cur))
+ break;
+ parser->cur++;
+ }
+
+ if (parser->cur < parser->end) {
+ /* *( alphanum | "-" ) alphanum */
+ do {
+ offset = parser->cur;
+
+ if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0) {
+ return -1;
+ } else if (ret > 0) {
+ if (!i_isalnum(ch) && ch != '-')
+ break;
+ if (host_name != NULL) {
+ if (offset > part)
+ str_append_data(host_name, part, offset - part);
+ str_append_c(host_name, ch);
+ }
+ part = parser->cur;
+ } else {
+ ch = *parser->cur;
+ if (!i_isalnum(ch) && ch != '-')
+ break;
+ parser->cur++;
+ }
+ pch = ch;
+ } while (parser->cur < parser->end);
+
+ if (!i_isalnum(pch)) {
+ parser->error = "Invalid domain label in hostname";
+ return -1;
+ }
+ }
+
+ if (host_name != NULL && parser->cur > part)
+ str_append_data(host_name, part, parser->cur - part);
+
+ /* "." */
+ if (parser->cur >= parser->end || ch != '.')
+ break;
+ if (host_name != NULL)
+ str_append_c(host_name, '.');
+ if (parser->cur == offset)
+ parser->cur++;
+ part = parser->cur;
+ }
+
+ if (parser->cur == first)
+ return 0;
+
+ /* remove trailing '.' */
+ if (host_name != NULL) {
+ const char *name = str_c(host_name);
+
+ i_assert(str_len(host_name) > 0);
+ if (name[str_len(host_name)-1] == '.')
+ str_truncate(host_name, str_len(host_name)-1);
+ }
+ return 1;
+}
+
+int uri_parse_host_name(struct uri_parser *parser,
+ const char **host_name_r)
+{
+ string_t *host_name = NULL;
+ int ret;
+
+ if (host_name_r != NULL)
+ host_name = uri_parser_get_tmpbuf(parser, 256);
+
+ if ((ret=uri_do_parse_host_name(parser, host_name)) <= 0)
+ return ret;
+
+ if (host_name_r != NULL)
+ *host_name_r = str_c(host_name);
+ return 1;
+}
+
+static int
+uri_parse_ip_literal(struct uri_parser *parser, string_t *literal,
+ struct in6_addr *ip6_r) ATTR_NULL(2,3)
+{
+ const unsigned char *p;
+ const char *address;
+ struct in6_addr ip6;
+
+ /* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+ * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+ * IPv6address = ; Syntax not relevant: parsed using inet_pton()
+ */
+
+ /* "[" already verified */
+
+ /* Scan for end of address */
+ for (p = parser->cur+1; p < parser->end; p++) {
+ if (*p == ']')
+ break;
+ }
+
+ if (p >= parser->end || *p != ']') {
+ parser->error = "Expecting ']' at end of IP-literal";
+ return -1;
+ }
+
+ if (literal != NULL)
+ str_append_data(literal, parser->cur, p-parser->cur+1);
+ address = t_strdup_until(parser->cur+1, p);
+ parser->cur = p + 1;
+
+ if (*address == '\0') {
+ parser->error = "Empty IPv6 host address";
+ return -1;
+ }
+ if (*address == 'v') {
+ parser->error = p_strdup_printf(parser->pool,
+ "Future IP host address '%s' not supported", address);
+ return -1;
+ }
+ if (inet_pton(AF_INET6, address, &ip6) <= 0) {
+ parser->error = p_strdup_printf(parser->pool,
+ "Invalid IPv6 host address '%s'", address);
+ return -1;
+ }
+ if (ip6_r != NULL)
+ *ip6_r = ip6;
+ return 1;
+}
+
+static int
+uri_do_parse_host(struct uri_parser *parser,
+ struct uri_host *host, bool host_name)
+ ATTR_NULL(2)
+{
+ const unsigned char *preserve;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ string_t *literal = NULL;
+ int ret;
+
+ /* RFC 3986:
+ *
+ * host = IP-literal / IPv4address / reg-name
+ */
+
+ if (host != NULL)
+ i_zero(host);
+
+ literal = uri_parser_get_tmpbuf(parser, 256);
+
+ /* IP-literal / */
+ if (parser->cur < parser->end && *parser->cur == '[') {
+ if (uri_parse_ip_literal(parser, literal, &ip6) <= 0)
+ return -1;
+
+ if (host != NULL) {
+ host->name = p_strdup(parser->pool, str_c(literal));;
+ host->ip.family = AF_INET6;
+ host->ip.u.ip6 = ip6;
+ }
+ return 1;
+ }
+
+ /* IPv4address /
+ *
+ * If it fails to parse, we try to parse it as a reg-name
+ */
+ preserve = parser->cur;
+ if ((ret = uri_parse_ipv4address(parser, literal, &ip4)) > 0) {
+ if (host != NULL) {
+ host->name = p_strdup(parser->pool, str_c(literal));
+ host->ip.family = AF_INET;
+ host->ip.u.ip4 = ip4;
+ }
+ return ret;
+ }
+ parser->cur = preserve;
+ str_truncate(literal, 0);
+
+ /* reg-name */
+ if (host_name) {
+ if (uri_do_parse_host_name(parser, literal) < 0)
+ return -1;
+ } else if (uri_do_parse_reg_name(parser, literal) < 0)
+ return -1;
+ if (host != NULL)
+ host->name = p_strdup(parser->pool, str_c(literal));
+ return 0;
+}
+
+int uri_parse_host(struct uri_parser *parser,
+ struct uri_host *host)
+{
+ return uri_do_parse_host(parser, host, TRUE);
+}
+
+static int
+uri_parse_port(struct uri_parser *parser,
+ struct uri_authority *auth) ATTR_NULL(2)
+{
+ const unsigned char *first;
+ in_port_t port;
+
+ /* RFC 3986:
+ *
+ * port = *DIGIT
+ */
+
+ first = parser->cur;
+ while (parser->cur < parser->end && i_isdigit(*parser->cur))
+ parser->cur++;
+
+ if (parser->cur == first)
+ return 0;
+ if (net_str2port(t_strdup_until(first, parser->cur), &port) < 0) {
+ parser->error = "Invalid port number";
+ return -1;
+ }
+
+ if (auth != NULL)
+ auth->port = port;
+ return 1;
+}
+
+static int
+uri_do_parse_authority(struct uri_parser *parser,
+ struct uri_authority *auth, bool host_name) ATTR_NULL(2)
+{
+ const unsigned char *p;
+ int ret;
+
+ /*
+ * authority = [ userinfo "@" ] host [ ":" port ]
+ */
+
+ if (auth != NULL)
+ i_zero(auth);
+
+ /* Scan ahead to check whether there is a [userinfo "@"] uri component */
+ for (p = parser->cur; p < parser->end; p++){
+ /* refuse 8bit characters */
+ if ((*p & 0x80) != 0)
+ break;
+
+ /* break at first delimiter */
+ if (*p != '%' && (_uri_char_lookup[*p] & CHAR_MASK_UCHAR) == 0)
+ break;
+ }
+
+ /* Extract userinfo */
+ if (p < parser->end && *p == '@') {
+ if (auth != NULL)
+ auth->enc_userinfo = p_strdup_until(parser->pool, parser->cur, p);
+ parser->cur = p+1;
+ }
+
+ /* host */
+ if (uri_do_parse_host(parser,
+ (auth == NULL ? NULL : &auth->host), host_name) < 0)
+ return -1;
+ if (parser->cur == parser->end)
+ return 1;
+ switch (*parser->cur) {
+ case ':': case '/': case '?': case '#':
+ break;
+ default:
+ parser->error = "Invalid host identifier";
+ return -1;
+ }
+
+ /* [":" port] */
+ if (*parser->cur == ':') {
+ parser->cur++;
+
+ if ((ret = uri_parse_port(parser, auth)) < 0)
+ return ret;
+ if (parser->cur == parser->end)
+ return 1;
+ switch (*parser->cur) {
+ case '/': case '?': case '#':
+ break;
+ default:
+ parser->error = "Invalid host port";
+ return -1;
+ }
+ }
+
+ return 1;
+}
+
+static int
+uri_do_parse_slashslash_authority(struct uri_parser *parser,
+ struct uri_authority *auth, bool host_name)
+ ATTR_NULL(2)
+{
+ /* "//" authority */
+
+ if ((parser->end - parser->cur) <= 2 || parser->cur[0] != '/' ||
+ parser->cur[1] != '/')
+ return 0;
+
+ parser->cur += 2;
+ return uri_do_parse_authority(parser, auth, host_name);
+}
+
+int uri_parse_authority(struct uri_parser *parser,
+ struct uri_authority *auth)
+{
+ return uri_do_parse_authority(parser, auth, FALSE);
+}
+
+int uri_parse_slashslash_authority(struct uri_parser *parser,
+ struct uri_authority *auth)
+{
+ return uri_do_parse_slashslash_authority(parser, auth, FALSE);
+}
+
+int uri_parse_host_authority(struct uri_parser *parser,
+ struct uri_authority *auth)
+{
+ return uri_do_parse_authority(parser, auth, TRUE);
+}
+
+int uri_parse_slashslash_host_authority(struct uri_parser *parser,
+ struct uri_authority *auth)
+{
+ return uri_do_parse_slashslash_authority(parser, auth, TRUE);
+}
+
+int uri_parse_path_segment(struct uri_parser *parser, const char **segment_r)
+{
+ const unsigned char *first = parser->cur;
+ int ret;
+
+ while (parser->cur < parser->end) {
+ if (*parser->cur == '%') {
+ unsigned char ch = 0;
+ if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
+ return -1;
+ if (ret > 0)
+ continue;
+ }
+
+ if ((*parser->cur & 0x80) != 0 ||
+ (_uri_char_lookup[*parser->cur] & CHAR_MASK_PCHAR) == 0)
+ break;
+
+ parser->cur++;
+ }
+
+ if (parser->cur < parser->end &&
+ *parser->cur != '/' && *parser->cur != '?' && *parser->cur != '#' ) {
+ parser->error =
+ "Path component contains invalid character";
+ return -1;
+ }
+
+ if (first == parser->cur)
+ return 0;
+
+ if (segment_r != NULL)
+ *segment_r = p_strdup_until(parser->pool, first, parser->cur);
+ return 1;
+}
+
+int uri_parse_path(struct uri_parser *parser,
+ int *relative_r, const char *const **path_r)
+{
+ const unsigned char *pbegin = parser->cur;
+ ARRAY_TYPE(const_string) segments;
+ const char *segment = NULL;
+ unsigned int count;
+ int relative = 1;
+ int ret;
+
+ count = 0;
+ if (path_r != NULL)
+ p_array_init(&segments, parser->pool, 16);
+ else
+ i_zero(&segments);
+
+ /* check for a leading '/' and indicate absolute path
+ when it is present
+ */
+ if (parser->cur < parser->end && *parser->cur == '/') {
+ parser->cur++;
+ relative = 0;
+ }
+
+ /* parse first segment */
+ if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
+ return -1;
+
+ for (;;) {
+ if (ret > 0) {
+ /* strip dot segments */
+ if (segment[0] == '.') {
+ if (segment[1] == '.') {
+ if (segment[2] == '\0') {
+ /* '..' -> skip and... */
+ segment = NULL;
+
+ /* ... pop last segment (if any) */
+ if (count > 0) {
+ if (path_r != NULL) {
+ i_assert(count == array_count(&segments));
+ array_delete(&segments, count-1, 1);
+ }
+ count--;
+ } else if ( relative > 0 ) {
+ relative++;
+ }
+ }
+ } else if (segment[1] == '\0') {
+ /* '.' -> skip */
+ segment = NULL;
+ }
+ }
+ } else {
+ segment = "";
+ }
+
+ if (segment != NULL) {
+ if (path_r != NULL)
+ array_push_back(&segments, &segment);
+ count++;
+ }
+
+ if (parser->cur >= parser->end || *parser->cur != '/')
+ break;
+ parser->cur++;
+
+ /* parse next path segment */
+ if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
+ return -1;
+ }
+
+ if (relative_r != NULL)
+ *relative_r = relative;
+ if (path_r != NULL)
+ *path_r = NULL;
+
+ if (parser->cur == pbegin) {
+ /* path part of URI is empty */
+ return 0;
+ }
+
+ if (path_r != NULL) {
+ /* special treatment for a trailing '..' or '.' */
+ if (segment == NULL) {
+ segment = "";
+ array_push_back(&segments, &segment);
+ }
+ array_append_zero(&segments);
+ *path_r = array_get(&segments, &count);
+ }
+ if (parser->cur < parser->end &&
+ *parser->cur != '?' && *parser->cur != '#') {
+ parser->error = "Path component contains invalid character";
+ return -1;
+ }
+ return 1;
+}
+
+int uri_parse_query(struct uri_parser *parser, const char **query_r)
+{
+ const unsigned char *first = parser->cur;
+ int ret;
+
+ /* RFC 3986:
+ *
+ * URI = { ... } [ "?" query ] { ... }
+ * query = *( pchar / "/" / "?" )
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ */
+ if (parser->cur >= parser->end || *parser->cur != '?')
+ return 0;
+ parser->cur++;
+
+ while (parser->cur < parser->end) {
+ if (*parser->cur == '%') {
+ unsigned char ch = 0;
+ if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
+ return -1;
+ if (ret > 0)
+ continue;
+ }
+
+ if ((*parser->cur & 0x80) != 0 ||
+ (_uri_char_lookup[*parser->cur] & CHAR_MASK_QCHAR) == 0)
+ break;
+ parser->cur++;
+ }
+
+ if (parser->cur < parser->end && *parser->cur != '#') {
+ parser->error = "Query component contains invalid character";
+ return -1;
+ }
+
+ if (query_r != NULL)
+ *query_r = p_strdup_until(parser->pool, first+1, parser->cur);
+ return 1;
+}
+
+int uri_parse_fragment(struct uri_parser *parser, const char **fragment_r)
+{
+ const unsigned char *first = parser->cur;
+ int ret;
+
+ /* RFC 3986:
+ *
+ * URI = { ... } [ "#" fragment ]
+ * fragment = *( pchar / "/" / "?" )
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ */
+
+ if (parser->cur >= parser->end || *parser->cur != '#')
+ return 0;
+ parser->cur++;
+
+ while (parser->cur < parser->end) {
+ if (*parser->cur == '%') {
+ unsigned char ch = 0;
+ if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
+ return -1;
+ if (ret > 0)
+ continue;
+ }
+
+ if ((*parser->cur & 0x80) != 0 ||
+ (_uri_char_lookup[*parser->cur] & CHAR_MASK_QCHAR) == 0)
+ break;
+ parser->cur++;
+ }
+
+ if (parser->cur < parser->end) {
+ parser->error = "Fragment component contains invalid character";
+ return -1;
+ }
+
+ if (fragment_r != NULL)
+ *fragment_r = p_strdup_until(parser->pool, first+1, parser->cur);
+ return 1;
+}
+
+void uri_parser_init_data(struct uri_parser *parser,
+ pool_t pool, const unsigned char *data, size_t size)
+{
+ i_zero(parser);
+ parser->pool = pool;
+ parser->begin = parser->cur = data;
+ parser->end = data + size;
+}
+
+void uri_parser_init(struct uri_parser *parser,
+ pool_t pool, const char *uri)
+{
+ uri_parser_init_data
+ (parser, pool, (const unsigned char *)uri, strlen(uri));
+}
+
+string_t *uri_parser_get_tmpbuf(struct uri_parser *parser, size_t size)
+{
+ if (parser->tmpbuf == NULL)
+ parser->tmpbuf = str_new(parser->pool, size);
+ else
+ str_truncate(parser->tmpbuf, 0);
+ return parser->tmpbuf;
+}
+
+int uri_parse_absolute_generic(struct uri_parser *parser,
+ enum uri_parse_flags flags)
+{
+ int relative, aret, ret = 0;
+
+ /*
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+
+ hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+ path-abempty = *( "/" segment )
+ path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ path-rootless = segment-nz *( "/" segment )
+ path-empty = 0<pchar>
+
+ segment = *pchar
+ segment-nz = 1*pchar
+ */
+
+ /* scheme ":" */
+ if ((flags & URI_PARSE_SCHEME_EXTERNAL) == 0 &&
+ (ret=uri_parse_scheme(parser, NULL)) <= 0) {
+ if (ret == 0)
+ parser->error = "Missing scheme";
+ return -1;
+ }
+
+ /* "//" authority */
+ if ((aret=uri_parse_slashslash_authority
+ (parser, NULL)) < 0)
+ return -1;
+
+ /* path-absolute / path-rootless / path-empty */
+ if (aret == 0) {
+ ret = uri_parse_path(parser, &relative, NULL);
+ /* path-abempty */
+ } else if (parser->cur < parser->end && *parser->cur == '/') {
+ ret = uri_parse_path(parser, &relative, NULL);
+ i_assert(ret <= 0 || relative == 0);
+ }
+ if (ret < 0)
+ return -1;
+
+ /* [ "?" query ] */
+ if (uri_parse_query(parser, NULL) < 0)
+ return -1;
+
+ /* [ "#" fragment ] */
+ if ((ret=uri_parse_fragment(parser, NULL)) < 0)
+ return ret;
+ if (ret > 0 && (flags & URI_PARSE_ALLOW_FRAGMENT_PART) == 0) {
+ parser->error = "Fragment part not allowed";
+ return -1;
+ }
+
+ i_assert(parser->cur == parser->end);
+ return 0;
+}
+
+/*
+ * Generic URI manipulation
+ */
+
+void uri_host_copy(pool_t pool, struct uri_host *dest,
+ const struct uri_host *src)
+{
+ const char *host_name = src->name;
+
+ /* create host name literal if caller is lazy */
+ if (host_name == NULL && src->ip.family != 0) {
+ host_name = net_ip2addr(&src->ip);
+ i_assert(*host_name != '\0');
+ }
+
+ *dest = *src;
+ dest->name = p_strdup(pool, host_name);
+}
+
+/*
+ * Check generic URI
+ */
+
+int uri_check_data(const unsigned char *data, size_t size,
+ enum uri_parse_flags flags, const char **error_r)
+{
+ struct uri_parser parser;
+ int ret;
+
+ i_zero(&parser);
+ parser.pool = pool_datastack_create();
+ parser.begin = parser.cur = data;
+ parser.end = data + size;
+
+ ret = uri_parse_absolute_generic(&parser, flags);
+ *error_r = parser.error;
+ return ret;
+}
+
+int uri_check(const char *uri, enum uri_parse_flags flags,
+ const char **error_r)
+{
+ return uri_check_data
+ ((const unsigned char *)uri, strlen(uri), flags, error_r);
+}
+
+/*
+ * Generic URI construction
+ */
+
+void uri_data_encode(string_t *out,
+ const unsigned char esc_table[256],
+ unsigned char esc_mask, const char *esc_extra,
+ const char *data)
+{
+ const unsigned char *pbegin, *p;
+
+ pbegin = p = (const unsigned char *)data;
+ while (*p != '\0') {
+ if ((*p & 0x80) != 0 || (esc_table[*p] & esc_mask) == 0 ||
+ (esc_extra != NULL && strchr(esc_extra, (char)*p) != NULL)) {
+ if ((p - pbegin) > 0)
+ str_append_data(out, pbegin, p - pbegin);
+ str_printfa(out, "%%%02x", *p);
+ p++;
+ pbegin = p;
+ } else {
+ p++;
+ }
+ }
+ if ((p - pbegin) > 0)
+ str_append_data(out, pbegin, p - pbegin);
+}
+
+void uri_append_scheme(string_t *out, const char *scheme)
+{
+ str_append(out, scheme);
+ str_append_c(out, ':');
+}
+
+void uri_append_user_data(string_t *out, const char *esc,
+ const char *data)
+{
+ uri_data_encode(out, _uri_char_lookup, CHAR_MASK_UCHAR, esc, data);
+}
+
+void uri_append_userinfo(string_t *out, const char *userinfo)
+{
+ uri_append_user_data(out, NULL, userinfo);
+ str_append_c(out, '@');
+}
+
+void uri_append_host_name(string_t *out, const char *name)
+{
+ uri_data_encode(out, _uri_char_lookup,
+ CHAR_MASK_UNRESERVED | CHAR_MASK_SUB_DELIMS, NULL, name);
+}
+
+void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip)
+{
+ const char *addr = net_ip2addr(host_ip);
+
+ i_assert(host_ip->family != 0);
+
+ if (host_ip->family == AF_INET) {
+ str_append(out, addr);
+ return;
+ }
+
+ i_assert(host_ip->family == AF_INET6);
+ str_append_c(out, '[');
+ str_append(out, addr);
+ str_append_c(out, ']');
+}
+
+void uri_append_host(string_t *out, const struct uri_host *host)
+{
+ if (host->name != NULL) {
+ /* assume IPv6 literal if starts with '['; avoid encoding */
+ if (*host->name == '[')
+ str_append(out, host->name);
+ else
+ uri_append_host_name(out, host->name);
+ } else
+ uri_append_host_ip(out, &host->ip);
+}
+
+void uri_append_port(string_t *out, in_port_t port)
+{
+ if (port != 0)
+ str_printfa(out, ":%u", port);
+}
+
+void uri_append_path_segment_data(string_t *out, const char *esc,
+ const char *data)
+{
+ uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PCHAR, esc, data);
+}
+
+void uri_append_path_segment(string_t *out, const char *segment)
+{
+ str_append_c(out, '/');
+ if (*segment != '\0')
+ uri_append_path_data(out, NULL, segment);
+}
+
+void uri_append_path_data(string_t *out, const char *esc,
+ const char *data)
+{
+ uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PFCHAR, esc, data);
+}
+
+void uri_append_path(string_t *out, const char *path)
+{
+ str_append_c(out, '/');
+ if (*path != '\0')
+ uri_append_path_data(out, NULL, path);
+}
+
+void uri_append_query_data(string_t *out, const char *esc,
+ const char *data)
+{
+ uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
+}
+
+void uri_append_query(string_t *out, const char *query)
+{
+ str_append_c(out, '?');
+ if (*query != '\0')
+ uri_append_query_data(out, NULL, query);
+}
+
+void uri_append_fragment_data(string_t *out, const char *esc,
+ const char *data)
+{
+ uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
+}
+
+void uri_append_fragment(string_t *out, const char *fragment)
+{
+ str_append_c(out, '#');
+ if (*fragment != '\0')
+ uri_append_fragment_data(out, NULL, fragment);
+}
+
+void uri_append_unreserved(string_t *out, const char *data)
+{
+ uri_data_encode(out, _uri_char_lookup, CHAR_MASK_UNRESERVED,
+ NULL, data);
+}
+
+void uri_append_unreserved_path(string_t *out, const char *data)
+{
+ uri_data_encode(out, _uri_char_lookup, CHAR_MASK_UNRESERVED_PATH,
+ NULL, data);
+}