summaryrefslogtreecommitdiffstats
path: root/src/lib/uri-util.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/uri-util.h')
-rw-r--r--src/lib/uri-util.h298
1 files changed, 298 insertions, 0 deletions
diff --git a/src/lib/uri-util.h b/src/lib/uri-util.h
new file mode 100644
index 0000000..837e54c
--- /dev/null
+++ b/src/lib/uri-util.h
@@ -0,0 +1,298 @@
+#ifndef URI_UTIL_H
+#define URI_UTIL_H
+
+#include "net.h"
+
+/*
+ * Generic URI parsing.
+ */
+
+enum uri_parse_flags {
+ /* Scheme part 'scheme:' is already parsed externally. */
+ URI_PARSE_SCHEME_EXTERNAL = BIT(0),
+ /* Allow '#fragment' part in URI */
+ URI_PARSE_ALLOW_FRAGMENT_PART = BIT(1),
+};
+
+struct uri_host {
+ const char *name;
+ struct ip_addr ip;
+};
+
+struct uri_authority {
+ /* encoded userinfo part; e.g. "user:pass" */
+ const char *enc_userinfo;
+
+ struct uri_host host;
+ in_port_t port; /* 0 means no port specified */
+};
+
+struct uri_parser {
+ pool_t pool;
+ const char *error;
+
+ const unsigned char *begin, *cur, *end;
+
+ string_t *tmpbuf;
+
+ bool allow_pct_nul:1;
+};
+
+/* parse one instance of percent encoding. Returns 1 for success,
+ 0 if none is preset at the current parser position, and -1 in
+ case of error. The decoded character is returned in ch_r upon
+ success */
+int uri_parse_pct_encoded(struct uri_parser *parser,
+ unsigned char *ch_r);
+
+/* parse characters as long as these comply with the the 'unreserved'
+ syntax. Returns 1 if characters were found, 0 if none were found,
+ and -1 if there was an error */
+int uri_parse_unreserved(struct uri_parser *parser, string_t *part);
+/* the same as uri_parse_unreserved(), but the allowed characters are
+ extended to 'unreserved / pct-encoded', meaning that percent encoding
+ is allowed */
+int uri_parse_unreserved_pct(struct uri_parser *parser, string_t *part);
+
+/* decode percent-encoded data from the 'data' parameter, up until the
+ 'until' parameter. If the latter is NULL, data is decoded up until the
+ '\0' character. The decoded data is allocated on the parser pool and
+ returned in decoded_r. Any errors are written to the parser object. */
+bool uri_data_decode(struct uri_parser *parser, const char *data,
+ const char *until, const char **decoded_r) ATTR_NULL(3);
+
+/* cut the 'scheme ":"' part from the URI. The uri_p pointer is updated to
+ point just past the ":". Returns 0 on success and -1 on error. The
+ result is returned in the scheme_r parameter. This can be NULL to use
+ this function for merely checking the presence of a valid scheme. */
+int uri_cut_scheme(const char **uri_p, const char **scheme_r)
+ ATTR_NULL(2);
+
+/* parse the URI 'scheme ":"' part. Returns 1 if successful, 0 if the first
+ character is not valid for a scheme, and -1 in case of error. The
+ result parameter scheme_r can be NULL to use this function for merely
+ checking the presence of a valid scheme. */
+int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
+ ATTR_NULL(2);
+
+/* parse the URI 'reg-name' syntax. Returns 1 if successful, 0 if the first
+ character is not valid for a host name, and -1 in case of error. The
+ result parameter reg_name_r can be NULL to use this function for merely
+ checking the presence of a valid host name. The result is allocated from
+ the data stack.
+ */
+int uri_parse_reg_name(struct uri_parser *parser,
+ const char **reg_name_r) ATTR_NULL(2);
+/* parse the URI 'reg-name' part as an Internet host name, which is a
+ sequence of domain name labels separated by '.', as defined in
+ Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123. Returns 1 if
+ successful, 0 if the first character is not valid for a host name,
+ and -1 in case of error. The result parameter host_name_r can be NULL
+ to use this function for merely checking the presence of a valid host
+ name. The result is allocated from the data stack.
+ */
+int uri_parse_host_name(struct uri_parser *parser,
+ const char **host_name_r) ATTR_NULL(2);
+/* parse the URI 'host' syntax, which is either an IP address literal or
+ a an Internet host name, as defined in Section 3.5 of RFC 1034 and
+ Section 2.1 of RFC 1123. An IP address literal is always allowed.
+ Returns 1 if successful, 0 if the first character is not valid for a
+ host name, and -1 in case of error. The provided host struct is filled
+ in with the parsed data, all allocated from the parser pool. The host
+ parameter can be NULL to use this function for merely checking for
+ valid 'host' syntax.
+ */
+int uri_parse_host(struct uri_parser *parser,
+ struct uri_host *host) ATTR_NULL(2);
+
+/* parse the URI 'authority' syntax. Returns 1 if successful, 0 if the
+ first character is not valid for the 'authority' syntax and -1 in case
+ of error. The provided uri_authority struct is filled in with the parsed
+ data, all allocated from the parser pool. The auth parameter can be
+ NULL to use this function for merely checking for valid 'authority'
+ syntax.
+ */
+int uri_parse_authority(struct uri_parser *parser,
+ struct uri_authority *auth) ATTR_NULL(2);
+/* identical to uri_parse_authority(), except that this function parses
+ '"//" authority', rather than 'authority'.
+ */
+int uri_parse_slashslash_authority(struct uri_parser *parser,
+ struct uri_authority *auth) ATTR_NULL(2);
+/* identical to uri_parse_authority(), except that this function parses
+ the registered name ('reg-name' syntax) as an Internet host name, as
+ defined in Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123.
+ */
+int uri_parse_host_authority(struct uri_parser *parser,
+ struct uri_authority *auth) ATTR_NULL(2);
+/* identical to uri_parse_slashslash_authority(), except that this
+ function parses the registered name ('reg-name' syntax) as an Internet
+ host name, as defined in Section 3.5 of RFC 1034 and Section 2.1 of
+ RFC 1123.
+ */
+int uri_parse_slashslash_host_authority(struct uri_parser *parser,
+ struct uri_authority *auth) ATTR_NULL(2);
+
+/* parse the URI 'segment' syntax. Returns 1 if successful, 0 if the first
+ character is not valid for the 'segment' syntax and -1 in case of
+ error. The result is allocated from the parser pool. Percent encoding is
+ not decoded in the result. The result parameter can be NULL to use this
+ function for merely checking for valid 'segment' syntax.
+ */
+int uri_parse_path_segment(struct uri_parser *parser,
+ const char **segment_r) ATTR_NULL(2);
+/* parse the URI 'path' syntax. This also resolves '..' and '.' segments in
+ the path. If the path is relative, the relative_r parameter indicates
+ how many segments the base path must be moved towards root (as caused by
+ leading '..' segments). Returns 1 if successful, 0 if the first character
+ is not valid for the 'segment' syntax and -1 in case of error. The result
+ is a NULL-terminated string list allocated from the parser pool. Percent
+ encoding is not decoded in the result. The result parameter can be NULL
+ to use this function for merely checking for valid 'path' syntax.
+ */
+int uri_parse_path(struct uri_parser *parser, int *relative_r,
+ const char *const **path_r) ATTR_NULL(2,3);
+
+/* parse the URI 'query' syntax. Returns 1 if successful, 0 if the first
+ character is not valid for the 'query' syntax and -1 in case of
+ error. The result is allocated from the parser pool. Percent encoding is
+ not decoded in the result. The result parameter can be NULL to use this
+ function for merely checking for valid 'query' syntax.
+ */
+int uri_parse_query(struct uri_parser *parser,
+ const char **query_r) ATTR_NULL(2);
+/* parse the URI 'fragment' syntax. Returns 1 if successful, 0 if the first
+ character is not valid for the 'fragment' syntax and -1 in case of
+ error. The result is allocated from the parser pool. Percent encoding is
+ not decoded in the result. The result parameter can be NULL to use this
+ function for merely checking for valid 'fragment' syntax.
+ */
+int uri_parse_fragment(struct uri_parser *parser,
+ const char **fragment_r) ATTR_NULL(2);
+
+/* initialize the URI parser with the provided data */
+void uri_parser_init_data(struct uri_parser *parser,
+ pool_t pool, const unsigned char *data, size_t size);
+/* initialize the URI parser with the provided '\0'-terminated string */
+void uri_parser_init(struct uri_parser *parser,
+ pool_t pool, const char *uri);
+
+/* returns the temporary buffer associated with this parser. Can be used
+ for higher-level parsing activities. */
+string_t *uri_parser_get_tmpbuf(struct uri_parser *parser,
+ size_t size);
+
+/* Parse a generic (RFC3986) absolute URI for validity.
+ Returns 0 if valid and -1 otherwise. Note that some URI formats like
+ "sip", "aix" and "aaa" violate RFC3986 and will currently fail with
+ this function.
+ */
+int uri_parse_absolute_generic(struct uri_parser *parser,
+ enum uri_parse_flags flags);
+
+/*
+ * Generic URI manipulation
+ */
+
+/* copy uri_host struct from src to dest and allocate it on pool */
+void uri_host_copy(pool_t pool, struct uri_host *dest,
+ const struct uri_host *src);
+
+/*
+ * Generic URI validation
+ */
+
+/* Check whether the provided data is a valid absolute RFC3986 URI.
+ Returns 0 if valid and -1 otherwise. */
+int uri_check_data(const unsigned char *data, size_t size,
+ enum uri_parse_flags flags, const char **error_r);
+/* Check whether the provided string is a valid absolute RFC3986 URI.
+ Returns 0 if valid and -1 otherwise. */
+int uri_check(const char *uri, enum uri_parse_flags,
+ const char **error_r);
+
+/*
+ * Generic URI construction
+ */
+
+/* encodes the '\0'-terminated data using the percent encoding. The
+ esc_table is a 256 byte lookup table. If none of the esc_mask bits are
+ set at the character's position in the esc_table, a character needs
+ to be encoded. Also, when esc_extra contains a character, it needs to
+ be encoded. All other characters are copied verbatim to the out buffer.
+ */
+void uri_data_encode(string_t *out,
+ const unsigned char esc_table[256],
+ unsigned char esc_mask, const char *esc_extra,
+ const char *data) ATTR_NULL(4);
+
+/* append the provided scheme to the out buffer */
+void uri_append_scheme(string_t *out, const char *scheme);
+
+/* append partial user data (i.e. some part of what comes before '@') to
+ the out buffer. No '@' is produced. Characters are percent-encoded when
+ necessary. Characters in esc are always percent-encoded, even when these
+ are valid 'userinfo' characters. */
+void uri_append_user_data(string_t *out,
+ const char *esc, const char *data) ATTR_NULL(2);
+/* append userinfo and '@' to the out buffer. Characters in userinfo are
+ percent-encoded when necessary.*/
+void uri_append_userinfo(string_t *out, const char *userinfo);
+
+/* append the host name to the out buffer. Characters are percent-encoded
+ when necessary.*/
+void uri_append_host_name(string_t *out, const char *name);
+/* append the host IP address to the out buffer. */
+void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip);
+/* encode the URI host struct to the out buffer. */
+void uri_append_host(string_t *out, const struct uri_host *host);
+/* append the port to the out buffer. */
+void uri_append_port(string_t *out, in_port_t port);
+
+/* append partial path segment data to the out buffer. No '/' is produced.
+ Characters are percent-encoded when necessary. Characters in esc are
+ always percent-encoded, even when these are valid 'segment' characters.
+ */
+void uri_append_path_segment_data(string_t *out,
+ const char *esc, const char *data) ATTR_NULL(2);
+/* append a full path segment to the out buffer. A leading '/' is
+ produced. Characters are percent-encoded when necessary. */
+void uri_append_path_segment(string_t *out, const char *segment);
+/* append partial path data to the out buffer. The data may include '/',
+ which is not encoded. Characters are percent-encoded when necessary.
+ Characters in esc are always percent-encoded, even when these are
+ valid 'path' characters.*/
+void uri_append_path_data(string_t *out,
+ const char *esc, const char *data) ATTR_NULL(2);
+/* append a full path to the out buffer. A leading '/' is produced. The
+ data may include more '/', which is not encoded. Characters are
+ percent-encoded when necessary.
+ */
+void uri_append_path(string_t *out, const char *path);
+
+/* append partial query data to the out buffer. No leading '?' is
+ produced. Characters are percent-encoded when necessary. Characters
+ in esc are always percent-encoded, even when these are valid 'query'
+ characters.*/
+void uri_append_query_data(string_t *out,
+ const char *esc, const char *data) ATTR_NULL(2);
+/* append a full URI query part to the out buffer. A leading '?' is
+ produced. Characters are percent-encoded when necessary. */
+void uri_append_query(string_t *out, const char *query);
+
+/* append partial fragment data to the out buffer. No leading '#' is
+ produced. Characters are percent-encoded when necessary. Characters
+ in esc are always percent-encoded, even when these are valid
+ 'fragment' characters.*/
+void uri_append_fragment_data(string_t *out,
+ const char *esc, const char *data) ATTR_NULL(2);
+/* append a full URI fragment part to the out buffer. A leading '#' is
+ produced. Characters are percent-encoded when necessary. */
+void uri_append_fragment(string_t *out, const char *fragment);
+
+/* append data to the out buffer and escape any reserved character */
+void uri_append_unreserved(string_t *out, const char *data);
+/* append data to the out buffer and escape any reserved character except '/' */
+void uri_append_unreserved_path(string_t *out, const char *data);
+
+#endif