diff options
Diffstat (limited to 'src/lib/uri-util.h')
-rw-r--r-- | src/lib/uri-util.h | 298 |
1 files changed, 298 insertions, 0 deletions
diff --git a/src/lib/uri-util.h b/src/lib/uri-util.h new file mode 100644 index 0000000..837e54c --- /dev/null +++ b/src/lib/uri-util.h @@ -0,0 +1,298 @@ +#ifndef URI_UTIL_H +#define URI_UTIL_H + +#include "net.h" + +/* + * Generic URI parsing. + */ + +enum uri_parse_flags { + /* Scheme part 'scheme:' is already parsed externally. */ + URI_PARSE_SCHEME_EXTERNAL = BIT(0), + /* Allow '#fragment' part in URI */ + URI_PARSE_ALLOW_FRAGMENT_PART = BIT(1), +}; + +struct uri_host { + const char *name; + struct ip_addr ip; +}; + +struct uri_authority { + /* encoded userinfo part; e.g. "user:pass" */ + const char *enc_userinfo; + + struct uri_host host; + in_port_t port; /* 0 means no port specified */ +}; + +struct uri_parser { + pool_t pool; + const char *error; + + const unsigned char *begin, *cur, *end; + + string_t *tmpbuf; + + bool allow_pct_nul:1; +}; + +/* parse one instance of percent encoding. Returns 1 for success, + 0 if none is preset at the current parser position, and -1 in + case of error. The decoded character is returned in ch_r upon + success */ +int uri_parse_pct_encoded(struct uri_parser *parser, + unsigned char *ch_r); + +/* parse characters as long as these comply with the the 'unreserved' + syntax. Returns 1 if characters were found, 0 if none were found, + and -1 if there was an error */ +int uri_parse_unreserved(struct uri_parser *parser, string_t *part); +/* the same as uri_parse_unreserved(), but the allowed characters are + extended to 'unreserved / pct-encoded', meaning that percent encoding + is allowed */ +int uri_parse_unreserved_pct(struct uri_parser *parser, string_t *part); + +/* decode percent-encoded data from the 'data' parameter, up until the + 'until' parameter. If the latter is NULL, data is decoded up until the + '\0' character. The decoded data is allocated on the parser pool and + returned in decoded_r. Any errors are written to the parser object. */ +bool uri_data_decode(struct uri_parser *parser, const char *data, + const char *until, const char **decoded_r) ATTR_NULL(3); + +/* cut the 'scheme ":"' part from the URI. The uri_p pointer is updated to + point just past the ":". Returns 0 on success and -1 on error. The + result is returned in the scheme_r parameter. This can be NULL to use + this function for merely checking the presence of a valid scheme. */ +int uri_cut_scheme(const char **uri_p, const char **scheme_r) + ATTR_NULL(2); + +/* parse the URI 'scheme ":"' part. Returns 1 if successful, 0 if the first + character is not valid for a scheme, and -1 in case of error. The + result parameter scheme_r can be NULL to use this function for merely + checking the presence of a valid scheme. */ +int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r) + ATTR_NULL(2); + +/* parse the URI 'reg-name' syntax. Returns 1 if successful, 0 if the first + character is not valid for a host name, and -1 in case of error. The + result parameter reg_name_r can be NULL to use this function for merely + checking the presence of a valid host name. The result is allocated from + the data stack. + */ +int uri_parse_reg_name(struct uri_parser *parser, + const char **reg_name_r) ATTR_NULL(2); +/* parse the URI 'reg-name' part as an Internet host name, which is a + sequence of domain name labels separated by '.', as defined in + Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123. Returns 1 if + successful, 0 if the first character is not valid for a host name, + and -1 in case of error. The result parameter host_name_r can be NULL + to use this function for merely checking the presence of a valid host + name. The result is allocated from the data stack. + */ +int uri_parse_host_name(struct uri_parser *parser, + const char **host_name_r) ATTR_NULL(2); +/* parse the URI 'host' syntax, which is either an IP address literal or + a an Internet host name, as defined in Section 3.5 of RFC 1034 and + Section 2.1 of RFC 1123. An IP address literal is always allowed. + Returns 1 if successful, 0 if the first character is not valid for a + host name, and -1 in case of error. The provided host struct is filled + in with the parsed data, all allocated from the parser pool. The host + parameter can be NULL to use this function for merely checking for + valid 'host' syntax. + */ +int uri_parse_host(struct uri_parser *parser, + struct uri_host *host) ATTR_NULL(2); + +/* parse the URI 'authority' syntax. Returns 1 if successful, 0 if the + first character is not valid for the 'authority' syntax and -1 in case + of error. The provided uri_authority struct is filled in with the parsed + data, all allocated from the parser pool. The auth parameter can be + NULL to use this function for merely checking for valid 'authority' + syntax. + */ +int uri_parse_authority(struct uri_parser *parser, + struct uri_authority *auth) ATTR_NULL(2); +/* identical to uri_parse_authority(), except that this function parses + '"//" authority', rather than 'authority'. + */ +int uri_parse_slashslash_authority(struct uri_parser *parser, + struct uri_authority *auth) ATTR_NULL(2); +/* identical to uri_parse_authority(), except that this function parses + the registered name ('reg-name' syntax) as an Internet host name, as + defined in Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123. + */ +int uri_parse_host_authority(struct uri_parser *parser, + struct uri_authority *auth) ATTR_NULL(2); +/* identical to uri_parse_slashslash_authority(), except that this + function parses the registered name ('reg-name' syntax) as an Internet + host name, as defined in Section 3.5 of RFC 1034 and Section 2.1 of + RFC 1123. + */ +int uri_parse_slashslash_host_authority(struct uri_parser *parser, + struct uri_authority *auth) ATTR_NULL(2); + +/* parse the URI 'segment' syntax. Returns 1 if successful, 0 if the first + character is not valid for the 'segment' syntax and -1 in case of + error. The result is allocated from the parser pool. Percent encoding is + not decoded in the result. The result parameter can be NULL to use this + function for merely checking for valid 'segment' syntax. + */ +int uri_parse_path_segment(struct uri_parser *parser, + const char **segment_r) ATTR_NULL(2); +/* parse the URI 'path' syntax. This also resolves '..' and '.' segments in + the path. If the path is relative, the relative_r parameter indicates + how many segments the base path must be moved towards root (as caused by + leading '..' segments). Returns 1 if successful, 0 if the first character + is not valid for the 'segment' syntax and -1 in case of error. The result + is a NULL-terminated string list allocated from the parser pool. Percent + encoding is not decoded in the result. The result parameter can be NULL + to use this function for merely checking for valid 'path' syntax. + */ +int uri_parse_path(struct uri_parser *parser, int *relative_r, + const char *const **path_r) ATTR_NULL(2,3); + +/* parse the URI 'query' syntax. Returns 1 if successful, 0 if the first + character is not valid for the 'query' syntax and -1 in case of + error. The result is allocated from the parser pool. Percent encoding is + not decoded in the result. The result parameter can be NULL to use this + function for merely checking for valid 'query' syntax. + */ +int uri_parse_query(struct uri_parser *parser, + const char **query_r) ATTR_NULL(2); +/* parse the URI 'fragment' syntax. Returns 1 if successful, 0 if the first + character is not valid for the 'fragment' syntax and -1 in case of + error. The result is allocated from the parser pool. Percent encoding is + not decoded in the result. The result parameter can be NULL to use this + function for merely checking for valid 'fragment' syntax. + */ +int uri_parse_fragment(struct uri_parser *parser, + const char **fragment_r) ATTR_NULL(2); + +/* initialize the URI parser with the provided data */ +void uri_parser_init_data(struct uri_parser *parser, + pool_t pool, const unsigned char *data, size_t size); +/* initialize the URI parser with the provided '\0'-terminated string */ +void uri_parser_init(struct uri_parser *parser, + pool_t pool, const char *uri); + +/* returns the temporary buffer associated with this parser. Can be used + for higher-level parsing activities. */ +string_t *uri_parser_get_tmpbuf(struct uri_parser *parser, + size_t size); + +/* Parse a generic (RFC3986) absolute URI for validity. + Returns 0 if valid and -1 otherwise. Note that some URI formats like + "sip", "aix" and "aaa" violate RFC3986 and will currently fail with + this function. + */ +int uri_parse_absolute_generic(struct uri_parser *parser, + enum uri_parse_flags flags); + +/* + * Generic URI manipulation + */ + +/* copy uri_host struct from src to dest and allocate it on pool */ +void uri_host_copy(pool_t pool, struct uri_host *dest, + const struct uri_host *src); + +/* + * Generic URI validation + */ + +/* Check whether the provided data is a valid absolute RFC3986 URI. + Returns 0 if valid and -1 otherwise. */ +int uri_check_data(const unsigned char *data, size_t size, + enum uri_parse_flags flags, const char **error_r); +/* Check whether the provided string is a valid absolute RFC3986 URI. + Returns 0 if valid and -1 otherwise. */ +int uri_check(const char *uri, enum uri_parse_flags, + const char **error_r); + +/* + * Generic URI construction + */ + +/* encodes the '\0'-terminated data using the percent encoding. The + esc_table is a 256 byte lookup table. If none of the esc_mask bits are + set at the character's position in the esc_table, a character needs + to be encoded. Also, when esc_extra contains a character, it needs to + be encoded. All other characters are copied verbatim to the out buffer. + */ +void uri_data_encode(string_t *out, + const unsigned char esc_table[256], + unsigned char esc_mask, const char *esc_extra, + const char *data) ATTR_NULL(4); + +/* append the provided scheme to the out buffer */ +void uri_append_scheme(string_t *out, const char *scheme); + +/* append partial user data (i.e. some part of what comes before '@') to + the out buffer. No '@' is produced. Characters are percent-encoded when + necessary. Characters in esc are always percent-encoded, even when these + are valid 'userinfo' characters. */ +void uri_append_user_data(string_t *out, + const char *esc, const char *data) ATTR_NULL(2); +/* append userinfo and '@' to the out buffer. Characters in userinfo are + percent-encoded when necessary.*/ +void uri_append_userinfo(string_t *out, const char *userinfo); + +/* append the host name to the out buffer. Characters are percent-encoded + when necessary.*/ +void uri_append_host_name(string_t *out, const char *name); +/* append the host IP address to the out buffer. */ +void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip); +/* encode the URI host struct to the out buffer. */ +void uri_append_host(string_t *out, const struct uri_host *host); +/* append the port to the out buffer. */ +void uri_append_port(string_t *out, in_port_t port); + +/* append partial path segment data to the out buffer. No '/' is produced. + Characters are percent-encoded when necessary. Characters in esc are + always percent-encoded, even when these are valid 'segment' characters. + */ +void uri_append_path_segment_data(string_t *out, + const char *esc, const char *data) ATTR_NULL(2); +/* append a full path segment to the out buffer. A leading '/' is + produced. Characters are percent-encoded when necessary. */ +void uri_append_path_segment(string_t *out, const char *segment); +/* append partial path data to the out buffer. The data may include '/', + which is not encoded. Characters are percent-encoded when necessary. + Characters in esc are always percent-encoded, even when these are + valid 'path' characters.*/ +void uri_append_path_data(string_t *out, + const char *esc, const char *data) ATTR_NULL(2); +/* append a full path to the out buffer. A leading '/' is produced. The + data may include more '/', which is not encoded. Characters are + percent-encoded when necessary. + */ +void uri_append_path(string_t *out, const char *path); + +/* append partial query data to the out buffer. No leading '?' is + produced. Characters are percent-encoded when necessary. Characters + in esc are always percent-encoded, even when these are valid 'query' + characters.*/ +void uri_append_query_data(string_t *out, + const char *esc, const char *data) ATTR_NULL(2); +/* append a full URI query part to the out buffer. A leading '?' is + produced. Characters are percent-encoded when necessary. */ +void uri_append_query(string_t *out, const char *query); + +/* append partial fragment data to the out buffer. No leading '#' is + produced. Characters are percent-encoded when necessary. Characters + in esc are always percent-encoded, even when these are valid + 'fragment' characters.*/ +void uri_append_fragment_data(string_t *out, + const char *esc, const char *data) ATTR_NULL(2); +/* append a full URI fragment part to the out buffer. A leading '#' is + produced. Characters are percent-encoded when necessary. */ +void uri_append_fragment(string_t *out, const char *fragment); + +/* append data to the out buffer and escape any reserved character */ +void uri_append_unreserved(string_t *out, const char *data); +/* append data to the out buffer and escape any reserved character except '/' */ +void uri_append_unreserved_path(string_t *out, const char *data); + +#endif |