diff options
Diffstat (limited to 'uri')
-rw-r--r-- | uri/apr_uri.c | 962 |
1 files changed, 962 insertions, 0 deletions
diff --git a/uri/apr_uri.c b/uri/apr_uri.c new file mode 100644 index 0000000..ca5c49d --- /dev/null +++ b/uri/apr_uri.c @@ -0,0 +1,962 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * apr_uri.c: URI related utility things + * + */ + +#include <stdlib.h> + +#include "apu.h" +#include "apr.h" +#include "apr_general.h" +#include "apr_strings.h" + +#define APR_WANT_STRFUNC +#include "apr_want.h" + +#include "apr_uri.h" + +typedef struct schemes_t schemes_t; + +/** Structure to store various schemes and their default ports */ +struct schemes_t { + /** The name of the scheme */ + const char *name; + /** The default port for the scheme */ + apr_port_t default_port; +}; + +/* Some WWW schemes and their default ports; this is basically /etc/services */ +/* This will become global when the protocol abstraction comes */ +/* As the schemes are searched by a linear search, */ +/* they are sorted by their expected frequency */ +static schemes_t schemes[] = +{ + {"http", APR_URI_HTTP_DEFAULT_PORT}, + {"ftp", APR_URI_FTP_DEFAULT_PORT}, + {"https", APR_URI_HTTPS_DEFAULT_PORT}, + {"gopher", APR_URI_GOPHER_DEFAULT_PORT}, + {"ldap", APR_URI_LDAP_DEFAULT_PORT}, + {"nntp", APR_URI_NNTP_DEFAULT_PORT}, + {"snews", APR_URI_SNEWS_DEFAULT_PORT}, + {"imap", APR_URI_IMAP_DEFAULT_PORT}, + {"pop", APR_URI_POP_DEFAULT_PORT}, + {"sip", APR_URI_SIP_DEFAULT_PORT}, + {"rtsp", APR_URI_RTSP_DEFAULT_PORT}, + {"wais", APR_URI_WAIS_DEFAULT_PORT}, + {"z39.50r", APR_URI_WAIS_DEFAULT_PORT}, + {"z39.50s", APR_URI_WAIS_DEFAULT_PORT}, + {"prospero", APR_URI_PROSPERO_DEFAULT_PORT}, + {"nfs", APR_URI_NFS_DEFAULT_PORT}, + {"tip", APR_URI_TIP_DEFAULT_PORT}, + {"acap", APR_URI_ACAP_DEFAULT_PORT}, + {"telnet", APR_URI_TELNET_DEFAULT_PORT}, + {"ssh", APR_URI_SSH_DEFAULT_PORT}, + { NULL, 0xFFFF } /* unknown port */ +}; + +APU_DECLARE(apr_port_t) apr_uri_port_of_scheme(const char *scheme_str) +{ + schemes_t *scheme; + + if (scheme_str) { + for (scheme = schemes; scheme->name != NULL; ++scheme) { + if (strcasecmp(scheme_str, scheme->name) == 0) { + return scheme->default_port; + } + } + } + return 0; +} + +/* Unparse a apr_uri_t structure to an URI string. + * Optionally suppress the password for security reasons. + */ +APU_DECLARE(char *) apr_uri_unparse(apr_pool_t *p, + const apr_uri_t *uptr, + unsigned flags) +{ + char *ret = ""; + + /* If suppressing the site part, omit both user name & scheme://hostname */ + if (!(flags & APR_URI_UNP_OMITSITEPART)) { + + /* Construct a "user:password@" string, honoring the passed + * APR_URI_UNP_ flags: */ + if (uptr->user || uptr->password) { + ret = apr_pstrcat(p, + (uptr->user && !(flags & APR_URI_UNP_OMITUSER)) + ? uptr->user : "", + (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)) + ? ":" : "", + (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)) + ? ((flags & APR_URI_UNP_REVEALPASSWORD) + ? uptr->password : "XXXXXXXX") + : "", + ((uptr->user && !(flags & APR_URI_UNP_OMITUSER)) || + (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))) + ? "@" : "", + NULL); + } + + /* Construct scheme://site string */ + if (uptr->hostname) { + int is_default_port; + const char *lbrk = "", *rbrk = ""; + + if (strchr(uptr->hostname, ':')) { /* v6 literal */ + lbrk = "["; + rbrk = "]"; + } + + is_default_port = + (uptr->port_str == NULL || + uptr->port == 0 || + uptr->port == apr_uri_port_of_scheme(uptr->scheme)); + + ret = apr_pstrcat(p, "//", ret, lbrk, uptr->hostname, rbrk, + is_default_port ? "" : ":", + is_default_port ? "" : uptr->port_str, + NULL); + } + if (uptr->scheme) { + ret = apr_pstrcat(p, uptr->scheme, ":", ret, NULL); + } + } + + /* Should we suppress all path info? */ + if (!(flags & APR_URI_UNP_OMITPATHINFO)) { + /* Append path, query and fragment strings: */ + ret = apr_pstrcat(p, + ret, + (uptr->path) + ? uptr->path : "", + (uptr->query && !(flags & APR_URI_UNP_OMITQUERY)) + ? "?" : "", + (uptr->query && !(flags & APR_URI_UNP_OMITQUERY)) + ? uptr->query : "", + (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY)) + ? "#" : NULL, + (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY)) + ? uptr->fragment : NULL, + NULL); + } + return ret; +} + +/* Here is the hand-optimized parse_uri_components(). There are some wild + * tricks we could pull in assembly language that we don't pull here... like we + * can do word-at-time scans for delimiter characters using the same technique + * that fast memchr()s use. But that would be way non-portable. -djg + */ + +/* We have a apr_table_t that we can index by character and it tells us if the + * character is one of the interesting delimiters. Note that we even get + * compares for NUL for free -- it's just another delimiter. + */ + +#define T_SLASH 0x01 /* '/' */ +#define T_QUESTION 0x02 /* '?' */ +#define T_HASH 0x04 /* '#' */ +#define T_ALPHA 0x08 /* 'A' ... 'Z', 'a' ... 'z' */ +#define T_SCHEME 0x10 /* '0' ... '9', '-', '+', '.' + * (allowed in scheme except first char) + */ +#define T_NUL 0x80 /* '\0' */ + +#if APR_CHARSET_EBCDIC +/* Delimiter table for the EBCDIC character set */ +static const unsigned char uri_delims[256] = { + T_NUL, /* 0x00 */ + 0, /* 0x01 */ + 0, /* 0x02 */ + 0, /* 0x03 */ + 0, /* 0x04 */ + 0, /* 0x05 */ + 0, /* 0x06 */ + 0, /* 0x07 */ + 0, /* 0x08 */ + 0, /* 0x09 */ + 0, /* 0x0a */ + 0, /* 0x0b */ + 0, /* 0x0c */ + 0, /* 0x0d */ + 0, /* 0x0e */ + 0, /* 0x0f */ + 0, /* 0x10 */ + 0, /* 0x11 */ + 0, /* 0x12 */ + 0, /* 0x13 */ + 0, /* 0x14 */ + 0, /* 0x15 */ + 0, /* 0x16 */ + 0, /* 0x17 */ + 0, /* 0x18 */ + 0, /* 0x19 */ + 0, /* 0x1a */ + 0, /* 0x1b */ + 0, /* 0x1c */ + 0, /* 0x1d */ + 0, /* 0x1e */ + 0, /* 0x1f */ + 0, /* 0x20 */ + 0, /* 0x21 */ + 0, /* 0x22 */ + 0, /* 0x23 */ + 0, /* 0x24 */ + 0, /* 0x25 */ + 0, /* 0x26 */ + 0, /* 0x27 */ + 0, /* 0x28 */ + 0, /* 0x29 */ + 0, /* 0x2a */ + 0, /* 0x2b */ + 0, /* 0x2c */ + 0, /* 0x2d */ + 0, /* 0x2e */ + 0, /* 0x2f */ + 0, /* 0x30 */ + 0, /* 0x31 */ + 0, /* 0x32 */ + 0, /* 0x33 */ + 0, /* 0x34 */ + 0, /* 0x35 */ + 0, /* 0x36 */ + 0, /* 0x37 */ + 0, /* 0x38 */ + 0, /* 0x39 */ + 0, /* 0x3a */ + 0, /* 0x3b */ + 0, /* 0x3c */ + 0, /* 0x3d */ + 0, /* 0x3e */ + 0, /* 0x3f */ + 0, /* 0x40 ' ' */ + 0, /* 0x41 */ + 0, /* 0x42 */ + 0, /* 0x43 */ + 0, /* 0x44 */ + 0, /* 0x45 */ + 0, /* 0x46 */ + 0, /* 0x47 */ + 0, /* 0x48 */ + 0, /* 0x49 */ + 0, /* 0x4a '[' */ + T_SCHEME, /* 0x4b '.' */ + 0, /* 0x4c '<' */ + 0, /* 0x4d '(' */ + T_SCHEME, /* 0x4e '+' */ + 0, /* 0x4f '!' */ + 0, /* 0x50 '&' */ + 0, /* 0x51 */ + 0, /* 0x52 */ + 0, /* 0x53 */ + 0, /* 0x54 */ + 0, /* 0x55 */ + 0, /* 0x56 */ + 0, /* 0x57 */ + 0, /* 0x58 */ + 0, /* 0x59 */ + 0, /* 0x5a ']' */ + 0, /* 0x5b '$' */ + 0, /* 0x5c '*' */ + 0, /* 0x5d ')' */ + 0, /* 0x5e ';' */ + 0, /* 0x5f '^' */ + T_SCHEME, /* 0x60 '-' */ + T_SLASH, /* 0x61 '/' */ + 0, /* 0x62 */ + 0, /* 0x63 */ + 0, /* 0x64 */ + 0, /* 0x65 */ + 0, /* 0x66 */ + 0, /* 0x67 */ + 0, /* 0x68 */ + 0, /* 0x69 */ + 0, /* 0x6a '|' */ + 0, /* 0x6b ',' */ + 0, /* 0x6c '%' */ + 0, /* 0x6d '_' */ + 0, /* 0x6e '>' */ + T_QUESTION, /* 0x6f '?' */ + 0, /* 0x70 */ + 0, /* 0x71 */ + 0, /* 0x72 */ + 0, /* 0x73 */ + 0, /* 0x74 */ + 0, /* 0x75 */ + 0, /* 0x76 */ + 0, /* 0x77 */ + 0, /* 0x78 */ + 0, /* 0x79 '`' */ + 0, /* 0x7a ':' */ + T_HASH, /* 0x7b '#' */ + 0, /* 0x7c '@' */ + 0, /* 0x7d ''' */ + 0, /* 0x7e '=' */ + 0, /* 0x7f '"' */ + 0, /* 0x80 */ + T_ALPHA, /* 0x81 'a' */ + T_ALPHA, /* 0x82 'b' */ + T_ALPHA, /* 0x83 'c' */ + T_ALPHA, /* 0x84 'd' */ + T_ALPHA, /* 0x85 'e' */ + T_ALPHA, /* 0x86 'f' */ + T_ALPHA, /* 0x87 'g' */ + T_ALPHA, /* 0x88 'h' */ + T_ALPHA, /* 0x89 'i' */ + 0, /* 0x8a */ + 0, /* 0x8b */ + 0, /* 0x8c */ + 0, /* 0x8d */ + 0, /* 0x8e */ + 0, /* 0x8f */ + 0, /* 0x90 */ + T_ALPHA, /* 0x91 'j' */ + T_ALPHA, /* 0x92 'k' */ + T_ALPHA, /* 0x93 'l' */ + T_ALPHA, /* 0x94 'm' */ + T_ALPHA, /* 0x95 'n' */ + T_ALPHA, /* 0x96 'o' */ + T_ALPHA, /* 0x97 'p' */ + T_ALPHA, /* 0x98 'q' */ + T_ALPHA, /* 0x99 'r' */ + 0, /* 0x9a */ + 0, /* 0x9b */ + 0, /* 0x9c */ + 0, /* 0x9d */ + 0, /* 0x9e */ + 0, /* 0x9f */ + 0, /* 0xa0 */ + 0, /* 0xa1 '~' */ + T_ALPHA, /* 0xa2 's' */ + T_ALPHA, /* 0xa3 't' */ + T_ALPHA, /* 0xa4 'u' */ + T_ALPHA, /* 0xa5 'v' */ + T_ALPHA, /* 0xa6 'w' */ + T_ALPHA, /* 0xa7 'x' */ + T_ALPHA, /* 0xa8 'y' */ + T_ALPHA, /* 0xa9 'z' */ + 0, /* 0xaa */ + 0, /* 0xab */ + 0, /* 0xac */ + 0, /* 0xad */ + 0, /* 0xae */ + 0, /* 0xaf */ + 0, /* 0xb0 */ + 0, /* 0xb1 */ + 0, /* 0xb2 */ + 0, /* 0xb3 */ + 0, /* 0xb4 */ + 0, /* 0xb5 */ + 0, /* 0xb6 */ + 0, /* 0xb7 */ + 0, /* 0xb8 */ + 0, /* 0xb9 */ + 0, /* 0xba */ + 0, /* 0xbb */ + 0, /* 0xbc */ + 0, /* 0xbd */ + 0, /* 0xbe */ + 0, /* 0xbf */ + 0, /* 0xc0 '{' */ + T_ALPHA, /* 0xc1 'A' */ + T_ALPHA, /* 0xc2 'B' */ + T_ALPHA, /* 0xc3 'C' */ + T_ALPHA, /* 0xc4 'D' */ + T_ALPHA, /* 0xc5 'E' */ + T_ALPHA, /* 0xc6 'F' */ + T_ALPHA, /* 0xc7 'G' */ + T_ALPHA, /* 0xc8 'H' */ + T_ALPHA, /* 0xc9 'I' */ + 0, /* 0xca */ + 0, /* 0xcb */ + 0, /* 0xcc */ + 0, /* 0xcd */ + 0, /* 0xce */ + 0, /* 0xcf */ + 0, /* 0xd0 '}' */ + T_ALPHA, /* 0xd1 'J' */ + T_ALPHA, /* 0xd2 'K' */ + T_ALPHA, /* 0xd3 'L' */ + T_ALPHA, /* 0xd4 'M' */ + T_ALPHA, /* 0xd5 'N' */ + T_ALPHA, /* 0xd6 'O' */ + T_ALPHA, /* 0xd7 'P' */ + T_ALPHA, /* 0xd8 'Q' */ + T_ALPHA, /* 0xd9 'R' */ + 0, /* 0xda */ + 0, /* 0xdb */ + 0, /* 0xdc */ + 0, /* 0xdd */ + 0, /* 0xde */ + 0, /* 0xdf */ + 0, /* 0xe0 '\' */ + 0, /* 0xe1 */ + T_ALPHA, /* 0xe2 'S' */ + T_ALPHA, /* 0xe3 'T' */ + T_ALPHA, /* 0xe4 'U' */ + T_ALPHA, /* 0xe5 'V' */ + T_ALPHA, /* 0xe6 'W' */ + T_ALPHA, /* 0xe7 'X' */ + T_ALPHA, /* 0xe8 'Y' */ + T_ALPHA, /* 0xe9 'Z' */ + 0, /* 0xea */ + 0, /* 0xeb */ + 0, /* 0xec */ + 0, /* 0xed */ + 0, /* 0xee */ + 0, /* 0xef */ + T_SCHEME, /* 0xf0 '0' */ + T_SCHEME, /* 0xf1 '1' */ + T_SCHEME, /* 0xf2 '2' */ + T_SCHEME, /* 0xf3 '3' */ + T_SCHEME, /* 0xf4 '4' */ + T_SCHEME, /* 0xf5 '5' */ + T_SCHEME, /* 0xf6 '6' */ + T_SCHEME, /* 0xf7 '7' */ + T_SCHEME, /* 0xf8 '8' */ + T_SCHEME, /* 0xf9 '9' */ + 0, /* 0xfa */ + 0, /* 0xfb */ + 0, /* 0xfc */ + 0, /* 0xfd */ + 0, /* 0xfe */ + 0 /* 0xff */ +}; +#else +/* Delimiter table for the ASCII character set */ +static const unsigned char uri_delims[256] = { + T_NUL, /* 0x00 */ + 0, /* 0x01 */ + 0, /* 0x02 */ + 0, /* 0x03 */ + 0, /* 0x04 */ + 0, /* 0x05 */ + 0, /* 0x06 */ + 0, /* 0x07 */ + 0, /* 0x08 */ + 0, /* 0x09 */ + 0, /* 0x0a */ + 0, /* 0x0b */ + 0, /* 0x0c */ + 0, /* 0x0d */ + 0, /* 0x0e */ + 0, /* 0x0f */ + 0, /* 0x10 */ + 0, /* 0x11 */ + 0, /* 0x12 */ + 0, /* 0x13 */ + 0, /* 0x14 */ + 0, /* 0x15 */ + 0, /* 0x16 */ + 0, /* 0x17 */ + 0, /* 0x18 */ + 0, /* 0x19 */ + 0, /* 0x1a */ + 0, /* 0x1b */ + 0, /* 0x1c */ + 0, /* 0x1d */ + 0, /* 0x1e */ + 0, /* 0x1f */ + 0, /* 0x20 ' ' */ + 0, /* 0x21 '!' */ + 0, /* 0x22 '"' */ + T_HASH, /* 0x23 '#' */ + 0, /* 0x24 '$' */ + 0, /* 0x25 '%' */ + 0, /* 0x26 '&' */ + 0, /* 0x27 ''' */ + 0, /* 0x28 '(' */ + 0, /* 0x29 ')' */ + 0, /* 0x2a '*' */ + T_SCHEME, /* 0x2b '+' */ + 0, /* 0x2c ',' */ + T_SCHEME, /* 0x2d '-' */ + T_SCHEME, /* 0x2e '.' */ + T_SLASH, /* 0x2f '/' */ + T_SCHEME, /* 0x30 '0' */ + T_SCHEME, /* 0x31 '1' */ + T_SCHEME, /* 0x32 '2' */ + T_SCHEME, /* 0x33 '3' */ + T_SCHEME, /* 0x34 '4' */ + T_SCHEME, /* 0x35 '5' */ + T_SCHEME, /* 0x36 '6' */ + T_SCHEME, /* 0x37 '7' */ + T_SCHEME, /* 0x38 '8' */ + T_SCHEME, /* 0x39 '9' */ + 0, /* 0x3a ':' */ + 0, /* 0x3b ';' */ + 0, /* 0x3c '<' */ + 0, /* 0x3d '=' */ + 0, /* 0x3e '>' */ + T_QUESTION, /* 0x3f '?' */ + 0, /* 0x40 '@' */ + T_ALPHA, /* 0x41 'A' */ + T_ALPHA, /* 0x42 'B' */ + T_ALPHA, /* 0x43 'C' */ + T_ALPHA, /* 0x44 'D' */ + T_ALPHA, /* 0x45 'E' */ + T_ALPHA, /* 0x46 'F' */ + T_ALPHA, /* 0x47 'G' */ + T_ALPHA, /* 0x48 'H' */ + T_ALPHA, /* 0x49 'I' */ + T_ALPHA, /* 0x4a 'J' */ + T_ALPHA, /* 0x4b 'K' */ + T_ALPHA, /* 0x4c 'L' */ + T_ALPHA, /* 0x4d 'M' */ + T_ALPHA, /* 0x4e 'N' */ + T_ALPHA, /* 0x4f 'O' */ + T_ALPHA, /* 0x50 'P' */ + T_ALPHA, /* 0x51 'Q' */ + T_ALPHA, /* 0x52 'R' */ + T_ALPHA, /* 0x53 'S' */ + T_ALPHA, /* 0x54 'T' */ + T_ALPHA, /* 0x55 'U' */ + T_ALPHA, /* 0x56 'V' */ + T_ALPHA, /* 0x57 'W' */ + T_ALPHA, /* 0x58 'X' */ + T_ALPHA, /* 0x59 'Y' */ + T_ALPHA, /* 0x5a 'Z' */ + 0, /* 0x5b '[' */ + 0, /* 0x5c '\' */ + 0, /* 0x5d ']' */ + 0, /* 0x5e '^' */ + 0, /* 0x5f '_' */ + 0, /* 0x60 '`' */ + T_ALPHA, /* 0x61 'a' */ + T_ALPHA, /* 0x62 'b' */ + T_ALPHA, /* 0x63 'c' */ + T_ALPHA, /* 0x64 'd' */ + T_ALPHA, /* 0x65 'e' */ + T_ALPHA, /* 0x66 'f' */ + T_ALPHA, /* 0x67 'g' */ + T_ALPHA, /* 0x68 'h' */ + T_ALPHA, /* 0x69 'i' */ + T_ALPHA, /* 0x6a 'j' */ + T_ALPHA, /* 0x6b 'k' */ + T_ALPHA, /* 0x6c 'l' */ + T_ALPHA, /* 0x6d 'm' */ + T_ALPHA, /* 0x6e 'n' */ + T_ALPHA, /* 0x6f 'o' */ + T_ALPHA, /* 0x70 'p' */ + T_ALPHA, /* 0x71 'q' */ + T_ALPHA, /* 0x72 'r' */ + T_ALPHA, /* 0x73 's' */ + T_ALPHA, /* 0x74 't' */ + T_ALPHA, /* 0x75 'u' */ + T_ALPHA, /* 0x76 'v' */ + T_ALPHA, /* 0x77 'w' */ + T_ALPHA, /* 0x78 'x' */ + T_ALPHA, /* 0x79 'y' */ + T_ALPHA, /* 0x7a 'z' */ + 0, /* 0x7b '{' */ + 0, /* 0x7c '|' */ + 0, /* 0x7d '}' */ + 0, /* 0x7e '~' */ + 0, /* 0x7f */ + 0, /* 0x80 */ + 0, /* 0x81 */ + 0, /* 0x82 */ + 0, /* 0x83 */ + 0, /* 0x84 */ + 0, /* 0x85 */ + 0, /* 0x86 */ + 0, /* 0x87 */ + 0, /* 0x88 */ + 0, /* 0x89 */ + 0, /* 0x8a */ + 0, /* 0x8b */ + 0, /* 0x8c */ + 0, /* 0x8d */ + 0, /* 0x8e */ + 0, /* 0x8f */ + 0, /* 0x90 */ + 0, /* 0x91 */ + 0, /* 0x92 */ + 0, /* 0x93 */ + 0, /* 0x94 */ + 0, /* 0x95 */ + 0, /* 0x96 */ + 0, /* 0x97 */ + 0, /* 0x98 */ + 0, /* 0x99 */ + 0, /* 0x9a */ + 0, /* 0x9b */ + 0, /* 0x9c */ + 0, /* 0x9d */ + 0, /* 0x9e */ + 0, /* 0x9f */ + 0, /* 0xa0 */ + 0, /* 0xa1 */ + 0, /* 0xa2 */ + 0, /* 0xa3 */ + 0, /* 0xa4 */ + 0, /* 0xa5 */ + 0, /* 0xa6 */ + 0, /* 0xa7 */ + 0, /* 0xa8 */ + 0, /* 0xa9 */ + 0, /* 0xaa */ + 0, /* 0xab */ + 0, /* 0xac */ + 0, /* 0xad */ + 0, /* 0xae */ + 0, /* 0xaf */ + 0, /* 0xb0 */ + 0, /* 0xb1 */ + 0, /* 0xb2 */ + 0, /* 0xb3 */ + 0, /* 0xb4 */ + 0, /* 0xb5 */ + 0, /* 0xb6 */ + 0, /* 0xb7 */ + 0, /* 0xb8 */ + 0, /* 0xb9 */ + 0, /* 0xba */ + 0, /* 0xbb */ + 0, /* 0xbc */ + 0, /* 0xbd */ + 0, /* 0xbe */ + 0, /* 0xbf */ + 0, /* 0xc0 */ + 0, /* 0xc1 */ + 0, /* 0xc2 */ + 0, /* 0xc3 */ + 0, /* 0xc4 */ + 0, /* 0xc5 */ + 0, /* 0xc6 */ + 0, /* 0xc7 */ + 0, /* 0xc8 */ + 0, /* 0xc9 */ + 0, /* 0xca */ + 0, /* 0xcb */ + 0, /* 0xcc */ + 0, /* 0xcd */ + 0, /* 0xce */ + 0, /* 0xcf */ + 0, /* 0xd0 */ + 0, /* 0xd1 */ + 0, /* 0xd2 */ + 0, /* 0xd3 */ + 0, /* 0xd4 */ + 0, /* 0xd5 */ + 0, /* 0xd6 */ + 0, /* 0xd7 */ + 0, /* 0xd8 */ + 0, /* 0xd9 */ + 0, /* 0xda */ + 0, /* 0xdb */ + 0, /* 0xdc */ + 0, /* 0xdd */ + 0, /* 0xde */ + 0, /* 0xdf */ + 0, /* 0xe0 */ + 0, /* 0xe1 */ + 0, /* 0xe2 */ + 0, /* 0xe3 */ + 0, /* 0xe4 */ + 0, /* 0xe5 */ + 0, /* 0xe6 */ + 0, /* 0xe7 */ + 0, /* 0xe8 */ + 0, /* 0xe9 */ + 0, /* 0xea */ + 0, /* 0xeb */ + 0, /* 0xec */ + 0, /* 0xed */ + 0, /* 0xee */ + 0, /* 0xef */ + 0, /* 0xf0 */ + 0, /* 0xf1 */ + 0, /* 0xf2 */ + 0, /* 0xf3 */ + 0, /* 0xf4 */ + 0, /* 0xf5 */ + 0, /* 0xf6 */ + 0, /* 0xf7 */ + 0, /* 0xf8 */ + 0, /* 0xf9 */ + 0, /* 0xfa */ + 0, /* 0xfb */ + 0, /* 0xfc */ + 0, /* 0xfd */ + 0, /* 0xfe */ + 0 /* 0xff */ +}; +#endif + + +/* it works like this: + if (uri_delims[ch] & NOTEND_foobar) { + then we're not at a delimiter for foobar + } +*/ + +#define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL) +#define NOTEND_PATH (T_QUESTION | T_HASH | T_NUL) + +/* parse_uri_components(): + * Parse a given URI, fill in all supplied fields of a uri_components + * structure. This eliminates the necessity of extracting host, port, + * path, query info repeatedly in the modules. + * Side effects: + * - fills in fields of uri_components *uptr + * - none on any of the r->* fields + */ +APU_DECLARE(apr_status_t) apr_uri_parse(apr_pool_t *p, const char *uri, + apr_uri_t *uptr) +{ + const char *s; + const char *s1; + const char *hostinfo; + char *endstr; + int port; + int v6_offset1 = 0, v6_offset2 = 0; + + /* Initialize the structure. parse_uri() and parse_uri_components() + * can be called more than once per request. + */ + memset (uptr, '\0', sizeof(*uptr)); + uptr->is_initialized = 1; + + /* We assume the processor has a branch predictor like most -- + * it assumes forward branches are untaken and backwards are taken. That's + * the reason for the gotos. -djg + */ + if (uri[0] == '/') { + /* RFC2396 #4.3 says that two leading slashes mean we have an + * authority component, not a path! Fixing this looks scary + * with the gotos here. But if the existing logic is valid, + * then presumably a goto pointing to deal_with_authority works. + * + * RFC2396 describes this as resolving an ambiguity. In the + * case of three or more slashes there would seem to be no + * ambiguity, so it is a path after all. + */ + if (uri[1] == '/' && uri[2] != '/') { + s = uri + 2 ; + goto deal_with_authority ; + } + +deal_with_path: + /* we expect uri to point to first character of path ... remember + * that the path could be empty -- http://foobar?query for example + */ + s = uri; + while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) { + ++s; + } + if (s != uri) { + uptr->path = apr_pstrmemdup(p, uri, s - uri); + } + if (*s == 0) { + return APR_SUCCESS; + } + if (*s == '?') { + ++s; + s1 = strchr(s, '#'); + if (s1) { + uptr->fragment = apr_pstrdup(p, s1 + 1); + uptr->query = apr_pstrmemdup(p, s, s1 - s); + } + else { + uptr->query = apr_pstrdup(p, s); + } + return APR_SUCCESS; + } + /* otherwise it's a fragment */ + uptr->fragment = apr_pstrdup(p, s + 1); + return APR_SUCCESS; + } + + /* find the scheme: */ + s = uri; + /* first char must be letter */ + if (uri_delims[*(unsigned char *)s] & T_ALPHA) { + ++s; + while ((uri_delims[*(unsigned char *)s] & (T_ALPHA|T_SCHEME))) + ++s; + } + /* scheme must be non-empty and followed by : */ + if (s != uri && s[0] == ':') { + uptr->scheme = apr_pstrmemdup(p, uri, s - uri); + s++; + } + else { + /* No valid scheme, restart from the beginning */ + s = uri; + } + + if (s[0] != '/' || s[1] != '/') { + if (uri == s) { + /* + * RFC 3986 3.3: If we have no scheme and no authority, + * the leading segment of a relative path must not contain a ':'. + */ + char *first_slash = strchr(uri, '/'); + if (first_slash) { + while (s < first_slash) { + if (s[0] == ':') + return APR_EGENERAL; + ++s; + } + /* no scheme but relative path, e.g. '../image.jpg' */ + } + else { + if (strchr(uri, ':') != NULL) + return APR_EGENERAL; + /* no scheme, no slash, but relative path, e.g. 'image.jpg' */ + } + goto deal_with_path; + } + /* scheme and relative path */ + uri = s; + goto deal_with_path; + } + + s += 2; + +deal_with_authority: + hostinfo = s; + while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) { + ++s; + } + uri = s; /* whatever follows hostinfo is start of uri */ + uptr->hostinfo = apr_pstrmemdup(p, hostinfo, uri - hostinfo); + + /* If there's a username:password@host:port, the @ we want is the last @... + * too bad there's no memrchr()... For the C purists, note that hostinfo + * is definitely not the first character of the original uri so therefore + * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C. + */ + do { + --s; + } while (s >= hostinfo && *s != '@'); + if (s < hostinfo) { + /* again we want the common case to be fall through */ +deal_with_host: + /* We expect hostinfo to point to the first character of + * the hostname. If there's a port it is the first colon, + * except with IPv6. + */ + if (*hostinfo == '[') { + v6_offset1 = 1; + v6_offset2 = 2; + s = memchr(hostinfo, ']', uri - hostinfo); + if (s == NULL) { + return APR_EGENERAL; + } + if (*++s != ':') { + s = NULL; /* no port */ + } + } + else { + s = memchr(hostinfo, ':', uri - hostinfo); + } + if (s == NULL) { + /* we expect the common case to have no port */ + uptr->hostname = apr_pstrmemdup(p, + hostinfo + v6_offset1, + uri - hostinfo - v6_offset2); + goto deal_with_path; + } + uptr->hostname = apr_pstrmemdup(p, + hostinfo + v6_offset1, + s - hostinfo - v6_offset2); + ++s; + uptr->port_str = apr_pstrmemdup(p, s, uri - s); + if (uri != s) { + port = strtol(uptr->port_str, &endstr, 10); + uptr->port = port; + if (*endstr == '\0') { + goto deal_with_path; + } + /* Invalid characters after ':' found */ + return APR_EGENERAL; + } + uptr->port = apr_uri_port_of_scheme(uptr->scheme); + goto deal_with_path; + } + + /* first colon delimits username:password */ + s1 = memchr(hostinfo, ':', s - hostinfo); + if (s1) { + uptr->user = apr_pstrmemdup(p, hostinfo, s1 - hostinfo); + ++s1; + uptr->password = apr_pstrmemdup(p, s1, s - s1); + } + else { + uptr->user = apr_pstrmemdup(p, hostinfo, s - hostinfo); + } + hostinfo = s + 1; + goto deal_with_host; +} + +/* Special case for CONNECT parsing: it comes with the hostinfo part only */ +/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy" + * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html + * for the format of the "CONNECT host:port HTTP/1.0" request + */ +APU_DECLARE(apr_status_t) apr_uri_parse_hostinfo(apr_pool_t *p, + const char *hostinfo, + apr_uri_t *uptr) +{ + const char *s; + char *endstr; + const char *rsb; + int v6_offset1 = 0; + + /* Initialize the structure. parse_uri() and parse_uri_components() + * can be called more than once per request. + */ + memset(uptr, '\0', sizeof(*uptr)); + uptr->is_initialized = 1; + uptr->hostinfo = apr_pstrdup(p, hostinfo); + + /* We expect hostinfo to point to the first character of + * the hostname. There must be a port, separated by a colon + */ + if (*hostinfo == '[') { + if ((rsb = strchr(hostinfo, ']')) == NULL || + *(rsb + 1) != ':') { + return APR_EGENERAL; + } + /* literal IPv6 address */ + s = rsb + 1; + ++hostinfo; + v6_offset1 = 1; + } + else { + s = strchr(hostinfo, ':'); + } + if (s == NULL) { + return APR_EGENERAL; + } + uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo - v6_offset1); + ++s; + uptr->port_str = apr_pstrdup(p, s); + if (*s != '\0') { + uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10); + if (*endstr == '\0') { + return APR_SUCCESS; + } + /* Invalid characters after ':' found */ + } + return APR_EGENERAL; +} |