diff options
Diffstat (limited to 'ncat/http.c')
-rw-r--r-- | ncat/http.c | 1633 |
1 files changed, 1633 insertions, 0 deletions
diff --git a/ncat/http.c b/ncat/http.c new file mode 100644 index 0000000..1a25988 --- /dev/null +++ b/ncat/http.c @@ -0,0 +1,1633 @@ +/*************************************************************************** + * http.c -- HTTP network interaction, parsing, and construction. * + ***********************IMPORTANT NMAP LICENSE TERMS************************ + * + * The Nmap Security Scanner is (C) 1996-2023 Nmap Software LLC ("The Nmap + * Project"). Nmap is also a registered trademark of the Nmap Project. + * + * This program is distributed under the terms of the Nmap Public Source + * License (NPSL). The exact license text applying to a particular Nmap + * release or source code control revision is contained in the LICENSE + * file distributed with that version of Nmap or source code control + * revision. More Nmap copyright/legal information is available from + * https://nmap.org/book/man-legal.html, and further information on the + * NPSL license itself can be found at https://nmap.org/npsl/ . This + * header summarizes some key points from the Nmap license, but is no + * substitute for the actual license text. + * + * Nmap is generally free for end users to download and use themselves, + * including commercial use. It is available from https://nmap.org. + * + * The Nmap license generally prohibits companies from using and + * redistributing Nmap in commercial products, but we sell a special Nmap + * OEM Edition with a more permissive license and special features for + * this purpose. See https://nmap.org/oem/ + * + * If you have received a written Nmap license agreement or contract + * stating terms other than these (such as an Nmap OEM license), you may + * choose to use and redistribute Nmap under those terms instead. + * + * The official Nmap Windows builds include the Npcap software + * (https://npcap.com) for packet capture and transmission. It is under + * separate license terms which forbid redistribution without special + * permission. So the official Nmap Windows builds may not be redistributed + * without special permission (such as an Nmap OEM license). + * + * Source is provided to this software because we believe users have a + * right to know exactly what a program is going to do before they run it. + * This also allows you to audit the software for security holes. + * + * Source code also allows you to port Nmap to new platforms, fix bugs, and add + * new features. You are highly encouraged to submit your changes as a Github PR + * or by email to the dev@nmap.org mailing list for possible incorporation into + * the main distribution. Unless you specify otherwise, it is understood that + * you are offering us very broad rights to use your submissions as described in + * the Nmap Public Source License Contributor Agreement. This is important + * because we fund the project by selling licenses with various terms, and also + * because the inability to relicense code has caused devastating problems for + * other Free Software projects (such as KDE and NASM). + * + * The free version of Nmap is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Warranties, + * indemnification and commercial support are all available through the + * Npcap OEM program--see https://nmap.org/oem/ + * + ***************************************************************************/ + +/* $Id$ */ + +#include <string.h> + +#include "base64.h" +#include "ncat.h" +#include "http.h" + +/* Limit the size of in-memory data structures to avoid certain denial of + service attacks (those trying to consume all available memory). */ +static const int MAX_REQUEST_LINE_LENGTH = 1024; +static const int MAX_STATUS_LINE_LENGTH = 1024; +static const int MAX_HEADER_LENGTH = 1024 * 10; + +void socket_buffer_init(struct socket_buffer *buf, int sd) +{ + buf->fdn.fd = sd; +#ifdef HAVE_OPENSSL + buf->fdn.ssl = NULL; +#endif + buf->p = buf->buffer; + buf->end = buf->p; +} + +/* Read from a stateful socket buffer. If there is any data in the buffer it is + returned, otherwise data is read with recv. Return value is as for recv. */ +int socket_buffer_read(struct socket_buffer *buf, char *out, size_t size) +{ + int i; + + /* Refill the buffer if necessary. */ + if (buf->p >= buf->end) { + buf->p = buf->buffer; + do { + errno = 0; + i = fdinfo_recv(&buf->fdn, buf->buffer, sizeof(buf->buffer)); + } while (i == -1 && errno == EINTR); + if (i <= 0) + return i; + buf->end = buf->buffer + i; + } + i = buf->end - buf->p; + if (i > size) + i = size; + memcpy(out, buf->p, i); + buf->p += i; + + return i; +} + +/* Read a line thorough a stateful socket buffer. The line, including its '\n', + is returned in a dynamically allocated buffer. The length of the line is + returned in *n. If the length of the line exceeds maxlen, then NULL is + returned and *n is greater than or equal to maxlen. On error, NULL is + returned and *n is less than maxlen. The returned buffer is always + null-terminated if the return value is not NULL. */ +char *socket_buffer_readline(struct socket_buffer *buf, size_t *n, size_t maxlen) +{ + char *line; + char *newline; + size_t count; + + line = NULL; + *n = 0; + + do { + /* Refill the buffer if necessary. */ + if (buf->p >= buf->end) { + int i; + + buf->p = buf->buffer; + do { + errno = 0; + i = fdinfo_recv(&buf->fdn, buf->buffer, sizeof(buf->buffer)); + } while (i == -1 && errno == EINTR); + if (i <= 0) { + free(line); + return NULL; + } + buf->end = buf->buffer + i; + } + + newline = (char *) memchr(buf->p, '\n', buf->end - buf->p); + if (newline == NULL) + count = buf->end - buf->p; + else + count = newline + 1 - buf->p; + + if (*n + count >= maxlen) { + /* Line exceeds our maximum length. */ + free(line); + *n += count; + return NULL; + } + + line = (char *) safe_realloc(line, *n + count + 1); + memcpy(line + *n, buf->p, count); + *n += count; + buf->p += count; + } while (newline == NULL); + + line[*n] = '\0'; + + return line; +} + +/* This is like socket_buffer_read, except that it blocks until it can read all + size bytes. If fewer than size bytes are available, it reads them and returns + -1. */ +int socket_buffer_readcount(struct socket_buffer *buf, char *out, size_t size) +{ + size_t n = 0; + int i; + + while (n < size) { + /* Refill the buffer if necessary. */ + if (buf->p >= buf->end) { + buf->p = buf->buffer; + do { + errno = 0; + i = fdinfo_recv(&buf->fdn, buf->buffer, sizeof(buf->buffer)); + } while (i == -1 && errno == EINTR); + if (i <= 0) + return -1; + buf->end = buf->buffer + i; + } + i = buf->end - buf->p; + if (i < size - n) { + memcpy(out + n, buf->p, i); + buf->p += i; + n += i; + } else { + memcpy(out + n, buf->p, size - n); + buf->p += size - n; + n += size - n; + } + } + + return n; +} + +/* Get whatever is left in the buffer. */ +char *socket_buffer_remainder(struct socket_buffer *buf, size_t *len) +{ + if (len != NULL) + *len = buf->end - buf->p; + + return buf->p; +} + +/* The URI functions have a test program in test/test-uri.c. Run the test after + making any changes and add tests for any new functions. */ + +void uri_init(struct uri *uri) +{ + uri->scheme = NULL; + uri->host = NULL; + uri->port = -1; + uri->path = NULL; +} + +void uri_free(struct uri *uri) +{ + free(uri->scheme); + free(uri->host); + free(uri->path); +} + +static int hex_digit_value(char digit) +{ + const char *DIGITS = "0123456789abcdef"; + const char *p; + + if ((unsigned char) digit == '\0') + return -1; + p = strchr(DIGITS, tolower((int) (unsigned char) digit)); + if (p == NULL) + return -1; + + return p - DIGITS; +} + +/* Case-insensitive string comparison. */ +static int str_cmp_i(const char *a, const char *b) +{ + while (*a != '\0' && *b != '\0') { + int ca, cb; + + ca = tolower((int) (unsigned char) *a); + cb = tolower((int) (unsigned char) *b); + if (ca != cb) + return ca - cb; + a++; + b++; + } + + if (*a == '\0' && *b == '\0') + return 0; + else if (*a == '\0') + return -1; + else + return 1; +} + +static int str_equal_i(const char *a, const char *b) +{ + return str_cmp_i(a, b) == 0; +} + +static int lowercase(char *s) +{ + char *p; + + for (p = s; *p != '\0'; p++) + *p = tolower((int) (unsigned char) *p); + + return p - s; +} + +/* In-place percent decoding. */ +static int percent_decode(char *s) +{ + char *p, *q; + + /* Skip to the first '%'. If there are no percent escapes, this lets us + return without doing any copying. */ + q = s; + while (*q != '\0' && *q != '%') + q++; + + p = q; + while (*q != '\0') { + if (*q == '%') { + int c, d; + + q++; + c = hex_digit_value(*q); + if (c == -1) + return -1; + q++; + d = hex_digit_value(*q); + if (d == -1) + return -1; + + *p++ = c * 16 + d; + q++; + } else { + *p++ = *q++; + } + } + *p = '\0'; + + return p - s; +} + +/* Use these functions because isalpha and isdigit can change their meaning + based on the locale. */ +static int is_alpha_char(int c) +{ + return c != '\0' && strchr("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", c) != NULL; +} + +static int is_digit_char(int c) +{ + return c != '\0' && strchr("0123456789", c) != NULL; +} + +/* Get the default port for the given URI scheme, or -1 if unrecognized. */ +static int scheme_default_port(const char *scheme) +{ + if (str_equal_i(scheme, "http")) + return 80; + + return -1; +} + +/* Parse a URI string into a struct URI. Any parts of the URI that are absent + will become NULL entries in the structure, except for the port which will be + -1. Returns NULL on error. See RFC 3986, section 3 for syntax. */ +struct uri *uri_parse(struct uri *uri, const char *uri_s) +{ + const char *p, *q; + + uri_init(uri); + + /* Scheme, section 3.1. */ + p = uri_s; + if (!is_alpha_char(*p)) + goto fail; + for (q = p; is_alpha_char(*q) || is_digit_char(*q) || *q == '+' || *q == '-' || *q == '.'; q++) + ; + if (*q != ':') + goto fail; + uri->scheme = mkstr(p, q); + /* "An implementation should accept uppercase letters as equivalent to + lowercase in scheme names (e.g., allow "HTTP" as well as "http") for the + sake of robustness..." */ + lowercase(uri->scheme); + + /* Authority, section 3.2. */ + p = q + 1; + if (*p == '/' && *(p + 1) == '/') { + char *authority = NULL; + + p += 2; + for (q = p; !(*q == '/' || *q == '?' || *q == '#' || *q == '\0'); q++) + ; + authority = mkstr(p, q); + if (uri_parse_authority(uri, authority) == NULL) { + free(authority); + goto fail; + } + free(authority); + + p = q; + } + if (uri->port == -1) + uri->port = scheme_default_port(uri->scheme); + + /* Path, section 3.3. We include the query and fragment in the path. The + path is also not percent-decoded because we just pass it on to the origin + server. */ + q = strchr(p, '\0'); + uri->path = mkstr(p, q); + + return uri; + +fail: + uri_free(uri); + return NULL; +} + +/* Parse the authority part of a URI. userinfo (user name and password) are not + supported and will cause an error if present. See RFC 3986, section 3.2. + Returns NULL on error. */ +struct uri *uri_parse_authority(struct uri *uri, const char *authority) +{ + const char *portsep; + const char *host_start, *host_end; + const char *tail; + + /* We do not support "user:pass@" userinfo. The proxy has no use for it. */ + if (strchr(authority, '@') != NULL) + return NULL; + + /* Find the beginning and end of the host. */ + host_start = authority; + if (*host_start == '[') { + /* IPv6 address in brackets. */ + host_start++; + host_end = strchr(host_start, ']'); + if (host_end == NULL) + return NULL; + portsep = host_end + 1; + if (!(*portsep == ':' || *portsep == '\0')) + return NULL; + } else { + portsep = strrchr(authority, ':'); + if (portsep == NULL) + portsep = strchr(authority, '\0'); + host_end = portsep; + } + + /* Get the port number. */ + if (*portsep == ':' && *(portsep + 1) != '\0') { + long n; + + errno = 0; + n = parse_long(portsep + 1, &tail); + if (errno != 0 || *tail != '\0' || tail == portsep + 1 || n < 1 || n > 65535) + return NULL; + uri->port = n; + } else { + uri->port = -1; + } + + /* Get the host. */ + uri->host = mkstr(host_start, host_end); + if (percent_decode(uri->host) < 0) { + free(uri->host); + uri->host = NULL; + return NULL; + } + + return uri; +} + +static void http_header_node_free(struct http_header *node) +{ + free(node->name); + free(node->value); + free(node); +} + +void http_header_free(struct http_header *header) +{ + struct http_header *p, *next; + + for (p = header; p != NULL; p = next) { + next = p->next; + http_header_node_free(p); + } +} + +/* RFC 2616, section 2.2; see LWS. */ +static int is_space_char(int c) +{ + return c == ' ' || c == '\t'; +} + +/* RFC 2616, section 2.2. */ +static int is_ctl_char(int c) +{ + return (c >= 0 && c <= 31) || c == 127; +} + +/* RFC 2616, section 2.2. */ +static int is_sep_char(int c) +{ + return c != '\0' && strchr("()<>@,;:\\\"/[]?={} \t", c) != NULL; +} + +/* RFC 2616, section 2.2. */ +static int is_token_char(char c) +{ + return !iscntrl((int) (unsigned char) c) && !is_sep_char((int) (unsigned char) c); +} + +static int is_crlf(const char *s) +{ + return *s == '\n' || (*s == '\r' && *(s + 1) == '\n'); +} + +static const char *skip_crlf(const char *s) +{ + if (*s == '\n') + return s + 1; + else if (*s == '\r' && *(s + 1) == '\n') + return s + 2; + + ncat_assert(0); + return NULL; +} + +static int field_name_equal(const char *a, const char *b) +{ + return str_equal_i(a, b); +} + +/* Get the value of every header with the given name, separated by commas. If + you only want the first value for header fields that should not be + concatenated in this way, use http_header_get_first. The returned string + must be freed. */ +char *http_header_get(const struct http_header *header, const char *name) +{ + const struct http_header *p; + char *buf = NULL; + size_t size = 0, offset = 0; + int count; + + count = 0; + for (p = header; p != NULL; p = p->next) { + /* RFC 2616, section 4.2: "Multiple message-header fields with the same + field-name MAY be present in a message if and only if the entire + field-value for that header field is defined as a comma-separated + list [i.e., #(values)]. It MUST be possible to combine the multiple + header fields into one "field-name: field-value" pair, without + changing the semantics of the message, by appending each subsequent + field-value to the first, each separated by a comma." */ + if (field_name_equal(p->name, name)) { + if (count > 0) + strbuf_append_str(&buf, &size, &offset, ", "); + strbuf_append_str(&buf, &size, &offset, p->value); + count++; + } + } + + return buf; +} + +const struct http_header *http_header_next(const struct http_header *header, + const struct http_header *p, const char *name) +{ + if (p == NULL) + p = header; + else + p = p->next; + + for (; p != NULL; p = p->next) { + if (field_name_equal(p->name, name)) + return p; + } + + return NULL; +} + +/* Get the value of the first header with the given name. The returned string + must be freed. */ +char *http_header_get_first(const struct http_header *header, const char *name) +{ + const struct http_header *p; + + p = http_header_next(header, NULL, name); + if (p != NULL) + return Strdup(p->value); + + return NULL; +} + +struct http_header *http_header_set(struct http_header *header, const char *name, const char *value) +{ + struct http_header *node, **prev; + + header = http_header_remove(header, name); + + node = (struct http_header *) safe_malloc(sizeof(*node)); + node->name = Strdup(name); + node->value = Strdup(value); + node->next = NULL; + + /* Link it to the end of the list. */ + for (prev = &header; *prev != NULL; prev = &(*prev)->next) + ; + *prev = node; + + return header; +} + +/* Read a token from a space-separated string. This only recognizes space as a + separator, so the string must already have had LWS normalized. + http_header_parse does this normalization. */ +static const char *read_token(const char *s, char **token) +{ + const char *t; + + while (*s == ' ') + s++; + t = s; + while (is_token_char(*t)) + t++; + if (s == t) + return NULL; + + *token = mkstr(s, t); + + return t; +} + +static const char *read_quoted_string(const char *s, char **quoted_string) +{ + char *buf = NULL; + size_t size = 0, offset = 0; + const char *t; + + while (is_space_char(*s)) + s++; + if (*s != '"') + return NULL; + s++; + t = s; + while (*s != '"') { + /* Get a block of normal characters. */ + while (*t != '"' && *t != '\\') { + /* This is qdtext, which is TEXT except for CTL. */ + if (is_ctl_char(*t)) { + free(buf); + return NULL; + } + t++; + } + strbuf_append(&buf, &size, &offset, s, t - s); + /* Now possibly handle an escape. */ + if (*t == '\\') { + t++; + /* You can only escape a CHAR, octets 0-127. But we disallow 0. */ + if (*t <= 0) { + free(buf); + return NULL; + } + strbuf_append(&buf, &size, &offset, t, 1); + t++; + } + s = t; + } + s++; + + *quoted_string = buf; + return s; +} + +static const char *read_token_or_quoted_string(const char *s, char **token) +{ + while (is_space_char(*s)) + s++; + if (*s == '"') + return read_quoted_string(s, token); + else + return read_token(s, token); +} + +static const char *read_token_list(const char *s, char **tokens[], size_t *n) +{ + char *token; + + *tokens = NULL; + *n = 0; + + for (;;) { + s = read_token(s, &token); + if (s == NULL) { + int i; + + for (i = 0; i < *n; i++) + free((*tokens)[i]); + free(*tokens); + + return NULL; + } + + *tokens = (char **) safe_realloc(*tokens, (*n + 1) * sizeof((*tokens)[0])); + (*tokens)[(*n)++] = token; + if (*s != ',') + break; + s++; + } + + return s; +} + +struct http_header *http_header_remove(struct http_header *header, const char *name) +{ + struct http_header *p, *next, **prev; + + prev = &header; + for (p = header; p != NULL; p = next) { + next = p->next; + if (field_name_equal(p->name, name)) { + *prev = next; + http_header_node_free(p); + continue; + } + prev = &p->next; + } + + return header; +} + +/* Removes hop-by-hop headers listed in section 13.5.1 of RFC 2616, and + additionally removes any headers listed in the Connection header as described + in section 14.10. */ +int http_header_remove_hop_by_hop(struct http_header **header) +{ + static const char *HOP_BY_HOP_HEADERS[] = { + "Connection", + "Keep-Alive", + "Proxy-Authenticate", + "Proxy-Authorization", + "TE", + "Trailers", + "Transfer-Encoding", + "Upgrade", + }; + char *connection; + char **connection_tokens; + size_t num_connection_tokens; + unsigned int i; + + connection = http_header_get(*header, "Connection"); + if (connection != NULL) { + const char *p; + + p = read_token_list(connection, &connection_tokens, &num_connection_tokens); + if (p == NULL) { + free(connection); + return 400; + } + if (*p != '\0') { + free(connection); + for (i = 0; i < num_connection_tokens; i++) + free(connection_tokens[i]); + free(connection_tokens); + return 400; + } + free(connection); + } else { + connection_tokens = NULL; + num_connection_tokens = 0; + } + + for (i = 0; i < sizeof(HOP_BY_HOP_HEADERS) / sizeof(HOP_BY_HOP_HEADERS[0]); i++) + *header = http_header_remove(*header, HOP_BY_HOP_HEADERS[i]); + for (i = 0; i < num_connection_tokens; i++) + *header = http_header_remove(*header, connection_tokens[i]); + + for (i = 0; i < num_connection_tokens; i++) + free(connection_tokens[i]); + free(connection_tokens); + + return 0; +} + +char *http_header_to_string(const struct http_header *header, size_t *n) +{ + const struct http_header *p; + char *buf = NULL; + size_t size = 0, offset = 0; + + strbuf_append_str(&buf, &size, &offset, ""); + + for (p = header; p != NULL; p = p->next) + strbuf_sprintf(&buf, &size, &offset, "%s: %s\r\n", p->name, p->value); + + if (n != NULL) + *n = offset; + + return buf; +} + +void http_request_init(struct http_request *request) +{ + request->method = NULL; + uri_init(&request->uri); + request->version = HTTP_UNKNOWN; + request->header = NULL; + request->content_length_set = 0; + request->content_length = 0; + request->bytes_transferred = 0; +} + +void http_request_free(struct http_request *request) +{ + free(request->method); + uri_free(&request->uri); + http_header_free(request->header); +} + +char *http_request_to_string(const struct http_request *request, size_t *n) +{ + const char *path; + char *buf = NULL; + size_t size = 0, offset = 0; + + /* RFC 2616, section 5.1.2: "the absolute path cannot be empty; if none is + present in the original URI, it MUST be given as "/" (the server + root)." */ + path = request->uri.path; + if (path[0] == '\0') + path = "/"; + + if (request->version == HTTP_09) { + /* HTTP/0.9 doesn't have headers. See + http://www.w3.org/Protocols/HTTP/AsImplemented.html. */ + strbuf_sprintf(&buf, &size, &offset, "%s %s\r\n", request->method, path); + } else { + const char *version; + char *header_str; + + if (request->version == HTTP_10) + version = " HTTP/1.0"; + else + version = " HTTP/1.1"; + + header_str = http_header_to_string(request->header, NULL); + strbuf_sprintf(&buf, &size, &offset, "%s %s%s\r\n%s\r\n", + request->method, path, version, header_str); + free(header_str); + } + + if (n != NULL) + *n = offset; + + return buf; +} + +void http_response_init(struct http_response *response) +{ + response->version = HTTP_UNKNOWN; + response->code = 0; + response->phrase = NULL; + response->header = NULL; + response->content_length_set = 0; + response->content_length = 0; + response->bytes_transferred = 0; +} + +void http_response_free(struct http_response *response) +{ + free(response->phrase); + http_header_free(response->header); +} + +char *http_response_to_string(const struct http_response *response, size_t *n) +{ + char *buf = NULL; + size_t size = 0, offset = 0; + + if (response->version == HTTP_09) { + /* HTTP/0.9 doesn't have a Status-Line or headers. See + http://www.w3.org/Protocols/HTTP/AsImplemented.html. */ + return Strdup(""); + } else { + const char *version; + char *header_str; + + if (response->version == HTTP_10) + version = "HTTP/1.0"; + else + version = "HTTP/1.1"; + + header_str = http_header_to_string(response->header, NULL); + strbuf_sprintf(&buf, &size, &offset, "%s %d %s\r\n%s\r\n", + version, response->code, response->phrase, header_str); + free(header_str); + } + + if (n != NULL) + *n = offset; + + return buf; +} + +int http_read_header(struct socket_buffer *buf, char **result) +{ + char *line = NULL; + char *header; + size_t n = 0; + size_t count; + int blank; + + header = NULL; + + do { + line = socket_buffer_readline(buf, &count, MAX_HEADER_LENGTH); + if (line == NULL) { + free(header); + if (count >= MAX_HEADER_LENGTH) + /* Request Entity Too Large. */ + return 413; + else + return 400; + } + blank = is_crlf(line); + + if (n + count >= MAX_HEADER_LENGTH) { + free(line); + free(header); + /* Request Entity Too Large. */ + return 413; + } + + header = (char *) safe_realloc(header, n + count + 1); + memcpy(header + n, line, count); + n += count; + free(line); + } while (!blank); + header[n] = '\0'; + + *result = header; + + return 0; +} + +static const char *skip_lws(const char *s) +{ + for (;;) { + while (is_space_char(*s)) + s++; + + if (*s == '\n' && is_space_char(*(s + 1))) + s += 1; + else if (*s == '\r' && *(s + 1) == '\n' && is_space_char(*(s + 2))) + s += 2; + else + break; + } + + return s; +} + +/* See section 4.2 of RFC 2616 for header format. */ +int http_parse_header(struct http_header **result, const char *header) +{ + const char *p, *q; + size_t value_len, value_offset; + struct http_header *node, **prev; + + *result = NULL; + prev = result; + + p = header; + while (*p != '\0' && !is_crlf(p)) { + /* Get the field name. */ + q = p; + while (*q != '\0' && is_token_char(*q)) + q++; + if (*q != ':') { + http_header_free(*result); + return 400; + } + + node = (struct http_header *) safe_malloc(sizeof(*node)); + node->name = mkstr(p, q); + node->value = NULL; + node->next = NULL; + value_len = 0; + value_offset = 0; + + /* Copy the header field value until we hit a CRLF. */ + p = q + 1; + p = skip_lws(p); + for (;;) { + q = p; + while (*q != '\0' && !is_space_char(*q) && !is_crlf(q)) { + /* Section 2.2 of RFC 2616 disallows control characters. */ + if (iscntrl((int) (unsigned char) *q)) { + http_header_node_free(node); + return 400; + } + q++; + } + strbuf_append(&node->value, &value_len, &value_offset, p, q - p); + p = skip_lws(q); + if (is_crlf(p)) + break; + /* Replace LWS with a single space. */ + strbuf_append_str(&node->value, &value_len, &value_offset, " "); + } + *prev = node; + prev = &node->next; + + p = skip_crlf(p); + } + + return 0; +} + +static int http_header_get_content_length(const struct http_header *header, int *content_length_set, unsigned long *content_length) +{ + char *content_length_s; + const char *tail; + int code; + + content_length_s = http_header_get_first(header, "Content-Length"); + if (content_length_s == NULL) { + *content_length_set = 0; + *content_length = 0; + return 0; + } + + code = 0; + + errno = 0; + *content_length_set = 1; + *content_length = parse_long(content_length_s, &tail); + if (errno != 0 || *tail != '\0' || tail == content_length_s) + code = 400; + free(content_length_s); + + return code; +} + +/* Parse a header and fill in any relevant fields in the request structure. */ +int http_request_parse_header(struct http_request *request, const char *header) +{ + int code; + + code = http_parse_header(&request->header, header); + if (code != 0) + return code; + code = http_header_get_content_length(request->header, &request->content_length_set, &request->content_length); + if (code != 0) + return code; + + return 0; +} + +/* Parse a header and fill in any relevant fields in the response structure. */ +int http_response_parse_header(struct http_response *response, const char *header) +{ + int code; + + code = http_parse_header(&response->header, header); + if (code != 0) + return code; + code = http_header_get_content_length(response->header, &response->content_length_set, &response->content_length); + if (code != 0) + return code; + + return 0; +} + +int http_read_request_line(struct socket_buffer *buf, char **line) +{ + size_t n; + + *line = NULL; + + /* Section 4.1 of RFC 2616 says "servers SHOULD ignore any empty line(s) + received where a Request-Line is expected." */ + do { + free(*line); + *line = socket_buffer_readline(buf, &n, MAX_REQUEST_LINE_LENGTH); + if (*line == NULL) { + if (n >= MAX_REQUEST_LINE_LENGTH) + /* Request Entity Too Large. */ + return 413; + else + return 400; + } + } while (is_crlf(*line)); + + return 0; +} + +/* Returns the character pointer after the HTTP version, or s if there was a + parse error. */ +static const char *parse_http_version(const char *s, enum http_version *version) +{ + const char *PREFIX = "HTTP/"; + const char *p, *q; + long major, minor; + + *version = HTTP_UNKNOWN; + + p = s; + if (memcmp(p, PREFIX, strlen(PREFIX)) != 0) + return s; + p += strlen(PREFIX); + + /* Major version. */ + errno = 0; + major = parse_long(p, &q); + if (errno != 0 || q == p) + return s; + + p = q; + if (*p != '.') + return s; + p++; + + /* Minor version. */ + errno = 0; + minor = parse_long(p, &q); + if (errno != 0 || q == p) + return s; + + if (major == 1 && minor == 0) + *version = HTTP_10; + else if (major == 1 && minor == 1) + *version = HTTP_11; + + return q; +} + +int http_parse_request_line(const char *line, struct http_request *request) +{ + const char *p, *q; + struct uri *uri; + char *uri_s; + + http_request_init(request); + + p = line; + while (*p == ' ') + p++; + + /* Method (CONNECT, GET, etc.). */ + q = p; + while (is_token_char(*q)) + q++; + if (p == q) + goto badreq; + request->method = mkstr(p, q); + + /* URI. */ + p = q; + while (*p == ' ') + p++; + q = p; + while (*q != '\0' && *q != ' ') + q++; + if (p == q) + goto badreq; + uri_s = mkstr(p, q); + + /* RFC 2616, section 5.1.1: The method is case-sensitive. + RFC 2616, section 5.1.2: + Request-URI = "*" | absoluteURI | abs_path | authority + The absoluteURI form is REQUIRED when the request is being made to a + proxy... The authority form is only used by the CONNECT method. */ + if (strcmp(request->method, "CONNECT") == 0) { + uri = uri_parse_authority(&request->uri, uri_s); + } else { + uri = uri_parse(&request->uri, uri_s); + } + free(uri_s); + if (uri == NULL) + /* The URI parsing failed. */ + goto badreq; + + /* Version number. */ + p = q; + while (*p == ' ') + p++; + if (*p == '\0') { + /* No HTTP/X.X version number indicates version 0.9. */ + request->version = HTTP_09; + } else { + q = parse_http_version(p, &request->version); + if (p == q) + goto badreq; + } + + return 0; + +badreq: + http_request_free(request); + return 400; +} + +int http_read_status_line(struct socket_buffer *buf, char **line) +{ + size_t n; + + /* RFC 2616, section 6.1: "The first line of a Response message is the + Status-Line... No CR or LF is allowed except in the final CRLF sequence." + Contrast that with Request-Line, which allows leading blank lines. */ + *line = socket_buffer_readline(buf, &n, MAX_STATUS_LINE_LENGTH); + if (*line == NULL) { + if (n >= MAX_STATUS_LINE_LENGTH) + /* Request Entity Too Large. */ + return 413; + else + return 400; + } + + return 0; +} + +/* Returns 0 on success and nonzero on failure. */ +int http_parse_status_line(const char *line, struct http_response *response) +{ + const char *p, *q; + + http_response_init(response); + + /* Version. */ + p = parse_http_version(line, &response->version); + if (p == line) + return -1; + while (*p == ' ') + p++; + + /* Status code. */ + errno = 0; + response->code = parse_long(p, &q); + if (errno != 0 || q == p) + return -1; + p = q; + + /* Reason phrase. */ + while (*p == ' ') + p++; + q = p; + while (!is_crlf(q)) + q++; + /* We expect that the CRLF ends the string. */ + if (*skip_crlf(q) != '\0') + return -1; + response->phrase = mkstr(p, q); + + return 0; +} + +/* This is a convenience wrapper around http_parse_status_line that only returns + the status code. Returns the status code on success or -1 on failure. */ +int http_parse_status_line_code(const char *line) +{ + struct http_response resp; + int code; + + if (http_parse_status_line(line, &resp) != 0) + return -1; + code = resp.code; + http_response_free(&resp); + + return code; +} + +static const char *http_read_challenge(const char *s, struct http_challenge *challenge) +{ + const char *p; + char *scheme; + + http_challenge_init(challenge); + + scheme = NULL; + s = read_token(s, &scheme); + if (s == NULL) + goto bail; + if (str_equal_i(scheme, "Basic")) { + challenge->scheme = AUTH_BASIC; + } else if (str_equal_i(scheme, "Digest")) { + challenge->scheme = AUTH_DIGEST; + } else { + challenge->scheme = AUTH_UNKNOWN; + } + free(scheme); + scheme = NULL; + + /* RFC 2617, section 1.2, requires at least one auth-param: + challenge = auth-scheme 1*SP 1#auth-param + But there are some schemes (NTLM and Negotiate) that can be without + auth-params, so we allow that here. A comma indicates the end of this + challenge and the beginning of the next (see the comment in the loop + below). */ + while (is_space_char(*s)) + s++; + if (*s == ',') { + s++; + while (is_space_char(*s)) + s++; + if (*s == '\0') + goto bail; + return s; + } + + while (*s != '\0') { + char *name, *value; + + p = read_token(s, &name); + if (p == NULL) + goto bail; + while (is_space_char(*p)) + p++; + /* It's possible that we've hit the end of one challenge and the + beginning of another. Section 14.33 says that the header value can be + 1#challenge, in other words several challenges separated by commas. + Because the auth-params are also separated by commas, the only way we + can tell is if we find a token not followed by an equals sign. */ + if (*p != '=') + break; + p++; + while (is_space_char(*p)) + p++; + p = read_token_or_quoted_string(p, &value); + if (p == NULL) { + free(name); + goto bail; + } + if (str_equal_i(name, "realm")) + challenge->realm = Strdup(value); + else if (challenge->scheme == AUTH_DIGEST) { + if (str_equal_i(name, "nonce")) { + if (challenge->digest.nonce != NULL) + goto bail; + challenge->digest.nonce = Strdup(value); + } else if (str_equal_i(name, "opaque")) { + if (challenge->digest.opaque != NULL) + goto bail; + challenge->digest.opaque = Strdup(value); + } else if (str_equal_i(name, "algorithm")) { + if (str_equal_i(value, "MD5")) + challenge->digest.algorithm = ALGORITHM_MD5; + else + challenge->digest.algorithm = ALGORITHM_UNKNOWN; + } else if (str_equal_i(name, "qop")) { + char **tokens; + size_t n; + int i; + const char *tmp; + + tmp = read_token_list(value, &tokens, &n); + if (tmp == NULL) { + free(name); + free(value); + goto bail; + } + for (i = 0; i < n; i++) { + if (str_equal_i(tokens[i], "auth")) + challenge->digest.qop |= QOP_AUTH; + else if (str_equal_i(tokens[i], "auth-int")) + challenge->digest.qop |= QOP_AUTH_INT; + } + for (i = 0; i < n; i++) + free(tokens[i]); + free(tokens); + if (*tmp != '\0') { + free(name); + free(value); + goto bail; + } + } + } + free(name); + free(value); + while (is_space_char(*p)) + p++; + if (*p == ',') { + p++; + while (is_space_char(*p)) + p++; + if (*p == '\0') + goto bail; + } + s = p; + } + + return s; + +bail: + if (scheme != NULL) + free(scheme); + http_challenge_free(challenge); + + return NULL; +} + +static const char *http_read_credentials(const char *s, + struct http_credentials *credentials) +{ + const char *p; + char *scheme; + + credentials->scheme = AUTH_UNKNOWN; + + s = read_token(s, &scheme); + if (s == NULL) + return NULL; + if (str_equal_i(scheme, "Basic")) { + http_credentials_init_basic(credentials); + } else if (str_equal_i(scheme, "Digest")) { + http_credentials_init_digest(credentials); + } else { + free(scheme); + return NULL; + } + free(scheme); + + while (is_space_char(*s)) + s++; + if (credentials->scheme == AUTH_BASIC) { + p = s; + /* Read base64. */ + while (is_alpha_char(*p) || is_digit_char(*p) || *p == '+' || *p == '/' || *p == '=') + p++; + credentials->u.basic = mkstr(s, p); + while (is_space_char(*p)) + p++; + s = p; + } else if (credentials->scheme == AUTH_DIGEST) { + char *name, *value; + + while (*s != '\0') { + p = read_token(s, &name); + if (p == NULL) + goto bail; + while (is_space_char(*p)) + p++; + /* It's not legal to combine multiple Authorization or + Proxy-Authorization values. The productions are + "Authorization" ":" credentials (section 14.8) + "Proxy-Authorization" ":" credentials (section 14.34) + Contrast this with WWW-Authenticate and Proxy-Authenticate and + their handling in http_read_challenge. */ + if (*p != '=') + goto bail; + p++; + while (is_space_char(*p)) + p++; + p = read_token_or_quoted_string(p, &value); + if (p == NULL) { + free(name); + goto bail; + } + if (str_equal_i(name, "username")) { + if (credentials->u.digest.username != NULL) + goto bail; + credentials->u.digest.username = Strdup(value); + } else if (str_equal_i(name, "realm")) { + if (credentials->u.digest.realm != NULL) + goto bail; + credentials->u.digest.realm = Strdup(value); + } else if (str_equal_i(name, "nonce")) { + if (credentials->u.digest.nonce != NULL) + goto bail; + credentials->u.digest.nonce = Strdup(value); + } else if (str_equal_i(name, "uri")) { + if (credentials->u.digest.uri != NULL) + goto bail; + credentials->u.digest.uri = Strdup(value); + } else if (str_equal_i(name, "response")) { + if (credentials->u.digest.response != NULL) + goto bail; + credentials->u.digest.response = Strdup(value); + } else if (str_equal_i(name, "algorithm")) { + if (str_equal_i(value, "MD5")) + credentials->u.digest.algorithm = ALGORITHM_MD5; + else + credentials->u.digest.algorithm = ALGORITHM_UNKNOWN; + } else if (str_equal_i(name, "qop")) { + if (str_equal_i(value, "auth")) + credentials->u.digest.qop = QOP_AUTH; + else if (str_equal_i(value, "auth-int")) + credentials->u.digest.qop = QOP_AUTH_INT; + else + credentials->u.digest.qop = QOP_NONE; + } else if (str_equal_i(name, "cnonce")) { + if (credentials->u.digest.cnonce != NULL) + goto bail; + credentials->u.digest.cnonce = Strdup(value); + } else if (str_equal_i(name, "nc")) { + if (credentials->u.digest.nc != NULL) + goto bail; + credentials->u.digest.nc = Strdup(value); + } + free(name); + free(value); + while (is_space_char(*p)) + p++; + if (*p == ',') { + p++; + while (is_space_char(*p)) + p++; + if (*p == '\0') + goto bail; + } + s = p; + } + } + + return s; + +bail: + http_credentials_free(credentials); + + return NULL; +} + +/* Is scheme a preferred over scheme b? We prefer Digest to Basic when Digest is + supported. */ +static int auth_scheme_is_better(enum http_auth_scheme a, + enum http_auth_scheme b) +{ +#if HAVE_HTTP_DIGEST + if (b == AUTH_DIGEST) + return 0; + if (b == AUTH_BASIC) + return a == AUTH_DIGEST; + if (b == AUTH_UNKNOWN) + return a == AUTH_BASIC || a == AUTH_DIGEST; +#else + if (b == AUTH_BASIC) + return 0; + if (b == AUTH_UNKNOWN) + return a == AUTH_BASIC; +#endif + + return 0; +} + +struct http_challenge *http_header_get_proxy_challenge(const struct http_header *header, struct http_challenge *challenge) +{ + const struct http_header *p; + + http_challenge_init(challenge); + + p = NULL; + while ((p = http_header_next(header, p, "Proxy-Authenticate")) != NULL) { + const char *tmp; + + tmp = p->value; + while (*tmp != '\0') { + struct http_challenge tmp_info; + + tmp = http_read_challenge(tmp, &tmp_info); + if (tmp == NULL) { + http_challenge_free(challenge); + return NULL; + } + if (auth_scheme_is_better(tmp_info.scheme, challenge->scheme)) { + http_challenge_free(challenge); + *challenge = tmp_info; + } else { + http_challenge_free(&tmp_info); + } + } + } + + return challenge; +} + +struct http_credentials *http_header_get_proxy_credentials(const struct http_header *header, struct http_credentials *credentials) +{ + const struct http_header *p; + + credentials->scheme = AUTH_UNKNOWN; + + p = NULL; + while ((p = http_header_next(header, p, "Proxy-Authorization")) != NULL) { + const char *tmp; + + tmp = p->value; + while (*tmp != '\0') { + struct http_credentials tmp_info; + + tmp = http_read_credentials(tmp, &tmp_info); + if (tmp == NULL) { + http_credentials_free(credentials); + return NULL; + } + if (auth_scheme_is_better(tmp_info.scheme, credentials->scheme)) { + http_credentials_free(credentials); + *credentials = tmp_info; + } else { + http_credentials_free(&tmp_info); + } + } + } + + return credentials; +} + +void http_challenge_init(struct http_challenge *challenge) +{ + challenge->scheme = AUTH_UNKNOWN; + challenge->realm = NULL; + challenge->digest.nonce = NULL; + challenge->digest.opaque = NULL; + challenge->digest.algorithm = ALGORITHM_MD5; + challenge->digest.qop = 0; +} + +void http_challenge_free(struct http_challenge *challenge) +{ + free(challenge->realm); + if (challenge->scheme == AUTH_DIGEST) { + free(challenge->digest.nonce); + free(challenge->digest.opaque); + } +} + +void http_credentials_init_basic(struct http_credentials *credentials) +{ + credentials->scheme = AUTH_BASIC; + credentials->u.basic = NULL; +} + +void http_credentials_init_digest(struct http_credentials *credentials) +{ + credentials->scheme = AUTH_DIGEST; + credentials->u.digest.username = NULL; + credentials->u.digest.realm = NULL; + credentials->u.digest.nonce = NULL; + credentials->u.digest.uri = NULL; + credentials->u.digest.response = NULL; + credentials->u.digest.algorithm = ALGORITHM_MD5; + credentials->u.digest.qop = QOP_NONE; + credentials->u.digest.nc = NULL; + credentials->u.digest.cnonce = NULL; +} + +void http_credentials_free(struct http_credentials *credentials) +{ + if (credentials->scheme == AUTH_BASIC) { + free(credentials->u.basic); + } else if (credentials->scheme == AUTH_DIGEST) { + free(credentials->u.digest.username); + free(credentials->u.digest.realm); + free(credentials->u.digest.nonce); + free(credentials->u.digest.uri); + free(credentials->u.digest.response); + free(credentials->u.digest.nc); + free(credentials->u.digest.cnonce); + } +} |