diff options
Diffstat (limited to 'htp/htp_request_generic.c')
-rw-r--r-- | htp/htp_request_generic.c | 462 |
1 files changed, 462 insertions, 0 deletions
diff --git a/htp/htp_request_generic.c b/htp/htp_request_generic.c new file mode 100644 index 0000000..435cf0a --- /dev/null +++ b/htp/htp_request_generic.c @@ -0,0 +1,462 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * Extract one request header. A header can span multiple lines, in + * which case they will be folded into one before parsing is attempted. + * + * @param[in] connp + * @param[in] data + * @param[in] len + * @return HTP_OK or HTP_ERROR + */ +htp_status_t htp_process_request_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) { + // Create a new header structure. + htp_header_t *h = calloc(1, sizeof (htp_header_t)); + if (h == NULL) return HTP_ERROR; + + // Now try to parse the header. + if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) { + free(h); + return HTP_ERROR; + } + + #ifdef HTP_DEBUG + fprint_bstr(stderr, "Header name", h->name); + fprint_bstr(stderr, "Header value", h->value); + #endif + + // Do we already have a header with the same name? + htp_header_t *h_existing = htp_table_get(connp->in_tx->request_headers, h->name); + if (h_existing != NULL) { + // TODO Do we want to have a list of the headers that are + // allowed to be combined in this way? + if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) { + // This is the second occurence for this header. + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header"); + } else { + // For simplicity reasons, we count the repetitions of all headers + if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) { + connp->in_tx->req_header_repetitions++; + } else { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_OK; + } + } + // Keep track of repeated same-name headers. + h_existing->flags |= HTP_FIELD_REPEATED; + + // Having multiple C-L headers is against the RFC but + // servers may ignore the subsequent headers if the values are the same. + if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) { + // Don't use string comparison here because we want to + // ignore small formatting differences. + + int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL); + int64_t new_cl = htp_parse_content_length(h->value, NULL); + // Ambiguous response C-L value. + if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value"); + } + // Ignoring the new C-L header that has the same value as the previous ones. + } else { + // Add to the existing header. + bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value)); + if (new_value == NULL) { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_ERROR; + } + + h_existing->value = new_value; + bstr_add_mem_noex(h_existing->value, ", ", 2); + bstr_add_noex(h_existing->value, h->value); + } + + // The new header structure is no longer needed. + bstr_free(h->name); + bstr_free(h->value); + free(h); + } else { + // Add as a new header. + if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) { + bstr_free(h->name); + bstr_free(h->value); + free(h); + } + } + + return HTP_OK; +} + +/** + * Generic request header parser. + * + * @param[in] connp + * @param[in] h + * @param[in] data + * @param[in] len + * @return HTP_OK or HTP_ERROR + */ +htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) { + size_t name_start, name_end; + size_t value_start, value_end; + + htp_chomp(data, &len); + + name_start = 0; + + // Look for the colon. + size_t colon_pos = 0; + while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++; + + if ((colon_pos == len) || (data[colon_pos] == '\0')) { + // Missing colon. + + h->flags |= HTP_FIELD_UNPARSEABLE; + + // Log only once per transaction. + if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) { + connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: colon missing"); + } + + // We handle this case as a header with an empty name, with the value equal + // to the entire input string. + + // TODO Apache will respond to this problem with a 400. + + // Now extract the name and the value + h->name = bstr_dup_c(""); + if (h->name == NULL) return HTP_ERROR; + + h->value = bstr_dup_mem(data, len); + if (h->value == NULL) { + bstr_free(h->name); + return HTP_ERROR; + } + + return HTP_OK; + } + + if (colon_pos == 0) { + // Empty header name. + + h->flags |= HTP_FIELD_INVALID; + + // Log only once per transaction. + if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { + connp->in_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name"); + } + } + + name_end = colon_pos; + + // Ignore LWS after field-name. + size_t prev = name_end; + while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) { + // LWS after header name. + + prev--; + name_end--; + + h->flags |= HTP_FIELD_INVALID; + + // Log only once per transaction. + if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { + connp->in_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name"); + } + } + + // Header value. + + value_start = colon_pos; + + // Go over the colon. + if (value_start < len) { + value_start++; + } + + // Ignore LWS before field-content. + while ((value_start < len) && (htp_is_lws(data[value_start]))) { + value_start++; + } + + // Look for the end of field-content. + value_end = len; + + // Ignore LWS after field-content. + prev = value_end - 1; + while ((prev > value_start) && (htp_is_lws(data[prev]))) { + prev--; + value_end--; + } + + // Check that the header name is a token. + size_t i = name_start; + while (i < name_end) { + if (!htp_is_token(data[i])) { + // Incorrectly formed header name. + + h->flags |= HTP_FIELD_INVALID; + + // Log only once per transaction. + if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { + connp->in_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token"); + } + + break; + } + + i++; + } + + // Now extract the name and the value + h->name = bstr_dup_mem(data + name_start, name_end - name_start); + if (h->name == NULL) return HTP_ERROR; + + h->value = bstr_dup_mem(data + value_start, value_end - value_start); + if (h->value == NULL) { + bstr_free(h->name); + return HTP_ERROR; + } + + return HTP_OK; +} + +/** + * Generic request line parser. + * + * @param[in] connp + * @return HTP_OK or HTP_ERROR + */ +htp_status_t htp_parse_request_line_generic(htp_connp_t *connp) { + return htp_parse_request_line_generic_ex(connp, 0 /* NUL does not terminates line */); +} + +htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates) { + htp_tx_t *tx = connp->in_tx; + unsigned char *data = bstr_ptr(tx->request_line); + size_t len = bstr_len(tx->request_line); + size_t pos = 0; + size_t mstart = 0; + size_t start; + size_t bad_delim; + + if (nul_terminates) { + // The line ends with the first NUL byte. + + size_t newlen = 0; + while ((pos < len) && (data[pos] != '\0')) { + pos++; + newlen++; + } + + // Start again, with the new length. + len = newlen; + pos = 0; + } + + // skip past leading whitespace. IIS allows this + while ((pos < len) && htp_is_space(data[pos])) pos++; + if (pos) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: leading whitespace"); + mstart = pos; + + if (connp->cfg->requestline_leading_whitespace_unwanted != HTP_UNWANTED_IGNORE) { + // reset mstart so that we copy the whitespace into the method + mstart = 0; + // set expected response code to this anomaly + tx->response_status_expected_number = connp->cfg->requestline_leading_whitespace_unwanted; + } + } + + // The request method starts at the beginning of the + // line and ends with the first whitespace character. + while ((pos < len) && (!htp_is_space(data[pos]))) pos++; + + // No, we don't care if the method is empty. + + tx->request_method = bstr_dup_mem(data + mstart, pos - mstart); + if (tx->request_method == NULL) return HTP_ERROR; + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_method), bstr_len(tx->request_method)); + #endif + + tx->request_method_number = htp_convert_method_to_number(tx->request_method); + + bad_delim = 0; + // Ignore whitespace after request method. The RFC allows + // for only one SP, but then suggests any number of SP and HT + // should be permitted. Apache uses isspace(), which is even + // more permitting, so that's what we use here. + while ((pos < len) && (isspace(data[pos]))) { + if (!bad_delim && data[pos] != 0x20) { + bad_delim++; + } + pos++; + } +// Too much performance overhead for fuzzing +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (bad_delim) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI"); + } +#endif + + // Is there anything after the request method? + if (pos == len) { + // No, this looks like a HTTP/0.9 request. + + tx->is_protocol_0_9 = 1; + tx->request_protocol_number = HTP_PROTOCOL_0_9; + if (tx->request_method_number == HTP_M_UNKNOWN) + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only"); + + return HTP_OK; + } + + start = pos; + bad_delim = 0; + if (tx->connp->cfg->allow_space_uri) { + pos = len - 1; + // Skips the spaces at the end of line (after protocol) + while (pos > start && htp_is_space(data[pos])) pos--; + // The URI ends with the last whitespace. + while ((pos > start) && (data[pos] != 0x20)) { + if (!bad_delim && htp_is_space(data[pos])) { + bad_delim++; + } + pos--; + } + /* if we've seen some 'bad' delimiters, we retry with those */ + if (bad_delim && pos == start) { + // special case: even though RFC's allow only SP (0x20), many + // implementations allow other delimiters, like tab or other + // characters that isspace() accepts. + pos = len - 1; + while ((pos > start) && (!htp_is_space(data[pos]))) pos--; + } else { + // reset bad_delim found in protocol part + bad_delim = 0; + for (size_t i = start; i < pos; i++) { + if (data[i] != 0x20 && htp_is_space(data[i])) { + bad_delim = 1; + break; + } + } + } + if (bad_delim) { +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // warn regardless if we've seen non-compliant chars + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter"); +#endif + } else if (pos == start) { + pos = len; + } + } else { + // The URI ends with the first whitespace. + while ((pos < len) && (data[pos] != 0x20)) { + if (!bad_delim && htp_is_space(data[pos])) { + bad_delim++; + } + pos++; + } + /* if we've seen some 'bad' delimiters, we retry with those */ + if (bad_delim && pos == len) { + // special case: even though RFC's allow only SP (0x20), many + // implementations allow other delimiters, like tab or other + // characters that isspace() accepts. + pos = start; + while ((pos < len) && (!htp_is_space(data[pos]))) pos++; + } +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (bad_delim) { + // warn regardless if we've seen non-compliant chars + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter"); + } +#endif + } + + tx->request_uri = bstr_dup_mem(data + start, pos - start); + if (tx->request_uri == NULL) return HTP_ERROR; + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_uri), bstr_len(tx->request_uri)); + #endif + + // Ignore whitespace after URI. + while ((pos < len) && (htp_is_space(data[pos]))) pos++; + + // Is there protocol information available? + if (pos == len) { + // No, this looks like a HTTP/0.9 request. + + tx->is_protocol_0_9 = 1; + tx->request_protocol_number = HTP_PROTOCOL_0_9; + if (tx->request_method_number == HTP_M_UNKNOWN) + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol"); + + return HTP_OK; + } + + // The protocol information continues until the end of the line. + tx->request_protocol = bstr_dup_mem(data + pos, len - pos); + if (tx->request_protocol == NULL) return HTP_ERROR; + + tx->request_protocol_number = htp_parse_protocol(tx->request_protocol); + if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID) + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol"); + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol)); + #endif + + return HTP_OK; +} + |