/*************************************************************************** * Copyright (c) 2009-2010 Open Information Security Foundation * Copyright (c) 2010-2013 Qualys, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - Neither the name of the Qualys, Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ /** * @file * @author Ivan Ristic */ #include "htp_config_auto.h" #include "htp_private.h" /** * Extract one request header. A header can span multiple lines, in * which case they will be folded into one before parsing is attempted. * * @param[in] connp * @param[in] data * @param[in] len * @return HTP_OK or HTP_ERROR */ htp_status_t htp_process_request_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) { // Create a new header structure. htp_header_t *h = calloc(1, sizeof (htp_header_t)); if (h == NULL) return HTP_ERROR; // Now try to parse the header. if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) { free(h); return HTP_ERROR; } #ifdef HTP_DEBUG fprint_bstr(stderr, "Header name", h->name); fprint_bstr(stderr, "Header value", h->value); #endif // Do we already have a header with the same name? htp_header_t *h_existing = htp_table_get(connp->in_tx->request_headers, h->name); if (h_existing != NULL) { // TODO Do we want to have a list of the headers that are // allowed to be combined in this way? if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) { // This is the second occurence for this header. htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header"); } else { // For simplicity reasons, we count the repetitions of all headers if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) { connp->in_tx->req_header_repetitions++; } else { bstr_free(h->name); bstr_free(h->value); free(h); return HTP_OK; } } // Keep track of repeated same-name headers. h_existing->flags |= HTP_FIELD_REPEATED; // Having multiple C-L headers is against the RFC but // servers may ignore the subsequent headers if the values are the same. if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) { // Don't use string comparison here because we want to // ignore small formatting differences. int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL); int64_t new_cl = htp_parse_content_length(h->value, NULL); // Ambiguous response C-L value. if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) { htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value"); } // Ignoring the new C-L header that has the same value as the previous ones. } else { // Add to the existing header. bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value)); if (new_value == NULL) { bstr_free(h->name); bstr_free(h->value); free(h); return HTP_ERROR; } h_existing->value = new_value; bstr_add_mem_noex(h_existing->value, ", ", 2); bstr_add_noex(h_existing->value, h->value); } // The new header structure is no longer needed. bstr_free(h->name); bstr_free(h->value); free(h); } else { // Add as a new header. if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) { bstr_free(h->name); bstr_free(h->value); free(h); } } return HTP_OK; } /** * Generic request header parser. * * @param[in] connp * @param[in] h * @param[in] data * @param[in] len * @return HTP_OK or HTP_ERROR */ htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) { size_t name_start, name_end; size_t value_start, value_end; htp_chomp(data, &len); name_start = 0; // Look for the colon. size_t colon_pos = 0; while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++; if ((colon_pos == len) || (data[colon_pos] == '\0')) { // Missing colon. h->flags |= HTP_FIELD_UNPARSEABLE; // Log only once per transaction. if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) { connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE; htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: colon missing"); } // We handle this case as a header with an empty name, with the value equal // to the entire input string. // TODO Apache will respond to this problem with a 400. // Now extract the name and the value h->name = bstr_dup_c(""); if (h->name == NULL) return HTP_ERROR; h->value = bstr_dup_mem(data, len); if (h->value == NULL) { bstr_free(h->name); return HTP_ERROR; } return HTP_OK; } if (colon_pos == 0) { // Empty header name. h->flags |= HTP_FIELD_INVALID; // Log only once per transaction. if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { connp->in_tx->flags |= HTP_FIELD_INVALID; htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name"); } } name_end = colon_pos; // Ignore LWS after field-name. size_t prev = name_end; while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) { // LWS after header name. prev--; name_end--; h->flags |= HTP_FIELD_INVALID; // Log only once per transaction. if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { connp->in_tx->flags |= HTP_FIELD_INVALID; htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name"); } } // Header value. value_start = colon_pos; // Go over the colon. if (value_start < len) { value_start++; } // Ignore LWS before field-content. while ((value_start < len) && (htp_is_lws(data[value_start]))) { value_start++; } // Look for the end of field-content. value_end = len; // Ignore LWS after field-content. prev = value_end - 1; while ((prev > value_start) && (htp_is_lws(data[prev]))) { prev--; value_end--; } // Check that the header name is a token. size_t i = name_start; while (i < name_end) { if (!htp_is_token(data[i])) { // Incorrectly formed header name. h->flags |= HTP_FIELD_INVALID; // Log only once per transaction. if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { connp->in_tx->flags |= HTP_FIELD_INVALID; htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token"); } break; } i++; } // Now extract the name and the value h->name = bstr_dup_mem(data + name_start, name_end - name_start); if (h->name == NULL) return HTP_ERROR; h->value = bstr_dup_mem(data + value_start, value_end - value_start); if (h->value == NULL) { bstr_free(h->name); return HTP_ERROR; } return HTP_OK; } /** * Generic request line parser. * * @param[in] connp * @return HTP_OK or HTP_ERROR */ htp_status_t htp_parse_request_line_generic(htp_connp_t *connp) { return htp_parse_request_line_generic_ex(connp, 0 /* NUL does not terminates line */); } htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates) { htp_tx_t *tx = connp->in_tx; unsigned char *data = bstr_ptr(tx->request_line); size_t len = bstr_len(tx->request_line); size_t pos = 0; size_t mstart = 0; size_t start; size_t bad_delim; if (nul_terminates) { // The line ends with the first NUL byte. size_t newlen = 0; while ((pos < len) && (data[pos] != '\0')) { pos++; newlen++; } // Start again, with the new length. len = newlen; pos = 0; } // skip past leading whitespace. IIS allows this while ((pos < len) && htp_is_space(data[pos])) pos++; if (pos) { htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: leading whitespace"); mstart = pos; if (connp->cfg->requestline_leading_whitespace_unwanted != HTP_UNWANTED_IGNORE) { // reset mstart so that we copy the whitespace into the method mstart = 0; // set expected response code to this anomaly tx->response_status_expected_number = connp->cfg->requestline_leading_whitespace_unwanted; } } // The request method starts at the beginning of the // line and ends with the first whitespace character. while ((pos < len) && (!htp_is_space(data[pos]))) pos++; // No, we don't care if the method is empty. tx->request_method = bstr_dup_mem(data + mstart, pos - mstart); if (tx->request_method == NULL) return HTP_ERROR; #ifdef HTP_DEBUG fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_method), bstr_len(tx->request_method)); #endif tx->request_method_number = htp_convert_method_to_number(tx->request_method); bad_delim = 0; // Ignore whitespace after request method. The RFC allows // for only one SP, but then suggests any number of SP and HT // should be permitted. Apache uses isspace(), which is even // more permitting, so that's what we use here. while ((pos < len) && (isspace(data[pos]))) { if (!bad_delim && data[pos] != 0x20) { bad_delim++; } pos++; } // Too much performance overhead for fuzzing #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (bad_delim) { htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI"); } #endif // Is there anything after the request method? if (pos == len) { // No, this looks like a HTTP/0.9 request. tx->is_protocol_0_9 = 1; tx->request_protocol_number = HTP_PROTOCOL_0_9; if (tx->request_method_number == HTP_M_UNKNOWN) htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only"); return HTP_OK; } start = pos; bad_delim = 0; if (tx->connp->cfg->allow_space_uri) { pos = len - 1; // Skips the spaces at the end of line (after protocol) while (pos > start && htp_is_space(data[pos])) pos--; // The URI ends with the last whitespace. while ((pos > start) && (data[pos] != 0x20)) { if (!bad_delim && htp_is_space(data[pos])) { bad_delim++; } pos--; } /* if we've seen some 'bad' delimiters, we retry with those */ if (bad_delim && pos == start) { // special case: even though RFC's allow only SP (0x20), many // implementations allow other delimiters, like tab or other // characters that isspace() accepts. pos = len - 1; while ((pos > start) && (!htp_is_space(data[pos]))) pos--; } else { // reset bad_delim found in protocol part bad_delim = 0; for (size_t i = start; i < pos; i++) { if (data[i] != 0x20 && htp_is_space(data[i])) { bad_delim = 1; break; } } } if (bad_delim) { #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION // warn regardless if we've seen non-compliant chars htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter"); #endif } else if (pos == start) { pos = len; } } else { // The URI ends with the first whitespace. while ((pos < len) && (data[pos] != 0x20)) { if (!bad_delim && htp_is_space(data[pos])) { bad_delim++; } pos++; } /* if we've seen some 'bad' delimiters, we retry with those */ if (bad_delim && pos == len) { // special case: even though RFC's allow only SP (0x20), many // implementations allow other delimiters, like tab or other // characters that isspace() accepts. pos = start; while ((pos < len) && (!htp_is_space(data[pos]))) pos++; } #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (bad_delim) { // warn regardless if we've seen non-compliant chars htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter"); } #endif } tx->request_uri = bstr_dup_mem(data + start, pos - start); if (tx->request_uri == NULL) return HTP_ERROR; #ifdef HTP_DEBUG fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_uri), bstr_len(tx->request_uri)); #endif // Ignore whitespace after URI. while ((pos < len) && (htp_is_space(data[pos]))) pos++; // Is there protocol information available? if (pos == len) { // No, this looks like a HTTP/0.9 request. tx->is_protocol_0_9 = 1; tx->request_protocol_number = HTP_PROTOCOL_0_9; if (tx->request_method_number == HTP_M_UNKNOWN) htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol"); return HTP_OK; } // The protocol information continues until the end of the line. tx->request_protocol = bstr_dup_mem(data + pos, len - pos); if (tx->request_protocol == NULL) return HTP_ERROR; tx->request_protocol_number = htp_parse_protocol(tx->request_protocol); if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID) htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol"); #ifdef HTP_DEBUG fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol)); #endif return HTP_OK; }