summaryrefslogtreecommitdiffstats
path: root/htp/htp_multipart.c
diff options
context:
space:
mode:
Diffstat (limited to 'htp/htp_multipart.c')
-rw-r--r--htp/htp_multipart.c1615
1 files changed, 1615 insertions, 0 deletions
diff --git a/htp/htp_multipart.c b/htp/htp_multipart.c
new file mode 100644
index 0000000..ea73072
--- /dev/null
+++ b/htp/htp_multipart.c
@@ -0,0 +1,1615 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * Determines the type of a Content-Disposition parameter.
+ *
+ * @param[in] data
+ * @param[in] startpos
+ * @param[in] pos
+ * @return CD_PARAM_OTHER, CD_PARAM_NAME or CD_PARAM_FILENAME.
+ */
+static int htp_mpartp_cd_param_type(unsigned char *data, size_t startpos, size_t endpos) {
+ if ((endpos - startpos) == 4) {
+ if (memcmp(data + startpos, "name", 4) == 0) return CD_PARAM_NAME;
+ } else if ((endpos - startpos) == 8) {
+ if (memcmp(data + startpos, "filename", 8) == 0) return CD_PARAM_FILENAME;
+ }
+
+ return CD_PARAM_OTHER;
+}
+
+htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser) {
+ return &(parser->multipart);
+}
+
+/**
+ * Decodes a C-D header value. This is impossible to do correctly without a
+ * parsing personality because most browsers are broken:
+ * - Firefox encodes " as \", and \ is not encoded.
+ * - Chrome encodes " as %22.
+ * - IE encodes " as \", and \ is not encoded.
+ * - Opera encodes " as \" and \ as \\.
+ * @param[in] b
+ */
+static void htp_mpart_decode_quoted_cd_value_inplace(bstr *b) {
+ unsigned char *s = bstr_ptr(b);
+ unsigned char *d = bstr_ptr(b);
+ size_t len = bstr_len(b);
+ size_t pos = 0;
+
+ while (pos < len) {
+ // Ignore \ when before \ or ".
+ if ((*s == '\\')&&(pos + 1 < len)&&((*(s + 1) == '"')||(*(s + 1) == '\\'))) {
+ s++;
+ pos++;
+ }
+
+ *d++ = *s++;
+ pos++;
+ }
+
+ bstr_adjust_len(b, len - (s - d));
+}
+
+/**
+ * Parses the Content-Disposition part header.
+ *
+ * @param[in] part
+ * @return HTP_OK on success (header found and parsed), HTP_DECLINED if there is no C-D header or if
+ * it could not be processed, and HTP_ERROR on fatal error.
+ */
+htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part) {
+ // Find the C-D header.
+ htp_header_t *h = htp_table_get_c(part->headers, "content-disposition");
+ if (h == NULL) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
+ return HTP_DECLINED;
+ }
+
+ // Require "form-data" at the beginning of the header.
+ if (bstr_index_of_c(h->value, "form-data") != 0) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // The parsing starts here.
+ unsigned char *data = bstr_ptr(h->value);
+ size_t len = bstr_len(h->value);
+ size_t pos = 9; // Start after "form-data"
+
+ // Main parameter parsing loop (once per parameter).
+ while (pos < len) {
+ // Ignore whitespace.
+ while ((pos < len) && isspace(data[pos])) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Expecting a semicolon.
+ if (data[pos] != ';') {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+ pos++;
+
+ // Go over the whitespace before parameter name.
+ while ((pos < len) && isspace(data[pos])) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Found the starting position of the parameter name.
+ size_t start = pos;
+
+ // Look for the ending position.
+ while ((pos < len) && (!isspace(data[pos]) && (data[pos] != '='))) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Ending position is in "pos" now.
+
+ // Determine parameter type ("name", "filename", or other).
+ int param_type = htp_mpartp_cd_param_type(data, start, pos);
+
+ // Ignore whitespace after parameter name, if any.
+ while ((pos < len) && isspace(data[pos])) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Equals.
+ if (data[pos] != '=') {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+ pos++;
+
+ // Go over the whitespace before the parameter value.
+ while ((pos < len) && isspace(data[pos])) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Expecting a double quote.
+ if (data[pos] != '"') {
+ // Bare string or non-standard quoting, which we don't like.
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ pos++; // Over the double quote.
+
+ // We have the starting position of the value.
+ start = pos;
+
+ // Find the end of the value.
+ while ((pos < len) && (data[pos] != '"')) {
+ // Check for escaping.
+ if (data[pos] == '\\') {
+ if (pos + 1 >= len) {
+ // A backslash as the last character in the C-D header.
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Allow " and \ to be escaped.
+ if ((data[pos + 1] == '"')||(data[pos + 1] == '\\')) {
+ // Go over the quoted character.
+ pos++;
+ }
+ }
+
+ pos++;
+ }
+
+ // If we've reached the end of the string that means the
+ // value was not terminated properly (the second double quote is missing).
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Expecting the terminating double quote.
+ if (data[pos] != '"') {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ pos++; // Over the terminating double quote.
+
+ // Finally, process the parameter value.
+
+ switch (param_type) {
+ case CD_PARAM_NAME:
+ // Check that we have not seen the name parameter already.
+ if (part->name != NULL) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
+ return HTP_DECLINED;
+ }
+
+ part->name = bstr_dup_mem(data + start, pos - start - 1);
+ if (part->name == NULL) return HTP_ERROR;
+
+ htp_mpart_decode_quoted_cd_value_inplace(part->name);
+
+ break;
+
+ case CD_PARAM_FILENAME:
+ // Check that we have not seen the filename parameter already.
+ if (part->file != NULL) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
+ return HTP_DECLINED;
+ }
+
+ part->file = calloc(1, sizeof (htp_file_t));
+ if (part->file == NULL) return HTP_ERROR;
+
+ part->file->fd = -1;
+ part->file->source = HTP_FILE_MULTIPART;
+
+ part->file->filename = bstr_dup_mem(data + start, pos - start - 1);
+ if (part->file->filename == NULL) {
+ free(part->file);
+ return HTP_ERROR;
+ }
+
+ htp_mpart_decode_quoted_cd_value_inplace(part->file->filename);
+
+ break;
+
+ default:
+ // Unknown parameter.
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_UNKNOWN;
+ return HTP_DECLINED;
+ break;
+ }
+
+ // Continue to parse the next parameter, if any.
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Parses the Content-Type part header, if present.
+ *
+ * @param[in] part
+ * @return HTP_OK on success, HTP_DECLINED if the C-T header is not present, and HTP_ERROR on failure.
+ */
+static htp_status_t htp_mpart_part_parse_c_t(htp_multipart_part_t *part) {
+ htp_header_t *h = (htp_header_t *) htp_table_get_c(part->headers, "content-type");
+ if (h == NULL) return HTP_DECLINED;
+ return htp_parse_ct_header(h->value, &part->content_type);
+}
+
+/**
+ * Processes part headers.
+ *
+ * @param[in] part
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part) {
+ if (htp_mpart_part_parse_c_d(part) == HTP_ERROR) return HTP_ERROR;
+ if (htp_mpart_part_parse_c_t(part) == HTP_ERROR) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+/**
+ * Parses one part header.
+ *
+ * @param[in] part
+ * @param[in] data
+ * @param[in] len
+ * @return HTP_OK on success, HTP_DECLINED on parsing error, HTP_ERROR on fatal error.
+ */
+htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
+ size_t name_start, name_end;
+ size_t value_start, value_end;
+
+ // We do not allow NUL bytes here.
+ if (memchr(data, '\0', len) != NULL) {
+ part->parser->multipart.flags |= HTP_MULTIPART_NUL_BYTE;
+ return HTP_DECLINED;
+ }
+
+ name_start = 0;
+
+ // Look for the starting position of the name first.
+ size_t colon_pos = 0;
+
+ while ((colon_pos < len)&&(htp_is_space(data[colon_pos]))) colon_pos++;
+ if (colon_pos != 0) {
+ // Whitespace before header name.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Now look for the colon.
+ while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
+
+ if (colon_pos == len) {
+ // Missing colon.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ if (colon_pos == 0) {
+ // Empty header name.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ name_end = colon_pos;
+
+ // Ignore LWS after header name.
+ size_t prev = name_end;
+ while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
+ prev--;
+ name_end--;
+
+ // LWS after field name. Not allowing for now.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Header value.
+
+ value_start = colon_pos + 1;
+
+ // Ignore LWS before value.
+ while ((value_start < len) && (htp_is_lws(data[value_start]))) value_start++;
+
+ if (value_start == len) {
+ // No header value.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Assume the value is at the end.
+ value_end = len;
+
+ // Check that the header name is a token.
+ size_t i = name_start;
+ while (i < name_end) {
+ if (!htp_is_token(data[i])) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ i++;
+ }
+
+ // Now extract the name and the value.
+ htp_header_t *h = calloc(1, sizeof (htp_header_t));
+ if (h == NULL) return HTP_ERROR;
+
+ h->name = bstr_dup_mem(data + name_start, name_end - name_start);
+ if (h->name == NULL) {
+ free(h);
+ return HTP_ERROR;
+ }
+
+ h->value = bstr_dup_mem(data + value_start, value_end - value_start);
+ if (h->value == NULL) {
+ bstr_free(h->name);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ if ((bstr_cmp_c_nocase(h->name, "content-disposition") != 0) && (bstr_cmp_c_nocase(h->name, "content-type") != 0)) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_UNKNOWN;
+ }
+
+ // Check if the header already exists.
+ htp_header_t * h_existing = htp_table_get(part->headers, h->name);
+ if (h_existing != NULL) {
+ // Add to the existing header.
+ bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value)
+ + 2 + bstr_len(h->value));
+ if (new_value == NULL) {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ h_existing->value = new_value;
+ bstr_add_mem_noex(h_existing->value, ", ", 2);
+ bstr_add_noex(h_existing->value, h->value);
+
+ // The header is no longer needed.
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+
+ // Keep track of same-name headers.
+ h_existing->flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
+ } else {
+ // Add as a new header.
+ if (htp_table_add(part->headers, h->name, h) != HTP_OK) {
+ bstr_free(h->value);
+ bstr_free(h->name);
+ free(h);
+ return HTP_ERROR;
+ }
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Creates a new Multipart part.
+ *
+ * @param[in] parser
+ * @return New part instance, or NULL on memory allocation failure.
+ */
+htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser) {
+ htp_multipart_part_t * part = calloc(1, sizeof (htp_multipart_part_t));
+ if (part == NULL) return NULL;
+
+ part->headers = htp_table_create(4);
+ if (part->headers == NULL) {
+ free(part);
+ return NULL;
+ }
+
+ part->parser = parser;
+ bstr_builder_clear(parser->part_data_pieces);
+ bstr_builder_clear(parser->part_header_pieces);
+
+ return part;
+}
+
+/**
+ * Destroys a part.
+ *
+ * @param[in] part
+ * @param[in] gave_up_data
+ */
+void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data) {
+ if (part == NULL) return;
+
+ if (part->file != NULL) {
+ bstr_free(part->file->filename);
+
+ if (part->file->tmpname != NULL) {
+ unlink(part->file->tmpname);
+ free(part->file->tmpname);
+ }
+
+ free(part->file);
+ part->file = NULL;
+ }
+
+ if ((!gave_up_data) || (part->type != MULTIPART_PART_TEXT)) {
+ bstr_free(part->name);
+ bstr_free(part->value);
+ }
+
+ bstr_free(part->content_type);
+
+ if (part->headers != NULL) {
+ htp_header_t *h = NULL;
+ for (size_t i = 0, n = htp_table_size(part->headers); i < n; i++) {
+ h = htp_table_get_index(part->headers, i, NULL);
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ }
+
+ htp_table_destroy(part->headers);
+ }
+
+ free(part);
+}
+
+/**
+ * Finalizes part processing.
+ *
+ * @param[in] part
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part) {
+ // Determine if this part is the epilogue.
+
+ if (part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
+ if (part->type == MULTIPART_PART_UNKNOWN) {
+ // Assume that the unknown part after the last boundary is the epilogue.
+ part->parser->current_part->type = MULTIPART_PART_EPILOGUE;
+
+ // But if we've already seen a part we thought was the epilogue,
+ // raise HTP_MULTIPART_PART_UNKNOWN. Multiple epilogues are not allowed.
+ if (part->parser->multipart.flags & HTP_MULTIPART_HAS_EPILOGUE) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
+ }
+
+ part->parser->multipart.flags |= HTP_MULTIPART_HAS_EPILOGUE;
+ } else {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
+ }
+ }
+
+ // Sanity checks.
+
+ // Have we seen complete part headers? If we have not, that means that the part ended prematurely.
+ if ((part->parser->current_part->type != MULTIPART_PART_EPILOGUE) && (part->parser->current_part_mode != MODE_DATA)) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_INCOMPLETE;
+ }
+
+ // Have we been able to determine the part type? If not, this means
+ // that the part did not contain the C-D header.
+ if (part->type == MULTIPART_PART_UNKNOWN) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
+ }
+
+ // Finalize part value.
+
+ if (part->type == MULTIPART_PART_FILE) {
+ // Notify callbacks about the end of the file.
+ htp_mpartp_run_request_file_data_hook(part, NULL, 0);
+
+ // If we are storing the file to disk, close the file descriptor.
+ if (part->file->fd != -1) {
+ close(part->file->fd);
+ }
+ } else {
+ // Combine value pieces into a single buffer.
+ if (bstr_builder_size(part->parser->part_data_pieces) > 0) {
+ part->value = bstr_builder_to_str(part->parser->part_data_pieces);
+ bstr_builder_clear(part->parser->part_data_pieces);
+ }
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
+ if (part->parser->cfg == NULL) return HTP_OK;
+
+ // Keep track of the file length.
+ part->file->len += len;
+
+ // Package data for the callbacks.
+ htp_file_data_t file_data;
+ file_data.file = part->file;
+ file_data.data = data;
+ file_data.len = (const size_t) len;
+
+ // Send data to callbacks
+ htp_status_t rc = htp_hook_run_all(part->parser->cfg->hook_request_file_data, &file_data);
+ if (rc != HTP_OK) return rc;
+
+ return HTP_OK;
+}
+
+/**
+ * Handles part data.
+ *
+ * @param[in] part
+ * @param[in] data
+ * @param[in] len
+ * @param[in] is_line
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line) {
+ #if HTP_DEBUG
+ fprintf(stderr, "Part type %d mode %d is_line %d\n", part->type, part->parser->current_part_mode, is_line);
+ fprint_raw_data(stderr, "htp_mpart_part_handle_data: data chunk", data, len);
+ #endif
+
+ // Keep track of raw part length.
+ part->len += len;
+
+ // If we're processing a part that came after the last boundary, then we're not sure if it
+ // is the epilogue part or some other part (in case of evasion attempt). For that reason we
+ // will keep all its data in the part_data_pieces structure. If it ends up not being the
+ // epilogue, this structure will be cleared.
+ if ((part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) && (part->type == MULTIPART_PART_UNKNOWN)) {
+ bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
+ }
+
+ if (part->parser->current_part_mode == MODE_LINE) {
+ // Line mode.
+
+ if (is_line) {
+ // End of the line.
+
+ bstr *line = NULL;
+
+ // If this line came to us in pieces, combine them now into a single buffer.
+ if (bstr_builder_size(part->parser->part_header_pieces) > 0) {
+ bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
+ line = bstr_builder_to_str(part->parser->part_header_pieces);
+ if (line == NULL) return HTP_ERROR;
+ bstr_builder_clear(part->parser->part_header_pieces);
+
+ data = bstr_ptr(line);
+ len = bstr_len(line);
+ }
+
+ // Ignore the line endings.
+ if (len > 1) {
+ if (data[len - 1] == LF) len--;
+ if (data[len - 1] == CR) len--;
+ } else if (len > 0) {
+ if (data[len - 1] == LF) len--;
+ }
+
+ // Is it an empty line?
+ if (len == 0) {
+ // Empty line; process headers and switch to data mode.
+
+ // Process the pending header, if any.
+ if (part->parser->pending_header_line != NULL) {
+ if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
+ bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
+ {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ bstr_free(part->parser->pending_header_line);
+ part->parser->pending_header_line = NULL;
+ }
+
+ if (htp_mpart_part_process_headers(part) == HTP_ERROR) {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ part->parser->current_part_mode = MODE_DATA;
+ bstr_builder_clear(part->parser->part_header_pieces);
+
+ if (part->file != NULL) {
+ // Changing part type because we have a filename.
+ part->type = MULTIPART_PART_FILE;
+
+ if ((part->parser->extract_files) && (part->parser->file_count < part->parser->extract_limit)) {
+ char buf[255];
+
+ strncpy(buf, part->parser->extract_dir, 254);
+ strncat(buf, "/libhtp-multipart-file-XXXXXX", 254 - strlen(buf));
+
+ part->file->tmpname = strdup(buf);
+ if (part->file->tmpname == NULL) {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ mode_t previous_mask = umask(S_IXUSR | S_IRWXG | S_IRWXO);
+ part->file->fd = mkstemp(part->file->tmpname);
+ umask(previous_mask);
+
+ if (part->file->fd < 0) {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ part->parser->file_count++;
+ }
+ } else if (part->name != NULL) {
+ // Changing part type because we have a name.
+ part->type = MULTIPART_PART_TEXT;
+ bstr_builder_clear(part->parser->part_data_pieces);
+ } else {
+ // Do nothing; the type stays MULTIPART_PART_UNKNOWN.
+ }
+ } else {
+ // Not an empty line.
+
+ // Is there a pending header?
+ if (part->parser->pending_header_line == NULL) {
+ if (line != NULL) {
+ part->parser->pending_header_line = line;
+ line = NULL;
+ } else {
+ part->parser->pending_header_line = bstr_dup_mem(data, len);
+ if (part->parser->pending_header_line == NULL) return HTP_ERROR;
+ }
+ } else {
+ // Is this a folded line?
+ if (isspace(data[0])) {
+ // Folding; add to the existing line.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_FOLDING;
+ part->parser->pending_header_line = bstr_add_mem(part->parser->pending_header_line, data, len);
+ if (part->parser->pending_header_line == NULL) {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+ } else {
+ // Process the pending header line.
+ if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
+ bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
+ {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ bstr_free(part->parser->pending_header_line);
+
+ if (line != NULL) {
+ part->parser->pending_header_line = line;
+ line = NULL;
+ } else {
+ part->parser->pending_header_line = bstr_dup_mem(data, len);
+ if (part->parser->pending_header_line == NULL) return HTP_ERROR;
+ }
+ }
+ }
+ }
+
+ bstr_free(line);
+ line = NULL;
+ } else {
+ // Not end of line; keep the data chunk for later.
+ bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
+ }
+ } else {
+ // Data mode; keep the data chunk for later (but not if it is a file).
+ switch (part->type) {
+ case MULTIPART_PART_EPILOGUE:
+ case MULTIPART_PART_PREAMBLE:
+ case MULTIPART_PART_TEXT:
+ case MULTIPART_PART_UNKNOWN:
+ // Make a copy of the data in RAM.
+ bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
+ break;
+
+ case MULTIPART_PART_FILE:
+ // Invoke file data callbacks.
+ htp_mpartp_run_request_file_data_hook(part, data, len);
+
+ // Optionally, store the data in a file.
+ if (part->file->fd != -1) {
+ if (write(part->file->fd, data, len) < 0) {
+ return HTP_ERROR;
+ }
+ }
+ break;
+
+ default:
+ // Internal error.
+ return HTP_ERROR;
+ break;
+ }
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Handles data, creating new parts as necessary.
+ *
+ * @param[in] mpartp
+ * @param[in] data
+ * @param[in] len
+ * @param[in] is_line
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+static htp_status_t htp_mpartp_handle_data(htp_mpartp_t *parser, const unsigned char *data, size_t len, int is_line) {
+ if (len == 0) return HTP_OK;
+
+ // Do we have a part already?
+ if (parser->current_part == NULL) {
+ // Create a new part.
+ parser->current_part = htp_mpart_part_create(parser);
+ if (parser->current_part == NULL) return HTP_ERROR;
+
+ if (parser->multipart.boundary_count == 0) {
+ // We haven't seen a boundary yet, so this must be the preamble part.
+ parser->current_part->type = MULTIPART_PART_PREAMBLE;
+ parser->multipart.flags |= HTP_MULTIPART_HAS_PREAMBLE;
+ parser->current_part_mode = MODE_DATA;
+ } else {
+ // Part after preamble.
+ parser->current_part_mode = MODE_LINE;
+ }
+
+ // Add part to the list.
+ htp_list_push(parser->multipart.parts, parser->current_part);
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "Created new part type %d\n", parser->current_part->type);
+ #endif
+ }
+
+ // Send data to the part.
+ return htp_mpart_part_handle_data(parser->current_part, data, len, is_line);
+}
+
+/**
+ * Handles a boundary event, which means that it will finalize a part if one exists.
+ *
+ * @param[in] mpartp
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+static htp_status_t htp_mpartp_handle_boundary(htp_mpartp_t *parser) {
+ #if HTP_DEBUG
+ fprintf(stderr, "htp_mpartp_handle_boundary\n");
+ #endif
+
+ if (parser->current_part != NULL) {
+ if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ // We're done with this part
+ parser->current_part = NULL;
+
+ // Revert to line mode
+ parser->current_part_mode = MODE_LINE;
+ }
+
+ return HTP_OK;
+}
+
+static htp_status_t htp_mpartp_init_boundary(htp_mpartp_t *parser, unsigned char *data, size_t len) {
+ if ((parser == NULL) || (data == NULL)) return HTP_ERROR;
+
+ // Copy the boundary and convert it to lowercase.
+
+ parser->multipart.boundary_len = len + 4;
+ parser->multipart.boundary = malloc(parser->multipart.boundary_len + 1);
+ if (parser->multipart.boundary == NULL) return HTP_ERROR;
+
+ parser->multipart.boundary[0] = CR;
+ parser->multipart.boundary[1] = LF;
+ parser->multipart.boundary[2] = '-';
+ parser->multipart.boundary[3] = '-';
+
+ for (size_t i = 0; i < len; i++) {
+ parser->multipart.boundary[i + 4] = data[i];
+ }
+
+ parser->multipart.boundary[parser->multipart.boundary_len] = '\0';
+
+ // We're starting in boundary-matching mode. The first boundary can appear without the
+ // CRLF, and our starting state expects that. If we encounter non-boundary data, the
+ // state will switch to data mode. Then, if the data is CRLF or LF, we will go back
+ // to boundary matching. Thus, we handle all the possibilities.
+
+ parser->parser_state = STATE_BOUNDARY;
+ parser->boundary_match_pos = 2;
+
+ return HTP_OK;
+}
+
+htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags) {
+ if ((cfg == NULL) || (boundary == NULL)) return NULL;
+
+ htp_mpartp_t *parser = calloc(1, sizeof (htp_mpartp_t));
+ if (parser == NULL) return NULL;
+
+ parser->cfg = cfg;
+
+ parser->boundary_pieces = bstr_builder_create();
+ if (parser->boundary_pieces == NULL) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ parser->part_data_pieces = bstr_builder_create();
+ if (parser->part_data_pieces == NULL) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ parser->part_header_pieces = bstr_builder_create();
+ if (parser->part_header_pieces == NULL) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ parser->multipart.parts = htp_list_create(64);
+ if (parser->multipart.parts == NULL) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ parser->multipart.flags = flags;
+ parser->parser_state = STATE_INIT;
+ parser->extract_files = cfg->extract_request_files;
+ parser->extract_dir = cfg->tmpdir;
+ if (cfg->extract_request_files_limit >= 0) {
+ parser->extract_limit = cfg->extract_request_files_limit;
+ } else {
+ parser->extract_limit = DEFAULT_FILE_EXTRACT_LIMIT;
+ }
+ parser->handle_data = htp_mpartp_handle_data;
+ parser->handle_boundary = htp_mpartp_handle_boundary;
+
+ // Initialize the boundary.
+ htp_status_t rc = htp_mpartp_init_boundary(parser, bstr_ptr(boundary), bstr_len(boundary));
+ if (rc != HTP_OK) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ // On success, the ownership of the boundary parameter
+ // is transferred to us. We made a copy, and so we
+ // don't need it any more.
+ bstr_free(boundary);
+
+ return parser;
+}
+
+void htp_mpartp_destroy(htp_mpartp_t *parser) {
+ if (parser == NULL) return;
+
+ if (parser->multipart.boundary != NULL) {
+ free(parser->multipart.boundary);
+ }
+
+ bstr_builder_destroy(parser->boundary_pieces);
+ bstr_builder_destroy(parser->part_header_pieces);
+ bstr_free(parser->pending_header_line);
+ bstr_builder_destroy(parser->part_data_pieces);
+
+ // Free the parts.
+ if (parser->multipart.parts != NULL) {
+ for (size_t i = 0, n = htp_list_size(parser->multipart.parts); i < n; i++) {
+ htp_multipart_part_t * part = htp_list_get(parser->multipart.parts, i);
+ htp_mpart_part_destroy(part, parser->gave_up_data);
+ }
+
+ htp_list_destroy(parser->multipart.parts);
+ }
+
+ free(parser);
+}
+
+/**
+ * Processes set-aside data.
+ *
+ * @param[in] mpartp
+ * @param[in] data
+ * @param[in] pos
+ * @param[in] startpos
+ * @param[in] return_pos
+ * @param[in] matched
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+static htp_status_t htp_martp_process_aside(htp_mpartp_t *parser, int matched) {
+ // The stored data pieces can contain up to one line. If we're in data mode and there
+ // was no boundary match, things are straightforward -- we process everything as data.
+ // If there was a match, we need to take care to not send the line ending as data, nor
+ // anything that follows (because it's going to be a part of the boundary). Similarly,
+ // when we are in line mode, we need to split the first data chunk, processing the first
+ // part as line and the second part as data.
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "mpartp_process_aside matched %d current_part_mode %d\n", matched, parser->current_part_mode);
+ #endif
+
+ // Do we need to do any chunk splitting?
+ if (matched || (parser->current_part_mode == MODE_LINE)) {
+ // Line mode or boundary match
+
+ // Process the CR byte, if set aside.
+ if ((!matched) && (parser->cr_aside)) {
+ // Treat as part data, when there is not a match.
+ parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
+ parser->cr_aside = 0;
+ } else {
+ // Treat as boundary, when there is a match.
+ parser->cr_aside = 0;
+ }
+
+ // We know that we went to match a boundary because
+ // we saw a new line. Now we have to find that line and
+ // process it. It's either going to be in the current chunk,
+ // or in the first stored chunk.
+ if (bstr_builder_size(parser->boundary_pieces) > 0) {
+ int first = 1;
+ for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
+ bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
+
+ if (first) {
+ first = 0;
+
+ // Split the first chunk.
+
+ if (!matched) {
+ // In line mode, we are OK with line endings.
+ parser->handle_data(parser, bstr_ptr(b), parser->boundary_candidate_pos, /* line */ 1);
+ } else {
+ // But if there was a match, the line ending belongs to the boundary.
+ unsigned char *dx = bstr_ptr(b);
+ size_t lx = parser->boundary_candidate_pos;
+
+ // Remove LF or CRLF.
+ if ((lx > 0) && (dx[lx - 1] == LF)) {
+ lx--;
+ // Remove CR.
+ if ((lx > 0) && (dx[lx - 1] == CR)) {
+ lx--;
+ }
+ }
+
+ parser->handle_data(parser, dx, lx, /* not a line */ 0);
+ }
+
+ // The second part of the split chunks belongs to the boundary
+ // when matched, data otherwise.
+ if (!matched) {
+ parser->handle_data(parser, bstr_ptr(b) + parser->boundary_candidate_pos,
+ bstr_len(b) - parser->boundary_candidate_pos, /* not a line */ 0);
+ }
+ } else {
+ // Do not send data if there was a boundary match. The stored
+ // data belongs to the boundary.
+ if (!matched) {
+ parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
+ }
+ }
+ }
+
+ bstr_builder_clear(parser->boundary_pieces);
+ }
+ } else {
+ // Data mode and no match.
+
+ // In data mode, we process the lone CR byte as data.
+ if (parser->cr_aside) {
+ parser->handle_data(parser, (const unsigned char *)&"\r", 1, /* not a line */ 0);
+ parser->cr_aside = 0;
+ }
+
+ // We then process any pieces that we might have stored, also as data.
+ if (bstr_builder_size(parser->boundary_pieces) > 0) {
+ for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
+ bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
+ parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
+ }
+
+ bstr_builder_clear(parser->boundary_pieces);
+ }
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser) {
+ if (parser->current_part != NULL) {
+ // Process buffered data, if any.
+ htp_martp_process_aside(parser, 0);
+
+ // Finalize the last part.
+ if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) return HTP_ERROR;
+
+ // It is OK to end abruptly in the epilogue part, but not in any other.
+ if (parser->current_part->type != MULTIPART_PART_EPILOGUE) {
+ parser->multipart.flags |= HTP_MULTIPART_INCOMPLETE;
+ }
+ }
+
+ bstr_builder_clear(parser->boundary_pieces);
+
+ return HTP_OK;
+}
+
+htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *_data, size_t len) {
+ unsigned char *data = (unsigned char *) _data;
+
+ // The current position in the entire input buffer.
+ size_t pos = 0;
+
+ // The position of the first unprocessed byte of data. We split the
+ // input buffer into smaller chunks, according to their purpose. Once
+ // an entire such smaller chunk is processed, we move to the next
+ // and update startpos.
+ size_t startpos = 0;
+
+ // The position of the (possible) boundary. We investigate for possible
+ // boundaries whenever we encounter CRLF or just LF. If we don't find a
+ // boundary we need to go back, and this is what data_return_pos helps with.
+ size_t data_return_pos = 0;
+
+ #if HTP_DEBUG
+ fprint_raw_data(stderr, "htp_mpartp_parse: data chunk", data, len);
+ #endif
+
+ // While there's data in the input buffer.
+
+ while (pos < len) {
+
+STATE_SWITCH:
+ #if HTP_DEBUG
+ fprintf(stderr, "htp_mpartp_parse: state %d pos %zd startpos %zd\n", parser->parser_state, pos, startpos);
+ #endif
+
+ switch (parser->parser_state) {
+
+ case STATE_INIT:
+ // Incomplete initialization.
+ return HTP_ERROR;
+ break;
+
+ case STATE_DATA: // Handle part data.
+
+ // While there's data in the input buffer.
+
+ while (pos < len) {
+ // Check for a CRLF-terminated line.
+ if (data[pos] == CR) {
+ // We have a CR byte.
+
+ // Is this CR the last byte in the input buffer?
+ if (pos + 1 == len) {
+ // We have CR as the last byte in input. We are going to process
+ // what we have in the buffer as data, except for the CR byte,
+ // which we're going to leave for later. If it happens that a
+ // CR is followed by a LF and then a boundary, the CR is going
+ // to be discarded.
+ pos++; // Advance over CR.
+ parser->cr_aside = 1;
+ } else {
+ // We have CR and at least one more byte in the buffer, so we
+ // are able to test for the LF byte too.
+ if (data[pos + 1] == LF) {
+ pos += 2; // Advance over CR and LF.
+
+ parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
+
+ // Prepare to switch to boundary testing.
+ data_return_pos = pos;
+ parser->boundary_candidate_pos = pos - startpos;
+ parser->boundary_match_pos = 2; // After LF; position of the first dash.
+ parser->parser_state = STATE_BOUNDARY;
+
+ goto STATE_SWITCH;
+ } else {
+ // This is not a new line; advance over the
+ // byte and clear the CR set-aside flag.
+ pos++;
+ parser->cr_aside = 0;
+ }
+ }
+ } else if (data[pos] == LF) { // Check for a LF-terminated line.
+ pos++; // Advance over LF.
+
+ // Did we have a CR in the previous input chunk?
+ if (parser->cr_aside == 0) {
+ parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
+ } else {
+ parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
+ }
+
+ // Prepare to switch to boundary testing.
+ data_return_pos = pos;
+ parser->boundary_candidate_pos = pos - startpos;
+ parser->boundary_match_pos = 2; // After LF; position of the first dash.
+ parser->parser_state = STATE_BOUNDARY;
+
+ goto STATE_SWITCH;
+ } else {
+ // Take one byte from input
+ pos++;
+
+ // Earlier we might have set aside a CR byte not knowing if the next
+ // byte is a LF. Now we know that it is not, and so we can release the CR.
+ if (parser->cr_aside) {
+ parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
+ parser->cr_aside = 0;
+ }
+ }
+ } // while
+
+ // No more data in the input buffer; process the data chunk.
+ parser->handle_data(parser, data + startpos, pos - startpos - parser->cr_aside, /* not a line */ 0);
+
+ break;
+
+ case STATE_BOUNDARY: // Handle a possible boundary.
+ while (pos < len) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "boundary (len %zd pos %zd char %d) data char %d\n", parser->multipart.boundary_len,
+ parser->boundary_match_pos, parser->multipart.boundary[parser->boundary_match_pos], tolower(data[pos]));
+ #endif
+
+ // Check if the bytes match.
+ if (!(data[pos] == parser->multipart.boundary[parser->boundary_match_pos])) {
+ // Boundary mismatch.
+
+ // Process stored (buffered) data.
+ htp_martp_process_aside(parser, /* no match */ 0);
+
+ // Return back where data parsing left off.
+ if (parser->current_part_mode == MODE_LINE) {
+ // In line mode, we process the line.
+ parser->handle_data(parser, data + startpos, data_return_pos - startpos, /* line */ 1);
+ startpos = data_return_pos;
+ } else {
+ // In data mode, we go back where we left off.
+ pos = data_return_pos;
+ }
+
+ parser->parser_state = STATE_DATA;
+
+ goto STATE_SWITCH;
+ }
+
+ // Consume one matched boundary byte
+ pos++;
+ parser->boundary_match_pos++;
+
+ // Have we seen all boundary bytes?
+ if (parser->boundary_match_pos == parser->multipart.boundary_len) {
+ // Boundary match!
+
+ // Process stored (buffered) data.
+ htp_martp_process_aside(parser, /* boundary match */ 1);
+
+ // Process data prior to the boundary in the current input buffer.
+ // Because we know this is the last chunk before boundary, we can
+ // remove the line endings.
+ size_t dlen = data_return_pos - startpos;
+ if ((dlen > 0) && (data[startpos + dlen - 1] == LF)) dlen--;
+ if ((dlen > 0) && (data[startpos + dlen - 1] == CR)) dlen--;
+ parser->handle_data(parser, data + startpos, dlen, /* line */ 1);
+
+ // Keep track of how many boundaries we've seen.
+ parser->multipart.boundary_count++;
+
+ if (parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
+ parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
+ }
+
+ // Run boundary match.
+ parser->handle_boundary(parser);
+
+ // We now need to check if this is the last boundary in the payload
+ parser->parser_state = STATE_BOUNDARY_IS_LAST2;
+
+ goto STATE_SWITCH;
+ }
+ } // while
+
+ // No more data in the input buffer; store (buffer) the unprocessed
+ // part for later, for after we find out if this is a boundary.
+ bstr_builder_append_mem(parser->boundary_pieces, data + startpos, len - startpos);
+
+ break;
+
+ case STATE_BOUNDARY_IS_LAST2:
+ // Examine the first byte after the last boundary character. If it is
+ // a dash, then we maybe processing the last boundary in the payload. If
+ // it is not, move to eat all bytes until the end of the line.
+
+ if (data[pos] == '-') {
+ // Found one dash, now go to check the next position.
+ pos++;
+ parser->parser_state = STATE_BOUNDARY_IS_LAST1;
+ } else {
+ // This is not the last boundary. Change state but
+ // do not advance the position, allowing the next
+ // state to process the byte.
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS;
+ }
+ break;
+
+ case STATE_BOUNDARY_IS_LAST1:
+ // Examine the byte after the first dash; expected to be another dash.
+ // If not, eat all bytes until the end of the line.
+
+ if (data[pos] == '-') {
+ // This is indeed the last boundary in the payload.
+ pos++;
+ parser->multipart.flags |= HTP_MULTIPART_SEEN_LAST_BOUNDARY;
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS;
+ } else {
+ // The second character is not a dash, and so this is not
+ // the final boundary. Raise the flag for the first dash,
+ // and change state to consume the rest of the boundary line.
+ parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS;
+ }
+ break;
+
+ case STATE_BOUNDARY_EAT_LWS:
+ if (data[pos] == CR) {
+ // CR byte, which could indicate a CRLF line ending.
+ pos++;
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS_CR;
+ } else if (data[pos] == LF) {
+ // LF line ending; we're done with boundary processing; data bytes follow.
+ pos++;
+ startpos = pos;
+ parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
+ parser->parser_state = STATE_DATA;
+ } else {
+ if (htp_is_lws(data[pos])) {
+ // Linear white space is allowed here.
+ parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_LWS_AFTER;
+ pos++;
+ } else {
+ // Unexpected byte; consume, but remain in the same state.
+ parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
+ pos++;
+ }
+ }
+ break;
+
+ case STATE_BOUNDARY_EAT_LWS_CR:
+ if (data[pos] == LF) {
+ // CRLF line ending; we're done with boundary processing; data bytes follow.
+ pos++;
+ startpos = pos;
+ parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
+ parser->parser_state = STATE_DATA;
+ } else {
+ // Not a line ending; start again, but do not process this byte.
+ parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS;
+ }
+ break;
+ } // switch
+ }
+
+ return HTP_OK;
+}
+
+static void htp_mpartp_validate_boundary(bstr *boundary, uint64_t *flags) {
+ /*
+
+ RFC 1341:
+
+ The only mandatory parameter for the multipart Content-Type
+ is the boundary parameter, which consists of 1 to 70
+ characters from a set of characters known to be very robust
+ through email gateways, and NOT ending with white space.
+ (If a boundary appears to end with white space, the white
+ space must be presumed to have been added by a gateway, and
+ should be deleted.) It is formally specified by the
+ following BNF:
+
+ boundary := 0*69<bchars> bcharsnospace
+
+ bchars := bcharsnospace / " "
+
+ bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_"
+ / "," / "-" / "." / "/" / ":" / "=" / "?"
+ */
+
+ /*
+ Chrome: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD
+ Firefox: Content-Type: multipart/form-data; boundary=---------------------------21071316483088
+ MSIE: Content-Type: multipart/form-data; boundary=---------------------------7dd13e11c0452
+ Opera: Content-Type: multipart/form-data; boundary=----------2JL5oh7QWEDwyBllIRc7fh
+ Safari: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryre6zL3b0BelnTY5S
+ */
+
+ unsigned char *data = bstr_ptr(boundary);
+ size_t len = bstr_len(boundary);
+
+ // The RFC allows up to 70 characters. In real life,
+ // boundaries tend to be shorter.
+ if ((len == 0) || (len > 70)) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+
+ // Check boundary characters. This check is stricter than the
+ // RFC, which seems to allow many separator characters.
+ size_t pos = 0;
+ while (pos < len) {
+ if (!(((data[pos] >= '0') && (data[pos] <= '9'))
+ || ((data[pos] >= 'a') && (data[pos] <= 'z'))
+ || ((data[pos] >= 'A') && (data[pos] <= 'Z'))
+ || (data[pos] == '-'))) {
+
+ switch (data[pos]) {
+ case '\'':
+ case '(':
+ case ')':
+ case '+':
+ case '_':
+ case ',':
+ case '.':
+ case '/':
+ case ':':
+ case '=':
+ case '?':
+ // These characters are allowed by the RFC, but not common.
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+ break;
+
+ default:
+ // Invalid character.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ break;
+ }
+ }
+
+ pos++;
+ }
+}
+
+static void htp_mpartp_validate_content_type(bstr *content_type, uint64_t *flags) {
+ unsigned char *data = bstr_ptr(content_type);
+ size_t len = bstr_len(content_type);
+ size_t counter = 0;
+
+ while (len > 0) {
+ int i = bstr_util_mem_index_of_c_nocase(data, len, "boundary");
+ if (i == -1) break;
+
+ data = data + i;
+ len = len - i;
+
+ // In order to work around the fact that WebKit actually uses
+ // the word "boundary" in their boundary, we also require one
+ // equals character the follow the words.
+ // "multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD"
+ if (memchr(data, '=', len) == NULL) break;
+
+ counter++;
+
+ // Check for case variations.
+ for (size_t j = 0; j < 8; j++) {
+ if (!((*data >= 'a') && (*data <= 'z'))) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+
+ data++;
+ len--;
+ }
+ }
+
+ // How many boundaries have we seen?
+ if (counter > 1) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+}
+
+htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *flags) {
+ if ((content_type == NULL) || (boundary == NULL) || (flags == NULL)) return HTP_ERROR;
+
+ // Our approach is to ignore the MIME type and instead just look for
+ // the boundary. This approach is more reliable in the face of various
+ // evasion techniques that focus on submitting invalid MIME types.
+
+ // Reset flags.
+ *flags = 0;
+
+ // Look for the boundary, case insensitive.
+ int i = bstr_index_of_c_nocase(content_type, "boundary");
+ if (i == -1) return HTP_DECLINED;
+
+ unsigned char *data = bstr_ptr(content_type) + i + 8;
+ size_t len = bstr_len(content_type) - i - 8;
+
+ // Look for the boundary value.
+ size_t pos = 0;
+ while ((pos < len) && (data[pos] != '=')) {
+ if (htp_is_space(data[pos])) {
+ // It is unusual to see whitespace before the equals sign.
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+ } else {
+ // But seeing a non-whitespace character may indicate evasion.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+
+ pos++;
+ }
+
+ if (pos >= len) {
+ // No equals sign in the header.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Go over the '=' character.
+ pos++;
+
+ // Ignore any whitespace after the equals sign.
+ while ((pos < len) && (htp_is_space(data[pos]))) {
+ if (htp_is_space(data[pos])) {
+ // It is unusual to see whitespace after
+ // the equals sign.
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+ }
+
+ pos++;
+ }
+
+ if (pos >= len) {
+ // No value after the equals sign.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ return HTP_DECLINED;
+ }
+
+ if (data[pos] == '"') {
+ // Quoted boundary.
+
+ // Possibly not very unusual, but let's see.
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+
+ pos++; // Over the double quote.
+ size_t startpos = pos; // Starting position of the boundary.
+
+ // Look for the terminating double quote.
+ while ((pos < len) && (data[pos] != '"')) pos++;
+
+ if (pos >= len) {
+ // Ran out of space without seeing
+ // the terminating double quote.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+
+ // Include the starting double quote in the boundary.
+ startpos--;
+ }
+
+ *boundary = bstr_dup_mem(data + startpos, pos - startpos);
+ if (*boundary == NULL) return HTP_ERROR;
+
+ pos++; // Over the double quote.
+ } else {
+ // Boundary not quoted.
+
+ size_t startpos = pos;
+
+ // Find the end of the boundary. For the time being, we replicate
+ // the behavior of PHP 5.4.x. This may result with a boundary that's
+ // closer to what would be accepted in real life. Our subsequent
+ // checks of boundary characters will catch irregularities.
+ while ((pos < len) && (data[pos] != ',') && (data[pos] != ';') && (!htp_is_space(data[pos]))) pos++;
+
+ *boundary = bstr_dup_mem(data + startpos, pos - startpos);
+ if (*boundary == NULL) return HTP_ERROR;
+ }
+
+ // Check for a zero-length boundary.
+ if (bstr_len(*boundary) == 0) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ bstr_free(*boundary);
+ *boundary = NULL;
+ return HTP_DECLINED;
+ }
+
+ // Allow only whitespace characters after the boundary.
+ int seen_space = 0, seen_non_space = 0;
+
+ while (pos < len) {
+ if (!htp_is_space(data[pos])) {
+ seen_non_space = 1;
+ } else {
+ seen_space = 1;
+ }
+
+ pos++;
+ }
+
+ // Raise INVALID if we see any non-space characters,
+ // but raise UNUSUAL if we see _only_ space characters.
+ if (seen_non_space) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ } else if (seen_space) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+ }
+
+ #ifdef HTP_DEBUG
+ fprint_bstr(stderr, "Multipart boundary", *boundary);
+ #endif
+
+ // Validate boundary characters.
+ htp_mpartp_validate_boundary(*boundary, flags);
+
+ // Correlate with the MIME type. This might be a tad too
+ // sensitive because it may catch non-browser access with sloppy
+ // implementations, but let's go with it for now.
+ if (bstr_begins_with_c(content_type, "multipart/form-data;") == 0) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+
+ htp_mpartp_validate_content_type(content_type, flags);
+
+ return HTP_OK;
+}