summaryrefslogtreecommitdiffstats
path: root/htp/htp_multipart_private.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--htp/htp_multipart_private.h203
1 files changed, 203 insertions, 0 deletions
diff --git a/htp/htp_multipart_private.h b/htp/htp_multipart_private.h
new file mode 100644
index 0000000..5b8d228
--- /dev/null
+++ b/htp/htp_multipart_private.h
@@ -0,0 +1,203 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _HTP_MULTIPART_PRIVATE_H
+#define _HTP_MULTIPART_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "htp_multipart.h"
+
+#define CD_PARAM_OTHER 0
+#define CD_PARAM_NAME 1
+#define CD_PARAM_FILENAME 2
+
+#define DEFAULT_FILE_EXTRACT_LIMIT 16
+
+enum htp_part_mode_t {
+ /** When in line mode, the parser is handling part headers. */
+ MODE_LINE = 0,
+
+ /** When in data mode, the parser is consuming part data. */
+ MODE_DATA = 1
+};
+
+enum htp_multipart_state_t {
+ /** Initial state, after the parser has been created but before the boundary initialized. */
+ STATE_INIT = 0,
+
+ /** Processing data, waiting for a new line (which might indicate a new boundary). */
+ STATE_DATA = 1,
+
+ /** Testing a potential boundary. */
+ STATE_BOUNDARY = 2,
+
+ /** Checking the first byte after a boundary. */
+ STATE_BOUNDARY_IS_LAST1 = 3,
+
+ /** Checking the second byte after a boundary. */
+ STATE_BOUNDARY_IS_LAST2 = 4,
+
+ /** Consuming linear whitespace after a boundary. */
+ STATE_BOUNDARY_EAT_LWS = 5,
+
+ /** Used after a CR byte is detected in STATE_BOUNDARY_EAT_LWS. */
+ STATE_BOUNDARY_EAT_LWS_CR = 6
+};
+
+struct htp_mpartp_t {
+ htp_multipart_t multipart;
+
+ htp_cfg_t *cfg;
+
+ int extract_files;
+
+ int extract_limit;
+
+ char *extract_dir;
+
+ int file_count;
+
+ // Parsing callbacks
+
+ int (*handle_data)(htp_mpartp_t *mpartp, const unsigned char *data,
+ size_t len, int line_end);
+ int (*handle_boundary)(htp_mpartp_t *mpartp);
+
+ // Internal parsing fields; move into a private structure
+
+ /**
+ * Parser state; one of MULTIPART_STATE_* constants.
+ */
+ enum htp_multipart_state_t parser_state;
+
+ /**
+ * Keeps track of the current position in the boundary matching progress.
+ * When this field reaches boundary_len, we have a boundary match.
+ */
+ size_t boundary_match_pos;
+
+ /**
+ * Pointer to the part that is currently being processed.
+ */
+ htp_multipart_part_t *current_part;
+
+ /**
+ * This parser consists of two layers: the outer layer is charged with
+ * finding parts, and the internal layer handles part data. There is an
+ * interesting interaction between the two parsers. Because the
+ * outer layer is seeing every line (it has to, in order to test for
+ * boundaries), it also effectively also splits input into lines. The
+ * inner parser deals with two areas: first is the headers, which are
+ * line based, followed by binary data. When parsing headers, the inner
+ * parser can reuse the lines identified by the outer parser. In this
+ * variable we keep the current parsing mode of the part, which helps
+ * us process input data more efficiently. The possible values are
+ * MULTIPART_MODE_LINE and MULTIPART_MODE_DATA.
+ */
+ enum htp_part_mode_t current_part_mode;
+
+ /**
+ * Used for buffering when a potential boundary is fragmented
+ * across many input data buffers. On a match, the data stored here is
+ * discarded. When there is no match, the buffer is processed as data
+ * (belonging to the currently active part).
+ */
+ bstr_builder_t *boundary_pieces;
+
+ bstr_builder_t *part_header_pieces;
+
+ bstr *pending_header_line;
+
+ /**
+ * Stores text part pieces until the entire part is seen, at which
+ * point the pieces are assembled into a single buffer, and the
+ * builder cleared.
+ */
+ bstr_builder_t *part_data_pieces;
+
+ /**
+ * The offset of the current boundary candidate, relative to the most
+ * recent data chunk (first unprocessed chunk of data).
+ */
+ size_t boundary_candidate_pos;
+
+ /**
+ * When we encounter a CR as the last byte in a buffer, we don't know
+ * if the byte is part of a CRLF combination. If it is, then the CR
+ * might be a part of a boundary. But if it is not, it's current
+ * part's data. Because we know how to handle everything before the
+ * CR, we do, and we use this flag to indicate that a CR byte is
+ * effectively being buffered. This is probably a case of premature
+ * optimization, but I am going to leave it in for now.
+ */
+ int cr_aside;
+
+ /**
+ * When set, indicates that this parser no longer owns names and
+ * values of MULTIPART_PART_TEXT parts. It is used to avoid data
+ * duplication when the parser is used by LibHTP internally.
+ */
+ int gave_up_data;
+};
+
+htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len);
+
+htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part);
+
+htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len);
+
+htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line);
+
+int htp_mpartp_is_boundary_character(int c);
+
+htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser);
+
+htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part);
+
+void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data);
+
+htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTP_MULTIPART_PRIVATE_H */