summaryrefslogtreecommitdiffstats
path: root/htp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 17:40:56 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 17:40:56 +0000
commitc248d29056abbc1fc4c5dc178bab48fb8d2c1fcb (patch)
tree4a13fc30604509224504e1911bc976e5df7bdf05 /htp
parentInitial commit. (diff)
downloadlibhtp-c248d29056abbc1fc4c5dc178bab48fb8d2c1fcb.tar.xz
libhtp-c248d29056abbc1fc4c5dc178bab48fb8d2c1fcb.zip
Adding upstream version 1:0.5.47.upstream/1%0.5.47
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'htp')
-rw-r--r--htp/Makefile.am35
-rw-r--r--htp/bstr.c638
-rw-r--r--htp/bstr.h678
-rw-r--r--htp/bstr_builder.c121
-rw-r--r--htp/bstr_builder.h136
-rw-r--r--htp/htp.h678
-rw-r--r--htp/htp_base64.c196
-rw-r--r--htp/htp_base64.h74
-rw-r--r--htp/htp_config.c954
-rw-r--r--htp/htp_config.h719
-rw-r--r--htp/htp_config_auto.h51
-rw-r--r--htp/htp_config_private.h373
-rw-r--r--htp/htp_connection.c168
-rw-r--r--htp/htp_connection_parser.c260
-rw-r--r--htp/htp_connection_parser.h218
-rw-r--r--htp/htp_connection_parser_private.h275
-rw-r--r--htp/htp_connection_private.h121
-rw-r--r--htp/htp_content_handlers.c299
-rw-r--r--htp/htp_cookies.c119
-rw-r--r--htp/htp_core.h353
-rw-r--r--htp/htp_decompressors.c490
-rw-r--r--htp/htp_decompressors.h94
-rw-r--r--htp/htp_hooks.c160
-rw-r--r--htp/htp_hooks.h122
-rw-r--r--htp/htp_list.c360
-rw-r--r--htp/htp_list.h227
-rw-r--r--htp/htp_list_private.h73
-rw-r--r--htp/htp_multipart.c1615
-rw-r--r--htp/htp_multipart.h345
-rw-r--r--htp/htp_multipart_private.h203
-rw-r--r--htp/htp_parsers.c214
-rw-r--r--htp/htp_php.c116
-rw-r--r--htp/htp_private.h269
-rw-r--r--htp/htp_request.c1173
-rw-r--r--htp/htp_request_apache_2_2.c64
-rw-r--r--htp/htp_request_generic.c462
-rw-r--r--htp/htp_request_parsers.c149
-rw-r--r--htp/htp_response.c1436
-rw-r--r--htp/htp_response_generic.c334
-rw-r--r--htp/htp_table.c250
-rw-r--r--htp/htp_table.h184
-rw-r--r--htp/htp_table_private.h78
-rw-r--r--htp/htp_transaction.c1558
-rw-r--r--htp/htp_transaction.h529
-rw-r--r--htp/htp_transcoder.c211
-rw-r--r--htp/htp_urlencoded.c332
-rw-r--r--htp/htp_urlencoded.h111
-rw-r--r--htp/htp_utf8_decoder.c118
-rw-r--r--htp/htp_utf8_decoder.h85
-rw-r--r--htp/htp_util.c2602
-rw-r--r--htp/htp_version.h.in53
-rw-r--r--htp/lzma/7zTypes.h375
-rw-r--r--htp/lzma/Compiler.h33
-rw-r--r--htp/lzma/LzFind.c1127
-rw-r--r--htp/lzma/LzFind.h121
-rw-r--r--htp/lzma/LzHash.h57
-rw-r--r--htp/lzma/LzmaDec.c1223
-rw-r--r--htp/lzma/LzmaDec.h234
-rw-r--r--htp/lzma/Makefile.am16
-rw-r--r--htp/lzma/Precomp.h10
-rw-r--r--htp/strlcat.c76
-rw-r--r--htp/strlcpy.c72
62 files changed, 23827 insertions, 0 deletions
diff --git a/htp/Makefile.am b/htp/Makefile.am
new file mode 100644
index 0000000..a3e8245
--- /dev/null
+++ b/htp/Makefile.am
@@ -0,0 +1,35 @@
+
+SUBDIRS = lzma
+
+h_sources = bstr.h bstr_builder.h htp.h htp_base64.h htp_config.h htp_connection_parser.h \
+ htp_core.h htp_decompressors.h htp_hooks.h htp_list.h \
+ htp_multipart.h htp_table.h htp_transaction.h \
+ htp_urlencoded.h htp_utf8_decoder.h htp_version.h
+
+h_sources_private = htp_config_private.h htp_connection_private.h htp_connection_parser_private.h htp_list_private.h \
+ htp_multipart_private.h htp_private.h htp_table_private.h htp_config_auto.h
+
+c_sources = bstr.c bstr_builder.c htp_base64.c htp_config.c htp_connection.c htp_connection_parser.c \
+ htp_content_handlers.c htp_cookies.c htp_decompressors.c htp_hooks.c htp_list.c htp_multipart.c htp_parsers.c \
+ htp_php.c htp_request.c htp_request_apache_2_2.c htp_request_generic.c htp_request_parsers.c htp_response.c \
+ htp_response_generic.c htp_table.c htp_transaction.c htp_transcoder.c htp_urlencoded.c htp_util.c htp_utf8_decoder.c \
+ strlcpy.c strlcat.c
+
+library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)
+library_include_HEADERS = $(h_sources)
+
+AM_CFLAGS = -I$(top_srcdir) -I$(top_builddir)/htp -D_GNU_SOURCE -g -Wall -Wextra -std=gnu99 -pedantic \
+ -Wextra -Wno-missing-field-initializers -Wshadow -Wpointer-arith \
+ -Wstrict-prototypes -Wmissing-prototypes -Wno-unused-parameter
+
+noinst_LTLIBRARIES = libhtp-c.la
+libhtp_c_la_SOURCES = $(h_sources) $(h_sources_private) $(c_sources)
+
+lib_LTLIBRARIES = libhtp.la
+libhtp_la_SOURCES =
+libhtp_la_LIBADD = libhtp-c.la lzma/liblzma-c.la
+libhtp_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
+if CYGWIN
+libhtp_la_LIBADD += $(LIBICONV)
+libhtp_la_LDFLAGS += -no-undefined
+endif
diff --git a/htp/bstr.c b/htp/bstr.c
new file mode 100644
index 0000000..7673c68
--- /dev/null
+++ b/htp/bstr.c
@@ -0,0 +1,638 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include <ctype.h>
+
+#include "bstr.h"
+
+bstr *bstr_alloc(size_t len) {
+ bstr *b = malloc(sizeof (bstr) + len);
+ if (b == NULL) return NULL;
+
+ b->len = 0;
+ b->size = len;
+ b->realptr = NULL;
+
+ return b;
+}
+
+bstr *bstr_add(bstr *destination, const bstr *source) {
+ return bstr_add_mem(destination, bstr_ptr(source), bstr_len(source));
+}
+
+bstr *bstr_add_c(bstr *bdestination, const char *csource) {
+ return bstr_add_mem(bdestination, csource, strlen(csource));
+}
+
+bstr *bstr_add_c_noex(bstr *destination, const char *source) {
+ return bstr_add_mem_noex(destination, source, strlen(source));
+}
+
+bstr *bstr_add_mem(bstr *destination, const void *data, size_t len) {
+ // Expand the destination if necessary
+ if (bstr_size(destination) < bstr_len(destination) + len) {
+ destination = bstr_expand(destination, bstr_len(destination) + len);
+ if (destination == NULL) return NULL;
+ }
+
+ // Add source to destination
+ bstr *b = (bstr *) destination;
+ memcpy(bstr_ptr(destination) + bstr_len(b), data, len);
+ bstr_adjust_len(b, bstr_len(b) + len);
+
+ return destination;
+}
+
+bstr *bstr_add_mem_noex(bstr *destination, const void *data, size_t len) {
+ size_t copylen = len;
+
+ // Is there enough room in the destination?
+ if (bstr_size(destination) < bstr_len(destination) + copylen) {
+ copylen = bstr_size(destination) - bstr_len(destination);
+ if (copylen <= 0) return destination;
+ }
+
+ // Copy over the bytes
+ bstr *b = (bstr *) destination;
+ memcpy(bstr_ptr(destination) + bstr_len(b), data, copylen);
+ bstr_adjust_len(b, bstr_len(b) + copylen);
+
+ return destination;
+}
+
+bstr *bstr_add_noex(bstr *destination, const bstr *source) {
+ return bstr_add_mem_noex(destination, bstr_ptr(source), bstr_len(source));
+}
+
+void bstr_adjust_len(bstr *b, size_t newlen) {
+ b->len = newlen;
+}
+
+void bstr_adjust_realptr(bstr *b, void *newrealptr) {
+ b->realptr = newrealptr;
+}
+
+void bstr_adjust_size(bstr *b, size_t newsize) {
+ b->size = newsize;
+}
+
+int bstr_begins_with(const bstr *haystack, const bstr *needle) {
+ return bstr_begins_with_mem(haystack, bstr_ptr(needle), bstr_len(needle));
+}
+
+int bstr_begins_with_c(const bstr *haystack, const char *needle) {
+ return bstr_begins_with_mem(haystack, needle, strlen(needle));
+}
+
+int bstr_begins_with_c_nocase(const bstr *haystack, const char *needle) {
+ return bstr_begins_with_mem_nocase(haystack, needle, strlen(needle));
+}
+
+int bstr_begins_with_nocase(const bstr *haystack, const bstr *needle) {
+ return bstr_begins_with_mem_nocase(haystack, bstr_ptr(needle), bstr_len(needle));
+}
+
+int bstr_begins_with_mem(const bstr *haystack, const void *_data, size_t len) {
+ const unsigned char *data = (unsigned char *) _data;
+ const unsigned char *hdata = bstr_ptr(haystack);
+ size_t hlen = bstr_len(haystack);
+ size_t pos = 0;
+
+ while ((pos < len) && (pos < hlen)) {
+ if (hdata[pos] != data[pos]) {
+ return 0;
+ }
+
+ pos++;
+ }
+
+ if (pos == len) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+int bstr_begins_with_mem_nocase(const bstr *haystack, const void *_data, size_t len) {
+ const unsigned char *data = (const unsigned char *) _data;
+ const unsigned char *hdata = bstr_ptr(haystack);
+ size_t hlen = bstr_len(haystack);
+ size_t pos = 0;
+
+ while ((pos < len) && (pos < hlen)) {
+ if (tolower((int) hdata[pos]) != tolower((int) data[pos])) {
+ return 0;
+ }
+
+ pos++;
+ }
+
+ if (pos == len) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+int bstr_char_at(const bstr *b, size_t pos) {
+ unsigned char *data = bstr_ptr(b);
+ size_t len = bstr_len(b);
+
+ if (pos >= len) return -1;
+ return data[pos];
+}
+
+int bstr_char_at_end(const bstr *b, size_t pos) {
+ unsigned char *data = bstr_ptr(b);
+ size_t len = bstr_len(b);
+
+ if (pos >= len) return -1;
+ return data[len - 1 - pos];
+}
+
+void bstr_chop(bstr *b) {
+ if (bstr_len(b) > 0) {
+ bstr_adjust_len(b, bstr_len(b) - 1);
+ }
+}
+
+int bstr_chr(const bstr *b, int c) {
+ unsigned char *data = bstr_ptr(b);
+ size_t len = bstr_len(b);
+
+ size_t i = 0;
+ while (i < len) {
+ if (data[i] == c) {
+ return (int) i;
+ }
+
+ i++;
+ }
+
+ return -1;
+}
+
+int bstr_cmp(const bstr *b1, const bstr *b2) {
+ return bstr_util_cmp_mem(bstr_ptr(b1), bstr_len(b1), bstr_ptr(b2), bstr_len(b2));
+}
+
+int bstr_cmp_c(const bstr *b, const char *c) {
+ return bstr_util_cmp_mem(bstr_ptr(b), bstr_len(b), c, strlen(c));
+}
+
+int bstr_cmp_c_nocase(const bstr *b, const char *c) {
+ return bstr_util_cmp_mem_nocase(bstr_ptr(b), bstr_len(b), c, strlen(c));
+}
+
+int bstr_cmp_c_nocasenorzero(const bstr *b, const char *c) {
+ return bstr_util_cmp_mem_nocasenorzero(bstr_ptr(b), bstr_len(b), c, strlen(c));
+}
+
+int bstr_cmp_mem(const bstr *b, const void *data, size_t len) {
+ return bstr_util_cmp_mem(bstr_ptr(b), bstr_len(b), data, len);
+}
+
+int bstr_cmp_mem_nocase(const bstr *b, const void *data, size_t len) {
+ return bstr_util_cmp_mem_nocase(bstr_ptr(b), bstr_len(b), data, len);
+}
+
+int bstr_cmp_nocase(const bstr *b1, const bstr *b2) {
+ return bstr_util_cmp_mem_nocase(bstr_ptr(b1), bstr_len(b1), bstr_ptr(b2), bstr_len(b2));
+}
+
+bstr *bstr_dup(const bstr *b) {
+ return bstr_dup_ex(b, 0, bstr_len(b));
+}
+
+bstr *bstr_dup_c(const char *cstr) {
+ return bstr_dup_mem(cstr, strlen(cstr));
+}
+
+bstr *bstr_dup_ex(const bstr *b, size_t offset, size_t len) {
+ bstr *bnew = bstr_alloc(len);
+ if (bnew == NULL) return NULL;
+ memcpy(bstr_ptr(bnew), bstr_ptr(b) + offset, len);
+ bstr_adjust_len(bnew, len);
+ return bnew;
+}
+
+bstr *bstr_dup_lower(const bstr *b) {
+ return bstr_to_lowercase(bstr_dup(b));
+}
+
+bstr *bstr_dup_mem(const void *data, size_t len) {
+ bstr *bnew = bstr_alloc(len);
+ if (bnew == NULL) return NULL;
+ memcpy(bstr_ptr(bnew), data, len);
+ bstr_adjust_len(bnew, len);
+ return bnew;
+}
+
+bstr *bstr_expand(bstr *b, size_t newsize) {
+ if (bstr_realptr(b) != NULL) {
+ // Refuse to expand a wrapped bstring. In the future,
+ // we can change this to make a copy of the data, thus
+ // leaving the original memory area intact.
+ return NULL;
+ }
+
+ // Catch attempts to "expand" to a smaller size
+ if (bstr_size(b) > newsize) return NULL;
+
+ bstr *bnew = realloc(b, sizeof (bstr) + newsize);
+ if (bnew == NULL) return NULL;
+
+ bstr_adjust_size(bnew, newsize);
+
+ return bnew;
+}
+
+void bstr_free(bstr *b) {
+ if (b == NULL) return;
+ free(b);
+}
+
+int bstr_index_of(const bstr *haystack, const bstr *needle) {
+ return bstr_index_of_mem(haystack, bstr_ptr(needle), bstr_len(needle));
+}
+
+int bstr_index_of_c(const bstr *haystack, const char *needle) {
+ return bstr_index_of_mem(haystack, needle, strlen(needle));
+}
+
+int bstr_index_of_c_nocase(const bstr *haystack, const char *needle) {
+ return bstr_index_of_mem_nocase(haystack, needle, strlen(needle));
+}
+
+int bstr_index_of_c_nocasenorzero(const bstr *haystack, const char *needle) {
+ return bstr_util_mem_index_of_mem_nocasenorzero(bstr_ptr(haystack), bstr_len(haystack), needle, strlen(needle));
+}
+
+int bstr_index_of_mem(const bstr *haystack, const void *_data2, size_t len2) {
+ return bstr_util_mem_index_of_mem(bstr_ptr(haystack), bstr_len(haystack), _data2, len2);
+}
+
+int bstr_index_of_mem_nocase(const bstr *haystack, const void *_data2, size_t len2) {
+ return bstr_util_mem_index_of_mem_nocase(bstr_ptr(haystack), bstr_len(haystack), _data2, len2);
+}
+
+int bstr_index_of_nocase(const bstr *haystack, const bstr *needle) {
+ return bstr_index_of_mem_nocase(haystack, bstr_ptr(needle), bstr_len(needle));
+}
+
+int bstr_rchr(const bstr *b, int c) {
+ const unsigned char *data = bstr_ptr(b);
+ size_t len = bstr_len(b);
+
+ size_t i = len;
+ while (i > 0) {
+ if (data[i - 1] == c) {
+ return (int) (i - 1);
+ }
+
+ i--;
+ }
+
+ return -1;
+}
+
+bstr *bstr_to_lowercase(bstr *b) {
+ if (b == NULL) return NULL;
+
+ unsigned char *data = bstr_ptr(b);
+ size_t len = bstr_len(b);
+
+ size_t i = 0;
+ while (i < len) {
+ data[i] = (uint8_t)tolower(data[i]);
+ i++;
+ }
+
+ return b;
+}
+
+int bstr_util_cmp_mem(const void *_data1, size_t len1, const void *_data2, size_t len2) {
+ const unsigned char *data1 = (const unsigned char *) _data1;
+ const unsigned char *data2 = (const unsigned char *) _data2;
+ size_t p1 = 0, p2 = 0;
+
+ while ((p1 < len1) && (p2 < len2)) {
+ if (data1[p1] != data2[p2]) {
+ // Difference.
+ return (data1[p1] < data2[p2]) ? -1 : 1;
+ }
+
+ p1++;
+ p2++;
+ }
+
+ if ((p1 == len2) && (p2 == len1)) {
+ // They're identical.
+ return 0;
+ } else {
+ // One string is shorter.
+ if (p1 == len1) return -1;
+ else return 1;
+ }
+}
+
+int bstr_util_cmp_mem_nocase(const void *_data1, size_t len1, const void *_data2, size_t len2) {
+ const unsigned char *data1 = (const unsigned char *) _data1;
+ const unsigned char *data2 = (const unsigned char *) _data2;
+ size_t p1 = 0, p2 = 0;
+
+ while ((p1 < len1) && (p2 < len2)) {
+ if (tolower(data1[p1]) != tolower(data2[p2])) {
+ // Difference.
+ return (tolower(data1[p1]) < tolower(data2[p2])) ? -1 : 1;
+ }
+
+ p1++;
+ p2++;
+ }
+
+ if ((p1 == len2) && (p2 == len1)) {
+ // They're identical.
+ return 0;
+ } else {
+ // One string is shorter.
+ if (p1 == len1) return -1;
+ else return 1;
+ }
+}
+
+int bstr_util_cmp_mem_nocasenorzero(const void *_data1, size_t len1, const void *_data2, size_t len2) {
+ const unsigned char *data1 = (const unsigned char *) _data1;
+ const unsigned char *data2 = (const unsigned char *) _data2;
+ size_t p1 = 0, p2 = 0;
+
+ while ((p1 < len1) && (p2 < len2)) {
+ if (data1[p1] == 0) {
+ p1++;
+ continue;
+ }
+ if (tolower(data1[p1]) != tolower(data2[p2])) {
+ // Difference.
+ return (tolower(data1[p1]) < tolower(data2[p2])) ? -1 : 1;
+ }
+
+ p1++;
+ p2++;
+ }
+
+ while((p1 < len1) && (data1[p1] == 0)) {
+ p1++;
+ }
+ if ((p1 == len1) && (p2 == len2)) {
+ // They're identical.
+ return 0;
+ } else {
+ // One string is shorter.
+ if (p1 == len1) return -1;
+ else return 1;
+ }
+}
+
+int64_t bstr_util_mem_to_pint(const void *_data, size_t len, int base, size_t *lastlen) {
+ const unsigned char *data = (unsigned char *) _data;
+ int64_t rval = 0, tflag = 0;
+ size_t i = 0;
+
+ *lastlen = i;
+
+ for (i = 0; i < len; i++) {
+ int d = data[i];
+
+ *lastlen = i;
+
+ // Convert character to digit.
+ if ((d >= '0') && (d <= '9')) {
+ d -= '0';
+ } else if ((d >= 'a') && (d <= 'z')) {
+ d -= 'a' - 10;
+ } else if ((d >= 'A') && (d <= 'Z')) {
+ d -= 'A' - 10;
+ } else {
+ d = -1;
+ }
+
+ // Check that the digit makes sense with the base we are using.
+ if ((d == -1) || (d >= base)) {
+ if (tflag) {
+ // Return what we have so far; lastlen points
+ // to the first non-digit position.
+ return rval;
+ } else {
+ // We didn't see a single digit.
+ return -1;
+ }
+ }
+
+ if (tflag) {
+ if (((INT64_MAX - d) / base) < rval) {
+ // Overflow
+ return -2;
+ }
+
+ rval *= base;
+ rval += d;
+ } else {
+ rval = d;
+ tflag = 1;
+ }
+ }
+
+ *lastlen = i + 1;
+
+ return rval;
+}
+
+int bstr_util_mem_index_of_c(const void *_data1, size_t len1, const char *cstr) {
+ return bstr_util_mem_index_of_mem(_data1, len1, cstr, strlen(cstr));
+}
+
+int bstr_util_mem_index_of_c_nocase(const void *_data1, size_t len1, const char *cstr) {
+ return bstr_util_mem_index_of_mem_nocase(_data1, len1, cstr, strlen(cstr));
+}
+
+int bstr_util_mem_index_of_mem(const void *_data1, size_t len1, const void *_data2, size_t len2) {
+ const unsigned char *data1 = (unsigned char *) _data1;
+ const unsigned char *data2 = (unsigned char *) _data2;
+ size_t i, j;
+
+ // If we ever want to optimize this function, the following link
+ // might be useful: http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
+
+ for (i = 0; i < len1; i++) {
+ size_t k = i;
+
+ for (j = 0; ((j < len2) && (k < len1)); j++, k++) {
+ if (data1[k] != data2[j]) break;
+ }
+
+ if (j == len2) {
+ return (int) i;
+ }
+ }
+
+ return -1;
+}
+
+int bstr_util_mem_index_of_mem_nocase(const void *_data1, size_t len1, const void *_data2, size_t len2) {
+ const unsigned char *data1 = (unsigned char *) _data1;
+ const unsigned char *data2 = (unsigned char *) _data2;
+ size_t i, j;
+
+ // If we ever want to optimize this function, the following link
+ // might be useful: http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
+
+ for (i = 0; i < len1; i++) {
+ size_t k = i;
+
+ for (j = 0; ((j < len2) && (k < len1)); j++, k++) {
+ if (toupper(data1[k]) != toupper(data2[j])) break;
+ }
+
+ if (j == len2) {
+ return (int) i;
+ }
+ }
+
+ return -1;
+}
+
+int bstr_util_mem_index_of_mem_nocasenorzero(const void *_data1, size_t len1, const void *_data2, size_t len2) {
+ const unsigned char *data1 = (unsigned char *) _data1;
+ const unsigned char *data2 = (unsigned char *) _data2;
+ size_t i, j;
+
+ // If we ever want to optimize this function, the following link
+ // might be useful: http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
+
+ for (i = 0; i < len1; i++) {
+ size_t k = i;
+ if (data1[i] == 0) {
+ // skip leading zeroes to avoid quadratic complexity
+ continue;
+ }
+
+ for (j = 0; ((j < len2) && (k < len1)); j++, k++) {
+ if (data1[k] == 0) {
+ j--;
+ continue;
+ }
+ if (toupper(data1[k]) != toupper(data2[j])) break;
+ }
+
+ if (j == len2) {
+ return (int) i;
+ }
+ }
+
+ return -1;
+}
+
+void bstr_util_mem_trim(unsigned char **data, size_t *len) {
+ if ((data == NULL)||(len == NULL)) return;
+
+ unsigned char *d = *data;
+ size_t l = *len;
+
+ // Ignore whitespace at the beginning.
+ size_t pos = 0;
+ while ((pos < l) && isspace(d[pos])) pos++;
+ d += pos;
+ l -= pos;
+
+ // Ignore whitespace at the end.
+ while ((l > 0)&&(isspace(d[l - 1]))) l--;
+
+ *data = d;
+ *len = l;
+}
+
+char *bstr_util_memdup_to_c(const void *_data, size_t len) {
+ const unsigned char *data = (unsigned char *) _data;
+
+ // Count how many NUL bytes we have in the string.
+ size_t i, nulls = 0;
+ for (i = 0; i < len; i++) {
+ if (data[i] == '\0') {
+ nulls++;
+ }
+ }
+
+ // Now copy the string into a NUL-terminated buffer.
+
+ char *r, *d;
+ r = d = malloc(len + nulls + 1);
+ if (d == NULL) return NULL;
+
+ while (len--) {
+ if (*data == '\0') {
+ data++;
+ *d++ = '\\';
+ *d++ = '0';
+ } else {
+ *d++ = *data++;
+ }
+ }
+
+ *d = '\0';
+
+ return r;
+}
+
+char *bstr_util_strdup_to_c(const bstr *b) {
+ if (b == NULL) return NULL;
+ return bstr_util_memdup_to_c(bstr_ptr(b), bstr_len(b));
+}
+
+bstr *bstr_wrap_c(const char *cstr) {
+ return bstr_wrap_mem((unsigned char *) cstr, strlen(cstr));
+}
+
+bstr *bstr_wrap_mem(const void *data, size_t len) {
+ bstr *b = (bstr *) malloc(sizeof (bstr));
+ if (b == NULL) return NULL;
+
+ b->size = b->len = len;
+ b->realptr = (unsigned char *) data;
+
+ return b;
+}
diff --git a/htp/bstr.h b/htp/bstr.h
new file mode 100644
index 0000000..eb6497b
--- /dev/null
+++ b/htp/bstr.h
@@ -0,0 +1,678 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _BSTR_H
+#define _BSTR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct bstr_t bstr;
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "bstr_builder.h"
+
+// Data structures
+
+struct bstr_t {
+ /** The length of the string stored in the buffer. */
+ size_t len;
+
+ /** The current size of the buffer. If there is extra room in the
+ * buffer the string will be able to expand without reallocation.
+ */
+ size_t size;
+
+ /** Optional buffer pointer. If this pointer is NULL the string buffer
+ * will immediately follow this structure. If the pointer is not NUL,
+ * it points to the actual buffer used, and there's no data following
+ * this structure.
+ */
+ unsigned char *realptr;
+};
+
+
+// Defines
+
+#define bstr_len(X) ((*(X)).len)
+#define bstr_size(X) ((*(X)).size)
+#define bstr_ptr(X) ( ((*(X)).realptr == NULL) ? ((unsigned char *)(X) + sizeof(bstr)) : (unsigned char *)(*(X)).realptr )
+#define bstr_realptr(X) ((*(X)).realptr)
+
+
+// Functions
+
+/**
+ * Append source bstring to destination bstring, growing destination if
+ * necessary. If the destination bstring is expanded, the pointer will change.
+ * You must replace the original destination pointer with the returned one.
+ * Destination is not changed on memory allocation failure.
+ *
+ * @param[in] bdestination
+ * @param[in] bsource
+ * @return Updated bstring, or NULL on memory allocation failure.
+ */
+bstr *bstr_add(bstr *bdestination, const bstr *bsource);
+
+/**
+ * Append a NUL-terminated source to destination, growing destination if
+ * necessary. If the string is expanded, the pointer will change. You must
+ * replace the original destination pointer with the returned one. Destination
+ * is not changed on memory allocation failure.
+ *
+ * @param[in] b
+ * @param[in] cstr
+ * @return Updated bstring, or NULL on memory allocation failure.
+ */
+bstr *bstr_add_c(bstr *b, const char *cstr);
+
+/**
+ * Append as many bytes from the source to destination bstring. The
+ * destination storage will not be expanded if there is not enough space in it
+ * already to accommodate all of the data.
+ *
+ * @param[in] b
+ * @param[in] cstr
+ * @return The destination bstring.
+ */
+bstr *bstr_add_c_noex(bstr *b, const char *cstr);
+
+/**
+ * Append a memory region to destination, growing destination if necessary. If
+ * the string is expanded, the pointer will change. You must replace the
+ * original destination pointer with the returned one. Destination is not
+ * changed on memory allocation failure.
+ *
+ * @param[in] b
+ * @param[in] data
+ * @param[in] len
+ * @return Updated bstring, or NULL on memory allocation failure.
+ */
+bstr *bstr_add_mem(bstr *b, const void *data, size_t len);
+
+/**
+ * Append as many bytes from the source to destination bstring. The
+ * destination storage will not be expanded if there is not enough space in it
+ * already to accommodate all of the data.
+ *
+ * @param[in] b
+ * @param[in] data
+ * @param[in] len
+ * @return The destination bstring.
+ */
+bstr *bstr_add_mem_noex(bstr *b, const void *data, size_t len);
+
+/**
+ * Append as many bytes from the source bstring to destination bstring. The
+ * destination storage will not be expanded if there is not enough space in it
+ * already to accommodate all of the data.
+ *
+ * @param[in] bdestination
+ * @param[in] bsource
+ * @return The destination bstring.
+ */
+bstr *bstr_add_noex(bstr *bdestination, const bstr *bsource);
+
+/**
+ * Adjust bstring length. You will need to use this method whenever
+ * you work directly with the string contents, and end up changing
+ * its length by direct structure manipulation.
+ *
+ * @param[in] b
+ * @param[in] newlen
+ */
+void bstr_adjust_len(bstr *b, size_t newlen);
+
+/**
+ * Change the external pointer used by bstring. You will need to use this
+ * function only if you're messing with bstr internals. Use with caution.
+ *
+ * @param[in] b
+ * @param[in] newrealptr
+ */
+void bstr_adjust_realptr(bstr *b, void *newrealptr);
+
+/**
+ * Adjust bstring size. This does not change the size of the storage behind
+ * the bstring, just changes the field that keeps track of how many bytes
+ * there are in the storage. You will need to use this function only if
+ * you're messing with bstr internals. Use with caution.
+ *
+ * @param[in] b
+ * @param[in] newsize
+ */
+void bstr_adjust_size(bstr *b, size_t newsize);
+
+/**
+ * Allocate a zero-length bstring, reserving space for at least size bytes.
+ *
+ * @param[in] size
+ * @return New string instance
+ */
+bstr *bstr_alloc(size_t size);
+
+/**
+ * Checks whether bstring begins with another bstring. Case sensitive.
+ *
+ * @param[in] bhaystack
+ * @param[in] bneedle
+ * @return 1 if true, otherwise 0.
+ */
+int bstr_begins_with(const bstr *bhaystack, const bstr *bneedle);
+
+/**
+ * Checks whether bstring begins with NUL-terminated string. Case sensitive.
+ *
+ * @param[in] bhaystack
+ * @param[in] cneedle
+ * @return 1 if true, otherwise 0.
+ */
+int bstr_begins_with_c(const bstr *bhaystack, const char *cneedle);
+
+/**
+ * Checks whether bstring begins with NUL-terminated string. Case insensitive.
+ *
+ * @param[in] bhaystack
+ * @param[in] cneedle
+ * @return 1 if true, otherwise 0.
+ */
+int bstr_begins_with_c_nocase(const bstr *bhaystack, const char *cneedle);
+
+/**
+ * Checks whether the bstring begins with the given memory block. Case sensitive.
+ *
+ * @param[in] bhaystack
+ * @param[in] data
+ * @param[in] len
+ * @return 1 if true, otherwise 0.
+ */
+int bstr_begins_with_mem(const bstr *bhaystack, const void *data, size_t len);
+
+/**
+ * Checks whether bstring begins with memory block. Case insensitive.
+ *
+ * @param[in] bhaystack
+ * @param[in] data
+ * @param[in] len
+ * @return 1 if true, otherwise 0.
+ */
+int bstr_begins_with_mem_nocase(const bstr *bhaystack, const void *data, size_t len);
+
+/**
+ * Checks whether bstring begins with another bstring. Case insensitive.
+ *
+ * @param[in] bhaystack
+ * @param[in] cneedle
+ * @return 1 if true, otherwise 0.
+ */
+int bstr_begins_with_nocase(const bstr *bhaystack, const bstr *cneedle);
+
+/**
+ * Return the byte at the given position.
+ *
+ * @param[in] b
+ * @param[in] pos
+ * @return The byte at the given location, or -1 if the position is out of range.
+ */
+int bstr_char_at(const bstr *b, size_t pos);
+
+/**
+ * Return the byte at the given position, counting from the end of the string (e.g.,
+ * byte at position 0 is the last byte in the string.)
+ *
+ * @param[in] b
+ * @param[in] pos
+ * @return The byte at the given location, or -1 if the position is out of range.
+ */
+int bstr_char_at_end(const bstr *b, size_t pos);
+
+/**
+ * Remove the last byte from bstring, assuming it contains at least one byte. This
+ * function will not reduce the storage that backs the string, only the amount
+ * of data used.
+ *
+ * @param[in] b
+ */
+void bstr_chop(bstr *b);
+
+/**
+ * Return the first position of the provided byte.
+ *
+ * @param[in] b
+ * @param[in] c
+ * @return The first position of the byte, or -1 if it could not be found
+ */
+int bstr_chr(const bstr *b, int c);
+
+/**
+ * Case-sensitive comparison of two bstrings.
+ *
+ * @param[in] b1
+ * @param[in] b2
+ * @return Zero on string match, 1 if b1 is greater than b2, and -1 if b2 is
+ * greater than b1.
+ */
+int bstr_cmp(const bstr *b1, const bstr *b2);
+
+/**
+ * Case-sensitive comparison of a bstring and a NUL-terminated string.
+ *
+ * @param[in] b
+ * @param[in] cstr
+ * @return Zero on string match, 1 if b is greater than cstr, and -1 if cstr is
+ * greater than b.
+ */
+int bstr_cmp_c(const bstr *b, const char *cstr);
+
+/**
+ * Case-insensitive comparison of a bstring with a NUL-terminated string.
+ *
+ * @param[in] b
+ * @param[in] cstr
+ * @return Zero on string match, 1 if b is greater than cstr, and -1 if cstr is greater than b.
+ */
+int bstr_cmp_c_nocase(const bstr *b, const char *cstr);
+
+/**
+ * Case-insensitive zero-skipping comparison of a bstring with a NUL-terminated string.
+ *
+ * @param[in] b
+ * @param[in] cstr
+ * @return Zero on string match, 1 if b is greater than cstr, and -1 if cstr is greater than b.
+ */
+int bstr_cmp_c_nocasenorzero(const bstr *b, const char *cstr);
+
+/**
+ * Performs a case-sensitive comparison of a bstring with a memory region.
+ *
+ * @param[in] b
+ * @param[in] data
+ * @param[in] len
+ * @return Zero ona match, 1 if b is greater than data, and -1 if data is greater than b.
+ */
+int bstr_cmp_mem(const bstr *b, const void *data, size_t len);
+
+/**
+ * Performs a case-insensitive comparison of a bstring with a memory region.
+ *
+ * @param[in] b
+ * @param[in] data
+ * @param[in] len
+ * @return Zero ona match, 1 if b is greater than data, and -1 if data is greater than b.
+ */
+int bstr_cmp_mem_nocase(const bstr *b, const void *data, size_t len);
+
+/**
+ * Case-insensitive comparison two bstrings.
+ *
+ * @param[in] b1
+ * @param[in] b2
+ * @return Zero on string match, 1 if b1 is greater than b2, and -1 if b2 is
+ * greater than b1.
+ */
+int bstr_cmp_nocase(const bstr *b1, const bstr *b2);
+
+/**
+ * Case-insensitive and zero skipping comparison two bstrings.
+ *
+ * @param[in] b1
+ * @param[in] b2
+ * @return Zero on string match, 1 if b1 is greater than b2, and -1 if b2 is
+ * greater than b1.
+ */
+int bstr_cmp_nocasenorzero(const bstr *b1, const bstr *b2);
+
+/**
+ * Create a new bstring by copying the provided bstring.
+ *
+ * @param[in] b
+ * @return New bstring, or NULL if memory allocation failed.
+ */
+bstr *bstr_dup(const bstr *b);
+
+/**
+ * Create a new bstring by copying the provided NUL-terminated string.
+ *
+ * @param[in] cstr
+ * @return New bstring, or NULL if memory allocation failed.
+ */
+bstr *bstr_dup_c(const char *cstr);
+
+/**
+ * Create a new bstring by copying a part of the provided bstring.
+ *
+ * @param[in] b
+ * @param[in] offset
+ * @param[in] len
+ * @return New bstring, or NULL if memory allocation failed.
+ */
+bstr *bstr_dup_ex(const bstr *b, size_t offset, size_t len);
+
+/**
+ * Create a copy of the provided bstring, then convert it to lowercase.
+ *
+ * @param[in] b
+ * @return New bstring, or NULL if memory allocation failed
+ */
+bstr *bstr_dup_lower(const bstr *b);
+
+/**
+ * Create a new bstring by copying the provided memory region.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @return New bstring, or NULL if memory allocation failed
+ */
+bstr *bstr_dup_mem(const void *data, size_t len);
+
+/**
+ * Expand internal bstring storage to support at least newsize bytes. The storage
+ * is not expanded if the current size is equal or greater to newsize. Because
+ * realloc is used underneath, the old pointer to bstring may no longer be valid
+ * after this function completes successfully.
+ *
+ * @param[in] b
+ * @param[in] newsize
+ * @return Updated string instance, or NULL if memory allocation failed or if
+ * attempt was made to "expand" the bstring to a smaller size.
+ */
+bstr *bstr_expand(bstr *b, size_t newsize);
+
+/**
+ * Deallocate the supplied bstring instance and set it to NULL. Allows NULL on
+ * input.
+ *
+ * @param[in] b
+ */
+void bstr_free(bstr *b);
+
+/**
+ * Find the needle in the haystack.
+ *
+ * @param[in] bhaystack
+ * @param[in] bneedle
+ * @return Position of the match, or -1 if the needle could not be found.
+ */
+int bstr_index_of(const bstr *bhaystack, const bstr *bneedle);
+
+/**
+ * Find the needle in the haystack, ignoring case differences.
+ *
+ * @param[in] bhaystack
+ * @param[in] bneedle
+ * @return Position of the match, or -1 if the needle could not be found.
+ */
+int bstr_index_of_nocase(const bstr *bhaystack, const bstr *bneedle);
+
+/**
+ * Find the needle in the haystack, with the needle being a NUL-terminated
+ * string.
+ *
+ * @param[in] bhaystack
+ * @param[in] cneedle
+ * @return Position of the match, or -1 if the needle could not be found.
+ */
+int bstr_index_of_c(const bstr *bhaystack, const char *cneedle);
+
+/**
+ * Find the needle in the haystack, with the needle being a NUL-terminated
+ * string. Ignore case differences.
+ *
+ * @param[in] bhaystack
+ * @param[in] cneedle
+ * @return Position of the match, or -1 if the needle could not be found.
+ */
+int bstr_index_of_c_nocase(const bstr *bhaystack, const char *cneedle);
+
+/**
+ * Find the needle in the haystack, with the needle being a NUL-terminated
+ * string. Ignore case differences. Skip zeroes in haystack
+ *
+ * @param[in] bhaystack
+ * @param[in] cneedle
+ * @return Position of the match, or -1 if the needle could not be found.
+ */
+int bstr_index_of_c_nocasenorzero(const bstr *bhaystack, const char *cneedle);
+
+/**
+ * Find the needle in the haystack, with the needle being a memory region.
+ *
+ * @param[in] bhaystack
+ * @param[in] data
+ * @param[in] len
+ * @return Position of the match, or -1 if the needle could not be found.
+ */
+int bstr_index_of_mem(const bstr *bhaystack, const void *data, size_t len);
+
+/**
+ * Find the needle in the haystack, with the needle being a memory region.
+ * Ignore case differences.
+ *
+ * @param[in] bhaystack
+ * @param[in] data
+ * @param[in] len
+ * @return Position of the match, or -1 if the needle could not be found.
+ */
+int bstr_index_of_mem_nocase(const bstr *bhaystack, const void *data, size_t len);
+
+/**
+ * Return the last position of a character (byte).
+ *
+ * @param[in] b
+ * @param[in] c
+ * @return The last position of the character, or -1 if it could not be found.
+ */
+int bstr_rchr(const bstr *b, int c);
+
+/**
+ * Convert bstring to lowercase. This function converts the supplied string,
+ * it does not create a new string.
+ *
+ * @param[in] b
+ * @return The same bstring received on input
+ */
+bstr *bstr_to_lowercase(bstr *b);
+
+/**
+ * Case-sensitive comparison of two memory regions.
+ *
+ * @param[in] data1
+ * @param[in] len1
+ * @param[in] data2
+ * @param[in] len2
+ * @return Zero if the memory regions are identical, 1 if data1 is greater than
+ * data2, and -1 if data2 is greater than data1.
+ */
+int bstr_util_cmp_mem(const void *data1, size_t len1, const void *data2, size_t len2);
+
+/**
+ * Case-insensitive comparison of two memory regions.
+ *
+ * @param[in] data1
+ * @param[in] len1
+ * @param[in] data2
+ * @param[in] len2
+ * @return Zero if the memory regions are identical, 1 if data1 is greater than
+ * data2, and -1 if data2 is greater than data1.
+ */
+ int bstr_util_cmp_mem_nocase(const void *data1, size_t len1, const void *data2, size_t len2);
+
+/**
+ * Case-insensitive zero-skipping comparison of two memory regions.
+ *
+ * @param[in] data1
+ * @param[in] len1
+ * @param[in] data2
+ * @param[in] len2
+ * @return Zero if the memory regions are identical, 1 if data1 is greater than
+ * data2, and -1 if data2 is greater than data1.
+ */
+ int bstr_util_cmp_mem_nocasenorzero(const void *data1, size_t len1, const void *data2, size_t len2);
+
+/**
+ * Convert contents of a memory region to a positive integer.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @param[in] base The desired number base.
+ * @param[in] lastlen Points to the first unused byte in the region
+ * @return If the conversion was successful, this function returns the
+ * number. When the conversion fails, -1 will be returned when not
+ * one valid digit was found, and -2 will be returned if an overflow
+ * occurred.
+ */
+int64_t bstr_util_mem_to_pint(const void *data, size_t len, int base, size_t *lastlen);
+
+/**
+ * Searches a memory block for the given NUL-terminated string. Case sensitive.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @param[in] cstr
+ * @return Index of the first location of the needle on success, or -1 if the needle was not found.
+ */
+int bstr_util_mem_index_of_c(const void *data, size_t len, const char *cstr);
+
+/**
+ * Searches a memory block for the given NUL-terminated string. Case insensitive.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @param[in] cstr
+ * @return Index of the first location of the needle on success, or -1 if the needle was not found.
+ */
+int bstr_util_mem_index_of_c_nocase(const void *data, size_t len, const char *cstr);
+
+/**
+ * Searches the haystack memory block for the needle memory block. Case sensitive.
+ *
+ * @param data1
+ * @param len1
+ * @param data2
+ * @param len2
+ * @return Index of the first location of the needle on success, or -1 if the needle was not found.
+ */
+int bstr_util_mem_index_of_mem(const void *data1, size_t len1, const void *data2, size_t len2);
+
+/**
+ * Searches the haystack memory block for the needle memory block. Case sensitive.
+ *
+ * @param data1
+ * @param len1
+ * @param data2
+ * @param len2
+ * @return Index of the first location of the needle on success, or -1 if the needle was not found.
+ */
+int bstr_util_mem_index_of_mem_nocase(const void *data1, size_t len1, const void *data2, size_t len2);
+
+/**
+ * Searches the haystack memory block for the needle memory block. Case sensitive. Skips zeroes in data1
+ *
+ * @param data1
+ * @param len1
+ * @param data2
+ * @param len2
+ * @return Index of the first location of the needle on success, or -1 if the needle was not found.
+ */
+int bstr_util_mem_index_of_mem_nocasenorzero(const void *data1, size_t len1, const void *data2, size_t len2);
+
+/**
+ * Removes whitespace from the beginning and the end of a memory region. The data
+ * itself is not modified; this function only adjusts the provided pointers.
+ *
+ * @param[in,out] data
+ * @param[in,out] len
+ */
+void bstr_util_mem_trim(unsigned char **data, size_t *len);
+
+/**
+ * Take the provided memory region, allocate a new memory buffer, and construct
+ * a NUL-terminated string, replacing each NUL byte with "\0" (two bytes). The
+ * caller is responsible to keep track of the allocated memory area and free
+ * it once it is no longer needed.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @return The newly created NUL-terminated string, or NULL in case of memory
+ * allocation failure.
+ */
+char *bstr_util_memdup_to_c(const void *data, size_t len);
+
+/**
+ * Create a new NUL-terminated string out of the provided bstring. If NUL bytes
+ * are contained in the bstring, each will be replaced with "\0" (two characters).
+ * The caller is responsible to keep track of the allocated memory area and free
+ * it once it is no longer needed.
+ *
+ * @param[in] b
+ * @return The newly created NUL-terminated string, or NULL in case of memory
+ * allocation failure.
+ */
+char *bstr_util_strdup_to_c(const bstr *b);
+
+/**
+ * Create a new bstring from the provided NUL-terminated string and without
+ * copying the data. The caller must ensure that the input string continues
+ * to point to a valid memory location for as long as the bstring is used.
+ *
+ * @param[in] cstr
+ * @return New bstring, or NULL on memory allocation failure.
+ */
+bstr *bstr_wrap_c(const char *cstr);
+
+/**
+ * Create a new bstring from the provided memory buffer without
+ * copying the data. The caller must ensure that the buffer remains
+ * valid for as long as the bstring is used.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @return New bstring, or NULL on memory allocation failure.
+ */
+bstr *bstr_wrap_mem(const void *data, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BSTR_H */
diff --git a/htp/bstr_builder.c b/htp/bstr_builder.c
new file mode 100644
index 0000000..89394f6
--- /dev/null
+++ b/htp/bstr_builder.c
@@ -0,0 +1,121 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "bstr.h"
+#include "htp_list.h"
+
+htp_status_t bstr_builder_appendn(bstr_builder_t *bb, bstr *b) {
+ return htp_list_push(bb->pieces, b);
+}
+
+htp_status_t bstr_builder_append_c(bstr_builder_t *bb, const char *cstr) {
+ bstr *b = bstr_dup_c(cstr);
+ if (b == NULL) return HTP_ERROR;
+ return htp_list_push(bb->pieces, b);
+}
+
+htp_status_t bstr_builder_append_mem(bstr_builder_t *bb, const void *data, size_t len) {
+ bstr *b = bstr_dup_mem(data, len);
+ if (b == NULL) return HTP_ERROR;
+ return htp_list_push(bb->pieces, b);
+}
+
+void bstr_builder_clear(bstr_builder_t *bb) {
+ // Do nothing if the list is empty
+ if (htp_list_size(bb->pieces) == 0) return;
+
+ for (size_t i = 0, n = htp_list_size(bb->pieces); i < n; i++) {
+ bstr *b = htp_list_get(bb->pieces, i);
+ bstr_free(b);
+ }
+
+ htp_list_clear(bb->pieces);
+}
+
+bstr_builder_t *bstr_builder_create(void) {
+ bstr_builder_t *bb = calloc(1, sizeof (bstr_builder_t));
+ if (bb == NULL) return NULL;
+
+ bb->pieces = htp_list_create(BSTR_BUILDER_DEFAULT_SIZE);
+ if (bb->pieces == NULL) {
+ free(bb);
+ return NULL;
+ }
+
+ return bb;
+}
+
+void bstr_builder_destroy(bstr_builder_t *bb) {
+ if (bb == NULL) return;
+
+ // Destroy any pieces we might have
+ for (size_t i = 0, n = htp_list_size(bb->pieces); i < n; i++) {
+ bstr *b = htp_list_get(bb->pieces, i);
+ bstr_free(b);
+ }
+
+ htp_list_destroy(bb->pieces);
+
+ free(bb);
+}
+
+size_t bstr_builder_size(const bstr_builder_t *bb) {
+ return htp_list_size(bb->pieces);
+}
+
+bstr *bstr_builder_to_str(const bstr_builder_t *bb) {
+ size_t len = 0;
+
+ // Determine the size of the string
+ for (size_t i = 0, n = htp_list_size(bb->pieces); i < n; i++) {
+ bstr *b = htp_list_get(bb->pieces, i);
+ len += bstr_len(b);
+ }
+
+ // Allocate string
+ bstr *bnew = bstr_alloc(len);
+ if (bnew == NULL) return NULL;
+
+ // Determine the size of the string
+ for (size_t i = 0, n = htp_list_size(bb->pieces); i < n; i++) {
+ bstr *b = htp_list_get(bb->pieces, i);
+ bstr_add_noex(bnew, b);
+ }
+
+ return bnew;
+}
diff --git a/htp/bstr_builder.h b/htp/bstr_builder.h
new file mode 100644
index 0000000..335a131
--- /dev/null
+++ b/htp/bstr_builder.h
@@ -0,0 +1,136 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _BSTR_BUILDER_H
+#define _BSTR_BUILDER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct bstr_builder_t bstr_builder_t;
+
+#include "htp_list.h"
+
+struct bstr_builder_t {
+ htp_list_t *pieces;
+};
+
+#define BSTR_BUILDER_DEFAULT_SIZE 16
+
+/**
+ * Adds one new string to the builder. This function will adopt the
+ * string and destroy it when the builder itself is destroyed.
+ *
+ * @param[in] bb
+ * @param[in] b
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t bstr_builder_appendn(bstr_builder_t *bb, bstr *b);
+
+/**
+ * Adds one new piece, in the form of a NUL-terminated string, to
+ * the builder. This function will make a copy of the provided string.
+ *
+ * @param[in] bb
+ * @param[in] cstr
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t bstr_builder_append_c(bstr_builder_t *bb, const char *cstr);
+
+/**
+ * Adds one new piece, defined with the supplied pointer and
+ * length, to the builder. This function will make a copy of the
+ * provided data region.
+ *
+ * @param[in] bb
+ * @param[in] data
+ * @param[in] len
+ * @return @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t bstr_builder_append_mem(bstr_builder_t *bb, const void *data, size_t len);
+
+/**
+ * Clears this string builder, destroying all existing pieces. You may
+ * want to clear a builder once you've either read all the pieces and
+ * done something with them, or after you've converted the builder into
+ * a single string.
+ *
+ * @param[in] bb
+ */
+void bstr_builder_clear(bstr_builder_t *bb);
+
+/**
+ * Creates a new string builder.
+ *
+ * @return New string builder, or NULL on error.
+ */
+bstr_builder_t *bstr_builder_create(void);
+
+/**
+ * Destroys an existing string builder, also destroying all
+ * the pieces stored within.
+ *
+ * @param[in] bb
+ */
+void bstr_builder_destroy(bstr_builder_t *bb);
+
+/**
+ * Returns the size (the number of pieces) currently in a string builder.
+ *
+ * @param[in] bb
+ * @return size
+ */
+size_t bstr_builder_size(const bstr_builder_t *bb);
+
+/**
+ * Creates a single string out of all the pieces held in a
+ * string builder. This method will not destroy any of the pieces.
+ *
+ * @param[in] bb
+ * @return New string, or NULL on error.
+ */
+bstr *bstr_builder_to_str(const bstr_builder_t *bb);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BSTR_BUILDER_H */
+
diff --git a/htp/htp.h b/htp/htp.h
new file mode 100644
index 0000000..36209ad
--- /dev/null
+++ b/htp/htp.h
@@ -0,0 +1,678 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _HTP_H
+#define _HTP_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/time.h>
+
+#include "htp_version.h"
+#include "htp_core.h"
+
+#include "bstr.h"
+#include "htp_base64.h"
+#include "htp_config.h"
+#include "htp_connection_parser.h"
+#include "htp_decompressors.h"
+#include "htp_hooks.h"
+#include "htp_list.h"
+#include "htp_multipart.h"
+#include "htp_table.h"
+#include "htp_transaction.h"
+#include "htp_urlencoded.h"
+#include "htp_utf8_decoder.h"
+
+/**
+ * Represents a single TCP connection.
+ */
+struct htp_conn_t {
+ /** Client IP address. */
+ char *client_addr;
+
+ /** Client port. */
+ int client_port;
+
+ /** Server IP address. */
+ char *server_addr;
+
+ /** Server port. */
+ int server_port;
+
+ /**
+ * Transactions carried out on this connection. The list may contain
+ * NULL elements when some of the transactions are deleted (and then
+ * removed from a connection by calling htp_conn_remove_tx().
+ */
+ htp_list_t *transactions;
+
+ /** Log messages associated with this connection. */
+ htp_list_t *messages;
+
+ /** Parsing flags: HTP_CONN_PIPELINED. */
+ uint8_t flags;
+
+ /** When was this connection opened? Can be NULL. */
+ htp_time_t open_timestamp;
+
+ /** When was this connection closed? Can be NULL. */
+ htp_time_t close_timestamp;
+
+ /** Inbound data counter. */
+ int64_t in_data_counter;
+
+ /** Outbound data counter. */
+ int64_t out_data_counter;
+};
+
+/**
+ * Used to represent files that are seen during the processing of HTTP traffic. Most
+ * commonly this refers to files seen in multipart/form-data payloads. In addition, PUT
+ * request bodies can be treated as files.
+ */
+struct htp_file_t {
+ /** Where did this file come from? Possible values: HTP_FILE_MULTIPART and HTP_FILE_PUT. */
+ enum htp_file_source_t source;
+
+ /** File name, as provided (e.g., in the Content-Disposition multipart part header. */
+ bstr *filename;
+
+ /** File length. */
+ int64_t len;
+
+ /** The unique filename in which this file is stored on the filesystem, when applicable.*/
+ char *tmpname;
+
+ /** The file descriptor used for external storage, or -1 if unused. */
+ int fd;
+};
+
+/**
+ * Represents a chunk of file data.
+ */
+struct htp_file_data_t {
+ /** File information. */
+ htp_file_t *file;
+
+ /** Pointer to the data buffer. */
+ const unsigned char *data;
+
+ /** Buffer length. */
+ size_t len;
+};
+
+/**
+ * Represents a single log entry.
+ */
+struct htp_log_t {
+ /** The connection parser associated with this log message. */
+ htp_connp_t *connp;
+
+ /** The transaction associated with this log message, if any. */
+ htp_tx_t *tx;
+
+ /** Log message. */
+ const char *msg;
+
+ /** Message level. */
+ enum htp_log_level_t level;
+
+ /** Message code. */
+ int code;
+
+ /** File in which the code that emitted the message resides. */
+ const char *file;
+
+ /** Line number on which the code that emitted the message resides. */
+ unsigned int line;
+};
+
+/**
+ * Represents a single request or response header.
+ */
+struct htp_header_t {
+ /** Header name. */
+ bstr *name;
+
+ /** Header value. */
+ bstr *value;
+
+ /** Parsing flags; a combination of: HTP_FIELD_INVALID, HTP_FIELD_FOLDED, HTP_FIELD_REPEATED. */
+ uint64_t flags;
+};
+
+/**
+ * Represents a single request parameter.
+ */
+struct htp_param_t {
+ /** Parameter name. */
+ bstr *name;
+
+ /** Parameter value. */
+ bstr *value;
+
+ /** Source of the parameter, for example HTP_SOURCE_QUERY_STRING. */
+ enum htp_data_source_t source;
+
+ /** Type of the data structure referenced below. */
+ enum htp_parser_id_t parser_id;
+
+ /**
+ * Pointer to the parser data structure that contains
+ * complete information about the parameter. Can be NULL.
+ */
+ void *parser_data;
+};
+
+/**
+ * Represents a single HTTP transaction, which is a combination of a request and a response.
+ */
+struct htp_tx_t {
+ /** The connection parser associated with this transaction. */
+ htp_connp_t *connp;
+
+ /** The connection to which this transaction belongs. */
+ htp_conn_t *conn;
+
+ /** The configuration structure associated with this transaction. */
+ htp_cfg_t *cfg;
+
+ /**
+ * Is the configuration structure shared with other transactions or connections? If
+ * this field is set to HTP_CONFIG_PRIVATE, the transaction owns the configuration.
+ */
+ int is_config_shared;
+
+ /** The user data associated with this transaction. */
+ void *user_data;
+
+
+ // Request fields
+
+ /** Contains a count of how many empty lines were skipped before the request line. */
+ unsigned int request_ignored_lines;
+
+ /** The first line of this request. */
+ bstr *request_line;
+
+ /** Request method. */
+ bstr *request_method;
+
+ /** Request method, as number. Available only if we were able to recognize the request method. */
+ enum htp_method_t request_method_number;
+
+ /**
+ * Request URI, raw, as given to us on the request line. This field can take different forms,
+ * for example authority for CONNECT methods, absolute URIs for proxy requests, and the query
+ * string when one is provided. Use htp_tx_t::parsed_uri if you need to access to specific
+ * URI elements. Can be NULL if the request line contains only a request method (which is
+ * an extreme case of HTTP/0.9, but passes in practice.
+ */
+ bstr *request_uri;
+
+ /** Request protocol, as text. Can be NULL if no protocol was specified. */
+ bstr *request_protocol;
+
+ /**
+ * Protocol version as a number. Multiply the high version number by 100, then add the low
+ * version number. You should prefer to work the pre-defined HTP_PROTOCOL_* constants.
+ */
+ int request_protocol_number;
+
+ /**
+ * Is this request using HTTP/0.9? We need a separate field for this purpose because
+ * the protocol version alone is not sufficient to determine if HTTP/0.9 is used. For
+ * example, if you submit "GET / HTTP/0.9" to Apache, it will not treat the request
+ * as HTTP/0.9.
+ */
+ int is_protocol_0_9;
+
+ /**
+ * This structure holds the individual components parsed out of the request URI, with
+ * appropriate normalization and transformation applied, per configuration. No information
+ * is added. In extreme cases when no URI is provided on the request line, all fields
+ * will be NULL. (Well, except for port_number, which will be -1.) To inspect raw data, use
+ * htp_tx_t::request_uri or htp_tx_t::parsed_uri_raw.
+ */
+ htp_uri_t *parsed_uri;
+
+ /**
+ * This structure holds the individual components parsed out of the request URI, but
+ * without any modification. The purpose of this field is to allow you to look at the data as it
+ * was supplied on the request line. Fields can be NULL, depending on what data was supplied.
+ * The port_number field is always -1.
+ */
+ htp_uri_t *parsed_uri_raw;
+
+ /* HTTP 1.1 RFC
+ *
+ * 4.3 Message Body
+ *
+ * The message-body (if any) of an HTTP message is used to carry the
+ * entity-body associated with the request or response. The message-body
+ * differs from the entity-body only when a transfer-coding has been
+ * applied, as indicated by the Transfer-Encoding header field (section
+ * 14.41).
+ *
+ * message-body = entity-body
+ * | <entity-body encoded as per Transfer-Encoding>
+ */
+
+ /**
+ * The length of the request message-body. In most cases, this value
+ * will be the same as request_entity_len. The values will be different
+ * if request compression or chunking were applied. In that case,
+ * request_message_len contains the length of the request body as it
+ * has been seen over TCP; request_entity_len contains length after
+ * de-chunking and decompression.
+ */
+ int64_t request_message_len;
+
+ /**
+ * The length of the request entity-body. In most cases, this value
+ * will be the same as request_message_len. The values will be different
+ * if request compression or chunking were applied. In that case,
+ * request_message_len contains the length of the request body as it
+ * has been seen over TCP; request_entity_len contains length after
+ * de-chunking and decompression.
+ */
+ int64_t request_entity_len;
+
+ /** Parsed request headers. */
+ htp_table_t *request_headers;
+
+ /**
+ * Request transfer coding. Can be one of HTP_CODING_UNKNOWN (body presence not
+ * determined yet), HTP_CODING_IDENTITY, HTP_CODING_CHUNKED, HTP_CODING_NO_BODY,
+ * and HTP_CODING_UNRECOGNIZED.
+ */
+ enum htp_transfer_coding_t request_transfer_coding;
+
+ /** Request body compression. */
+ enum htp_content_encoding_t request_content_encoding;
+
+ /**
+ * This field contain the request content type when that information is
+ * available in request headers. The contents of the field will be converted
+ * to lowercase and any parameters (e.g., character set information) removed.
+ */
+ bstr *request_content_type;
+
+ /**
+ * Contains the value specified in the Content-Length header. The value of this
+ * field will be -1 from the beginning of the transaction and until request
+ * headers are processed. It will stay -1 if the C-L header was not provided,
+ * or if the value in it cannot be parsed.
+ */
+ int64_t request_content_length;
+
+ /**
+ * Transaction-specific REQUEST_BODY_DATA hook. Behaves as
+ * the configuration hook with the same name.
+ */
+ htp_hook_t *hook_request_body_data;
+
+ /**
+ * Transaction-specific RESPONSE_BODY_DATA hook. Behaves as
+ * the configuration hook with the same name.
+ */
+ htp_hook_t *hook_response_body_data;
+
+ /**
+ * Query string URLENCODED parser. Available only
+ * when the query string is not NULL and not empty.
+ */
+ htp_urlenp_t *request_urlenp_query;
+
+ /**
+ * Request body URLENCODED parser. Available only when the request body is in the
+ * application/x-www-form-urlencoded format and the parser was configured to run.
+ */
+ htp_urlenp_t *request_urlenp_body;
+
+ /**
+ * Request body MULTIPART parser. Available only when the body is in the
+ * multipart/form-data format and the parser was configured to run.
+ */
+ htp_mpartp_t *request_mpartp;
+
+ /** Request parameters. */
+ htp_table_t *request_params;
+
+ /** Request cookies */
+ htp_table_t *request_cookies;
+
+ /** Authentication type used in the request. */
+ enum htp_auth_type_t request_auth_type;
+
+ /** Authentication username. */
+ bstr *request_auth_username;
+
+ /** Authentication password. Available only when htp_tx_t::request_auth_type is HTP_AUTH_BASIC. */
+ bstr *request_auth_password;
+
+ /**
+ * Request hostname. Per the RFC, the hostname will be taken from the Host header
+ * when available. If the host information is also available in the URI, it is used
+ * instead of whatever might be in the Host header. Can be NULL. This field does
+ * not contain port information.
+ */
+ bstr *request_hostname;
+
+ /**
+ * Request port number, if presented. The rules for htp_tx_t::request_host apply. Set to
+ * -1 by default.
+ */
+ int request_port_number;
+
+
+ // Response fields
+
+ /** How many empty lines did we ignore before reaching the status line? */
+ unsigned int response_ignored_lines;
+
+ /** Response line. */
+ bstr *response_line;
+
+ /** Response protocol, as text. Can be NULL. */
+ bstr *response_protocol;
+
+ /**
+ * Response protocol as number. Available only if we were able to parse the protocol version,
+ * HTP_PROTOCOL_INVALID otherwise. HTP_PROTOCOL_UNKNOWN until parsing is attempted.
+ */
+ int response_protocol_number;
+
+ /**
+ * Response status code, as text. Starts as NULL and can remain NULL on
+ * an invalid response that does not specify status code.
+ */
+ bstr *response_status;
+
+ /**
+ * Response status code, available only if we were able to parse it, HTP_STATUS_INVALID
+ * otherwise. HTP_STATUS_UNKNOWN until parsing is attempted.
+ */
+ int response_status_number;
+
+ /**
+ * This field is set by the protocol decoder with it thinks that the
+ * backend server will reject a request with a particular status code.
+ */
+ int response_status_expected_number;
+
+ /** The message associated with the response status code. Can be NULL. */
+ bstr *response_message;
+
+ /** Have we seen the server respond with a 100 response? */
+ int seen_100continue;
+
+ /** Parsed response headers. Contains instances of htp_header_t. */
+ htp_table_t *response_headers;
+
+ /* HTTP 1.1 RFC
+ *
+ * 4.3 Message Body
+ *
+ * The message-body (if any) of an HTTP message is used to carry the
+ * entity-body associated with the request or response. The message-body
+ * differs from the entity-body only when a transfer-coding has been
+ * applied, as indicated by the Transfer-Encoding header field (section
+ * 14.41).
+ *
+ * message-body = entity-body
+ * | <entity-body encoded as per Transfer-Encoding>
+ */
+
+ /**
+ * The length of the response message-body. In most cases, this value
+ * will be the same as response_entity_len. The values will be different
+ * if response compression or chunking were applied. In that case,
+ * response_message_len contains the length of the response body as it
+ * has been seen over TCP; response_entity_len contains the length after
+ * de-chunking and decompression.
+ */
+ int64_t response_message_len;
+
+ /**
+ * The length of the response entity-body. In most cases, this value
+ * will be the same as response_message_len. The values will be different
+ * if request compression or chunking were applied. In that case,
+ * response_message_len contains the length of the response body as it
+ * has been seen over TCP; response_entity_len contains length after
+ * de-chunking and decompression.
+ */
+ int64_t response_entity_len;
+
+ /**
+ * Contains the value specified in the Content-Length header. The value of this
+ * field will be -1 from the beginning of the transaction and until response
+ * headers are processed. It will stay -1 if the C-L header was not provided,
+ * or if the value in it cannot be parsed.
+ */
+ int64_t response_content_length;
+
+ /**
+ * Response transfer coding, which indicates if there is a response body,
+ * and how it is transported (e.g., as-is, or chunked).
+ */
+ enum htp_transfer_coding_t response_transfer_coding;
+
+ /**
+ * Response body compression, which indicates if compression is used
+ * for the response body. This field is an interpretation of the information
+ * available in response headers.
+ */
+ enum htp_content_encoding_t response_content_encoding;
+
+ /**
+ * Response body compression processing information, which is related to how
+ * the library is going to process (or has processed) a response body. Changing
+ * this field mid-processing can influence library actions. For example, setting
+ * this field to HTP_COMPRESSION_NONE in a RESPONSE_HEADERS callback will prevent
+ * decompression.
+ */
+ enum htp_content_encoding_t response_content_encoding_processing;
+
+ /**
+ * This field will contain the response content type when that information
+ * is available in response headers. The contents of the field will be converted
+ * to lowercase and any parameters (e.g., character set information) removed.
+ */
+ bstr *response_content_type;
+
+
+ // Common fields
+
+ /**
+ * Parsing flags; a combination of: HTP_REQUEST_INVALID_T_E, HTP_INVALID_FOLDING,
+ * HTP_REQUEST_SMUGGLING, HTP_MULTI_PACKET_HEAD, and HTP_FIELD_UNPARSEABLE.
+ */
+ uint64_t flags;
+
+ /** Request progress. */
+ enum htp_tx_req_progress_t request_progress;
+
+ /** Response progress. */
+ enum htp_tx_res_progress_t response_progress;
+
+ /** Transaction index on the connection. */
+ size_t index;
+
+ /** Total repetitions for headers in request. */
+ uint16_t req_header_repetitions;
+
+ /** Total repetitions for headers in response. */
+ uint16_t res_header_repetitions;
+};
+
+/**
+ * This structure is used to pass transaction data (for example
+ * request and response body buffers) to callbacks.
+ */
+struct htp_tx_data_t {
+ /** Transaction pointer. */
+ htp_tx_t *tx;
+
+ /** Pointer to the data buffer. */
+ const unsigned char *data;
+
+ /** Buffer length. */
+ size_t len;
+
+ /**
+ * Indicator if this chunk of data is the last in the series. Currently
+ * used only by REQUEST_HEADER_DATA, REQUEST_TRAILER_DATA, RESPONSE_HEADER_DATA,
+ * and RESPONSE_TRAILER_DATA callbacks.
+ */
+ int is_last;
+};
+
+/**
+ * URI structure. Each of the fields provides access to a single
+ * URI element. Where an element is not present in a URI, the
+ * corresponding field will be set to NULL or -1, depending on the
+ * field type.
+ */
+struct htp_uri_t {
+ /** Scheme, e.g., "http". */
+ bstr *scheme;
+
+ /** Username. */
+ bstr *username;
+
+ /** Password. */
+ bstr *password;
+
+ /** Hostname. */
+ bstr *hostname;
+
+ /** Port, as string. */
+ bstr *port;
+
+ /**
+ * Port, as number. This field will contain HTP_PORT_NONE if there was
+ * no port information in the URI and HTP_PORT_INVALID if the port information
+ * was invalid (e.g., it's not a number or it falls out of range.
+ */
+ int port_number;
+
+ /** The path part of this URI. */
+ bstr *path;
+
+ /** Query string. */
+ bstr *query;
+
+ /**
+ * Fragment identifier. This field will rarely be available in a server-side
+ * setting, but it's not impossible to see it. */
+ bstr *fragment;
+};
+
+/**
+ * Frees all data contained in the uri, and then the uri itself.
+ *
+ * @param[in] uri
+ */
+void htp_uri_free(htp_uri_t *uri);
+
+/**
+ * Allocates and initializes a new htp_uri_t structure.
+ *
+ * @return New structure, or NULL on memory allocation failure.
+ */
+htp_uri_t *htp_uri_alloc(void);
+
+/**
+ * Creates a new log entry and stores it with the connection. The file and line
+ * parameters are typically auto-generated using the HTP_LOG_MARK macro.
+*
+ * @param[in] connp
+ * @param[in] file
+ * @param[in] line
+ * @param[in] level
+ * @param[in] code
+ * @param[in] fmt
+ * @param[in] ...
+ */
+void htp_log(htp_connp_t *connp, const char *file, int line, enum htp_log_level_t level, int code, const char *fmt, ...);
+
+/**
+ * Performs in-place decoding of the input string, according to the configuration specified
+ * by cfg and ctx. On output, various flags (HTP_URLEN_*) might be set.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] input
+ * @param[out] flags
+ *
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_urldecode_inplace(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags);
+
+/**
+ * Performs in-place decoding of the input string, according to the configuration specified
+ * by cfg and ctx. On output, various flags (HTP_URLEN_*) might be set. If something in the
+ * input would cause a particular server to respond with an error, the appropriate status
+ * code will be set.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] input
+ * @param[out] flags
+ * @param[out] expected_status_code 0 by default, or status code as necessary
+ *
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_urldecode_inplace_ex(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags, int *expected_status_code);
+
+/**
+ * Returns the LibHTP version string.
+ *
+ * @return LibHTP version, for example "LibHTP v0.5.x".
+ */
+char *htp_get_version(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTP_H */
diff --git a/htp/htp_base64.c b/htp/htp_base64.c
new file mode 100644
index 0000000..75dc122
--- /dev/null
+++ b/htp/htp_base64.c
@@ -0,0 +1,196 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+/* Adapted from the libb64 project (http://sourceforge.net/projects/libb64), which is in public domain. */
+
+#include "bstr.h"
+#include "htp_base64.h"
+
+/**
+ * Decode single base64-encoded character.
+ *
+ * @param[in] value_in
+ * @return decoded character
+ */
+int htp_base64_decode_single(signed char value_in) {
+ static const signed char decoding[] = {62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ -1, -1, -1, -2, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51};
+ static const signed char decoding_size = sizeof (decoding);
+
+ value_in -= 43;
+
+ if ((value_in < 0) || (value_in > decoding_size - 1)) return -1;
+
+ return decoding[(int) value_in];
+}
+
+/**
+ * Initialize base64 decoder.
+ *
+ * @param[in] decoder
+ */
+void htp_base64_decoder_init(htp_base64_decoder *decoder) {
+ decoder->step = step_a;
+ decoder->plainchar = 0;
+}
+
+/**
+ * Feed the supplied memory range to the decoder.
+ *
+ * @param[in] decoder
+ * @param[in] _code_in
+ * @param[in] length_in
+ * @param[in] _plaintext_out
+ * @param[in] length_out
+ * @return how many bytes were placed into plaintext output
+ */
+int htp_base64_decode(htp_base64_decoder *decoder, const void *_code_in, int length_in, void *_plaintext_out, int length_out) {
+ const unsigned char *code_in = (const unsigned char *)_code_in;
+ unsigned char *plaintext_out = (unsigned char *)_plaintext_out;
+ const unsigned char *codechar = code_in;
+ unsigned char *plainchar = plaintext_out;
+ signed char fragment;
+
+ if (length_out <= 0) return 0;
+
+ *plainchar = decoder->plainchar;
+
+ switch (decoder->step) {
+ while (1) {
+ case step_a:
+ do {
+ if (codechar == code_in + length_in) {
+ decoder->step = step_a;
+ decoder->plainchar = *plainchar;
+ return (int) (plainchar - plaintext_out);
+ }
+ fragment = (char) htp_base64_decode_single(*codechar++);
+ } while (fragment < 0);
+ *plainchar = (unsigned char) ((fragment & 0x03f) << 2);
+ /* fall through */
+
+ case step_b:
+ do {
+ if (codechar == code_in + length_in) {
+ decoder->step = step_b;
+ decoder->plainchar = *plainchar;
+ return (int) (plainchar - plaintext_out);
+ }
+ fragment = (char) htp_base64_decode_single(*codechar++);
+ } while (fragment < 0);
+ *plainchar++ |= (fragment & 0x030) >> 4;
+ *plainchar = (unsigned char) ((fragment & 0x00f) << 4);
+ if (--length_out == 0) {
+ return (int) (plainchar - plaintext_out);
+ }
+ /* fall through */
+
+ case step_c:
+ do {
+ if (codechar == code_in + length_in) {
+ decoder->step = step_c;
+ decoder->plainchar = *plainchar;
+ return (int) (plainchar - plaintext_out);
+ }
+ fragment = (char) htp_base64_decode_single(*codechar++);
+ } while (fragment < 0);
+ *plainchar++ |= (fragment & 0x03c) >> 2;
+ *plainchar = (unsigned char) ((fragment & 0x003) << 6);
+ if (--length_out == 0) {
+ return (int) (plainchar - plaintext_out);
+ }
+ /* fall through */
+
+ case step_d:
+ do {
+ if (codechar == code_in + length_in) {
+ decoder->step = step_d;
+ decoder->plainchar = *plainchar;
+ return (int) (plainchar - plaintext_out);
+ }
+ fragment = (char) htp_base64_decode_single(*codechar++);
+ } while (fragment < 0);
+ *plainchar++ |= (fragment & 0x03f);
+ if (--length_out == 0) {
+ return (int) (plainchar - plaintext_out);
+ }
+ /* fall through */
+ }
+ }
+
+ /* control should not reach here */
+ return plainchar - plaintext_out;
+}
+
+/**
+ * Base64-decode input, given as bstring.
+ *
+ * @param[in] input
+ * @return new base64-decoded bstring
+ */
+bstr *htp_base64_decode_bstr(bstr *input) {
+ return htp_base64_decode_mem(bstr_ptr(input), bstr_len(input));
+}
+
+/**
+ * Base64-decode input, given as memory range.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @return new base64-decoded bstring
+ */
+bstr *htp_base64_decode_mem(const void *data, size_t len) {
+ htp_base64_decoder decoder;
+ bstr *r = NULL;
+
+ htp_base64_decoder_init(&decoder);
+
+ unsigned char *tmpstr = malloc(len);
+ if (tmpstr == NULL) return NULL;
+
+ int resulting_len = htp_base64_decode(&decoder, data, (int) len, tmpstr, (int) len);
+ if (resulting_len > 0) {
+ r = bstr_dup_mem(tmpstr, resulting_len);
+ }
+
+ free(tmpstr);
+
+ return r;
+}
diff --git a/htp/htp_base64.h b/htp/htp_base64.h
new file mode 100644
index 0000000..8978e7a
--- /dev/null
+++ b/htp/htp_base64.h
@@ -0,0 +1,74 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+/* Adapted from the libb64 project (http://sourceforge.net/projects/libb64), which is in public domain. */
+
+#ifndef _HTP_BASE64_H
+#define _HTP_BASE64_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "bstr.h"
+
+typedef enum {
+ step_a, step_b, step_c, step_d
+} htp_base64_decodestep;
+
+typedef struct {
+ htp_base64_decodestep step;
+ char plainchar;
+} htp_base64_decoder;
+
+void htp_base64_decoder_init(htp_base64_decoder *state_in);
+
+int htp_base64_decode_single(signed char value_in);
+
+int htp_base64_decode(htp_base64_decoder *decoder, const void *code_in, int length_in, void *plaintext_out, int length_out);
+
+bstr *htp_base64_decode_bstr(bstr *input);
+
+bstr *htp_base64_decode_mem(const void *data, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTP_BASE64_H */
+
diff --git a/htp/htp_config.c b/htp/htp_config.c
new file mode 100644
index 0000000..00ae853
--- /dev/null
+++ b/htp/htp_config.c
@@ -0,0 +1,954 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * This map is used by default for best-fit mapping from the Unicode
+ * values U+0100-FFFF.
+ */
+static unsigned char bestfit_1252[] = {
+ 0x01, 0x00, 0x41, 0x01, 0x01, 0x61, 0x01, 0x02, 0x41, 0x01, 0x03, 0x61,
+ 0x01, 0x04, 0x41, 0x01, 0x05, 0x61, 0x01, 0x06, 0x43, 0x01, 0x07, 0x63,
+ 0x01, 0x08, 0x43, 0x01, 0x09, 0x63, 0x01, 0x0a, 0x43, 0x01, 0x0b, 0x63,
+ 0x01, 0x0c, 0x43, 0x01, 0x0d, 0x63, 0x01, 0x0e, 0x44, 0x01, 0x0f, 0x64,
+ 0x01, 0x11, 0x64, 0x01, 0x12, 0x45, 0x01, 0x13, 0x65, 0x01, 0x14, 0x45,
+ 0x01, 0x15, 0x65, 0x01, 0x16, 0x45, 0x01, 0x17, 0x65, 0x01, 0x18, 0x45,
+ 0x01, 0x19, 0x65, 0x01, 0x1a, 0x45, 0x01, 0x1b, 0x65, 0x01, 0x1c, 0x47,
+ 0x01, 0x1d, 0x67, 0x01, 0x1e, 0x47, 0x01, 0x1f, 0x67, 0x01, 0x20, 0x47,
+ 0x01, 0x21, 0x67, 0x01, 0x22, 0x47, 0x01, 0x23, 0x67, 0x01, 0x24, 0x48,
+ 0x01, 0x25, 0x68, 0x01, 0x26, 0x48, 0x01, 0x27, 0x68, 0x01, 0x28, 0x49,
+ 0x01, 0x29, 0x69, 0x01, 0x2a, 0x49, 0x01, 0x2b, 0x69, 0x01, 0x2c, 0x49,
+ 0x01, 0x2d, 0x69, 0x01, 0x2e, 0x49, 0x01, 0x2f, 0x69, 0x01, 0x30, 0x49,
+ 0x01, 0x31, 0x69, 0x01, 0x34, 0x4a, 0x01, 0x35, 0x6a, 0x01, 0x36, 0x4b,
+ 0x01, 0x37, 0x6b, 0x01, 0x39, 0x4c, 0x01, 0x3a, 0x6c, 0x01, 0x3b, 0x4c,
+ 0x01, 0x3c, 0x6c, 0x01, 0x3d, 0x4c, 0x01, 0x3e, 0x6c, 0x01, 0x41, 0x4c,
+ 0x01, 0x42, 0x6c, 0x01, 0x43, 0x4e, 0x01, 0x44, 0x6e, 0x01, 0x45, 0x4e,
+ 0x01, 0x46, 0x6e, 0x01, 0x47, 0x4e, 0x01, 0x48, 0x6e, 0x01, 0x4c, 0x4f,
+ 0x01, 0x4d, 0x6f, 0x01, 0x4e, 0x4f, 0x01, 0x4f, 0x6f, 0x01, 0x50, 0x4f,
+ 0x01, 0x51, 0x6f, 0x01, 0x54, 0x52, 0x01, 0x55, 0x72, 0x01, 0x56, 0x52,
+ 0x01, 0x57, 0x72, 0x01, 0x58, 0x52, 0x01, 0x59, 0x72, 0x01, 0x5a, 0x53,
+ 0x01, 0x5b, 0x73, 0x01, 0x5c, 0x53, 0x01, 0x5d, 0x73, 0x01, 0x5e, 0x53,
+ 0x01, 0x5f, 0x73, 0x01, 0x62, 0x54, 0x01, 0x63, 0x74, 0x01, 0x64, 0x54,
+ 0x01, 0x65, 0x74, 0x01, 0x66, 0x54, 0x01, 0x67, 0x74, 0x01, 0x68, 0x55,
+ 0x01, 0x69, 0x75, 0x01, 0x6a, 0x55, 0x01, 0x6b, 0x75, 0x01, 0x6c, 0x55,
+ 0x01, 0x6d, 0x75, 0x01, 0x6e, 0x55, 0x01, 0x6f, 0x75, 0x01, 0x70, 0x55,
+ 0x01, 0x71, 0x75, 0x01, 0x72, 0x55, 0x01, 0x73, 0x75, 0x01, 0x74, 0x57,
+ 0x01, 0x75, 0x77, 0x01, 0x76, 0x59, 0x01, 0x77, 0x79, 0x01, 0x79, 0x5a,
+ 0x01, 0x7b, 0x5a, 0x01, 0x7c, 0x7a, 0x01, 0x80, 0x62, 0x01, 0x97, 0x49,
+ 0x01, 0x9a, 0x6c, 0x01, 0x9f, 0x4f, 0x01, 0xa0, 0x4f, 0x01, 0xa1, 0x6f,
+ 0x01, 0xab, 0x74, 0x01, 0xae, 0x54, 0x01, 0xaf, 0x55, 0x01, 0xb0, 0x75,
+ 0x01, 0xb6, 0x7a, 0x01, 0xc0, 0x7c, 0x01, 0xc3, 0x21, 0x01, 0xcd, 0x41,
+ 0x01, 0xce, 0x61, 0x01, 0xcf, 0x49, 0x01, 0xd0, 0x69, 0x01, 0xd1, 0x4f,
+ 0x01, 0xd2, 0x6f, 0x01, 0xd3, 0x55, 0x01, 0xd4, 0x75, 0x01, 0xd5, 0x55,
+ 0x01, 0xd6, 0x75, 0x01, 0xd7, 0x55, 0x01, 0xd8, 0x75, 0x01, 0xd9, 0x55,
+ 0x01, 0xda, 0x75, 0x01, 0xdb, 0x55, 0x01, 0xdc, 0x75, 0x01, 0xde, 0x41,
+ 0x01, 0xdf, 0x61, 0x01, 0xe4, 0x47, 0x01, 0xe5, 0x67, 0x01, 0xe6, 0x47,
+ 0x01, 0xe7, 0x67, 0x01, 0xe8, 0x4b, 0x01, 0xe9, 0x6b, 0x01, 0xea, 0x4f,
+ 0x01, 0xeb, 0x6f, 0x01, 0xec, 0x4f, 0x01, 0xed, 0x6f, 0x01, 0xf0, 0x6a,
+ 0x02, 0x61, 0x67, 0x02, 0xb9, 0x27, 0x02, 0xba, 0x22, 0x02, 0xbc, 0x27,
+ 0x02, 0xc4, 0x5e, 0x02, 0xc8, 0x27, 0x02, 0xcb, 0x60, 0x02, 0xcd, 0x5f,
+ 0x03, 0x00, 0x60, 0x03, 0x02, 0x5e, 0x03, 0x03, 0x7e, 0x03, 0x0e, 0x22,
+ 0x03, 0x31, 0x5f, 0x03, 0x32, 0x5f, 0x03, 0x7e, 0x3b, 0x03, 0x93, 0x47,
+ 0x03, 0x98, 0x54, 0x03, 0xa3, 0x53, 0x03, 0xa6, 0x46, 0x03, 0xa9, 0x4f,
+ 0x03, 0xb1, 0x61, 0x03, 0xb4, 0x64, 0x03, 0xb5, 0x65, 0x03, 0xc0, 0x70,
+ 0x03, 0xc3, 0x73, 0x03, 0xc4, 0x74, 0x03, 0xc6, 0x66, 0x04, 0xbb, 0x68,
+ 0x05, 0x89, 0x3a, 0x06, 0x6a, 0x25, 0x20, 0x00, 0x20, 0x20, 0x01, 0x20,
+ 0x20, 0x02, 0x20, 0x20, 0x03, 0x20, 0x20, 0x04, 0x20, 0x20, 0x05, 0x20,
+ 0x20, 0x06, 0x20, 0x20, 0x10, 0x2d, 0x20, 0x11, 0x2d, 0x20, 0x17, 0x3d,
+ 0x20, 0x32, 0x27, 0x20, 0x35, 0x60, 0x20, 0x44, 0x2f, 0x20, 0x74, 0x34,
+ 0x20, 0x75, 0x35, 0x20, 0x76, 0x36, 0x20, 0x77, 0x37, 0x20, 0x78, 0x38,
+ 0x20, 0x7f, 0x6e, 0x20, 0x80, 0x30, 0x20, 0x81, 0x31, 0x20, 0x82, 0x32,
+ 0x20, 0x83, 0x33, 0x20, 0x84, 0x34, 0x20, 0x85, 0x35, 0x20, 0x86, 0x36,
+ 0x20, 0x87, 0x37, 0x20, 0x88, 0x38, 0x20, 0x89, 0x39, 0x20, 0xa7, 0x50,
+ 0x21, 0x02, 0x43, 0x21, 0x07, 0x45, 0x21, 0x0a, 0x67, 0x21, 0x0b, 0x48,
+ 0x21, 0x0c, 0x48, 0x21, 0x0d, 0x48, 0x21, 0x0e, 0x68, 0x21, 0x10, 0x49,
+ 0x21, 0x11, 0x49, 0x21, 0x12, 0x4c, 0x21, 0x13, 0x6c, 0x21, 0x15, 0x4e,
+ 0x21, 0x18, 0x50, 0x21, 0x19, 0x50, 0x21, 0x1a, 0x51, 0x21, 0x1b, 0x52,
+ 0x21, 0x1c, 0x52, 0x21, 0x1d, 0x52, 0x21, 0x24, 0x5a, 0x21, 0x28, 0x5a,
+ 0x21, 0x2a, 0x4b, 0x21, 0x2c, 0x42, 0x21, 0x2d, 0x43, 0x21, 0x2e, 0x65,
+ 0x21, 0x2f, 0x65, 0x21, 0x30, 0x45, 0x21, 0x31, 0x46, 0x21, 0x33, 0x4d,
+ 0x21, 0x34, 0x6f, 0x22, 0x12, 0x2d, 0x22, 0x15, 0x2f, 0x22, 0x16, 0x5c,
+ 0x22, 0x17, 0x2a, 0x22, 0x1a, 0x76, 0x22, 0x1e, 0x38, 0x22, 0x23, 0x7c,
+ 0x22, 0x29, 0x6e, 0x22, 0x36, 0x3a, 0x22, 0x3c, 0x7e, 0x22, 0x61, 0x3d,
+ 0x22, 0x64, 0x3d, 0x22, 0x65, 0x3d, 0x23, 0x03, 0x5e, 0x23, 0x20, 0x28,
+ 0x23, 0x21, 0x29, 0x23, 0x29, 0x3c, 0x23, 0x2a, 0x3e, 0x25, 0x00, 0x2d,
+ 0x25, 0x0c, 0x2b, 0x25, 0x10, 0x2b, 0x25, 0x14, 0x2b, 0x25, 0x18, 0x2b,
+ 0x25, 0x1c, 0x2b, 0x25, 0x2c, 0x2d, 0x25, 0x34, 0x2d, 0x25, 0x3c, 0x2b,
+ 0x25, 0x50, 0x2d, 0x25, 0x52, 0x2b, 0x25, 0x53, 0x2b, 0x25, 0x54, 0x2b,
+ 0x25, 0x55, 0x2b, 0x25, 0x56, 0x2b, 0x25, 0x57, 0x2b, 0x25, 0x58, 0x2b,
+ 0x25, 0x59, 0x2b, 0x25, 0x5a, 0x2b, 0x25, 0x5b, 0x2b, 0x25, 0x5c, 0x2b,
+ 0x25, 0x5d, 0x2b, 0x25, 0x64, 0x2d, 0x25, 0x65, 0x2d, 0x25, 0x66, 0x2d,
+ 0x25, 0x67, 0x2d, 0x25, 0x68, 0x2d, 0x25, 0x69, 0x2d, 0x25, 0x6a, 0x2b,
+ 0x25, 0x6b, 0x2b, 0x25, 0x6c, 0x2b, 0x25, 0x84, 0x5f, 0x27, 0x58, 0x7c,
+ 0x30, 0x00, 0x20, 0x30, 0x08, 0x3c, 0x30, 0x09, 0x3e, 0x30, 0x1a, 0x5b,
+ 0x30, 0x1b, 0x5d, 0xff, 0x01, 0x21, 0xff, 0x02, 0x22, 0xff, 0x03, 0x23,
+ 0xff, 0x04, 0x24, 0xff, 0x05, 0x25, 0xff, 0x06, 0x26, 0xff, 0x07, 0x27,
+ 0xff, 0x08, 0x28, 0xff, 0x09, 0x29, 0xff, 0x0a, 0x2a, 0xff, 0x0b, 0x2b,
+ 0xff, 0x0c, 0x2c, 0xff, 0x0d, 0x2d, 0xff, 0x0e, 0x2e, 0xff, 0x0f, 0x2f,
+ 0xff, 0x10, 0x30, 0xff, 0x11, 0x31, 0xff, 0x12, 0x32, 0xff, 0x13, 0x33,
+ 0xff, 0x14, 0x34, 0xff, 0x15, 0x35, 0xff, 0x16, 0x36, 0xff, 0x17, 0x37,
+ 0xff, 0x18, 0x38, 0xff, 0x19, 0x39, 0xff, 0x1a, 0x3a, 0xff, 0x1b, 0x3b,
+ 0xff, 0x1c, 0x3c, 0xff, 0x1d, 0x3d, 0xff, 0x1e, 0x3e, 0xff, 0x20, 0x40,
+ 0xff, 0x21, 0x41, 0xff, 0x22, 0x42, 0xff, 0x23, 0x43, 0xff, 0x24, 0x44,
+ 0xff, 0x25, 0x45, 0xff, 0x26, 0x46, 0xff, 0x27, 0x47, 0xff, 0x28, 0x48,
+ 0xff, 0x29, 0x49, 0xff, 0x2a, 0x4a, 0xff, 0x2b, 0x4b, 0xff, 0x2c, 0x4c,
+ 0xff, 0x2d, 0x4d, 0xff, 0x2e, 0x4e, 0xff, 0x2f, 0x4f, 0xff, 0x30, 0x50,
+ 0xff, 0x31, 0x51, 0xff, 0x32, 0x52, 0xff, 0x33, 0x53, 0xff, 0x34, 0x54,
+ 0xff, 0x35, 0x55, 0xff, 0x36, 0x56, 0xff, 0x37, 0x57, 0xff, 0x38, 0x58,
+ 0xff, 0x39, 0x59, 0xff, 0x3a, 0x5a, 0xff, 0x3b, 0x5b, 0xff, 0x3c, 0x5c,
+ 0xff, 0x3d, 0x5d, 0xff, 0x3e, 0x5e, 0xff, 0x3f, 0x5f, 0xff, 0x40, 0x60,
+ 0xff, 0x41, 0x61, 0xff, 0x42, 0x62, 0xff, 0x43, 0x63, 0xff, 0x44, 0x64,
+ 0xff, 0x45, 0x65, 0xff, 0x46, 0x66, 0xff, 0x47, 0x67, 0xff, 0x48, 0x68,
+ 0xff, 0x49, 0x69, 0xff, 0x4a, 0x6a, 0xff, 0x4b, 0x6b, 0xff, 0x4c, 0x6c,
+ 0xff, 0x4d, 0x6d, 0xff, 0x4e, 0x6e, 0xff, 0x4f, 0x6f, 0xff, 0x50, 0x70,
+ 0xff, 0x51, 0x71, 0xff, 0x52, 0x72, 0xff, 0x53, 0x73, 0xff, 0x54, 0x74,
+ 0xff, 0x55, 0x75, 0xff, 0x56, 0x76, 0xff, 0x57, 0x77, 0xff, 0x58, 0x78,
+ 0xff, 0x59, 0x79, 0xff, 0x5a, 0x7a, 0xff, 0x5b, 0x7b, 0xff, 0x5c, 0x7c,
+ 0xff, 0x5d, 0x7d, 0xff, 0x5e, 0x7e, 0x00, 0x00, 0x00
+};
+
+htp_cfg_t *htp_config_create(void) {
+ htp_cfg_t *cfg = calloc(1, sizeof (htp_cfg_t));
+ if (cfg == NULL) return NULL;
+
+ cfg->field_limit_hard = HTP_FIELD_LIMIT_HARD;
+ cfg->field_limit_soft = HTP_FIELD_LIMIT_SOFT;
+ cfg->log_level = HTP_LOG_NOTICE;
+ cfg->response_decompression_enabled = 1;
+ cfg->request_decompression_enabled = 0; // disabled by default
+ cfg->parse_request_cookies = 1;
+ cfg->parse_request_auth = 1;
+ cfg->extract_request_files = 0;
+ cfg->extract_request_files_limit = -1; // Use the parser default.
+ cfg->response_decompression_layer_limit = 2; // 2 layers seem fairly common
+ cfg->lzma_memlimit = HTP_LZMA_MEMLIMIT;
+ cfg->response_lzma_layer_limit = 1; // default is only one layer
+ cfg->compression_bomb_limit = HTP_COMPRESSION_BOMB_LIMIT;
+ cfg->compression_time_limit = HTP_COMPRESSION_TIME_LIMIT_USEC;
+ cfg->allow_space_uri = 0;
+
+ // Default settings for URL-encoded data.
+
+ htp_config_set_bestfit_map(cfg, HTP_DECODER_DEFAULTS, bestfit_1252);
+ htp_config_set_bestfit_replacement_byte(cfg, HTP_DECODER_DEFAULTS, '?');
+
+ htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_DEFAULTS, HTP_URL_DECODE_PRESERVE_PERCENT);
+ htp_config_set_nul_raw_terminates(cfg, HTP_DECODER_DEFAULTS, 0);
+ htp_config_set_nul_encoded_terminates(cfg, HTP_DECODER_DEFAULTS, 0);
+ htp_config_set_u_encoding_decode(cfg, HTP_DECODER_DEFAULTS, 0);
+
+ htp_config_set_plusspace_decode(cfg, HTP_DECODER_URLENCODED, 1);
+
+ htp_config_set_server_personality(cfg, HTP_SERVER_MINIMAL);
+
+ return cfg;
+}
+
+htp_cfg_t *htp_config_copy(htp_cfg_t *cfg) {
+ if (cfg == NULL) return NULL;
+
+ // Start by making a copy of the entire structure,
+ // which is essentially a shallow copy.
+ htp_cfg_t *copy = malloc(sizeof (htp_cfg_t));
+ if (copy == NULL) return NULL;
+ memcpy(copy, cfg, sizeof (htp_cfg_t));
+
+ // Now create copies of the hooks' structures.
+
+ if (cfg->hook_request_start != NULL) {
+ copy->hook_request_start = htp_hook_copy(cfg->hook_request_start);
+ if (copy->hook_request_start == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_request_line != NULL) {
+ copy->hook_request_line = htp_hook_copy(cfg->hook_request_line);
+ if (copy->hook_request_line == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_request_uri_normalize != NULL) {
+ copy->hook_request_uri_normalize = htp_hook_copy(cfg->hook_request_uri_normalize);
+ if (copy->hook_request_uri_normalize == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_request_header_data != NULL) {
+ copy->hook_request_header_data = htp_hook_copy(cfg->hook_request_header_data);
+ if (copy->hook_request_header_data == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_request_headers != NULL) {
+ copy->hook_request_headers = htp_hook_copy(cfg->hook_request_headers);
+ if (copy->hook_request_headers == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_request_body_data != NULL) {
+ copy->hook_request_body_data = htp_hook_copy(cfg->hook_request_body_data);
+ if (copy->hook_request_body_data == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_request_file_data != NULL) {
+ copy->hook_request_file_data = htp_hook_copy(cfg->hook_request_file_data);
+ if (copy->hook_request_file_data == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_request_trailer != NULL) {
+ copy->hook_request_trailer = htp_hook_copy(cfg->hook_request_trailer);
+ if (copy->hook_request_trailer == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_request_trailer_data != NULL) {
+ copy->hook_request_trailer_data = htp_hook_copy(cfg->hook_request_trailer_data);
+ if (copy->hook_request_trailer_data == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_request_complete != NULL) {
+ copy->hook_request_complete = htp_hook_copy(cfg->hook_request_complete);
+ if (copy->hook_request_complete == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_response_start != NULL) {
+ copy->hook_response_start = htp_hook_copy(cfg->hook_response_start);
+ if (copy->hook_response_start == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_response_line != NULL) {
+ copy->hook_response_line = htp_hook_copy(cfg->hook_response_line);
+ if (copy->hook_response_line == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_response_header_data != NULL) {
+ copy->hook_response_header_data = htp_hook_copy(cfg->hook_response_header_data);
+ if (copy->hook_response_header_data == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_response_headers != NULL) {
+ copy->hook_response_headers = htp_hook_copy(cfg->hook_response_headers);
+ if (copy->hook_response_headers == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_response_body_data != NULL) {
+ copy->hook_response_body_data = htp_hook_copy(cfg->hook_response_body_data);
+ if (copy->hook_response_body_data == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_response_trailer != NULL) {
+ copy->hook_response_trailer = htp_hook_copy(cfg->hook_response_trailer);
+ if (copy->hook_response_trailer == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_response_trailer_data != NULL) {
+ copy->hook_response_trailer_data = htp_hook_copy(cfg->hook_response_trailer_data);
+ if (copy->hook_response_trailer_data == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_response_complete != NULL) {
+ copy->hook_response_complete = htp_hook_copy(cfg->hook_response_complete);
+ if (copy->hook_response_complete == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_transaction_complete != NULL) {
+ copy->hook_transaction_complete = htp_hook_copy(cfg->hook_transaction_complete);
+ if (copy->hook_transaction_complete == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ if (cfg->hook_log != NULL) {
+ copy->hook_log = htp_hook_copy(cfg->hook_log);
+ if (copy->hook_log == NULL) {
+ htp_config_destroy(copy);
+ return NULL;
+ }
+ }
+
+ return copy;
+}
+
+void htp_config_destroy(htp_cfg_t *cfg) {
+ if (cfg == NULL) return;
+
+ htp_hook_destroy(cfg->hook_request_start);
+ htp_hook_destroy(cfg->hook_request_line);
+ htp_hook_destroy(cfg->hook_request_uri_normalize);
+ htp_hook_destroy(cfg->hook_request_header_data);
+ htp_hook_destroy(cfg->hook_request_headers);
+ htp_hook_destroy(cfg->hook_request_body_data);
+ htp_hook_destroy(cfg->hook_request_file_data);
+ htp_hook_destroy(cfg->hook_request_trailer);
+ htp_hook_destroy(cfg->hook_request_trailer_data);
+ htp_hook_destroy(cfg->hook_request_complete);
+ htp_hook_destroy(cfg->hook_response_start);
+ htp_hook_destroy(cfg->hook_response_line);
+ htp_hook_destroy(cfg->hook_response_header_data);
+ htp_hook_destroy(cfg->hook_response_headers);
+ htp_hook_destroy(cfg->hook_response_body_data);
+ htp_hook_destroy(cfg->hook_response_trailer);
+ htp_hook_destroy(cfg->hook_response_trailer_data);
+ htp_hook_destroy(cfg->hook_response_complete);
+ htp_hook_destroy(cfg->hook_transaction_complete);
+ htp_hook_destroy(cfg->hook_log);
+
+ free(cfg);
+}
+
+void *htp_config_get_user_data(htp_cfg_t *cfg) {
+ if (cfg == NULL) return NULL;
+ return cfg->user_data;
+}
+
+void htp_config_register_log(htp_cfg_t *cfg, int (*callback_fn)(htp_log_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_log, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_multipart_parser(htp_cfg_t *cfg) {
+ if (cfg == NULL) return;
+ htp_config_register_request_headers(cfg, htp_ch_multipart_callback_request_headers);
+}
+
+void htp_config_register_request_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_complete, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_request_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_body_data, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_request_file_data(htp_cfg_t *cfg, int (*callback_fn)(htp_file_data_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_file_data, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_request_uri_normalize(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_uri_normalize, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_request_header_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_header_data, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_request_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_headers, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_request_line(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_line, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_request_start(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_start, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_request_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_trailer, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_request_trailer_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *d)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_request_trailer_data, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_response_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_response_body_data, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_response_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_response_complete, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_response_header_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_response_header_data, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_response_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_response_headers, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_response_line(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_response_line, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_response_start(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_response_start, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_response_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_response_trailer, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_response_trailer_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *d)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_response_trailer_data, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_transaction_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) {
+ if (cfg == NULL) return;
+ htp_hook_register(&cfg->hook_transaction_complete, (htp_callback_fn_t) callback_fn);
+}
+
+void htp_config_register_urlencoded_parser(htp_cfg_t *cfg) {
+ if (cfg == NULL) return;
+ htp_config_register_request_line(cfg, htp_ch_urlencoded_callback_request_line);
+ htp_config_register_request_headers(cfg, htp_ch_urlencoded_callback_request_headers);
+}
+
+htp_status_t htp_config_set_extract_request_files(htp_cfg_t *cfg, int extract_request_files, int limit) {
+ if (cfg == NULL) return HTP_ERROR;
+ if (cfg->tmpdir == NULL) return HTP_ERROR;
+ cfg->extract_request_files = extract_request_files;
+ cfg->extract_request_files_limit = limit;
+ return HTP_OK;
+}
+
+void htp_config_set_field_limits(htp_cfg_t *cfg, size_t soft_limit, size_t hard_limit) {
+ if (cfg == NULL) return;
+ cfg->field_limit_soft = soft_limit;
+ cfg->field_limit_hard = hard_limit;
+}
+
+void htp_config_set_lzma_memlimit(htp_cfg_t *cfg, size_t memlimit) {
+ if (cfg == NULL) return;
+ cfg->lzma_memlimit = memlimit;
+}
+
+void htp_config_set_lzma_layers(htp_cfg_t *cfg, int limit) {
+ if (cfg == NULL) return;
+ cfg->response_lzma_layer_limit = limit;
+}
+
+void htp_config_set_max_tx(htp_cfg_t *cfg, uint32_t limit) {
+ if (cfg == NULL) return;
+ cfg->max_tx = limit;
+}
+
+void htp_config_set_compression_bomb_limit(htp_cfg_t *cfg, size_t bomblimit) {
+ if (cfg == NULL) return;
+ if (bomblimit > INT32_MAX) {
+ cfg->compression_bomb_limit = INT32_MAX;
+ } else {
+ cfg->compression_bomb_limit = (int32_t) bomblimit;
+ }
+}
+
+void htp_config_set_compression_time_limit(htp_cfg_t *cfg, size_t useclimit) {
+ if (cfg == NULL) return;
+ // max limit is one second
+ if (useclimit >= 1000000) {
+ cfg->compression_time_limit = 1000000;
+ } else {
+ cfg->compression_time_limit = (int32_t) useclimit;
+ }
+}
+
+void htp_config_set_log_level(htp_cfg_t *cfg, enum htp_log_level_t log_level) {
+ if (cfg == NULL) return;
+ cfg->log_level = log_level;
+}
+
+void htp_config_set_parse_request_auth(htp_cfg_t *cfg, int parse_request_auth) {
+ if (cfg == NULL) return;
+ cfg->parse_request_auth = parse_request_auth;
+}
+
+void htp_config_set_parse_request_cookies(htp_cfg_t *cfg, int parse_request_cookies) {
+ if (cfg == NULL) return;
+ cfg->parse_request_cookies = parse_request_cookies;
+}
+
+void htp_config_set_response_decompression(htp_cfg_t *cfg, int enabled) {
+ if (cfg == NULL) return;
+ cfg->response_decompression_enabled = enabled;
+}
+
+void htp_config_set_request_decompression(htp_cfg_t *cfg, int enabled) {
+ if (cfg == NULL) return;
+ cfg->request_decompression_enabled = enabled;
+}
+
+void htp_config_set_allow_space_uri(htp_cfg_t *cfg, int allow_space_uri) {
+ if (cfg == NULL) return;
+ cfg->allow_space_uri = allow_space_uri;
+}
+
+int htp_config_set_server_personality(htp_cfg_t *cfg, enum htp_server_personality_t personality) {
+ if (cfg == NULL) return HTP_ERROR;
+
+ switch (personality) {
+
+ case HTP_SERVER_MINIMAL:
+ cfg->parse_request_line = htp_parse_request_line_generic;
+ cfg->process_request_header = htp_process_request_header_generic;
+ cfg->parse_response_line = htp_parse_response_line_generic;
+ cfg->process_response_header = htp_process_response_header_generic;
+ break;
+
+ case HTP_SERVER_GENERIC:
+ cfg->parse_request_line = htp_parse_request_line_generic;
+ cfg->process_request_header = htp_process_request_header_generic;
+ cfg->parse_response_line = htp_parse_response_line_generic;
+ cfg->process_response_header = htp_process_response_header_generic;
+
+ htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1);
+ break;
+
+ case HTP_SERVER_IDS:
+ cfg->parse_request_line = htp_parse_request_line_generic;
+ cfg->process_request_header = htp_process_request_header_generic;
+ cfg->parse_response_line = htp_parse_response_line_generic;
+ cfg->process_response_header = htp_process_response_header_generic;
+
+ htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_convert_lowercase(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_utf8_convert_bestfit(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_IGNORE);
+ break;
+
+ case HTP_SERVER_APACHE_2:
+ cfg->parse_request_line = htp_parse_request_line_apache_2_2;
+ cfg->process_request_header = htp_process_request_header_apache_2_2;
+ cfg->parse_response_line = htp_parse_response_line_generic;
+ cfg->process_response_header = htp_process_response_header_generic;
+
+ htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 0);
+ htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 0);
+ htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 0);
+
+ htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_URL_PATH, HTP_URL_DECODE_PRESERVE_PERCENT);
+ htp_config_set_url_encoding_invalid_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400);
+ htp_config_set_control_chars_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_IGNORE);
+ htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_400);
+ break;
+
+ case HTP_SERVER_IIS_5_1:
+ cfg->parse_request_line = htp_parse_request_line_generic;
+ cfg->process_request_header = htp_process_request_header_generic;
+ cfg->parse_response_line = htp_parse_response_line_generic;
+ cfg->process_response_header = htp_process_response_header_generic;
+
+ htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 0);
+
+ htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_URL_PATH, HTP_URL_DECODE_PRESERVE_PERCENT);
+ htp_config_set_control_chars_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_IGNORE);
+ htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_IGNORE);
+ break;
+
+ case HTP_SERVER_IIS_6_0:
+ cfg->parse_request_line = htp_parse_request_line_generic;
+ cfg->process_request_header = htp_process_request_header_generic;
+ cfg->parse_response_line = htp_parse_response_line_generic;
+ cfg->process_response_header = htp_process_response_header_generic;
+
+ htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 1);
+
+ htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_URL_PATH, HTP_URL_DECODE_PRESERVE_PERCENT);
+ htp_config_set_u_encoding_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400);
+ htp_config_set_control_chars_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400);
+ htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_IGNORE);
+ break;
+
+ case HTP_SERVER_IIS_7_0:
+ case HTP_SERVER_IIS_7_5:
+ cfg->parse_request_line = htp_parse_request_line_generic;
+ cfg->process_request_header = htp_process_request_header_generic;
+ cfg->parse_response_line = htp_parse_response_line_generic;
+ cfg->process_response_header = htp_process_response_header_generic;
+
+ htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1);
+ htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 1);
+
+ htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_URL_PATH, HTP_URL_DECODE_PRESERVE_PERCENT);
+ htp_config_set_url_encoding_invalid_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400);
+ htp_config_set_control_chars_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400);
+ htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_IGNORE);
+ break;
+
+ default:
+ return HTP_ERROR;
+ }
+
+ // Remember the personality
+ cfg->server_personality = personality;
+
+ return HTP_OK;
+}
+
+void htp_config_set_tmpdir(htp_cfg_t *cfg, char *tmpdir) {
+ if (cfg == NULL) return;
+ cfg->tmpdir = tmpdir;
+}
+
+void htp_config_set_tx_auto_destroy(htp_cfg_t *cfg, int tx_auto_destroy) {
+ if (cfg == NULL) return;
+ cfg->tx_auto_destroy = tx_auto_destroy;
+}
+
+void htp_config_set_user_data(htp_cfg_t *cfg, void *user_data) {
+ if (cfg == NULL) return;
+ cfg->user_data = user_data;
+}
+
+
+static int convert_to_0_or_1(int b) {
+ if (b) return 1;
+ return 0;
+}
+
+void htp_config_set_bestfit_map(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, void *map) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].bestfit_map = map;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].bestfit_map = map;
+ }
+ }
+}
+
+void htp_config_set_bestfit_replacement_byte(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int b) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].bestfit_replacement_byte = (unsigned char) b;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].bestfit_replacement_byte = (unsigned char) b;
+ }
+ }
+}
+
+void htp_config_set_url_encoding_invalid_handling(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_url_encoding_handling_t handling) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].url_encoding_invalid_handling = handling;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].url_encoding_invalid_handling = handling;
+ }
+ }
+}
+
+void htp_config_set_nul_raw_terminates(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].nul_raw_terminates = convert_to_0_or_1(enabled);
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].nul_raw_terminates = convert_to_0_or_1(enabled);
+ }
+ }
+}
+
+void htp_config_set_nul_encoded_terminates(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].nul_encoded_terminates = convert_to_0_or_1(enabled);
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].nul_encoded_terminates = convert_to_0_or_1(enabled);
+ }
+ }
+}
+
+void htp_config_set_u_encoding_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].u_encoding_decode = convert_to_0_or_1(enabled);
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].u_encoding_decode = convert_to_0_or_1(enabled);
+ }
+ }
+}
+
+void htp_config_set_backslash_convert_slashes(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].backslash_convert_slashes = convert_to_0_or_1(enabled);
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].backslash_convert_slashes = convert_to_0_or_1(enabled);
+ }
+ }
+}
+
+void htp_config_set_path_separators_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].path_separators_decode = convert_to_0_or_1(enabled);
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].path_separators_decode = convert_to_0_or_1(enabled);
+ }
+ }
+}
+
+void htp_config_set_path_separators_compress(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].path_separators_compress = convert_to_0_or_1(enabled);
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].path_separators_compress = convert_to_0_or_1(enabled);
+ }
+ }
+}
+
+void htp_config_set_plusspace_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].plusspace_decode = convert_to_0_or_1(enabled);
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].plusspace_decode = convert_to_0_or_1(enabled);
+ }
+ }
+}
+
+void htp_config_set_convert_lowercase(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].convert_lowercase = convert_to_0_or_1(enabled);
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].convert_lowercase = convert_to_0_or_1(enabled);
+ }
+ }
+}
+
+void htp_config_set_utf8_convert_bestfit(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].utf8_convert_bestfit = convert_to_0_or_1(enabled);
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].utf8_convert_bestfit = convert_to_0_or_1(enabled);
+ }
+ }
+}
+
+void htp_config_set_u_encoding_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].u_encoding_unwanted = unwanted;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].u_encoding_unwanted = unwanted;
+ }
+ }
+}
+
+void htp_config_set_control_chars_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].u_encoding_unwanted = unwanted;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].u_encoding_unwanted = unwanted;
+ }
+ }
+}
+
+void htp_config_set_url_encoding_invalid_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted = unwanted;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].url_encoding_invalid_unwanted = unwanted;
+ }
+ }
+}
+
+void htp_config_set_nul_encoded_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].nul_encoded_unwanted = unwanted;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].nul_encoded_unwanted = unwanted;
+ }
+ }
+}
+
+void htp_config_set_nul_raw_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].nul_raw_unwanted = unwanted;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].nul_raw_unwanted = unwanted;
+ }
+ }
+}
+
+void htp_config_set_path_separators_encoded_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].path_separators_encoded_unwanted = unwanted;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].path_separators_encoded_unwanted = unwanted;
+ }
+ }
+}
+
+void htp_config_set_utf8_invalid_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->decoder_cfgs[ctx].utf8_invalid_unwanted = unwanted;
+
+ if (ctx == HTP_DECODER_DEFAULTS) {
+ for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) {
+ cfg->decoder_cfgs[i].utf8_invalid_unwanted = unwanted;
+ }
+ }
+}
+
+void htp_config_set_requestline_leading_whitespace_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) {
+ if (ctx >= HTP_DECODER_CONTEXTS_MAX) return;
+
+ cfg->requestline_leading_whitespace_unwanted = unwanted;
+}
+
+void htp_config_set_response_decompression_layer_limit(htp_cfg_t *cfg, int limit) {
+ if (cfg == NULL) return;
+ cfg->response_decompression_layer_limit = limit;
+}
diff --git a/htp/htp_config.h b/htp/htp_config.h
new file mode 100644
index 0000000..57544f7
--- /dev/null
+++ b/htp/htp_config.h
@@ -0,0 +1,719 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_CONFIG_H
+#define HTP_CONFIG_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "htp.h"
+
+/**
+ * Decoder contexts.
+ */
+enum htp_decoder_ctx_t {
+ /** Default settings. Settings applied to this context are propagated to all other contexts. */
+ HTP_DECODER_DEFAULTS = 0,
+
+ /** Urlencoded decoder settings. */
+ HTP_DECODER_URLENCODED = 1,
+
+ /** URL path decoder settings. */
+ HTP_DECODER_URL_PATH = 2
+};
+
+/**
+ * Enumerates the possible server personalities.
+ */
+enum htp_server_personality_t {
+ /**
+ * Minimal personality that performs at little work as possible. All optional
+ * features are disabled. This personality is a good starting point for customization.
+ */
+ HTP_SERVER_MINIMAL = 0,
+
+ /** A generic personality that aims to work reasonably well for all server types. */
+ HTP_SERVER_GENERIC = 1,
+
+ /** The IDS personality tries to perform as much decoding as possible. */
+ HTP_SERVER_IDS = 2,
+
+ /** Mimics the behavior of IIS 4.0, as shipped with Windows NT 4.0. */
+ HTP_SERVER_IIS_4_0 = 3,
+
+ /** Mimics the behavior of IIS 5.0, as shipped with Windows 2000. */
+ HTP_SERVER_IIS_5_0 = 4,
+
+ /** Mimics the behavior of IIS 5.1, as shipped with Windows XP Professional. */
+ HTP_SERVER_IIS_5_1 = 5,
+
+ /** Mimics the behavior of IIS 6.0, as shipped with Windows 2003. */
+ HTP_SERVER_IIS_6_0 = 6,
+
+ /** Mimics the behavior of IIS 7.0, as shipped with Windows 2008. */
+ HTP_SERVER_IIS_7_0 = 7,
+
+ /* Mimics the behavior of IIS 7.5, as shipped with Windows 7. */
+ HTP_SERVER_IIS_7_5 = 8,
+
+ /* Mimics the behavior of Apache 2.x. */
+ HTP_SERVER_APACHE_2 = 9
+};
+
+/**
+ * Enumerates the ways in which servers respond to malformed data.
+ */
+enum htp_unwanted_t {
+
+ /** Ignores problem. */
+ HTP_UNWANTED_IGNORE = 0,
+
+ /** Responds with HTTP 400 status code. */
+ HTP_UNWANTED_400 = 400,
+
+ /** Responds with HTTP 404 status code. */
+ HTP_UNWANTED_404 = 404
+};
+
+/**
+ * Enumerates the possible approaches to handling invalid URL-encodings.
+ */
+enum htp_url_encoding_handling_t {
+ /** Ignore invalid URL encodings and leave the % in the data. */
+ HTP_URL_DECODE_PRESERVE_PERCENT = 0,
+
+ /** Ignore invalid URL encodings, but remove the % from the data. */
+ HTP_URL_DECODE_REMOVE_PERCENT = 1,
+
+ /** Decode invalid URL encodings. */
+ HTP_URL_DECODE_PROCESS_INVALID = 2
+};
+
+/**
+ * Creates a new configuration structure. Configuration structures created at
+ * configuration time must not be changed afterwards in order to support lock-less
+ * copying.
+ *
+ * @return New configuration structure.
+ */
+htp_cfg_t *htp_config_create(void);
+
+/**
+ * Creates a copy of the supplied configuration structure. The idea is to create
+ * one or more configuration objects at configuration-time, but to use this
+ * function to create per-connection copies. That way it will be possible to
+ * adjust per-connection configuration as necessary, without affecting the
+ * global configuration. Make sure no other thread changes the configuration
+ * object while this function is operating.
+ *
+ * @param[in] cfg
+ * @return A copy of the configuration structure.
+ */
+htp_cfg_t *htp_config_copy(htp_cfg_t *cfg);
+
+/**
+ * Destroy a configuration structure.
+ *
+ * @param[in] cfg
+ */
+void htp_config_destroy(htp_cfg_t *cfg);
+
+/**
+ * Retrieves user data associated with this configuration.
+ *
+ * @param[in] cfg
+ * @return User data pointer, or NULL if not set.
+ */
+void *htp_config_get_user_data(htp_cfg_t *cfg);
+
+/**
+ * Registers a callback that is invoked every time there is a log message with
+ * severity equal and higher than the configured log level.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_log(htp_cfg_t *cfg, int (*callback_fn)(htp_log_t *));
+
+/**
+ * Adds the built-in Multipart parser to the configuration. This parser will extract information
+ * stored in request bodies, when they are in multipart/form-data format.
+ *
+ * @param[in] cfg
+ */
+void htp_config_register_multipart_parser(htp_cfg_t *cfg);
+
+/**
+ * Registers a REQUEST_START callback, which is invoked every time a new
+ * request begins and before any parsing is done.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_start(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a REQUEST_BODY_DATA callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *));
+
+/**
+ * Registers a REQUEST_COMPLETE callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a REQUEST_FILE_DATA callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_file_data(htp_cfg_t *cfg, int (*callback_fn)(htp_file_data_t *));
+
+/**
+ * Registers a REQUEST_HEADER_DATA callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_header_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *));
+
+/**
+ * Registers a REQUEST_HEADERS callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a REQUEST_LINE callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_line(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a REQUEST_URI_NORMALIZE callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_uri_normalize(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a HTP_REQUEST_TRAILER callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a REQUEST_TRAILER_DATA callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_request_trailer_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *d));
+
+/**
+ * Registers a RESPONSE_BODY_DATA callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_response_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *));
+
+/**
+ * Registers a RESPONSE_COMPLETE callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_response_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a RESPONSE_HEADER_DATA callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_response_header_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *));
+
+/**
+ * Registers a RESPONSE_HEADERS callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_response_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a RESPONSE_LINE callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_response_line(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a RESPONSE_START callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_response_start(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a RESPONSE_TRAILER callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_response_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Registers a RESPONSE_TRAILER_DATA callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_response_trailer_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *d));
+
+/**
+ * Registers a TRANSACTION_COMPLETE callback.
+ *
+ * @param[in] cfg
+ * @param[in] callback_fn
+ */
+void htp_config_register_transaction_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *));
+
+/**
+ * Adds the built-in Urlencoded parser to the configuration. The parser will
+ * parse query strings and request bodies with the appropriate MIME type.
+ *
+ * @param[in] cfg
+ */
+void htp_config_register_urlencoded_parser(htp_cfg_t *cfg);
+
+/**
+ * Configures whether backslash characters are treated as path segment separators. They
+ * are not on Unix systems, but are on Windows systems. If this setting is enabled, a path
+ * such as "/one\two/three" will be converted to "/one/two/three". Implemented only for HTP_DECODER_URL_PATH.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] enabled
+ */
+void htp_config_set_backslash_convert_slashes(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled);
+
+/**
+ * Configures a best-fit map, which is used whenever characters longer than one byte
+ * need to be converted to a single-byte. By default a Windows 1252 best-fit map is used.
+ * The map is an list of triplets, the first 2 bytes being an UCS-2 character to map from,
+ * and the third byte being the single byte to map to. Make sure that your map contains
+ * the mappings to cover the full-width and half-width form characters (U+FF00-FFEF). The
+ * last triplet in the map must be all zeros (3 NUL bytes).
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] map
+ */
+void htp_config_set_bestfit_map(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, void *map);
+
+/**
+ * Sets the replacement character that will be used to in the lossy best-fit
+ * mapping from multi-byte to single-byte streams. The question mark character
+ * is used as the default replacement byte.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] replacement_byte
+ */
+void htp_config_set_bestfit_replacement_byte(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int replacement_byte);
+
+/**
+ * Controls reaction to raw control characters in the data.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] unwanted
+ */
+void htp_config_set_control_chars_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted);
+
+/**
+ * Configures whether input data will be converted to lowercase. Useful when set on the
+ * HTP_DECODER_URL_PATH context, in order to handle servers with case-insensitive filesystems.
+ * Implemented only for HTP_DECODER_URL_PATH.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] enabled
+ */
+void htp_config_set_convert_lowercase(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled);
+
+/**
+ * Enables or disables Multipart file extraction. This function can be invoked only
+ * after a previous htp_config_set_tmpdir() invocation. Otherwise, the configuration
+ * change will fail, and extraction will not be enabled. Disabled by default. Please
+ * note that the built-in file extraction implementation uses synchronous I/O, which
+ * means that it is not suitable for use in an event-driven container. There's an
+ * upper limit to how many files can be created on the filesystem during a single
+ * request. The limit exists in order to mitigate against a DoS attack with a
+ * Multipart payload that contains hundreds and thousands of files (it's cheap for the
+ * attacker to do this, but costly for the server to support it). The default limit
+ * may be pretty conservative.
+ *
+ * @param[in] cfg
+ * @param[in] extract_files 1 if you wish extraction to be enabled, 0 otherwise
+ * @param[in] limit the maximum number of files allowed; use -1 to use the parser default.
+ */
+htp_status_t htp_config_set_extract_request_files(htp_cfg_t *cfg, int extract_files, int limit);
+
+/**
+ * Configures the maximum size of the buffer LibHTP will use when all data is not available
+ * in the current buffer (e.g., a very long header line that might span several packets). This
+ * limit is controlled by the hard_limit parameter. The soft_limit parameter is not implemented.
+ *
+ * @param[in] cfg
+ * @param[in] soft_limit NOT IMPLEMENTED.
+ * @param[in] hard_limit
+ */
+void htp_config_set_field_limits(htp_cfg_t *cfg, size_t soft_limit, size_t hard_limit);
+
+/**
+ * Configures the maximum memlimit LibHTP will pass to liblzma.
+ *
+ * @param[in] cfg
+ * @param[in] memlimit
+ */
+void htp_config_set_lzma_memlimit(htp_cfg_t *cfg, size_t memlimit);
+
+/**
+ * Configures the maximum layers LibHTP will pass to liblzma.
+ *
+ * @param[in] cfg
+ * @param[in] limit
+ */
+void htp_config_set_lzma_layers(htp_cfg_t *cfg, int limit);
+
+/**
+ * Configures the maximum compression bomb size LibHTP will decompress.
+ *
+ * @param[in] cfg
+ * @param[in] bomblimit
+ */
+void htp_config_set_compression_bomb_limit(htp_cfg_t *cfg, size_t bomblimit);
+
+/**
+ * Configures the maximum compression bomb time LibHTP will decompress.
+ *
+ * @param[in] cfg
+ * @param[in] useclimit
+ */
+void htp_config_set_compression_time_limit(htp_cfg_t *cfg, size_t useclimit);
+
+/**
+ * Configures the maximum number of tx LibHTP will have per connection.
+ *
+ * @param[in] cfg
+ * @param[in] limit
+ */
+void htp_config_set_max_tx(htp_cfg_t *cfg, uint32_t limit);
+
+/**
+ * Configures the desired log level.
+ *
+ * @param[in] cfg
+ * @param[in] log_level
+ */
+void htp_config_set_log_level(htp_cfg_t *cfg, enum htp_log_level_t log_level);
+
+/**
+ * Configures how the server reacts to encoded NUL bytes. Some servers will stop at
+ * at NUL, while some will respond with 400 or 404. When the termination option is not
+ * used, the NUL byte will remain in the path.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] enabled
+ */
+void htp_config_set_nul_encoded_terminates(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled);
+
+/**
+ * Configures reaction to encoded NUL bytes in input data.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] unwanted
+ */
+void htp_config_set_nul_encoded_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted);
+
+/**
+ * Configures the handling of raw NUL bytes. If enabled, raw NUL terminates strings.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] enabled
+ */
+void htp_config_set_nul_raw_terminates(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled);
+
+/**
+ * Configures how the server reacts to raw NUL bytes. Some servers will terminate
+ * path at NUL, while some will respond with 400 or 404. When the termination option
+ * is not used, the NUL byte will remain in the data.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] unwanted
+ */
+void htp_config_set_nul_raw_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted);
+
+/**
+ * Enable or disable request HTTP Authentication parsing. Enabled by default.
+ *
+ * @param[in] cfg
+ * @param[in] parse_request_auth
+ */
+void htp_config_set_parse_request_auth(htp_cfg_t *cfg, int parse_request_auth);
+
+/**
+ * Enable or disable request cookie parsing. Enabled by default.
+ *
+ * @param[in] cfg
+ * @param[in] parse_request_cookies
+ */
+void htp_config_set_parse_request_cookies(htp_cfg_t *cfg, int parse_request_cookies);
+
+/**
+ * Enable or disable spaces in URIs. Disabled by default.
+ *
+ * @param[in] cfg
+ * @param[in] allow_space_uri
+ */
+void htp_config_set_allow_space_uri(htp_cfg_t *cfg, int allow_space_uri);
+
+/**
+ * Configures whether consecutive path segment separators will be compressed. When enabled, a path
+ * such as "/one//two" will be normalized to "/one/two". Backslash conversion and path segment separator
+ * decoding are carried out before compression. For example, the path "/one\\/two\/%5cthree/%2f//four"
+ * will be converted to "/one/two/three/four" (assuming all 3 options are enabled). Implemented only for
+ * HTP_DECODER_URL_PATH.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] enabled
+ */
+void htp_config_set_path_separators_compress(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled);
+
+/**
+ * Configures whether encoded path segment separators will be decoded. Apache does not do
+ * this by default, but IIS does. If enabled, a path such as "/one%2ftwo" will be normalized
+ * to "/one/two". If the backslash_separators option is also enabled, encoded backslash
+ * characters will be converted too (and subsequently normalized to forward slashes). Implemented
+ * only for HTP_DECODER_URL_PATH.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] enabled
+ */
+void htp_config_set_path_separators_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled);
+
+/**
+ * Configures reaction to encoded path separator characters (e.g., %2f). Implemented only for HTP_DECODER_URL_PATH.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] unwanted
+ */
+void htp_config_set_path_separators_encoded_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted);
+
+/**
+ * Configures whether plus characters are converted to spaces when decoding URL-encoded strings. This
+ * is appropriate to do for parameters, but not for URLs. Only applies to contexts where decoding
+ * is taking place.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] enabled
+ */
+void htp_config_set_plusspace_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled);
+
+/**
+ * Controls whether compressed response bodies will be automatically decompressed.
+ *
+ * @param[in] cfg
+ * @param[in] enabled set to 1 to enable decompression, 0 otherwise
+ */
+void htp_config_set_response_decompression(htp_cfg_t *cfg, int enabled);
+
+/**
+ * Controls whether compressed request bodies will be automatically decompressed.
+ *
+ * @param[in] cfg
+ * @param[in] enabled set to 1 to enable decompression, 0 otherwise
+ */
+void htp_config_set_request_decompression(htp_cfg_t *cfg, int enabled);
+
+/**
+ * Configure desired server personality.
+ *
+ * @param[in] cfg
+ * @param[in] personality
+ * @return HTP_OK if the personality is supported, HTP_ERROR if it isn't.
+ */
+htp_status_t htp_config_set_server_personality(htp_cfg_t *cfg, enum htp_server_personality_t personality);
+
+/**
+ * Configures the path where temporary files should be stored. Must be set
+ * in order to use the Multipart file extraction functionality.
+ *
+ * @param[in] cfg
+ * @param[in] tmpdir
+ */
+void htp_config_set_tmpdir(htp_cfg_t *cfg, char *tmpdir);
+
+/**
+ * Configures whether transactions will be automatically destroyed once they
+ * are processed and all callbacks invoked. This option is appropriate for
+ * programs that process transactions as they are processed.
+ *
+ * @param[in] cfg
+ * @param[in] tx_auto_destroy
+ */
+void htp_config_set_tx_auto_destroy(htp_cfg_t *cfg, int tx_auto_destroy);
+
+/**
+ * Associates provided opaque user data with the configuration.
+ *
+ * @param[in] cfg
+ * @param[in] user_data
+ */
+void htp_config_set_user_data(htp_cfg_t *cfg, void *user_data);
+
+/**
+ * Configures whether %u-encoded sequences are decoded. Such sequences
+ * will be treated as invalid URL encoding if decoding is not desirable.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] enabled
+ */
+void htp_config_set_u_encoding_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled);
+
+/**
+ * Configures reaction to %u-encoded sequences in input data.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] unwanted
+ */
+void htp_config_set_u_encoding_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted);
+
+/**
+ * Configures how the server handles to invalid URL encoding.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] handling
+ */
+void htp_config_set_url_encoding_invalid_handling(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_url_encoding_handling_t handling);
+
+/**
+ * Configures how the server reacts to invalid URL encoding.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] unwanted
+ */
+void htp_config_set_url_encoding_invalid_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted);
+
+/**
+ * Controls whether the data should be treated as UTF-8 and converted to a single-byte
+ * stream using best-fit mapping. Implemented only for HTP_DECODER_URL_PATH.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] enabled
+ */
+void htp_config_set_utf8_convert_bestfit(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled);
+
+/**
+ * Configures how the server reacts to invalid UTF-8 characters. This setting does
+ * not affect path normalization; it only controls what response status will be expect for
+ * a request that contains invalid UTF-8 characters. Implemented only for HTP_DECODER_URL_PATH.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] unwanted
+ */
+void htp_config_set_utf8_invalid_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted);
+
+/**
+ * Configures how the server reacts to leading whitespace on the request line.
+ *
+ * @param[in] cfg
+ * @param[in] ctx
+ * @param[in] unwanted
+ */
+void htp_config_set_requestline_leading_whitespace_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted);
+
+/**
+ * Configures many layers of compression we try to decompress.
+ *
+ * @param[in] cfg
+ * @param[in] limit 0 disables limit
+ */
+void htp_config_set_response_decompression_layer_limit(htp_cfg_t *cfg, int limit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_CONFIG_H */
+
diff --git a/htp/htp_config_auto.h b/htp/htp_config_auto.h
new file mode 100644
index 0000000..7fbed4b
--- /dev/null
+++ b/htp/htp_config_auto.h
@@ -0,0 +1,51 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ *
+ * This file wraps the generated autoconf header to provide define
+ * blocks to prevent issue when included more than once.
+ *
+ * @warning Only include this in source files.
+ *
+ * @author Brian Rectanus <brectanus@qualys.com>
+ */
+
+#ifndef _HTP_CONFIG_AUTO_H
+#define _HTP_CONFIG_AUTO_H
+
+#ifdef HAVE_CONFIG_H
+#include "htp_config_auto_gen.h"
+#endif
+
+#endif /* _HTP_CONFIG_AUTO_H */
diff --git a/htp/htp_config_private.h b/htp/htp_config_private.h
new file mode 100644
index 0000000..83ff8f6
--- /dev/null
+++ b/htp/htp_config_private.h
@@ -0,0 +1,373 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_CONFIG_PRIVATE_H
+#define HTP_CONFIG_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define HTP_DECODER_CONTEXTS_MAX 3
+
+typedef struct htp_decoder_cfg_t {
+
+ // Path-specific decoding options.
+
+ /** Convert backslash characters to slashes. */
+ int backslash_convert_slashes;
+
+ /** Convert to lowercase. */
+ int convert_lowercase;
+
+ /** Compress slash characters. */
+ int path_separators_compress;
+
+ /** Should we URL-decode encoded path segment separators? */
+ int path_separators_decode;
+
+ /** Should we decode '+' characters to spaces? */
+ int plusspace_decode;
+
+ /** Reaction to encoded path separators. */
+ enum htp_unwanted_t path_separators_encoded_unwanted;
+
+
+ // Special characters options.
+
+ /** Controls how raw NUL bytes are handled. */
+ int nul_raw_terminates;
+
+ /** Determines server response to a raw NUL byte in the path. */
+ enum htp_unwanted_t nul_raw_unwanted;
+
+ /** Reaction to control characters. */
+ enum htp_unwanted_t control_chars_unwanted;
+
+
+ // URL encoding options.
+
+ /** Should we decode %u-encoded characters? */
+ int u_encoding_decode;
+
+ /** Reaction to %u encoding. */
+ enum htp_unwanted_t u_encoding_unwanted;
+
+ /** Handling of invalid URL encodings. */
+ enum htp_url_encoding_handling_t url_encoding_invalid_handling;
+
+ /** Reaction to invalid URL encoding. */
+ enum htp_unwanted_t url_encoding_invalid_unwanted;
+
+ /** Controls how encoded NUL bytes are handled. */
+ int nul_encoded_terminates;
+
+ /** How are we expected to react to an encoded NUL byte? */
+ enum htp_unwanted_t nul_encoded_unwanted;
+
+
+ // UTF-8 options.
+
+ /** Controls how invalid UTF-8 characters are handled. */
+ enum htp_unwanted_t utf8_invalid_unwanted;
+
+ /** Convert UTF-8 characters into bytes using best-fit mapping. */
+ int utf8_convert_bestfit;
+
+
+ // Best-fit mapping options.
+
+ /** The best-fit map to use to decode %u-encoded characters. */
+ unsigned char *bestfit_map;
+
+ /** The replacement byte used when there is no best-fit mapping. */
+ unsigned char bestfit_replacement_byte;
+
+} htp_decoder_cfg_t;
+
+struct htp_cfg_t {
+ /**
+ * The maximum size of the buffer that is used when the current
+ * input chunk does not contain all the necessary data (e.g., a very header
+ * line that spans several packets).
+ */
+ size_t field_limit_hard;
+
+ /**
+ * Soft field limit length. If this limit is reached the parser will issue
+ * a warning but continue to run. NOT IMPLEMENTED.
+ */
+ size_t field_limit_soft;
+
+ /**
+ * Log level, which will be used when deciding whether to store or
+ * ignore the messages issued by the parser.
+ */
+ enum htp_log_level_t log_level;
+
+ /**
+ * Whether to delete each transaction after the last hook is invoked. This
+ * feature should be used when parsing traffic streams in real time.
+ */
+ int tx_auto_destroy;
+
+ /**
+ * Server personality identifier.
+ */
+ enum htp_server_personality_t server_personality;
+
+ /** The function used for request line parsing. Depends on the personality. */
+ int (*parse_request_line)(htp_connp_t *connp);
+
+ /** The function used for response line parsing. Depends on the personality. */
+ int (*parse_response_line)(htp_connp_t *connp);
+
+ /** The function used for request header parsing. Depends on the personality. */
+ int (*process_request_header)(htp_connp_t *connp, unsigned char *data, size_t len);
+
+ /** The function used for response header parsing. Depends on the personality. */
+ int (*process_response_header)(htp_connp_t *connp, unsigned char *data, size_t len);
+
+ /** The function to use to transform parameters after parsing. */
+ int (*parameter_processor)(htp_param_t *param);
+
+ /** Decoder configuration array, one per context. */
+ htp_decoder_cfg_t decoder_cfgs[HTP_DECODER_CONTEXTS_MAX];
+
+ /** Whether to generate the request_uri_normalized field. */
+ int generate_request_uri_normalized;
+
+ /** Whether to decompress compressed response bodies. */
+ int response_decompression_enabled;
+
+ /** Not fully implemented at the moment. */
+ char *request_encoding;
+
+ /** Not fully implemented at the moment. */
+ char *internal_encoding;
+
+ /** Whether to parse request cookies. */
+ int parse_request_cookies;
+
+ /** Whether to parse HTTP Authentication headers. */
+ int parse_request_auth;
+
+ /** Whether to extract files from requests using Multipart encoding. */
+ int extract_request_files;
+
+ /** How many extracted files are allowed in a single Multipart request? */
+ int extract_request_files_limit;
+
+ /** Whether to allow spaces in URI. */
+ int allow_space_uri;
+
+ /** The location on disk where temporary files will be created. */
+ char *tmpdir;
+
+ // Hooks
+
+ /**
+ * Request start hook, invoked when the parser receives the first byte of a new
+ * request. Because in HTTP a transaction always starts with a request, this hook
+ * doubles as a transaction start hook.
+ */
+ htp_hook_t *hook_request_start;
+
+ /**
+ * Request line hook, invoked after a request line has been parsed.
+ */
+ htp_hook_t *hook_request_line;
+
+ /**
+ * Request URI normalization hook, for overriding default normalization of URI.
+ */
+ htp_hook_t *hook_request_uri_normalize;
+
+ /**
+ * Receives raw request header data, starting immediately after the request line,
+ * including all headers as they are seen on the TCP connection, and including the
+ * terminating empty line. Not available on genuine HTTP/0.9 requests (because
+ * they don't use headers).
+ */
+ htp_hook_t *hook_request_header_data;
+
+ /**
+ * Request headers hook, invoked after all request headers are seen.
+ */
+ htp_hook_t *hook_request_headers;
+
+ /**
+ * Request body data hook, invoked every time body data is available. Each
+ * invocation will provide a htp_tx_data_t instance. Chunked data
+ * will be dechunked before the data is passed to this hook. Decompression
+ * is not currently implemented. At the end of the request body
+ * there will be a call with the data pointer set to NULL.
+ */
+ htp_hook_t *hook_request_body_data;
+
+ /**
+ * Request file data hook, which is invoked whenever request file data is
+ * available. Currently used only by the Multipart parser.
+ */
+ htp_hook_t *hook_request_file_data;
+
+ /**
+ * Receives raw request trailer data, which can be available on requests that have
+ * chunked bodies. The data starts immediately after the zero-length chunk
+ * and includes the terminating empty line.
+ */
+ htp_hook_t *hook_request_trailer_data;
+
+ /**
+ * Request trailer hook, invoked after all trailer headers are seen,
+ * and if they are seen (not invoked otherwise).
+ */
+ htp_hook_t *hook_request_trailer;
+
+ /**
+ * Request hook, invoked after a complete request is seen.
+ */
+ htp_hook_t *hook_request_complete;
+
+ /**
+ * Response startup hook, invoked when a response transaction is found and
+ * processing started.
+ */
+ htp_hook_t *hook_response_start;
+
+ /**
+ * Response line hook, invoked after a response line has been parsed.
+ */
+ htp_hook_t *hook_response_line;
+
+ /**
+ * Receives raw response header data, starting immediately after the status line
+ * and including all headers as they are seen on the TCP connection, and including the
+ * terminating empty line. Not available on genuine HTTP/0.9 responses (because
+ * they don't have response headers).
+ */
+ htp_hook_t *hook_response_header_data;
+
+ /**
+ * Response headers book, invoked after all response headers have been seen.
+ */
+ htp_hook_t *hook_response_headers;
+
+ /**
+ * Response body data hook, invoked every time body data is available. Each
+ * invocation will provide a htp_tx_data_t instance. Chunked data
+ * will be dechunked before the data is passed to this hook. By default,
+ * compressed data will be decompressed, but decompression can be disabled
+ * in configuration. At the end of the response body there will be a call
+ * with the data pointer set to NULL.
+ */
+ htp_hook_t *hook_response_body_data;
+
+ /**
+ * Receives raw response trailer data, which can be available on responses that have
+ * chunked bodies. The data starts immediately after the zero-length chunk
+ * and includes the terminating empty line.
+ */
+ htp_hook_t *hook_response_trailer_data;
+
+ /**
+ * Response trailer hook, invoked after all trailer headers have been processed,
+ * and only if the trailer exists.
+ */
+ htp_hook_t *hook_response_trailer;
+
+ /**
+ * Response hook, invoked after a response has been seen. Because sometimes servers
+ * respond before receiving complete requests, a response_complete callback may be
+ * invoked prior to a request_complete callback.
+ */
+ htp_hook_t *hook_response_complete;
+
+ /**
+ * Transaction complete hook, which is invoked once the entire transaction is
+ * considered complete (request and response are both complete). This is always
+ * the last hook to be invoked.
+ */
+ htp_hook_t *hook_transaction_complete;
+
+ /**
+ * Log hook, invoked every time the library wants to log.
+ */
+ htp_hook_t *hook_log;
+
+ /**
+ * Opaque user data associated with this configuration structure.
+ */
+ void *user_data;
+
+ // Request Line parsing options.
+
+ // TODO this was added here to maintain a stable ABI, once we can break that
+ // we may want to move this into htp_decoder_cfg_t (VJ)
+
+ /** Reaction to leading whitespace on the request line */
+ enum htp_unwanted_t requestline_leading_whitespace_unwanted;
+
+ /** How many layers of compression we will decompress (0 => no limit). */
+ int response_decompression_layer_limit;
+
+ /** max memory use by a the lzma decompressor. */
+ size_t lzma_memlimit;
+
+ /** max output size for a compression bomb. */
+ int32_t compression_bomb_limit;
+
+ /** max time for a decompression bomb. */
+ int32_t compression_time_limit;
+
+ /** How many layers of compression we will decompress (0 => no lzma). */
+ int response_lzma_layer_limit;
+
+ /** Whether to decompress compressed request bodies. */
+ int request_decompression_enabled;
+
+ /** Maximum number of transactions. */
+ uint32_t max_tx;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_CONFIG_PRIVATE H */
+
diff --git a/htp/htp_connection.c b/htp/htp_connection.c
new file mode 100644
index 0000000..3fe7c89
--- /dev/null
+++ b/htp/htp_connection.c
@@ -0,0 +1,168 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+htp_conn_t *htp_conn_create(void) {
+ htp_conn_t *conn = calloc(1, sizeof (htp_conn_t));
+ if (conn == NULL) return NULL;
+
+ conn->transactions = htp_list_create(16);
+ if (conn->transactions == NULL) {
+ free(conn);
+ return NULL;
+ }
+
+ conn->messages = htp_list_create(8);
+ if (conn->messages == NULL) {
+ htp_list_destroy(conn->transactions);
+ conn->transactions = NULL;
+ free(conn);
+ return NULL;
+ }
+
+ return conn;
+}
+
+void htp_conn_close(htp_conn_t *conn, const htp_time_t *timestamp) {
+ if (conn == NULL) return;
+
+ // Update timestamp.
+ if (timestamp != NULL) {
+ memcpy(&(conn->close_timestamp), timestamp, sizeof(htp_time_t));
+ }
+}
+
+void htp_conn_destroy(htp_conn_t *conn) {
+ if (conn == NULL) return;
+
+ if (conn->transactions != NULL) {
+ // Destroy individual transactions. Do note that iterating
+ // using the iterator does not work here because some of the
+ // list element may be NULL (and with the iterator it is impossible
+ // to distinguish a NULL element from the end of the list).
+ for (size_t i = 0, n = htp_list_size(conn->transactions); i < n; i++) {
+ htp_tx_t *tx = htp_list_get(conn->transactions, i);
+ if (tx != NULL) {
+ htp_tx_destroy_incomplete(tx);
+ }
+ }
+
+ htp_list_destroy(conn->transactions);
+ conn->transactions = NULL;
+ }
+
+ if (conn->messages != NULL) {
+ // Destroy individual messages.
+ for (size_t i = 0, n = htp_list_size(conn->messages); i < n; i++) {
+ htp_log_t *l = htp_list_get(conn->messages, i);
+ free((void *) l->msg);
+ free(l);
+ }
+
+ htp_list_destroy(conn->messages);
+ conn->messages = NULL;
+ }
+
+ if (conn->server_addr != NULL) {
+ free(conn->server_addr);
+ }
+
+ if (conn->client_addr != NULL) {
+ free(conn->client_addr);
+ }
+
+ free(conn);
+}
+
+htp_status_t htp_conn_open(htp_conn_t *conn, const char *client_addr, int client_port,
+ const char *server_addr, int server_port, const htp_time_t *timestamp)
+{
+ if (conn == NULL) return HTP_ERROR;
+
+ if (client_addr != NULL) {
+ conn->client_addr = strdup(client_addr);
+ if (conn->client_addr == NULL) return HTP_ERROR;
+ }
+
+ conn->client_port = client_port;
+
+ if (server_addr != NULL) {
+ conn->server_addr = strdup(server_addr);
+ if (conn->server_addr == NULL) {
+ if (conn->client_addr != NULL) {
+ free(conn->client_addr);
+ }
+
+ return HTP_ERROR;
+ }
+ }
+
+ conn->server_port = server_port;
+
+ // Remember when the connection was opened.
+ if (timestamp != NULL) {
+ memcpy(&(conn->open_timestamp), timestamp, sizeof(*timestamp));
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_conn_remove_tx(htp_conn_t *conn, const htp_tx_t *tx) {
+ if ((tx == NULL) || (conn == NULL)) return HTP_ERROR;
+ if (conn->transactions == NULL) return HTP_ERROR;
+ for (size_t i = 0, n = htp_list_size(conn->transactions); i < n; i++) {
+ htp_tx_t *tx2 = htp_list_get(conn->transactions, i);
+ if (tx2 == tx) {
+ return htp_list_replace(conn->transactions, i, NULL);
+ }
+ }
+ return HTP_DECLINED;
+}
+
+void htp_conn_track_inbound_data(htp_conn_t *conn, size_t len, const htp_time_t *timestamp) {
+ if (conn == NULL) return;
+ conn->in_data_counter += len;
+}
+
+void htp_conn_track_outbound_data(htp_conn_t *conn, size_t len, const htp_time_t *timestamp) {
+ if (conn == NULL) return;
+ conn->out_data_counter += len;
+}
diff --git a/htp/htp_connection_parser.c b/htp/htp_connection_parser.c
new file mode 100644
index 0000000..8fd4ed6
--- /dev/null
+++ b/htp/htp_connection_parser.c
@@ -0,0 +1,260 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+void htp_connp_clear_error(htp_connp_t *connp) {
+ connp->last_error = NULL;
+}
+
+void htp_connp_req_close(htp_connp_t *connp, const htp_time_t *timestamp) {
+ if (connp == NULL) return;
+
+ // Update internal flags
+ if (connp->in_status != HTP_STREAM_ERROR)
+ connp->in_status = HTP_STREAM_CLOSED;
+
+ // Call the parsers one last time, which will allow them
+ // to process the events that depend on stream closure
+ htp_connp_req_data(connp, timestamp, NULL, 0);
+}
+
+void htp_connp_close(htp_connp_t *connp, const htp_time_t *timestamp) {
+ if (connp == NULL) return;
+
+ // Close the underlying connection.
+ htp_conn_close(connp->conn, timestamp);
+
+ // Update internal flags
+ if (connp->in_status != HTP_STREAM_ERROR)
+ connp->in_status = HTP_STREAM_CLOSED;
+ if (connp->out_status != HTP_STREAM_ERROR)
+ connp->out_status = HTP_STREAM_CLOSED;
+
+ // Call the parsers one last time, which will allow them
+ // to process the events that depend on stream closure
+ htp_connp_req_data(connp, timestamp, NULL, 0);
+ htp_connp_res_data(connp, timestamp, NULL, 0);
+}
+
+htp_connp_t *htp_connp_create(htp_cfg_t *cfg) {
+ htp_connp_t *connp = calloc(1, sizeof (htp_connp_t));
+ if (connp == NULL) return NULL;
+
+ // Use the supplied configuration structure
+ connp->cfg = cfg;
+
+ // Create a new connection.
+ connp->conn = htp_conn_create();
+ if (connp->conn == NULL) {
+ free(connp);
+ return NULL;
+ }
+
+ // Request parsing
+ connp->in_state = htp_connp_REQ_IDLE;
+ connp->in_status = HTP_STREAM_NEW;
+
+ // Response parsing
+ connp->out_state = htp_connp_RES_IDLE;
+ connp->out_status = HTP_STREAM_NEW;
+
+ return connp;
+}
+
+void htp_connp_destroy(htp_connp_t *connp) {
+ if (connp == NULL) return;
+
+ if (connp->in_buf != NULL) {
+ free(connp->in_buf);
+ }
+
+ if (connp->out_buf != NULL) {
+ free(connp->out_buf);
+ }
+
+ htp_connp_destroy_decompressors(connp);
+
+ if (connp->put_file != NULL) {
+ bstr_free(connp->put_file->filename);
+ free(connp->put_file);
+ }
+
+ if (connp->in_header) {
+ bstr_free(connp->in_header);
+ connp->in_header = NULL;
+ }
+ if (connp->out_header) {
+ bstr_free(connp->out_header);
+ connp->out_header = NULL;
+ }
+ free(connp);
+}
+
+void htp_connp_destroy_all(htp_connp_t *connp) {
+ if (connp == NULL) return;
+
+ // Destroy connection
+ htp_conn_destroy(connp->conn);
+ connp->conn = NULL;
+
+ // Destroy everything else
+ htp_connp_destroy(connp);
+}
+
+htp_conn_t *htp_connp_get_connection(const htp_connp_t *connp) {
+ if (connp == NULL) return NULL;
+ return connp->conn;
+}
+
+htp_tx_t *htp_connp_get_in_tx(const htp_connp_t *connp) {
+ if (connp == NULL) return NULL;
+ return connp->in_tx;
+}
+
+htp_log_t *htp_connp_get_last_error(const htp_connp_t *connp) {
+ if (connp == NULL) return NULL;
+ return connp->last_error;
+}
+
+htp_tx_t *htp_connp_get_out_tx(const htp_connp_t *connp) {
+ if (connp == NULL) return NULL;
+ return connp->out_tx;
+}
+
+void *htp_connp_get_user_data(const htp_connp_t *connp) {
+ if (connp == NULL) return NULL;
+ return (void *)connp->user_data;
+}
+
+void htp_connp_in_reset(htp_connp_t *connp) {
+ if (connp == NULL) return;
+ connp->in_content_length = -1;
+ connp->in_body_data_left = -1;
+ connp->in_chunk_request_index = connp->in_chunk_count;
+}
+
+void htp_connp_open(htp_connp_t *connp, const char *client_addr, int client_port, const char *server_addr,
+ int server_port, htp_time_t *timestamp)
+{
+ if (connp == NULL) return;
+
+ // Check connection parser state first.
+ if ((connp->in_status != HTP_STREAM_NEW) || (connp->out_status != HTP_STREAM_NEW)) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Connection is already open");
+ return;
+ }
+
+ if (htp_conn_open(connp->conn, client_addr, client_port, server_addr, server_port, timestamp) != HTP_OK) {
+ return;
+ }
+
+ connp->in_status = HTP_STREAM_OPEN;
+ connp->out_status = HTP_STREAM_OPEN;
+}
+
+void htp_connp_set_user_data(htp_connp_t *connp, const void *user_data) {
+ if (connp == NULL) return;
+ connp->user_data = user_data;
+}
+
+htp_tx_t *htp_connp_tx_create(htp_connp_t *connp) {
+ if (connp == NULL) return NULL;
+
+ // Detect pipelining.
+ if (htp_list_size(connp->conn->transactions) > connp->out_next_tx_index) {
+ connp->conn->flags |= HTP_CONN_PIPELINED;
+ }
+ if (connp->cfg->max_tx > 0 &&
+ htp_list_size(connp->conn->transactions) > connp->cfg->max_tx) {
+ return NULL;
+ }
+
+ htp_tx_t *tx = htp_tx_create(connp);
+ if (tx == NULL) return NULL;
+
+ connp->in_tx = tx;
+
+ htp_connp_in_reset(connp);
+
+ return tx;
+}
+
+/**
+ * Removes references to the supplied transaction.
+ *
+ * @param[in] connp
+ * @param[in] tx
+ */
+void htp_connp_tx_remove(htp_connp_t *connp, htp_tx_t *tx) {
+ if (connp == NULL) return;
+
+ if (connp->in_tx == tx) {
+ connp->in_tx = NULL;
+ }
+
+ if (connp->out_tx == tx) {
+ connp->out_tx = NULL;
+ }
+}
+
+/**
+ * Removes all front NULL-ed transactions
+ *
+ * @param[in] connp
+ * @return numbers of removed NULL transactions
+ */
+size_t htp_connp_tx_freed(htp_connp_t *connp) {
+ // Transactions first got freed and NULLed
+ // Now, we can recycle the space that hold them by shifting the list
+ size_t r = 0;
+ size_t nb = htp_list_size(connp->conn->transactions);
+ for (size_t i = 0; i < nb; i++) {
+ // 0 and not i because at next iteration, we have removed the first
+ void * tx = htp_list_get(connp->conn->transactions, 0);
+ if (tx != NULL) {
+ break;
+ }
+ htp_list_shift(connp->conn->transactions);
+ r++;
+ connp->out_next_tx_index--;
+ }
+ return r;
+}
diff --git a/htp/htp_connection_parser.h b/htp/htp_connection_parser.h
new file mode 100644
index 0000000..b2c8d3a
--- /dev/null
+++ b/htp/htp_connection_parser.h
@@ -0,0 +1,218 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_CONNECTION_PARSER_H
+#define HTP_CONNECTION_PARSER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Clears the most recent error, if any.
+ *
+ * @param[in] connp
+ */
+void htp_connp_clear_error(htp_connp_t *connp);
+
+/**
+ * Closes the connection associated with the supplied parser.
+ *
+ * @param[in] connp
+ * @param[in] timestamp Optional.
+ */
+void htp_connp_close(htp_connp_t *connp, const htp_time_t *timestamp);
+void htp_connp_req_close(htp_connp_t *connp, const htp_time_t *timestamp);
+
+/**
+ * Creates a new connection parser using the provided configuration. Because
+ * the configuration structure is used directly, in a multithreaded environment
+ * you are not allowed to change the structure, ever. If you have a need to
+ * change configuration on per-connection basis, make a copy of the configuration
+ * structure to go along with every connection parser.
+ *
+ * @param[in] cfg
+ * @return New connection parser instance, or NULL on error.
+ */
+htp_connp_t *htp_connp_create(htp_cfg_t *cfg);
+
+/**
+ * Destroys the connection parser and its data structures, leaving
+ * all the data (connection, transactions, etc) intact.
+ *
+ * @param[in] connp
+ */
+void htp_connp_destroy(htp_connp_t *connp);
+
+/**
+ * Destroys the connection parser, its data structures, as well
+ * as the connection and its transactions.
+ *
+ * @param[in] connp
+ */
+void htp_connp_destroy_all(htp_connp_t *connp);
+
+/**
+ * Returns the connection associated with the connection parser.
+ *
+ * @param[in] connp
+ * @return htp_conn_t instance, or NULL if one is not available.
+ */
+htp_conn_t *htp_connp_get_connection(const htp_connp_t *connp);
+
+/**
+ * Retrieves the pointer to the active inbound transaction. In connection
+ * parsing mode there can be many open transactions, and up to 2 active
+ * transactions at any one time. This is due to HTTP pipelining. Can be NULL.
+ *
+ * @param[in] connp
+ * @return Active inbound transaction, or NULL if there isn't one.
+ */
+htp_tx_t *htp_connp_get_in_tx(const htp_connp_t *connp);
+
+/**
+ * Returns the last error that occurred with this connection parser. Do note, however,
+ * that the value in this field will only be valid immediately after an error condition,
+ * but it is not guaranteed to remain valid if the parser is invoked again.
+ *
+ * @param[in] connp
+ * @return A pointer to an htp_log_t instance if there is an error, or NULL
+ * if there isn't.
+ */
+htp_log_t *htp_connp_get_last_error(const htp_connp_t *connp);
+
+/**
+ * Retrieves the pointer to the active outbound transaction. In connection
+ * parsing mode there can be many open transactions, and up to 2 active
+ * transactions at any one time. This is due to HTTP pipelining. Can be NULL.
+ *
+ * @param[in] connp
+ * @return Active outbound transaction, or NULL if there isn't one.
+ */
+htp_tx_t *htp_connp_get_out_tx(const htp_connp_t *connp);
+
+/**
+ * Retrieve the user data associated with this connection parser.
+ *
+ * @param[in] connp
+ * @return User data, or NULL if there isn't any.
+ */
+void *htp_connp_get_user_data(const htp_connp_t *connp);
+
+/**
+ * Opens connection.
+ *
+ * @param[in] connp
+ * @param[in] client_addr Client address
+ * @param[in] client_port Client port
+ * @param[in] server_addr Server address
+ * @param[in] server_port Server port
+ * @param[in] timestamp Optional.
+ */
+void htp_connp_open(htp_connp_t *connp, const char *client_addr, int client_port, const char *server_addr,
+ int server_port, htp_time_t *timestamp);
+
+/**
+ * Associate user data with the supplied parser.
+ *
+ * @param[in] connp
+ * @param[in] user_data
+ */
+void htp_connp_set_user_data(htp_connp_t *connp, const void *user_data);
+
+/**
+ *
+ * @param[in] connp
+ * @param[in] timestamp
+ * @param[in] data
+ * @param[in] len
+ * @return HTP_STREAM_DATA, HTP_STREAM_ERROR or STEAM_STATE_DATA_OTHER (see QUICK_START).
+ * HTP_STREAM_CLOSED and HTP_STREAM_TUNNEL are also possible.
+ */
+int htp_connp_req_data(htp_connp_t *connp, const htp_time_t *timestamp, const void *data, size_t len);
+
+/**
+ * Returns the number of bytes consumed from the most recent inbound data chunk. Normally, an invocation
+ * of htp_connp_req_data() will consume all data from the supplied buffer, but there are circumstances
+ * where only partial consumption is possible. In such cases HTP_STREAM_DATA_OTHER will be returned.
+ * Consumed bytes are no longer necessary, but the remainder of the buffer will be need to be saved
+ * for later.
+ *
+ * @param[in] connp
+ * @return The number of bytes consumed from the last data chunk sent for inbound processing.
+ */
+size_t htp_connp_req_data_consumed(htp_connp_t *connp);
+
+/**
+ * Process a chunk of outbound (server or response) data.
+ *
+ * @param[in] connp
+ * @param[in] timestamp Optional.
+ * @param[in] data
+ * @param[in] len
+ * @return HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed
+ */
+int htp_connp_res_data(htp_connp_t *connp, const htp_time_t *timestamp, const void *data, size_t len);
+
+/**
+ * Returns the number of bytes consumed from the most recent outbound data chunk. Normally, an invocation
+ * of htp_connp_res_data() will consume all data from the supplied buffer, but there are circumstances
+ * where only partial consumption is possible. In such cases HTP_STREAM_DATA_OTHER will be returned.
+ * Consumed bytes are no longer necessary, but the remainder of the buffer will be need to be saved
+ * for later.
+ *
+ * @param[in] connp
+ * @return The number of bytes consumed from the last data chunk sent for outbound processing.
+ */
+size_t htp_connp_res_data_consumed(htp_connp_t *connp);
+
+/**
+ * Create a new transaction using the connection parser provided.
+ *
+ * @param[in] connp
+ * @return Transaction instance on success, NULL on failure.
+ */
+htp_tx_t *htp_connp_tx_create(htp_connp_t *connp);
+
+size_t htp_connp_tx_freed(htp_connp_t *connp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_CONNECTION_PARSER_H */
diff --git a/htp/htp_connection_parser_private.h b/htp/htp_connection_parser_private.h
new file mode 100644
index 0000000..a055aa8
--- /dev/null
+++ b/htp/htp_connection_parser_private.h
@@ -0,0 +1,275 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_CONNECTION_PARSER_PRIVATE_H
+#define HTP_CONNECTION_PARSER_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "htp_core.h"
+
+/**
+ * Connection parser structure.
+ */
+struct htp_connp_t {
+
+ // General fields
+
+ /** Current parser configuration structure. */
+ htp_cfg_t *cfg;
+
+ /** The connection structure associated with this parser. */
+ htp_conn_t *conn;
+
+ /** Opaque user data associated with this parser. */
+ const void *user_data;
+
+ /**
+ * On parser failure, this field will contain the error information. Do note, however,
+ * that the value in this field will only be valid immediately after an error condition,
+ * but it is not guaranteed to remain valid if the parser is invoked again.
+ */
+ htp_log_t *last_error;
+
+
+ // Request parser fields
+
+ /** Parser inbound status. Starts as HTP_OK, but may turn into HTP_ERROR. */
+ enum htp_stream_state_t in_status;
+
+ /** Parser output status. Starts as HTP_OK, but may turn into HTP_ERROR. */
+ enum htp_stream_state_t out_status;
+
+ /**
+ * When true, this field indicates that there is unprocessed inbound data, and
+ * that the response parsing code should stop at the end of the current request
+ * in order to allow more requests to be produced.
+ */
+ unsigned int out_data_other_at_tx_end;
+
+ /**
+ * The time when the last request data chunk was received. Can be NULL if
+ * the upstream code is not providing the timestamps when calling us.
+ */
+ htp_time_t in_timestamp;
+
+ /** Pointer to the current request data chunk. */
+ unsigned char *in_current_data;
+
+ /** The length of the current request data chunk. */
+ int64_t in_current_len;
+
+ /** The offset of the next byte in the request data chunk to read. */
+ int64_t in_current_read_offset;
+
+ /**
+ * The starting point of the data waiting to be consumed. This field is used
+ * in the states where reading data is not the same as consumption.
+ */
+ int64_t in_current_consume_offset;
+
+ /**
+ * Marks the starting point of raw data within the inbound data chunk. Raw
+ * data (e.g., complete headers) is sent to appropriate callbacks (e.g.,
+ * REQUEST_HEADER_DATA).
+ */
+ int64_t in_current_receiver_offset;
+
+ /** How many data chunks does the inbound connection stream consist of? */
+ size_t in_chunk_count;
+
+ /** The index of the first chunk used in the current request. */
+ size_t in_chunk_request_index;
+
+ /** The offset, in the entire connection stream, of the next request byte. */
+ int64_t in_stream_offset;
+
+ /**
+ * The value of the request byte currently being processed. This field is
+ * populated when the IN_NEXT_* or IN_PEEK_* macros are invoked.
+ */
+ int in_next_byte;
+
+ /** Used to buffer a line of inbound data when buffering cannot be avoided. */
+ unsigned char *in_buf;
+
+ /** Stores the size of the buffer. Valid only when htp_tx_t::in_buf is not NULL. */
+ size_t in_buf_size;
+
+ /**
+ * Stores the current value of a folded request header. Such headers span
+ * multiple lines, and are processed only when all data is available.
+ */
+ bstr *in_header;
+
+ /** Ongoing inbound transaction. */
+ htp_tx_t *in_tx;
+
+ /**
+ * The request body length declared in a valid request header. The key here
+ * is "valid". This field will not be populated if the request contains both
+ * a Transfer-Encoding header and a Content-Length header.
+ */
+ int64_t in_content_length;
+
+ /**
+ * Holds the remaining request body length that we expect to read. This
+ * field will be available only when the length of a request body is known
+ * in advance, i.e. when request headers contain a Content-Length header.
+ */
+ int64_t in_body_data_left;
+
+ /**
+ * Holds the amount of data that needs to be read from the
+ * current data chunk. Only used with chunked request bodies.
+ */
+ int64_t in_chunked_length;
+
+ /** Current request parser state. */
+ int (*in_state)(htp_connp_t *);
+
+ /** Previous request parser state. Used to detect state changes. */
+ int (*in_state_previous)(htp_connp_t *);
+
+ /** The hook that should be receiving raw connection data. */
+ htp_hook_t *in_data_receiver_hook;
+
+ // Response parser fields
+
+ /**
+ * Response counter, incremented with every new response. This field is
+ * used to match responses to requests. The expectation is that for every
+ * response there will already be a transaction (request) waiting.
+ */
+ size_t out_next_tx_index;
+
+ /** The time when the last response data chunk was received. Can be NULL. */
+ htp_time_t out_timestamp;
+
+ /** Pointer to the current response data chunk. */
+ unsigned char *out_current_data;
+
+ /** The length of the current response data chunk. */
+ int64_t out_current_len;
+
+ /** The offset of the next byte in the response data chunk to consume. */
+ int64_t out_current_read_offset;
+
+ /**
+ * The starting point of the data waiting to be consumed. This field is used
+ * in the states where reading data is not the same as consumption.
+ */
+ int64_t out_current_consume_offset;
+
+ /**
+ * Marks the starting point of raw data within the outbound data chunk. Raw
+ * data (e.g., complete headers) is sent to appropriate callbacks (e.g.,
+ * RESPONSE_HEADER_DATA).
+ */
+ int64_t out_current_receiver_offset;
+
+ /** The offset, in the entire connection stream, of the next response byte. */
+ int64_t out_stream_offset;
+
+ /** The value of the response byte currently being processed. */
+ int out_next_byte;
+
+ /** Used to buffer a line of outbound data when buffering cannot be avoided. */
+ unsigned char *out_buf;
+
+ /** Stores the size of the buffer. Valid only when htp_tx_t::out_buf is not NULL. */
+ size_t out_buf_size;
+
+ /**
+ * Stores the current value of a folded response header. Such headers span
+ * multiple lines, and are processed only when all data is available.
+ */
+ bstr *out_header;
+
+ /** Ongoing outbound transaction */
+ htp_tx_t *out_tx;
+
+ /**
+ * The length of the current response body as presented in the
+ * Content-Length response header.
+ */
+ int64_t out_content_length;
+
+ /** The remaining length of the current response body, if known. Set to -1 otherwise. */
+ int64_t out_body_data_left;
+
+ /**
+ * Holds the amount of data that needs to be read from the
+ * current response data chunk. Only used with chunked response bodies.
+ */
+ int64_t out_chunked_length;
+
+ /** Current response parser state. */
+ int (*out_state)(htp_connp_t *);
+
+ /** Previous response parser state. */
+ int (*out_state_previous)(htp_connp_t *);
+
+ /** The hook that should be receiving raw connection data. */
+ htp_hook_t *out_data_receiver_hook;
+
+ /** Response decompressor used to decompress response body data. */
+ htp_decompressor_t *out_decompressor;
+
+ /** On a PUT request, this field contains additional file data. */
+ htp_file_t *put_file;
+
+ /** Request decompressor used to decompress request body data. */
+ htp_decompressor_t *req_decompressor;
+};
+
+/**
+ * This function is most likely not used and/or not needed.
+ *
+ * @param[in] connp
+ */
+void htp_connp_in_reset(htp_connp_t *connp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_CONNECTION_PARSER_PRIVATE_H */
+
diff --git a/htp/htp_connection_private.h b/htp/htp_connection_private.h
new file mode 100644
index 0000000..e4beccc
--- /dev/null
+++ b/htp/htp_connection_private.h
@@ -0,0 +1,121 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_CONNECTION_H
+#define HTP_CONNECTION_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Creates a new connection structure.
+ *
+ * @return A new connection structure on success, NULL on memory allocation failure.
+ */
+htp_conn_t *htp_conn_create(void);
+
+/**
+ * Closes the connection.
+ *
+ * @param[in] conn
+ * @param[in] timestamp
+ */
+void htp_conn_close(htp_conn_t *conn, const htp_time_t *timestamp);
+
+/**
+ * Destroys a connection, as well as all the transactions it contains. It is
+ * not possible to destroy a connection structure yet leave any of its
+ * transactions intact. This is because transactions need its connection and
+ * connection structures hold little data anyway. The opposite is true, though
+ * it is possible to delete a transaction but leave its connection alive.
+ *
+ * @param[in] conn
+ */
+void htp_conn_destroy(htp_conn_t *conn);
+
+/**
+ * Opens a connection. This function will essentially only store the provided data
+ * for future reference. The timestamp parameter is optional.
+ *
+ * @param[in] conn
+ * @param[in] remote_addr
+ * @param[in] remote_port
+ * @param[in] local_addr
+ * @param[in] local_port
+ * @param[in] timestamp
+ * @return
+ */
+htp_status_t htp_conn_open(htp_conn_t *conn, const char *remote_addr, int remote_port,
+ const char *local_addr, int local_port, const htp_time_t *timestamp);
+
+/**
+ * Removes the given transaction structure, which makes it possible to
+ * safely destroy it. It is safe to destroy transactions in this way
+ * because the index of the transactions (in a connection) is preserved.
+ *
+ * @param[in] conn
+ * @param[in] tx
+ * @return HTP_OK if transaction was removed (replaced with NULL) or HTP_ERROR if it wasn't found.
+ */
+htp_status_t htp_conn_remove_tx(htp_conn_t *conn, const htp_tx_t *tx);
+
+/**
+ * Keeps track of inbound packets and data.
+ *
+ * @param[in] conn
+ * @param[in] len
+ * @param[in] timestamp
+ */
+void htp_conn_track_inbound_data(htp_conn_t *conn, size_t len, const htp_time_t *timestamp);
+
+/**
+ * Keeps track of outbound packets and data.
+ *
+ * @param[in] conn
+ * @param[in] len
+ * @param[in] timestamp
+ */
+void htp_conn_track_outbound_data(htp_conn_t *conn, size_t len, const htp_time_t *timestamp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_CONNECTION_H */
+
diff --git a/htp/htp_content_handlers.c b/htp/htp_content_handlers.c
new file mode 100644
index 0000000..183a0f2
--- /dev/null
+++ b/htp/htp_content_handlers.c
@@ -0,0 +1,299 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * This callback function feeds request body data to a Urlencoded parser
+ * and, later, feeds the parsed parameters to the correct structures.
+ *
+ * @param[in] d
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_ch_urlencoded_callback_request_body_data(htp_tx_data_t *d) {
+ htp_tx_t *tx = d->tx;
+
+ // Check that we were not invoked again after the finalization.
+ if (tx->request_urlenp_body->params == NULL) return HTP_ERROR;
+
+ if (d->data != NULL) {
+ // Process one chunk of data.
+ htp_urlenp_parse_partial(tx->request_urlenp_body, d->data, d->len);
+ } else {
+ // Finalize parsing.
+ htp_urlenp_finalize(tx->request_urlenp_body);
+
+ // Add all parameters to the transaction.
+ bstr *name = NULL;
+ bstr *value = NULL;
+
+ for (size_t i = 0, n = htp_table_size(tx->request_urlenp_body->params); i < n; i++) {
+ value = htp_table_get_index(tx->request_urlenp_body->params, i, &name);
+
+ htp_param_t *param = calloc(1, sizeof (htp_param_t));
+ if (param == NULL) return HTP_ERROR;
+
+ param->name = name;
+ param->value = value;
+ param->source = HTP_SOURCE_BODY;
+ param->parser_id = HTP_PARSER_URLENCODED;
+ param->parser_data = NULL;
+
+ if (htp_tx_req_add_param(tx, param) != HTP_OK) {
+ free(param);
+ return HTP_ERROR;
+ }
+ }
+
+ // All the parameter data is now owned by the transaction, and
+ // the parser table used to store it is no longer needed. The
+ // line below will destroy just the table, leaving keys intact.
+ htp_table_destroy_ex(tx->request_urlenp_body->params);
+ tx->request_urlenp_body->params = NULL;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Determine if the request has a Urlencoded body, and, if it does, create and
+ * attach an instance of the Urlencoded parser to the transaction.
+ *
+ * @param[in] connp
+ * @return HTP_OK if a new parser has been setup, HTP_DECLINED if the MIME type
+ * is not appropriate for this parser, and HTP_ERROR on failure.
+ */
+htp_status_t htp_ch_urlencoded_callback_request_headers(htp_tx_t *tx) {
+ // Check the request content type to see if it matches our MIME type.
+ if ((tx->request_content_type == NULL) || (!bstr_begins_with_c(tx->request_content_type, HTP_URLENCODED_MIME_TYPE))) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_ch_urlencoded_callback_request_headers: Body not URLENCODED\n");
+ #endif
+
+ return HTP_DECLINED;
+ }
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_ch_urlencoded_callback_request_headers: Parsing URLENCODED body\n");
+ #endif
+
+ // Create parser instance.
+ tx->request_urlenp_body = htp_urlenp_create(tx);
+ if (tx->request_urlenp_body == NULL) return HTP_ERROR;
+
+ // Register a request body data callback.
+ htp_tx_register_request_body_data(tx, htp_ch_urlencoded_callback_request_body_data);
+
+ return HTP_OK;
+}
+
+/**
+ * Parses request query string, if present.
+ *
+ * @param[in] connp
+ * @param[in] raw_data
+ * @param[in] raw_len
+ * @return HTP_OK if query string was parsed, HTP_DECLINED if there was no query
+ * string, and HTP_ERROR on failure.
+ */
+htp_status_t htp_ch_urlencoded_callback_request_line(htp_tx_t *tx) {
+ // Proceed only if there's something for us to parse.
+ if ((tx->parsed_uri->query == NULL) || (bstr_len(tx->parsed_uri->query) == 0)) {
+ return HTP_DECLINED;
+ }
+
+ // We have a non-zero length query string.
+
+ tx->request_urlenp_query = htp_urlenp_create(tx);
+ if (tx->request_urlenp_query == NULL) return HTP_ERROR;
+
+ if (htp_urlenp_parse_complete(tx->request_urlenp_query, bstr_ptr(tx->parsed_uri->query),
+ bstr_len(tx->parsed_uri->query)) != HTP_OK) {
+ htp_urlenp_destroy(tx->request_urlenp_query);
+ return HTP_ERROR;
+ }
+
+ // Add all parameters to the transaction.
+
+ bstr *name = NULL;
+ bstr *value = NULL;
+ for (size_t i = 0, n = htp_table_size(tx->request_urlenp_query->params); i < n; i++) {
+ value = htp_table_get_index(tx->request_urlenp_query->params, i, &name);
+
+ htp_param_t *param = calloc(1, sizeof (htp_param_t));
+ if (param == NULL) return HTP_ERROR;
+
+ param->name = name;
+ param->value = value;
+ param->source = HTP_SOURCE_QUERY_STRING;
+ param->parser_id = HTP_PARSER_URLENCODED;
+ param->parser_data = NULL;
+
+ if (htp_tx_req_add_param(tx, param) != HTP_OK) {
+ free(param);
+ return HTP_ERROR;
+ }
+ }
+
+ // All the parameter data is now owned by the transaction, and
+ // the parser table used to store it is no longer needed. The
+ // line below will destroy just the table, leaving keys intact.
+ htp_table_destroy_ex(tx->request_urlenp_query->params);
+ tx->request_urlenp_query->params = NULL;
+
+ htp_urlenp_destroy(tx->request_urlenp_query);
+ tx->request_urlenp_query = NULL;
+
+ return HTP_OK;
+}
+
+/**
+ * Finalize Multipart processing.
+ *
+ * @param[in] d
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_ch_multipart_callback_request_body_data(htp_tx_data_t *d) {
+ htp_tx_t *tx = d->tx;
+
+ // Check that we were not invoked again after the finalization.
+ if (tx->request_mpartp->gave_up_data == 1) return HTP_ERROR;
+
+ if (d->data != NULL) {
+ // Process one chunk of data.
+ htp_mpartp_parse(tx->request_mpartp, d->data, d->len);
+ } else {
+ // Finalize parsing.
+ htp_mpartp_finalize(tx->request_mpartp);
+
+ htp_multipart_t *body = htp_mpartp_get_multipart(tx->request_mpartp);
+
+ for (size_t i = 0, n = htp_list_size(body->parts); i < n; i++) {
+ htp_multipart_part_t *part = htp_list_get(body->parts, i);
+
+ // Use text parameters.
+ if (part->type == MULTIPART_PART_TEXT) {
+ htp_param_t *param = calloc(1, sizeof (htp_param_t));
+ if (param == NULL) return HTP_ERROR;
+ param->name = part->name;
+ param->value = part->value;
+ param->source = HTP_SOURCE_BODY;
+ param->parser_id = HTP_PARSER_MULTIPART;
+ param->parser_data = part;
+
+ if (htp_tx_req_add_param(tx, param) != HTP_OK) {
+ free(param);
+ return HTP_ERROR;
+ }
+ }
+ }
+
+ // Tell the parser that it no longer owns names
+ // and values of MULTIPART_PART_TEXT parts.
+ tx->request_mpartp->gave_up_data = 1;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Inspect request headers and register the Multipart request data hook
+ * if it contains a multipart/form-data body.
+ *
+ * @param[in] connp
+ * @return HTP_OK if a new parser has been setup, HTP_DECLINED if the MIME type
+ * is not appropriate for this parser, and HTP_ERROR on failure.
+ */
+htp_status_t htp_ch_multipart_callback_request_headers(htp_tx_t *tx) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_ch_multipart_callback_request_headers: Need to determine if multipart body is present\n");
+ #endif
+
+ // The field tx->request_content_type does not contain the entire C-T
+ // value and so we cannot use it to look for a boundary, but we can
+ // use it for a quick check to determine if the C-T header exists.
+ if (tx->request_content_type == NULL) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_ch_multipart_callback_request_headers: Not multipart body (no C-T header)\n");
+ #endif
+
+ return HTP_DECLINED;
+ }
+
+ // Look for a boundary.
+
+ htp_header_t *ct = htp_table_get_c(tx->request_headers, "content-type");
+ if (ct == NULL) return HTP_ERROR;
+
+ bstr *boundary = NULL;
+ uint64_t flags = 0;
+
+ htp_status_t rc = htp_mpartp_find_boundary(ct->value, &boundary, &flags);
+ if (rc != HTP_OK) {
+ #ifdef HTP_DEBUG
+ if (rc == HTP_DECLINED) {
+ fprintf(stderr, "htp_ch_multipart_callback_request_headers: Not multipart body\n");
+ }
+ #endif
+
+ // No boundary (HTP_DECLINED) or error (HTP_ERROR).
+ return rc;
+ }
+
+ if (boundary == NULL) return HTP_ERROR;
+
+ // Create a Multipart parser instance.
+ tx->request_mpartp = htp_mpartp_create(tx->connp->cfg, boundary, flags);
+ if (tx->request_mpartp == NULL) {
+ bstr_free(boundary);
+ return HTP_ERROR;
+ }
+
+ // Configure file extraction.
+ if (tx->cfg->extract_request_files) {
+ tx->request_mpartp->extract_files = 1;
+ tx->request_mpartp->extract_dir = tx->connp->cfg->tmpdir;
+ }
+
+ // Register a request body data callback.
+ htp_tx_register_request_body_data(tx, htp_ch_multipart_callback_request_body_data);
+
+ return HTP_OK;
+}
diff --git a/htp/htp_cookies.c b/htp/htp_cookies.c
new file mode 100644
index 0000000..bb26822
--- /dev/null
+++ b/htp/htp_cookies.c
@@ -0,0 +1,119 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * Parses a single v0 request cookie and places the results into tx->request_cookies.
+ *
+ * @param[in] connp
+ * @param[in] data
+ * @param[in] len
+ * @return HTP_OK on success, HTP_ERROR on error.
+ */
+int htp_parse_single_cookie_v0(htp_connp_t *connp, unsigned char *data, size_t len) {
+ if (len == 0) return HTP_OK;
+
+ size_t pos = 0;
+
+ // Look for '='.
+ while ((pos < len) && (data[pos] != '=')) pos++;
+ if (pos == 0) return HTP_OK; // Ignore a nameless cookie.
+
+ bstr *name = bstr_dup_mem(data, pos);
+ if (name == NULL) return HTP_ERROR;
+
+ bstr *value = NULL;
+ if (pos == len) {
+ // The cookie is empty.
+ value = bstr_dup_c("");
+ } else {
+ // The cookie is not empty.
+ value = bstr_dup_mem(data + pos + 1, len - pos - 1);
+ }
+
+ if (value == NULL) {
+ bstr_free(name);
+ return HTP_ERROR;
+ }
+
+ htp_table_addn(connp->in_tx->request_cookies, name, value);
+
+ return HTP_OK;
+}
+
+/**
+ * Parses the Cookie request header in v0 format.
+ *
+ * @param[in] connp
+ * @return HTP_OK on success, HTP_ERROR on error
+ */
+htp_status_t htp_parse_cookies_v0(htp_connp_t *connp) {
+ htp_header_t *cookie_header = htp_table_get_c(connp->in_tx->request_headers, "cookie");
+ if (cookie_header == NULL) return HTP_OK;
+
+ // Create a new table to store cookies.
+ connp->in_tx->request_cookies = htp_table_create(4);
+ if (connp->in_tx->request_cookies == NULL) return HTP_ERROR;
+
+ unsigned char *data = bstr_ptr(cookie_header->value);
+ size_t len = bstr_len(cookie_header->value);
+ size_t pos = 0;
+
+ while (pos < len) {
+ // Ignore whitespace at the beginning.
+ while ((pos < len) && (isspace((int)data[pos]))) pos++;
+ if (pos == len) return HTP_OK;
+
+ size_t start = pos;
+
+ // Find the end of the cookie.
+ while ((pos < len) && (data[pos] != ';')) pos++;
+
+ if (htp_parse_single_cookie_v0(connp, data + start, pos - start) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ // Go over the semicolon.
+ if (pos < len) pos++;
+ }
+
+ return HTP_OK;
+}
diff --git a/htp/htp_core.h b/htp/htp_core.h
new file mode 100644
index 0000000..e4c933e
--- /dev/null
+++ b/htp/htp_core.h
@@ -0,0 +1,353 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_CORE_H
+#define HTP_CORE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int htp_status_t;
+
+typedef struct htp_cfg_t htp_cfg_t;
+typedef struct htp_conn_t htp_conn_t;
+typedef struct htp_connp_t htp_connp_t;
+typedef struct htp_file_t htp_file_t;
+typedef struct htp_file_data_t htp_file_data_t;
+typedef struct htp_header_t htp_header_t;
+typedef struct htp_header_line_t htp_header_line_t;
+typedef struct htp_log_t htp_log_t;
+typedef struct htp_param_t htp_param_t;
+typedef struct htp_tx_data_t htp_tx_data_t;
+typedef struct htp_tx_t htp_tx_t;
+typedef struct htp_uri_t htp_uri_t;
+typedef struct timeval htp_time_t;
+
+// Below are all htp_status_t return codes used by LibHTP. Enum is not
+// used here to allow applications to define their own codes.
+
+/**
+ * The lowest htp_status_t value LibHTP will use internally.
+ */
+#define HTP_ERROR_RESERVED -1000
+
+/** General-purpose error code. */
+#define HTP_ERROR -1
+
+/**
+ * No processing or work was done. This is typically used by callbacks
+ * to indicate that they were not interested in doing any work in the
+ * given context.
+ */
+#define HTP_DECLINED 0
+
+/** Returned by a function when its work was successfully completed. */
+#define HTP_OK 1
+
+/**
+ * Returned when processing a connection stream, after consuming all
+ * provided data. The caller should call again with more data.
+ */
+#define HTP_DATA 2
+
+/**
+ * Returned when processing a connection stream, after encountering
+ * a situation where processing needs to continue on the alternate
+ * stream (e.g., the inbound parser needs to observe some outbound
+ * data). The data provided was not completely consumed. On the next
+ * invocation the caller should supply only the data that has not
+ * been processed already. Use htp_connp_req_data_consumed() and
+ * htp_connp_res_data_consumed() to determine how much of the most
+ * recent data chunk was consumed.
+ */
+#define HTP_DATA_OTHER 3
+
+/**
+ * Used by callbacks to indicate that the processing should stop. For example,
+ * returning HTP_STOP from a connection callback indicates that LibHTP should
+ * stop following that particular connection.
+ */
+#define HTP_STOP 4
+
+/**
+ * Same as HTP_DATA, but indicates that any non-consumed part of the
+ * data chunk should be preserved (buffered) for later.
+ */
+#define HTP_DATA_BUFFER 5
+
+/**
+ * The highest htp_status_t value LibHTP will use internally.
+ */
+#define HTP_STATUS_RESERVED 1000
+
+/**
+ * Enumerates the possible values for authentication type.
+ */
+enum htp_auth_type_t {
+ /**
+ * This is the default value that is used before
+ * the presence of authentication is determined (e.g.,
+ * before request headers are seen).
+ */
+ HTP_AUTH_UNKNOWN = 0,
+
+ /** No authentication. */
+ HTP_AUTH_NONE = 1,
+
+ /** HTTP Basic authentication used. */
+ HTP_AUTH_BASIC = 2,
+
+ /** HTTP Digest authentication used. */
+ HTP_AUTH_DIGEST = 3,
+
+ /** HTTP Digest authentication used. */
+ HTP_AUTH_BEARER = 4,
+
+ /** Unrecognized authentication method. */
+ HTP_AUTH_UNRECOGNIZED = 9
+};
+
+enum htp_content_encoding_t {
+ /**
+ * This is the default value, which is used until the presence
+ * of content encoding is determined (e.g., before request headers
+ * are seen.
+ */
+ HTP_COMPRESSION_UNKNOWN = 0,
+
+ /** No compression. */
+ HTP_COMPRESSION_NONE = 1,
+
+ /** Gzip compression. */
+ HTP_COMPRESSION_GZIP = 2,
+
+ /** Deflate compression. */
+ HTP_COMPRESSION_DEFLATE = 3,
+
+ /** LZMA compression. */
+ HTP_COMPRESSION_LZMA = 4
+};
+
+/**
+ * Enumerates the possible request and response body codings.
+ */
+enum htp_transfer_coding_t {
+ /** Body coding not determined yet. */
+ HTP_CODING_UNKNOWN = 0,
+
+ /** No body. */
+ HTP_CODING_NO_BODY = 1,
+
+ /** Identity coding is used, which means that the body was sent as is. */
+ HTP_CODING_IDENTITY = 2,
+
+ /** Chunked encoding. */
+ HTP_CODING_CHUNKED = 3,
+
+ /** We could not recognize the encoding. */
+ HTP_CODING_INVALID = 4
+};
+
+enum htp_file_source_t {
+
+ HTP_FILE_MULTIPART = 1,
+
+ HTP_FILE_PUT = 2
+};
+
+// Various flag bits. Even though we have a flag field in several places
+// (header, transaction, connection), these fields are all in the same namespace
+// because we may want to set the same flag in several locations. For example, we
+// may set HTP_FIELD_FOLDED on the actual folded header, but also on the transaction
+// that contains the header. Both uses are useful.
+
+// Connection flags are 8 bits wide.
+#define HTP_CONN_PIPELINED 0x000000001ULL
+#define HTP_CONN_HTTP_0_9_EXTRA 0x000000002ULL
+
+// All other flags are 64 bits wide.
+#define HTP_FIELD_UNPARSEABLE 0x000000004ULL
+#define HTP_FIELD_INVALID 0x000000008ULL
+#define HTP_FIELD_FOLDED 0x000000010ULL
+#define HTP_FIELD_REPEATED 0x000000020ULL
+#define HTP_FIELD_LONG 0x000000040ULL
+#define HTP_FIELD_RAW_NUL 0x000000080ULL
+#define HTP_REQUEST_SMUGGLING 0x000000100ULL
+#define HTP_INVALID_FOLDING 0x000000200ULL
+#define HTP_REQUEST_INVALID_T_E 0x000000400ULL
+#define HTP_MULTI_PACKET_HEAD 0x000000800ULL
+#define HTP_HOST_MISSING 0x000001000ULL
+#define HTP_HOST_AMBIGUOUS 0x000002000ULL
+#define HTP_PATH_ENCODED_NUL 0x000004000ULL
+#define HTP_PATH_RAW_NUL 0x000008000ULL
+#define HTP_PATH_INVALID_ENCODING 0x000010000ULL
+#define HTP_PATH_INVALID 0x000020000ULL
+#define HTP_PATH_OVERLONG_U 0x000040000ULL
+#define HTP_PATH_ENCODED_SEPARATOR 0x000080000ULL
+#define HTP_PATH_UTF8_VALID 0x000100000ULL /* At least one valid UTF-8 character and no invalid ones. */
+#define HTP_PATH_UTF8_INVALID 0x000200000ULL
+#define HTP_PATH_UTF8_OVERLONG 0x000400000ULL
+#define HTP_PATH_HALF_FULL_RANGE 0x000800000ULL /* Range U+FF00 - U+FFEF detected. */
+#define HTP_STATUS_LINE_INVALID 0x001000000ULL
+#define HTP_HOSTU_INVALID 0x002000000ULL /* Host in the URI. */
+#define HTP_HOSTH_INVALID 0x004000000ULL /* Host in the Host header. */
+#define HTP_URLEN_ENCODED_NUL 0x008000000ULL
+#define HTP_URLEN_INVALID_ENCODING 0x010000000ULL
+#define HTP_URLEN_OVERLONG_U 0x020000000ULL
+#define HTP_URLEN_HALF_FULL_RANGE 0x040000000ULL /* Range U+FF00 - U+FFEF detected. */
+#define HTP_URLEN_RAW_NUL 0x080000000ULL
+#define HTP_REQUEST_INVALID 0x100000000ULL
+#define HTP_REQUEST_INVALID_C_L 0x200000000ULL
+#define HTP_AUTH_INVALID 0x400000000ULL
+
+#define HTP_MAX_HEADERS_REPETITIONS 64
+
+#define HTP_HOST_INVALID ( HTP_HOSTU_INVALID | HTP_HOSTH_INVALID )
+
+// Logging-related constants.
+#define HTP_LOG_MARK __FILE__,__LINE__
+
+/**
+ * Enumerates all log levels.
+ */
+enum htp_log_level_t {
+ HTP_LOG_NONE = 0,
+ HTP_LOG_ERROR = 1,
+ HTP_LOG_WARNING = 2,
+ HTP_LOG_NOTICE = 3,
+ HTP_LOG_INFO = 4,
+ HTP_LOG_DEBUG = 5,
+ HTP_LOG_DEBUG2 = 6
+};
+
+/**
+ * HTTP methods.
+ */
+enum htp_method_t {
+ /**
+ * Used by default, until the method is determined (e.g., before
+ * the request line is processed.
+ */
+ HTP_M_UNKNOWN = 0,
+ HTP_M_HEAD = 1,
+ HTP_M_GET = 2,
+ HTP_M_PUT = 3,
+ HTP_M_POST = 4,
+ HTP_M_DELETE = 5,
+ HTP_M_CONNECT = 6,
+ HTP_M_OPTIONS = 7,
+ HTP_M_TRACE = 8,
+ HTP_M_PATCH = 9,
+ HTP_M_PROPFIND = 10,
+ HTP_M_PROPPATCH = 11,
+ HTP_M_MKCOL = 12,
+ HTP_M_COPY = 13,
+ HTP_M_MOVE = 14,
+ HTP_M_LOCK = 15,
+ HTP_M_UNLOCK = 16,
+ HTP_M_VERSION_CONTROL = 17,
+ HTP_M_CHECKOUT = 18,
+ HTP_M_UNCHECKOUT = 19,
+ HTP_M_CHECKIN = 20,
+ HTP_M_UPDATE = 21,
+ HTP_M_LABEL = 22,
+ HTP_M_REPORT = 23,
+ HTP_M_MKWORKSPACE = 24,
+ HTP_M_MKACTIVITY = 25,
+ HTP_M_BASELINE_CONTROL = 26,
+ HTP_M_MERGE = 27,
+ HTP_M_INVALID = 28
+};
+
+// A collection of unique parser IDs.
+enum htp_parser_id_t {
+ /** application/x-www-form-urlencoded parser. */
+ HTP_PARSER_URLENCODED = 0,
+
+ /** multipart/form-data parser. */
+ HTP_PARSER_MULTIPART = 1
+};
+
+// Protocol version constants; an enum cannot be
+// used here because we allow any properly-formatted protocol
+// version (e.g., 1.3), even those that do not actually exist.
+#define HTP_PROTOCOL_INVALID -2
+#define HTP_PROTOCOL_UNKNOWN -1
+#define HTP_PROTOCOL_0_9 9
+#define HTP_PROTOCOL_1_0 100
+#define HTP_PROTOCOL_1_1 101
+
+// A collection of possible data sources.
+enum htp_data_source_t {
+ /** Embedded in the URL. */
+ HTP_SOURCE_URL = 0,
+
+ /** Transported in the query string. */
+ HTP_SOURCE_QUERY_STRING = 1,
+
+ /** Cookies. */
+ HTP_SOURCE_COOKIE = 2,
+
+ /** Transported in the request body. */
+ HTP_SOURCE_BODY = 3
+};
+
+#define HTP_STATUS_INVALID -1
+#define HTP_STATUS_UNKNOWN 0
+
+/**
+ * Enumerates all stream states. Each connection has two streams, one
+ * inbound and one outbound. Their states are tracked separately.
+ */
+enum htp_stream_state_t {
+ HTP_STREAM_NEW = 0,
+ HTP_STREAM_OPEN = 1,
+ HTP_STREAM_CLOSED = 2,
+ HTP_STREAM_ERROR = 3,
+ HTP_STREAM_TUNNEL = 4,
+ HTP_STREAM_DATA_OTHER = 5,
+ HTP_STREAM_STOP = 6,
+ HTP_STREAM_DATA = 9
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_CORE_H */
diff --git a/htp/htp_decompressors.c b/htp/htp_decompressors.c
new file mode 100644
index 0000000..fe12833
--- /dev/null
+++ b/htp/htp_decompressors.c
@@ -0,0 +1,490 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+
+static void *SzAlloc(ISzAllocPtr p, size_t size) { return malloc(size); }
+static void SzFree(ISzAllocPtr p, void *address) { free(address); }
+const ISzAlloc lzma_Alloc = { SzAlloc, SzFree };
+
+
+/**
+ * @brief See if the header has extensions
+ * @return number of bytes to skip
+ */
+static size_t htp_gzip_decompressor_probe(const unsigned char *data,
+ size_t data_len)
+{
+ if (data_len < 4)
+ return 0;
+
+ size_t consumed = 0;
+
+ if (data[0] == 0x1f && data[1] == 0x8b && data[3] != 0) {
+ if (data[3] & (1 << 3) || data[3] & (1 << 4)) {
+ /* skip past
+ * - FNAME extension, which is a name ended in a NUL terminator
+ * or
+ * - FCOMMENT extension, which is a commend ended in a NULL terminator
+ */
+
+ size_t len;
+ for (len = 10; len < data_len && data[len] != '\0'; len++);
+ consumed = len + 1;
+
+ //printf("skipped %u bytes for FNAME/FCOMMENT header (GZIP)\n", (uint)consumed);
+
+ } else if (data[3] & (1 << 1)) {
+ consumed = 12;
+ //printf("skipped %u bytes for FHCRC header (GZIP)\n", 12);
+
+ } else {
+ //printf("GZIP unknown/unsupported flags %02X\n", data[3]);
+ consumed = 10;
+ }
+ }
+
+ if (consumed > data_len)
+ return 0;
+
+ return consumed;
+}
+
+/**
+ * @brief restart the decompressor
+ * @return 1 if it restarted, 0 otherwise
+ */
+static int htp_gzip_decompressor_restart(htp_decompressor_gzip_t *drec,
+ const unsigned char *data,
+ size_t data_len, size_t *consumed_back)
+{
+ size_t consumed = 0;
+ int rc = 0;
+
+ if (drec->restart < 3) {
+
+ // first retry with the existing type, but now consider the
+ // extensions
+ if (drec->restart == 0) {
+ consumed = htp_gzip_decompressor_probe(data, data_len);
+
+ if (drec->zlib_initialized == HTP_COMPRESSION_GZIP) {
+ //printf("GZIP restart, consumed %u\n", (uint)consumed);
+ rc = inflateInit2(&drec->stream, 15 + 32);
+ } else {
+ //printf("DEFLATE restart, consumed %u\n", (uint)consumed);
+ rc = inflateInit2(&drec->stream, -15);
+ }
+ if (rc != Z_OK)
+ return 0;
+
+ goto restart;
+
+ // if that still fails, try the other method we support
+
+ } else if (drec->zlib_initialized == HTP_COMPRESSION_DEFLATE) {
+ rc = inflateInit2(&drec->stream, 15 + 32);
+ if (rc != Z_OK)
+ return 0;
+
+ drec->zlib_initialized = HTP_COMPRESSION_GZIP;
+ consumed = htp_gzip_decompressor_probe(data, data_len);
+#if 0
+ printf("DEFLATE -> GZIP consumed %u\n", (uint)consumed);
+#endif
+ goto restart;
+
+ } else if (drec->zlib_initialized == HTP_COMPRESSION_GZIP) {
+ rc = inflateInit2(&drec->stream, -15);
+ if (rc != Z_OK)
+ return 0;
+
+ drec->zlib_initialized = HTP_COMPRESSION_DEFLATE;
+ consumed = htp_gzip_decompressor_probe(data, data_len);
+#if 0
+ printf("GZIP -> DEFLATE consumed %u\n", (uint)consumed);
+#endif
+ goto restart;
+ }
+ }
+ return 0;
+
+restart:
+#if 0
+ gz_header y;
+ gz_headerp x = &y;
+ int res = inflateGetHeader(&drec->stream, x);
+ printf("HEADER res %d x.os %d x.done %d\n", res, x->os, x->done);
+#endif
+ *consumed_back = consumed;
+ drec->restart++;
+ return 1;
+}
+
+/**
+ * Ends decompressor.
+ *
+ * @param[in] drec
+ */
+static void htp_gzip_decompressor_end(htp_decompressor_gzip_t *drec) {
+ if (drec->zlib_initialized == HTP_COMPRESSION_LZMA) {
+ LzmaDec_Free(&drec->state, &lzma_Alloc);
+ drec->zlib_initialized = 0;
+ } else if (drec->zlib_initialized) {
+ inflateEnd(&drec->stream);
+ drec->zlib_initialized = 0;
+ }
+}
+
+/**
+ * Decompress a chunk of gzip-compressed data.
+ * If we have more than one decompressor, call this function recursively.
+ *
+ * @param[in] drec
+ * @param[in] d
+ * @return HTP_OK on success, HTP_ERROR or some other negative integer on failure.
+ */
+htp_status_t htp_gzip_decompressor_decompress(htp_decompressor_t *drec1, htp_tx_data_t *d) {
+ size_t consumed = 0;
+ int rc = 0;
+ htp_status_t callback_rc;
+ htp_decompressor_gzip_t *drec = (htp_decompressor_gzip_t*) drec1;
+
+ // Pass-through the NULL chunk, which indicates the end of the stream.
+
+ if (drec->super.passthrough) {
+ htp_tx_data_t d2;
+ d2.tx = d->tx;
+ d2.data = d->data;
+ d2.len = d->len;
+ d2.is_last = d->is_last;
+
+ callback_rc = drec->super.callback(&d2);
+ if (callback_rc != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+ }
+
+ if (d->data == NULL) {
+ // Prepare data for callback.
+ htp_tx_data_t dout;
+ dout.tx = d->tx;
+ // This is last call, so output uncompressed data so far
+ dout.len = GZIP_BUF_SIZE - drec->stream.avail_out;
+ if (dout.len > 0) {
+ dout.data = drec->buffer;
+ } else {
+ dout.data = NULL;
+ }
+ dout.is_last = d->is_last;
+ if (drec->super.next != NULL && drec->zlib_initialized) {
+ return htp_gzip_decompressor_decompress(drec->super.next, &dout);
+ } else {
+ // Send decompressed data to the callback.
+ callback_rc = drec->super.callback(&dout);
+ if (callback_rc != HTP_OK) {
+ htp_gzip_decompressor_end(drec);
+ return callback_rc;
+ }
+ }
+
+ return HTP_OK;
+ }
+
+restart:
+ if (consumed > d->len || d->len > UINT32_MAX ) {
+ htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "GZip decompressor: consumed > d->len");
+ return HTP_ERROR;
+ }
+ drec->stream.next_in = (unsigned char *) (d->data + consumed);
+ drec->stream.avail_in = (uint32_t) (d->len - consumed);
+
+ while (drec->stream.avail_in != 0) {
+ // If there's no more data left in the
+ // buffer, send that information out.
+ if (drec->stream.avail_out == 0) {
+ drec->crc = crc32(drec->crc, drec->buffer, GZIP_BUF_SIZE);
+
+ // Prepare data for callback.
+ htp_tx_data_t d2;
+ d2.tx = d->tx;
+ d2.data = drec->buffer;
+ d2.len = GZIP_BUF_SIZE;
+ d2.is_last = d->is_last;
+
+ if (drec->super.next != NULL && drec->zlib_initialized) {
+ callback_rc = htp_gzip_decompressor_decompress(drec->super.next, &d2);
+ } else {
+ // Send decompressed data to callback.
+ callback_rc = drec->super.callback(&d2);
+ }
+ if (callback_rc != HTP_OK) {
+ htp_gzip_decompressor_end(drec);
+ return callback_rc;
+ }
+
+ drec->stream.next_out = drec->buffer;
+ drec->stream.avail_out = GZIP_BUF_SIZE;
+ }
+
+ if (drec->zlib_initialized == HTP_COMPRESSION_LZMA) {
+ if (drec->header_len < LZMA_PROPS_SIZE + 8) {
+ consumed = LZMA_PROPS_SIZE + 8 - drec->header_len;
+ if (consumed > drec->stream.avail_in) {
+ consumed = drec->stream.avail_in;
+ }
+ memcpy(drec->header + drec->header_len, drec->stream.next_in, consumed);
+ drec->stream.next_in = (unsigned char *) (d->data + consumed);
+ drec->stream.avail_in = (uint32_t) (d->len - consumed);
+ drec->header_len += consumed;
+ }
+ if (drec->header_len == LZMA_PROPS_SIZE + 8) {
+ rc = LzmaDec_Allocate(&drec->state, drec->header, LZMA_PROPS_SIZE, &lzma_Alloc);
+ if (rc != SZ_OK)
+ return rc;
+ LzmaDec_Init(&drec->state);
+ // hacky to get to next step end retry allocate in case of failure
+ drec->header_len++;
+ }
+ if (drec->header_len > LZMA_PROPS_SIZE + 8) {
+ size_t inprocessed = drec->stream.avail_in;
+ size_t outprocessed = drec->stream.avail_out;
+ ELzmaStatus status;
+ rc = LzmaDec_DecodeToBuf(&drec->state, drec->stream.next_out, &outprocessed,
+ drec->stream.next_in, &inprocessed, LZMA_FINISH_ANY, &status, d->tx->cfg->lzma_memlimit);
+ drec->stream.avail_in -= inprocessed;
+ drec->stream.next_in += inprocessed;
+ drec->stream.avail_out -= outprocessed;
+ drec->stream.next_out += outprocessed;
+ switch (rc) {
+ case SZ_OK:
+ rc = Z_OK;
+ if (status == LZMA_STATUS_FINISHED_WITH_MARK) {
+ rc = Z_STREAM_END;
+ }
+ break;
+ case SZ_ERROR_MEM:
+ htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "LZMA decompressor: memory limit reached");
+ // fall through
+ default:
+ rc = Z_DATA_ERROR;
+ }
+ }
+ } else if (drec->zlib_initialized) {
+ rc = inflate(&drec->stream, Z_NO_FLUSH);
+ } else {
+ // no initialization means previous error on stream
+ return HTP_ERROR;
+ }
+ if (GZIP_BUF_SIZE > drec->stream.avail_out) {
+ if (rc == Z_DATA_ERROR) {
+ // There is data even if there is an error
+ // So use this data and log a warning
+ htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "GZip decompressor: inflate failed with %d", rc);
+ rc = Z_STREAM_END;
+ }
+ }
+ if (rc == Z_STREAM_END) {
+ // How many bytes do we have?
+ size_t len = GZIP_BUF_SIZE - drec->stream.avail_out;
+
+ // Update CRC
+
+ // Prepare data for the callback.
+ htp_tx_data_t d2;
+ d2.tx = d->tx;
+ d2.data = drec->buffer;
+ d2.len = len;
+ d2.is_last = d->is_last;
+
+ if (drec->super.next != NULL && drec->zlib_initialized) {
+ callback_rc = htp_gzip_decompressor_decompress(drec->super.next, &d2);
+ } else {
+ // Send decompressed data to the callback.
+ callback_rc = drec->super.callback(&d2);
+ }
+ if (callback_rc != HTP_OK) {
+ htp_gzip_decompressor_end(drec);
+ return callback_rc;
+ }
+ drec->stream.avail_out = GZIP_BUF_SIZE;
+ drec->stream.next_out = drec->buffer;
+ // TODO Handle trailer.
+
+ return HTP_OK;
+ }
+ else if (rc != Z_OK) {
+ htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "GZip decompressor: inflate failed with %d", rc);
+ if (drec->zlib_initialized == HTP_COMPRESSION_LZMA) {
+ LzmaDec_Free(&drec->state, &lzma_Alloc);
+ // so as to clean zlib ressources after restart
+ drec->zlib_initialized = HTP_COMPRESSION_NONE;
+ } else {
+ inflateEnd(&drec->stream);
+ }
+
+ // see if we want to restart the decompressor
+ if (htp_gzip_decompressor_restart(drec,
+ d->data, d->len, &consumed) == 1)
+ {
+ // we'll be restarting the compressor
+ goto restart;
+ }
+
+ drec->zlib_initialized = 0;
+
+ // all our inflate attempts have failed, simply
+ // pass the raw data on to the callback in case
+ // it's not compressed at all
+
+ htp_tx_data_t d2;
+ d2.tx = d->tx;
+ d2.data = d->data;
+ d2.len = d->len;
+ d2.is_last = d->is_last;
+
+ callback_rc = drec->super.callback(&d2);
+ if (callback_rc != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ drec->stream.avail_out = GZIP_BUF_SIZE;
+ drec->stream.next_out = drec->buffer;
+
+ /* successfully passed through, lets continue doing that */
+ drec->super.passthrough = 1;
+ return HTP_OK;
+ }
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Shut down gzip decompressor.
+ *
+ * @param[in] drec
+ */
+void htp_gzip_decompressor_destroy(htp_decompressor_t *drec1) {
+ htp_decompressor_gzip_t *drec = (htp_decompressor_gzip_t*) drec1;
+ if (drec == NULL) return;
+
+ htp_gzip_decompressor_end(drec);
+
+ free(drec->buffer);
+ free(drec);
+}
+
+/**
+ * Create a new decompressor instance.
+ *
+ * @param[in] connp
+ * @param[in] format
+ * @return New htp_decompressor_t instance on success, or NULL on failure.
+ */
+htp_decompressor_t *htp_gzip_decompressor_create(htp_connp_t *connp, enum htp_content_encoding_t format) {
+ htp_decompressor_gzip_t *drec = calloc(1, sizeof (htp_decompressor_gzip_t));
+ if (drec == NULL) return NULL;
+
+ drec->super.decompress = NULL;
+ drec->super.destroy = NULL;
+ drec->super.next = NULL;
+
+ drec->buffer = malloc(GZIP_BUF_SIZE);
+ if (drec->buffer == NULL) {
+ free(drec);
+ return NULL;
+ }
+
+ // Initialize zlib.
+ int rc;
+
+ switch (format) {
+ case HTP_COMPRESSION_LZMA:
+ if (connp->cfg->lzma_memlimit > 0 &&
+ connp->cfg->response_lzma_layer_limit > 0) {
+ LzmaDec_Construct(&drec->state);
+ } else {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "LZMA decompression disabled");
+ drec->super.passthrough = 1;
+ }
+ rc = Z_OK;
+ break;
+ case HTP_COMPRESSION_DEFLATE:
+ // Negative values activate raw processing,
+ // which is what we need for deflate.
+ rc = inflateInit2(&drec->stream, -15);
+ break;
+ case HTP_COMPRESSION_GZIP:
+ // Increased windows size activates gzip header processing.
+ rc = inflateInit2(&drec->stream, 15 + 32);
+ break;
+ default:
+ // do nothing
+ rc = Z_DATA_ERROR;
+ }
+
+ if (rc != Z_OK) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "GZip decompressor: inflateInit2 failed with code %d", rc);
+
+ if (format == HTP_COMPRESSION_DEFLATE || format == HTP_COMPRESSION_GZIP) {
+ inflateEnd(&drec->stream);
+ }
+ free(drec->buffer);
+ free(drec);
+
+ return NULL;
+ }
+
+ drec->zlib_initialized = format;
+ drec->stream.avail_out = GZIP_BUF_SIZE;
+ drec->stream.next_out = drec->buffer;
+
+ #if 0
+ if (format == COMPRESSION_DEFLATE) {
+ drec->initialized = 1;
+ }
+ #endif
+
+ return (htp_decompressor_t *) drec;
+}
diff --git a/htp/htp_decompressors.h b/htp/htp_decompressors.h
new file mode 100644
index 0000000..a357de1
--- /dev/null
+++ b/htp/htp_decompressors.h
@@ -0,0 +1,94 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _HTP_DECOMPRESSORS_H
+#define _HTP_DECOMPRESSORS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <zlib.h>
+#include "lzma/LzmaDec.h"
+
+typedef struct htp_decompressor_gzip_t htp_decompressor_gzip_t;
+typedef struct htp_decompressor_t htp_decompressor_t;
+
+#define GZIP_BUF_SIZE 8192
+
+#define DEFLATE_MAGIC_1 0x1f
+#define DEFLATE_MAGIC_2 0x8b
+
+struct htp_decompressor_t {
+ // no longer used
+ htp_status_t (*decompress)(htp_decompressor_t *, htp_tx_data_t *);
+ htp_status_t (*callback)(htp_tx_data_t *);
+ // no longer used
+ void (*destroy)(htp_decompressor_t *);
+ struct htp_decompressor_t *next;
+ struct timeval time_before;
+ int32_t time_spent;
+ uint32_t nb_callbacks;
+ uint8_t passthrough; /**< decompression failed, pass through raw data */
+};
+
+struct htp_decompressor_gzip_t {
+ htp_decompressor_t super;
+ #if 0
+ int initialized;
+ #endif
+ int zlib_initialized;
+ uint8_t restart; /**< deflate restarted to try rfc1950 instead of 1951 */
+ z_stream stream;
+ uint8_t header[LZMA_PROPS_SIZE + 8];
+ uint8_t header_len;
+ CLzmaDec state;
+ unsigned char *buffer;
+ unsigned long crc;
+};
+
+htp_decompressor_t *htp_gzip_decompressor_create(htp_connp_t *connp, enum htp_content_encoding_t format);
+htp_status_t htp_gzip_decompressor_decompress(htp_decompressor_t *drec, htp_tx_data_t *d);
+void htp_gzip_decompressor_destroy(htp_decompressor_t *drec);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTP_DECOMPRESSORS_H */
+
diff --git a/htp/htp_hooks.c b/htp/htp_hooks.c
new file mode 100644
index 0000000..37d0fd4
--- /dev/null
+++ b/htp/htp_hooks.c
@@ -0,0 +1,160 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+htp_hook_t *htp_hook_copy(const htp_hook_t *hook) {
+ if (hook == NULL) return NULL;
+
+ htp_hook_t *copy = htp_hook_create();
+ if (copy == NULL) return NULL;
+
+ for (size_t i = 0, n = htp_list_size(hook->callbacks); i < n; i++) {
+ htp_callback_t *callback = htp_list_get(hook->callbacks, i);
+ if (htp_hook_register(&copy, callback->fn) != HTP_OK) {
+ htp_hook_destroy(copy);
+ return NULL;
+ }
+ }
+
+ return copy;
+}
+
+htp_hook_t *htp_hook_create(void) {
+ htp_hook_t *hook = calloc(1, sizeof (htp_hook_t));
+ if (hook == NULL) return NULL;
+
+ hook->callbacks = (htp_list_array_t *) htp_list_array_create(4);
+ if (hook->callbacks == NULL) {
+ free(hook);
+ return NULL;
+ }
+
+ return hook;
+}
+
+void htp_hook_destroy(htp_hook_t *hook) {
+ if (hook == NULL) return;
+
+ for (size_t i = 0, n = htp_list_size(hook->callbacks); i < n; i++) {
+ free((htp_callback_t *) htp_list_get(hook->callbacks, i));
+ }
+
+ htp_list_array_destroy(hook->callbacks);
+
+ free(hook);
+}
+
+htp_status_t htp_hook_register(htp_hook_t **hook, const htp_callback_fn_t callback_fn) {
+ if (hook == NULL) return HTP_ERROR;
+
+ htp_callback_t *callback = calloc(1, sizeof (htp_callback_t));
+ if (callback == NULL) return HTP_ERROR;
+
+ callback->fn = callback_fn;
+
+ // Create a new hook if one does not exist
+ int hook_created = 0;
+
+ if (*hook == NULL) {
+ hook_created = 1;
+
+ *hook = htp_hook_create();
+ if (*hook == NULL) {
+ free(callback);
+ return HTP_ERROR;
+ }
+ }
+
+ // Add callback
+ if (htp_list_array_push((*hook)->callbacks, callback) != HTP_OK) {
+ if (hook_created) {
+ free(*hook);
+ }
+
+ free(callback);
+
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_hook_run_all(htp_hook_t *hook, void *user_data) {
+ if (hook == NULL) return HTP_OK;
+
+ // Loop through the registered callbacks, giving each a chance to run.
+ for (size_t i = 0, n = htp_list_size(hook->callbacks); i < n; i++) {
+ htp_callback_t *callback = htp_list_get(hook->callbacks, i);
+
+ htp_status_t rc = callback->fn(user_data);
+
+ // A hook can return HTP_OK to say that it did some work,
+ // or HTP_DECLINED to say that it did no work. Anything else
+ // is treated as an error.
+ if ((rc != HTP_OK) && (rc != HTP_DECLINED)) {
+ return rc;
+ }
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_hook_run_one(htp_hook_t *hook, void *user_data) {
+ if (hook == NULL) return HTP_DECLINED;
+
+ for (size_t i = 0, n = htp_list_size(hook->callbacks); i < n; i++) {
+ htp_callback_t *callback = htp_list_get(hook->callbacks, i);
+
+ htp_status_t rc = callback->fn(user_data);
+
+ // A hook can return HTP_DECLINED to say that it did no work,
+ // and we'll ignore that. If we see HTP_OK or anything else,
+ // we stop processing (because it was either a successful
+ // handling or an error).
+ if (rc != HTP_DECLINED) {
+ // Return HTP_OK or an error.
+ return rc;
+ }
+ }
+
+ // No hook wanted to process the callback.
+ return HTP_DECLINED;
+}
diff --git a/htp/htp_hooks.h b/htp/htp_hooks.h
new file mode 100644
index 0000000..902a7d4
--- /dev/null
+++ b/htp/htp_hooks.h
@@ -0,0 +1,122 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _HOOKS_H
+#define _HOOKS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct htp_hook_t htp_hook_t;
+typedef struct htp_callback_t htp_callback_t;
+typedef int (*htp_callback_fn_t) (void *);
+
+#include "htp.h"
+
+struct htp_hook_t {
+ htp_list_array_t *callbacks;
+};
+
+struct htp_callback_t {
+ htp_callback_fn_t fn;
+};
+
+/**
+ * Creates a copy of the provided hook. The hook is allowed to be NULL,
+ * in which case this function simply returns a NULL.
+ *
+ * @param[in] hook
+ * @return A copy of the hook, or NULL (if the provided hook was NULL
+ * or, if it wasn't, if there was a memory allocation problem while
+ * constructing a copy).
+ */
+htp_hook_t *htp_hook_copy(const htp_hook_t *hook);
+
+/**
+ * Creates a new hook.
+ *
+ * @return New htp_hook_t structure on success, NULL on failure.
+ */
+htp_hook_t *htp_hook_create(void);
+
+/**
+ * Destroys an existing hook. It is all right to send a NULL
+ * to this method because it will simply return straight away.
+ *
+ * @param[in] hook
+ */
+void htp_hook_destroy(htp_hook_t *hook);
+
+/**
+ * Registers a new callback with the hook.
+ *
+ * @param[in] hook
+ * @param[in] callback_fn
+ * @return HTP_OK on success, HTP_ERROR on memory allocation error.
+ */
+htp_status_t htp_hook_register(htp_hook_t **hook, const htp_callback_fn_t callback_fn);
+
+/**
+ * Runs all the callbacks associated with a given hook. Only stops if
+ * one of the callbacks returns an error (HTP_ERROR) or stop (HTP_STOP).
+ *
+ * @param[in] hook
+ * @param[in] user_data
+ * @return HTP_OK if at least one hook ran successfully, HTP_STOP if there was
+ * no error but processing should stop, and HTP_ERROR or any other value
+ * less than zero on error.
+ */
+htp_status_t htp_hook_run_all(htp_hook_t *hook, void *user_data);
+
+/**
+ * Run callbacks one by one until one of them accepts to service the hook.
+ *
+ * @param[in] hook
+ * @param[in] user_data
+ * @return HTP_OK if a hook was found to process the callback, HTP_DECLINED if
+ * no hook could be found, HTP_STOP if a hook signalled the processing
+ * to stop, and HTP_ERROR or any other value less than zero on error.
+ */
+htp_status_t htp_hook_run_one(htp_hook_t *hook, void *user_data);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HOOKS_H */
diff --git a/htp/htp_list.c b/htp/htp_list.c
new file mode 100644
index 0000000..b7c42bf
--- /dev/null
+++ b/htp/htp_list.c
@@ -0,0 +1,360 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+// Array-backed list
+
+htp_status_t htp_list_array_init(htp_list_t *l, size_t size) {
+ // Allocate the initial batch of elements.
+ l->elements = malloc(size * sizeof (void *));
+ if (l->elements == NULL) {
+ return HTP_ERROR;
+ }
+
+ // Initialize the structure.
+ l->first = 0;
+ l->last = 0;
+ l->current_size = 0;
+ l->max_size = size;
+
+ return HTP_OK;
+}
+
+htp_list_t *htp_list_array_create(size_t size) {
+ // It makes no sense to create a zero-size list.
+ if (size == 0) return NULL;
+
+ // Allocate the list structure.
+ htp_list_array_t *l = calloc(1, sizeof (htp_list_array_t));
+ if (l == NULL) return NULL;
+
+ if (htp_list_array_init(l, size) == HTP_ERROR) {
+ free(l);
+ return NULL;
+ }
+
+ return (htp_list_t *) l;
+}
+
+void htp_list_array_clear(htp_list_array_t *l) {
+ if (l == NULL) return;
+
+ // Continue using already allocated memory; just reset the fields.
+ l->first = 0;
+ l->last = 0;
+ l->current_size = 0;
+}
+
+void htp_list_array_destroy(htp_list_array_t *l) {
+ if (l == NULL) return;
+
+ free(l->elements);
+ free(l);
+}
+
+void htp_list_array_release(htp_list_array_t *l) {
+ if (l == NULL) return;
+
+ free(l->elements);
+}
+
+void *htp_list_array_get(const htp_list_array_t *l, size_t idx) {
+ if (l == NULL) return NULL;
+ if (idx >= l->current_size) return NULL;
+
+ if (l->first + idx < l->max_size) {
+ return (void *) l->elements[l->first + idx];
+ } else {
+ return (void *) l->elements[idx - (l->max_size - l->first)];
+ }
+}
+
+void *htp_list_array_pop(htp_list_array_t *l) {
+ if (l == NULL) return NULL;
+
+ const void *r = NULL;
+
+ if (l->current_size == 0) {
+ return NULL;
+ }
+
+ size_t pos = l->first + l->current_size - 1;
+ if (pos > l->max_size - 1) pos -= l->max_size;
+
+ r = l->elements[pos];
+ l->last = pos;
+
+ l->current_size--;
+
+ return (void *) r;
+}
+
+htp_status_t htp_list_array_push(htp_list_array_t *l, void *e) {
+ if (l == NULL) return HTP_ERROR;
+
+ // Check whether we're full
+ if (l->current_size >= l->max_size) {
+ size_t new_size = l->max_size * 2;
+ void *newblock = NULL;
+
+ if (l->first == 0) {
+ // The simple case of expansion is when the first
+ // element in the list resides in the first slot. In
+ // that case we just add some new space to the end,
+ // adjust the max_size and that's that.
+ newblock = realloc(l->elements, new_size * sizeof (void *));
+ if (newblock == NULL) return HTP_ERROR;
+ } else {
+ // When the first element is not in the first
+ // memory slot, we need to rearrange the order
+ // of the elements in order to expand the storage area.
+ /* coverity[suspicious_sizeof] */
+ newblock = malloc((size_t) (new_size * sizeof (void *)));
+ if (newblock == NULL) return HTP_ERROR;
+
+ // Copy the beginning of the list to the beginning of the new memory block
+ /* coverity[suspicious_sizeof] */
+ memcpy(newblock,
+ (void *) ((char *) l->elements + l->first * sizeof (void *)),
+ (size_t) ((l->max_size - l->first) * sizeof (void *)));
+
+ // Append the second part of the list to the end
+ memcpy((void *) ((char *) newblock + (l->max_size - l->first) * sizeof (void *)),
+ (void *) l->elements,
+ (size_t) (l->first * sizeof (void *)));
+
+ free(l->elements);
+ }
+
+ l->first = 0;
+ l->last = l->current_size;
+ l->max_size = new_size;
+ l->elements = newblock;
+ }
+
+ l->elements[l->last] = e;
+ l->current_size++;
+
+ l->last++;
+ if (l->last == l->max_size) {
+ l->last = 0;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_list_array_replace(htp_list_array_t *l, size_t idx, void *e) {
+ if (l == NULL) return HTP_ERROR;
+
+ if (idx + 1 > l->current_size) return HTP_DECLINED;
+
+ l->elements[(l->first + idx) % l->max_size] = e;
+
+ return HTP_OK;
+}
+
+size_t htp_list_array_size(const htp_list_array_t *l) {
+ if (l == NULL) return HTP_ERROR;
+
+ return l->current_size;
+}
+
+void *htp_list_array_shift(htp_list_array_t *l) {
+ if (l == NULL) return NULL;
+
+ void *r = NULL;
+
+ if (l->current_size == 0) {
+ return NULL;
+ }
+
+ r = l->elements[l->first];
+ l->first++;
+ if (l->first == l->max_size) {
+ l->first = 0;
+ }
+
+ l->current_size--;
+
+ return r;
+}
+
+#if 0
+// Linked list
+
+htp_list_linked_t *htp_list_linked_create(void) {
+ htp_list_linked_t *l = calloc(1, sizeof (htp_list_linked_t));
+ if (l == NULL) return NULL;
+
+ return l;
+}
+
+void htp_list_linked_destroy(htp_list_linked_t *l) {
+ if (l == NULL) return;
+
+ // Free the list structures
+ htp_list_linked_element_t *temp = l->first;
+ htp_list_linked_element_t *prev = NULL;
+ while (temp != NULL) {
+ free(temp->data);
+ prev = temp;
+ temp = temp->next;
+ free(prev);
+ }
+
+ // Free the list itself
+ free(l);
+}
+
+int htp_list_linked_empty(const htp_list_linked_t *l) {
+ if (!l->first) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+void *htp_list_linked_pop(htp_list_linked_t *l) {
+ void *r = NULL;
+
+ if (!l->first) {
+ return NULL;
+ }
+
+ // Find the last element
+ htp_list_linked_element_t *qprev = NULL;
+ htp_list_linked_element_t *qe = l->first;
+ while (qe->next != NULL) {
+ qprev = qe;
+ qe = qe->next;
+ }
+
+ r = qe->data;
+ free(qe);
+
+ if (qprev != NULL) {
+ qprev->next = NULL;
+ l->last = qprev;
+ } else {
+ l->first = NULL;
+ l->last = NULL;
+ }
+
+ return r;
+}
+
+int htp_list_linked_push(htp_list_linked_t *l, void *e) {
+ htp_list_linked_element_t *le = calloc(1, sizeof (htp_list_linked_element_t));
+ if (le == NULL) return -1;
+
+ // Remember the element
+ le->data = e;
+
+ // If the queue is empty, make this element first
+ if (!l->first) {
+ l->first = le;
+ }
+
+ if (l->last) {
+ l->last->next = le;
+ }
+
+ l->last = le;
+
+ return 1;
+}
+
+void *htp_list_linked_shift(htp_list_linked_t *l) {
+ void *r = NULL;
+
+ if (!l->first) {
+ return NULL;
+ }
+
+ htp_list_linked_element_t *le = l->first;
+ l->first = le->next;
+ r = le->data;
+
+ if (!l->first) {
+ l->last = NULL;
+ }
+
+ free(le);
+
+ return r;
+}
+#endif
+
+#if 0
+
+int main(int argc, char **argv) {
+ htp_list_t *q = htp_list_array_create(4);
+
+ htp_list_push(q, "1");
+ htp_list_push(q, "2");
+ htp_list_push(q, "3");
+ htp_list_push(q, "4");
+
+ htp_list_shift(q);
+ htp_list_push(q, "5");
+ htp_list_push(q, "6");
+
+ char *s = NULL;
+ while ((s = (char *) htp_list_pop(q)) != NULL) {
+ printf("Got: %s\n", s);
+ }
+
+ printf("---\n");
+
+ htp_list_push(q, "1");
+ htp_list_push(q, "2");
+ htp_list_push(q, "3");
+ htp_list_push(q, "4");
+
+ while ((s = (char *) htp_list_shift(q)) != NULL) {
+ printf("Got: %s\n", s);
+ }
+
+ free(q);
+
+ return 0;
+}
+#endif
diff --git a/htp/htp_list.h b/htp/htp_list.h
new file mode 100644
index 0000000..8a2bd63
--- /dev/null
+++ b/htp/htp_list.h
@@ -0,0 +1,227 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_LIST_H
+#define HTP_LIST_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The default list implementation is array-based. The
+// linked list version is not fully implemented yet.
+#define htp_list_t htp_list_array_t
+#define htp_list_add htp_list_array_push
+#define htp_list_create htp_list_array_create
+#define htp_list_init htp_list_array_init
+#define htp_list_clear htp_list_array_clear
+#define htp_list_destroy htp_list_array_destroy
+#define htp_list_release htp_list_array_release
+#define htp_list_get htp_list_array_get
+#define htp_list_pop htp_list_array_pop
+#define htp_list_push htp_list_array_push
+#define htp_list_replace htp_list_array_replace
+#define htp_list_size htp_list_array_size
+#define htp_list_shift htp_list_array_shift
+
+// Data structures
+
+typedef struct htp_list_array_t htp_list_array_t;
+typedef struct htp_list_linked_t htp_list_linked_t;
+
+#include "htp_core.h"
+#include "bstr.h"
+
+// Functions
+
+/**
+ * Create new array-backed list.
+ *
+ * @param[in] size
+ * @return Newly created list.
+ */
+htp_list_array_t *htp_list_array_create(size_t size);
+
+/**
+ * Initialize an array-backed list.
+ *
+ * @param[in] l
+ * @param[in] size
+ * @return HTP_OK or HTP_ERROR if allocation failed
+ */
+htp_status_t htp_list_array_init(htp_list_array_t *l, size_t size);
+
+/**
+ * Remove all elements from the list. It is the responsibility of the caller
+ * to iterate over list elements and deallocate them if necessary, prior to
+ * invoking this function.
+ *
+ * @param[in] l
+ */
+void htp_list_array_clear(htp_list_array_t *l);
+
+/**
+ * Free the memory occupied by this list. This function assumes
+ * the elements held by the list were freed beforehand.
+ *
+ * @param[in] l
+ */
+void htp_list_array_destroy(htp_list_array_t *l);
+
+/**
+ * Free the memory occupied by this list, except itself.
+ * This function assumes the elements held by the list
+ * were freed beforehand.
+ *
+ * @param[in] l
+ */
+void htp_list_array_release(htp_list_array_t *l);
+
+/**
+ * Find the element at the given index.
+ *
+ * @param[in] l
+ * @param[in] idx
+ * @return the desired element, or NULL if the list is too small, or
+ * if the element at that position carries a NULL
+ */
+void *htp_list_array_get(const htp_list_array_t *l, size_t idx);
+
+/**
+ * Remove one element from the end of the list.
+ *
+ * @param[in] l
+ * @return The removed element, or NULL if the list is empty.
+ */
+void *htp_list_array_pop(htp_list_array_t *l);
+
+/**
+ * Add new element to the end of the list, expanding the list as necessary.
+ *
+ * @param[in] l
+ * @param[in] e
+ * @return HTP_OK on success or HTP_ERROR on failure.
+ *
+ */
+htp_status_t htp_list_array_push(htp_list_array_t *l, void *e);
+
+/**
+ * Replace the element at the given index with the provided element.
+ *
+ * @param[in] l
+ * @param[in] idx
+ * @param[in] e
+ *
+ * @return HTP_OK if an element with the given index was replaced; HTP_ERROR
+ * if the desired index does not exist.
+ */
+htp_status_t htp_list_array_replace(htp_list_array_t *l, size_t idx, void *e);
+
+/**
+ * Returns the size of the list.
+ *
+ * @param[in] l
+ * @return List size.
+ */
+size_t htp_list_array_size(const htp_list_array_t *l);
+
+/**
+ * Remove one element from the beginning of the list.
+ *
+ * @param[in] l
+ * @return The removed element, or NULL if the list is empty.
+ */
+void *htp_list_array_shift(htp_list_array_t *l);
+
+
+// Linked list
+
+/**
+ * Create a new linked list.
+ *
+ * @return The newly created list, or NULL on memory allocation failure
+ */
+htp_list_linked_t *htp_list_linked_create(void);
+
+/**
+ * Destroy list. This function will not destroy any of the
+ * data stored in it. You'll have to do that manually beforehand.
+ *
+ * @param[in] l
+ */
+void htp_list_linked_destroy(htp_list_linked_t *l);
+
+/**
+ * Is the list empty?
+ *
+ * @param[in] l
+ * @return 1 if the list is empty, 0 if it is not
+ */
+int htp_list_linked_empty(const htp_list_linked_t *l);
+
+/**
+ * Remove one element from the end of the list.
+ *
+ * @param[in] l
+ * @return Pointer to the removed element, or NULL if the list is empty.
+ */
+void *htp_list_linked_pop(htp_list_linked_t *l);
+
+/**
+ * Add element to list.
+ *
+ * @param[in] l
+ * @param[in] e
+ * @return HTP_OK on success, HTP_ERROR on error.
+ */
+htp_status_t htp_list_linked_push(htp_list_linked_t *l, void *e);
+
+/**
+ * Remove one element from the beginning of the list.
+ *
+ * @param[in] l
+ * @return Pointer to the removed element, or NULL if the list is empty.
+ */
+void *htp_list_linked_shift(htp_list_linked_t *l);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_LIST_H */
+
diff --git a/htp/htp_list_private.h b/htp/htp_list_private.h
new file mode 100644
index 0000000..6f462c0
--- /dev/null
+++ b/htp/htp_list_private.h
@@ -0,0 +1,73 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_LIST_PRIVATE_H
+#define HTP_LIST_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "htp_list.h"
+
+typedef struct htp_list_linked_element_t htp_list_linked_element_t;
+
+struct htp_list_array_t {
+ size_t first;
+ size_t last;
+ size_t max_size;
+ size_t current_size;
+ void **elements;
+};
+
+struct htp_list_linked_element_t {
+ void *data;
+ htp_list_linked_element_t *next;
+};
+
+struct htp_list_linked_t {
+ htp_list_linked_element_t *first;
+ htp_list_linked_element_t *last;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_LIST_PRIVATE_H */
+
diff --git a/htp/htp_multipart.c b/htp/htp_multipart.c
new file mode 100644
index 0000000..ea73072
--- /dev/null
+++ b/htp/htp_multipart.c
@@ -0,0 +1,1615 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * Determines the type of a Content-Disposition parameter.
+ *
+ * @param[in] data
+ * @param[in] startpos
+ * @param[in] pos
+ * @return CD_PARAM_OTHER, CD_PARAM_NAME or CD_PARAM_FILENAME.
+ */
+static int htp_mpartp_cd_param_type(unsigned char *data, size_t startpos, size_t endpos) {
+ if ((endpos - startpos) == 4) {
+ if (memcmp(data + startpos, "name", 4) == 0) return CD_PARAM_NAME;
+ } else if ((endpos - startpos) == 8) {
+ if (memcmp(data + startpos, "filename", 8) == 0) return CD_PARAM_FILENAME;
+ }
+
+ return CD_PARAM_OTHER;
+}
+
+htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser) {
+ return &(parser->multipart);
+}
+
+/**
+ * Decodes a C-D header value. This is impossible to do correctly without a
+ * parsing personality because most browsers are broken:
+ * - Firefox encodes " as \", and \ is not encoded.
+ * - Chrome encodes " as %22.
+ * - IE encodes " as \", and \ is not encoded.
+ * - Opera encodes " as \" and \ as \\.
+ * @param[in] b
+ */
+static void htp_mpart_decode_quoted_cd_value_inplace(bstr *b) {
+ unsigned char *s = bstr_ptr(b);
+ unsigned char *d = bstr_ptr(b);
+ size_t len = bstr_len(b);
+ size_t pos = 0;
+
+ while (pos < len) {
+ // Ignore \ when before \ or ".
+ if ((*s == '\\')&&(pos + 1 < len)&&((*(s + 1) == '"')||(*(s + 1) == '\\'))) {
+ s++;
+ pos++;
+ }
+
+ *d++ = *s++;
+ pos++;
+ }
+
+ bstr_adjust_len(b, len - (s - d));
+}
+
+/**
+ * Parses the Content-Disposition part header.
+ *
+ * @param[in] part
+ * @return HTP_OK on success (header found and parsed), HTP_DECLINED if there is no C-D header or if
+ * it could not be processed, and HTP_ERROR on fatal error.
+ */
+htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part) {
+ // Find the C-D header.
+ htp_header_t *h = htp_table_get_c(part->headers, "content-disposition");
+ if (h == NULL) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
+ return HTP_DECLINED;
+ }
+
+ // Require "form-data" at the beginning of the header.
+ if (bstr_index_of_c(h->value, "form-data") != 0) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // The parsing starts here.
+ unsigned char *data = bstr_ptr(h->value);
+ size_t len = bstr_len(h->value);
+ size_t pos = 9; // Start after "form-data"
+
+ // Main parameter parsing loop (once per parameter).
+ while (pos < len) {
+ // Ignore whitespace.
+ while ((pos < len) && isspace(data[pos])) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Expecting a semicolon.
+ if (data[pos] != ';') {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+ pos++;
+
+ // Go over the whitespace before parameter name.
+ while ((pos < len) && isspace(data[pos])) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Found the starting position of the parameter name.
+ size_t start = pos;
+
+ // Look for the ending position.
+ while ((pos < len) && (!isspace(data[pos]) && (data[pos] != '='))) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Ending position is in "pos" now.
+
+ // Determine parameter type ("name", "filename", or other).
+ int param_type = htp_mpartp_cd_param_type(data, start, pos);
+
+ // Ignore whitespace after parameter name, if any.
+ while ((pos < len) && isspace(data[pos])) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Equals.
+ if (data[pos] != '=') {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+ pos++;
+
+ // Go over the whitespace before the parameter value.
+ while ((pos < len) && isspace(data[pos])) pos++;
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Expecting a double quote.
+ if (data[pos] != '"') {
+ // Bare string or non-standard quoting, which we don't like.
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ pos++; // Over the double quote.
+
+ // We have the starting position of the value.
+ start = pos;
+
+ // Find the end of the value.
+ while ((pos < len) && (data[pos] != '"')) {
+ // Check for escaping.
+ if (data[pos] == '\\') {
+ if (pos + 1 >= len) {
+ // A backslash as the last character in the C-D header.
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Allow " and \ to be escaped.
+ if ((data[pos + 1] == '"')||(data[pos + 1] == '\\')) {
+ // Go over the quoted character.
+ pos++;
+ }
+ }
+
+ pos++;
+ }
+
+ // If we've reached the end of the string that means the
+ // value was not terminated properly (the second double quote is missing).
+ if (pos == len) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Expecting the terminating double quote.
+ if (data[pos] != '"') {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
+ return HTP_DECLINED;
+ }
+
+ pos++; // Over the terminating double quote.
+
+ // Finally, process the parameter value.
+
+ switch (param_type) {
+ case CD_PARAM_NAME:
+ // Check that we have not seen the name parameter already.
+ if (part->name != NULL) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
+ return HTP_DECLINED;
+ }
+
+ part->name = bstr_dup_mem(data + start, pos - start - 1);
+ if (part->name == NULL) return HTP_ERROR;
+
+ htp_mpart_decode_quoted_cd_value_inplace(part->name);
+
+ break;
+
+ case CD_PARAM_FILENAME:
+ // Check that we have not seen the filename parameter already.
+ if (part->file != NULL) {
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
+ return HTP_DECLINED;
+ }
+
+ part->file = calloc(1, sizeof (htp_file_t));
+ if (part->file == NULL) return HTP_ERROR;
+
+ part->file->fd = -1;
+ part->file->source = HTP_FILE_MULTIPART;
+
+ part->file->filename = bstr_dup_mem(data + start, pos - start - 1);
+ if (part->file->filename == NULL) {
+ free(part->file);
+ return HTP_ERROR;
+ }
+
+ htp_mpart_decode_quoted_cd_value_inplace(part->file->filename);
+
+ break;
+
+ default:
+ // Unknown parameter.
+ part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_UNKNOWN;
+ return HTP_DECLINED;
+ break;
+ }
+
+ // Continue to parse the next parameter, if any.
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Parses the Content-Type part header, if present.
+ *
+ * @param[in] part
+ * @return HTP_OK on success, HTP_DECLINED if the C-T header is not present, and HTP_ERROR on failure.
+ */
+static htp_status_t htp_mpart_part_parse_c_t(htp_multipart_part_t *part) {
+ htp_header_t *h = (htp_header_t *) htp_table_get_c(part->headers, "content-type");
+ if (h == NULL) return HTP_DECLINED;
+ return htp_parse_ct_header(h->value, &part->content_type);
+}
+
+/**
+ * Processes part headers.
+ *
+ * @param[in] part
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part) {
+ if (htp_mpart_part_parse_c_d(part) == HTP_ERROR) return HTP_ERROR;
+ if (htp_mpart_part_parse_c_t(part) == HTP_ERROR) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+/**
+ * Parses one part header.
+ *
+ * @param[in] part
+ * @param[in] data
+ * @param[in] len
+ * @return HTP_OK on success, HTP_DECLINED on parsing error, HTP_ERROR on fatal error.
+ */
+htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
+ size_t name_start, name_end;
+ size_t value_start, value_end;
+
+ // We do not allow NUL bytes here.
+ if (memchr(data, '\0', len) != NULL) {
+ part->parser->multipart.flags |= HTP_MULTIPART_NUL_BYTE;
+ return HTP_DECLINED;
+ }
+
+ name_start = 0;
+
+ // Look for the starting position of the name first.
+ size_t colon_pos = 0;
+
+ while ((colon_pos < len)&&(htp_is_space(data[colon_pos]))) colon_pos++;
+ if (colon_pos != 0) {
+ // Whitespace before header name.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Now look for the colon.
+ while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
+
+ if (colon_pos == len) {
+ // Missing colon.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ if (colon_pos == 0) {
+ // Empty header name.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ name_end = colon_pos;
+
+ // Ignore LWS after header name.
+ size_t prev = name_end;
+ while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
+ prev--;
+ name_end--;
+
+ // LWS after field name. Not allowing for now.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Header value.
+
+ value_start = colon_pos + 1;
+
+ // Ignore LWS before value.
+ while ((value_start < len) && (htp_is_lws(data[value_start]))) value_start++;
+
+ if (value_start == len) {
+ // No header value.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Assume the value is at the end.
+ value_end = len;
+
+ // Check that the header name is a token.
+ size_t i = name_start;
+ while (i < name_end) {
+ if (!htp_is_token(data[i])) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
+ return HTP_DECLINED;
+ }
+
+ i++;
+ }
+
+ // Now extract the name and the value.
+ htp_header_t *h = calloc(1, sizeof (htp_header_t));
+ if (h == NULL) return HTP_ERROR;
+
+ h->name = bstr_dup_mem(data + name_start, name_end - name_start);
+ if (h->name == NULL) {
+ free(h);
+ return HTP_ERROR;
+ }
+
+ h->value = bstr_dup_mem(data + value_start, value_end - value_start);
+ if (h->value == NULL) {
+ bstr_free(h->name);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ if ((bstr_cmp_c_nocase(h->name, "content-disposition") != 0) && (bstr_cmp_c_nocase(h->name, "content-type") != 0)) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_UNKNOWN;
+ }
+
+ // Check if the header already exists.
+ htp_header_t * h_existing = htp_table_get(part->headers, h->name);
+ if (h_existing != NULL) {
+ // Add to the existing header.
+ bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value)
+ + 2 + bstr_len(h->value));
+ if (new_value == NULL) {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ h_existing->value = new_value;
+ bstr_add_mem_noex(h_existing->value, ", ", 2);
+ bstr_add_noex(h_existing->value, h->value);
+
+ // The header is no longer needed.
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+
+ // Keep track of same-name headers.
+ h_existing->flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
+ } else {
+ // Add as a new header.
+ if (htp_table_add(part->headers, h->name, h) != HTP_OK) {
+ bstr_free(h->value);
+ bstr_free(h->name);
+ free(h);
+ return HTP_ERROR;
+ }
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Creates a new Multipart part.
+ *
+ * @param[in] parser
+ * @return New part instance, or NULL on memory allocation failure.
+ */
+htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser) {
+ htp_multipart_part_t * part = calloc(1, sizeof (htp_multipart_part_t));
+ if (part == NULL) return NULL;
+
+ part->headers = htp_table_create(4);
+ if (part->headers == NULL) {
+ free(part);
+ return NULL;
+ }
+
+ part->parser = parser;
+ bstr_builder_clear(parser->part_data_pieces);
+ bstr_builder_clear(parser->part_header_pieces);
+
+ return part;
+}
+
+/**
+ * Destroys a part.
+ *
+ * @param[in] part
+ * @param[in] gave_up_data
+ */
+void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data) {
+ if (part == NULL) return;
+
+ if (part->file != NULL) {
+ bstr_free(part->file->filename);
+
+ if (part->file->tmpname != NULL) {
+ unlink(part->file->tmpname);
+ free(part->file->tmpname);
+ }
+
+ free(part->file);
+ part->file = NULL;
+ }
+
+ if ((!gave_up_data) || (part->type != MULTIPART_PART_TEXT)) {
+ bstr_free(part->name);
+ bstr_free(part->value);
+ }
+
+ bstr_free(part->content_type);
+
+ if (part->headers != NULL) {
+ htp_header_t *h = NULL;
+ for (size_t i = 0, n = htp_table_size(part->headers); i < n; i++) {
+ h = htp_table_get_index(part->headers, i, NULL);
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ }
+
+ htp_table_destroy(part->headers);
+ }
+
+ free(part);
+}
+
+/**
+ * Finalizes part processing.
+ *
+ * @param[in] part
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part) {
+ // Determine if this part is the epilogue.
+
+ if (part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
+ if (part->type == MULTIPART_PART_UNKNOWN) {
+ // Assume that the unknown part after the last boundary is the epilogue.
+ part->parser->current_part->type = MULTIPART_PART_EPILOGUE;
+
+ // But if we've already seen a part we thought was the epilogue,
+ // raise HTP_MULTIPART_PART_UNKNOWN. Multiple epilogues are not allowed.
+ if (part->parser->multipart.flags & HTP_MULTIPART_HAS_EPILOGUE) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
+ }
+
+ part->parser->multipart.flags |= HTP_MULTIPART_HAS_EPILOGUE;
+ } else {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
+ }
+ }
+
+ // Sanity checks.
+
+ // Have we seen complete part headers? If we have not, that means that the part ended prematurely.
+ if ((part->parser->current_part->type != MULTIPART_PART_EPILOGUE) && (part->parser->current_part_mode != MODE_DATA)) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_INCOMPLETE;
+ }
+
+ // Have we been able to determine the part type? If not, this means
+ // that the part did not contain the C-D header.
+ if (part->type == MULTIPART_PART_UNKNOWN) {
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
+ }
+
+ // Finalize part value.
+
+ if (part->type == MULTIPART_PART_FILE) {
+ // Notify callbacks about the end of the file.
+ htp_mpartp_run_request_file_data_hook(part, NULL, 0);
+
+ // If we are storing the file to disk, close the file descriptor.
+ if (part->file->fd != -1) {
+ close(part->file->fd);
+ }
+ } else {
+ // Combine value pieces into a single buffer.
+ if (bstr_builder_size(part->parser->part_data_pieces) > 0) {
+ part->value = bstr_builder_to_str(part->parser->part_data_pieces);
+ bstr_builder_clear(part->parser->part_data_pieces);
+ }
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
+ if (part->parser->cfg == NULL) return HTP_OK;
+
+ // Keep track of the file length.
+ part->file->len += len;
+
+ // Package data for the callbacks.
+ htp_file_data_t file_data;
+ file_data.file = part->file;
+ file_data.data = data;
+ file_data.len = (const size_t) len;
+
+ // Send data to callbacks
+ htp_status_t rc = htp_hook_run_all(part->parser->cfg->hook_request_file_data, &file_data);
+ if (rc != HTP_OK) return rc;
+
+ return HTP_OK;
+}
+
+/**
+ * Handles part data.
+ *
+ * @param[in] part
+ * @param[in] data
+ * @param[in] len
+ * @param[in] is_line
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line) {
+ #if HTP_DEBUG
+ fprintf(stderr, "Part type %d mode %d is_line %d\n", part->type, part->parser->current_part_mode, is_line);
+ fprint_raw_data(stderr, "htp_mpart_part_handle_data: data chunk", data, len);
+ #endif
+
+ // Keep track of raw part length.
+ part->len += len;
+
+ // If we're processing a part that came after the last boundary, then we're not sure if it
+ // is the epilogue part or some other part (in case of evasion attempt). For that reason we
+ // will keep all its data in the part_data_pieces structure. If it ends up not being the
+ // epilogue, this structure will be cleared.
+ if ((part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) && (part->type == MULTIPART_PART_UNKNOWN)) {
+ bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
+ }
+
+ if (part->parser->current_part_mode == MODE_LINE) {
+ // Line mode.
+
+ if (is_line) {
+ // End of the line.
+
+ bstr *line = NULL;
+
+ // If this line came to us in pieces, combine them now into a single buffer.
+ if (bstr_builder_size(part->parser->part_header_pieces) > 0) {
+ bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
+ line = bstr_builder_to_str(part->parser->part_header_pieces);
+ if (line == NULL) return HTP_ERROR;
+ bstr_builder_clear(part->parser->part_header_pieces);
+
+ data = bstr_ptr(line);
+ len = bstr_len(line);
+ }
+
+ // Ignore the line endings.
+ if (len > 1) {
+ if (data[len - 1] == LF) len--;
+ if (data[len - 1] == CR) len--;
+ } else if (len > 0) {
+ if (data[len - 1] == LF) len--;
+ }
+
+ // Is it an empty line?
+ if (len == 0) {
+ // Empty line; process headers and switch to data mode.
+
+ // Process the pending header, if any.
+ if (part->parser->pending_header_line != NULL) {
+ if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
+ bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
+ {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ bstr_free(part->parser->pending_header_line);
+ part->parser->pending_header_line = NULL;
+ }
+
+ if (htp_mpart_part_process_headers(part) == HTP_ERROR) {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ part->parser->current_part_mode = MODE_DATA;
+ bstr_builder_clear(part->parser->part_header_pieces);
+
+ if (part->file != NULL) {
+ // Changing part type because we have a filename.
+ part->type = MULTIPART_PART_FILE;
+
+ if ((part->parser->extract_files) && (part->parser->file_count < part->parser->extract_limit)) {
+ char buf[255];
+
+ strncpy(buf, part->parser->extract_dir, 254);
+ strncat(buf, "/libhtp-multipart-file-XXXXXX", 254 - strlen(buf));
+
+ part->file->tmpname = strdup(buf);
+ if (part->file->tmpname == NULL) {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ mode_t previous_mask = umask(S_IXUSR | S_IRWXG | S_IRWXO);
+ part->file->fd = mkstemp(part->file->tmpname);
+ umask(previous_mask);
+
+ if (part->file->fd < 0) {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ part->parser->file_count++;
+ }
+ } else if (part->name != NULL) {
+ // Changing part type because we have a name.
+ part->type = MULTIPART_PART_TEXT;
+ bstr_builder_clear(part->parser->part_data_pieces);
+ } else {
+ // Do nothing; the type stays MULTIPART_PART_UNKNOWN.
+ }
+ } else {
+ // Not an empty line.
+
+ // Is there a pending header?
+ if (part->parser->pending_header_line == NULL) {
+ if (line != NULL) {
+ part->parser->pending_header_line = line;
+ line = NULL;
+ } else {
+ part->parser->pending_header_line = bstr_dup_mem(data, len);
+ if (part->parser->pending_header_line == NULL) return HTP_ERROR;
+ }
+ } else {
+ // Is this a folded line?
+ if (isspace(data[0])) {
+ // Folding; add to the existing line.
+ part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_FOLDING;
+ part->parser->pending_header_line = bstr_add_mem(part->parser->pending_header_line, data, len);
+ if (part->parser->pending_header_line == NULL) {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+ } else {
+ // Process the pending header line.
+ if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
+ bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
+ {
+ bstr_free(line);
+ return HTP_ERROR;
+ }
+
+ bstr_free(part->parser->pending_header_line);
+
+ if (line != NULL) {
+ part->parser->pending_header_line = line;
+ line = NULL;
+ } else {
+ part->parser->pending_header_line = bstr_dup_mem(data, len);
+ if (part->parser->pending_header_line == NULL) return HTP_ERROR;
+ }
+ }
+ }
+ }
+
+ bstr_free(line);
+ line = NULL;
+ } else {
+ // Not end of line; keep the data chunk for later.
+ bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
+ }
+ } else {
+ // Data mode; keep the data chunk for later (but not if it is a file).
+ switch (part->type) {
+ case MULTIPART_PART_EPILOGUE:
+ case MULTIPART_PART_PREAMBLE:
+ case MULTIPART_PART_TEXT:
+ case MULTIPART_PART_UNKNOWN:
+ // Make a copy of the data in RAM.
+ bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
+ break;
+
+ case MULTIPART_PART_FILE:
+ // Invoke file data callbacks.
+ htp_mpartp_run_request_file_data_hook(part, data, len);
+
+ // Optionally, store the data in a file.
+ if (part->file->fd != -1) {
+ if (write(part->file->fd, data, len) < 0) {
+ return HTP_ERROR;
+ }
+ }
+ break;
+
+ default:
+ // Internal error.
+ return HTP_ERROR;
+ break;
+ }
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Handles data, creating new parts as necessary.
+ *
+ * @param[in] mpartp
+ * @param[in] data
+ * @param[in] len
+ * @param[in] is_line
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+static htp_status_t htp_mpartp_handle_data(htp_mpartp_t *parser, const unsigned char *data, size_t len, int is_line) {
+ if (len == 0) return HTP_OK;
+
+ // Do we have a part already?
+ if (parser->current_part == NULL) {
+ // Create a new part.
+ parser->current_part = htp_mpart_part_create(parser);
+ if (parser->current_part == NULL) return HTP_ERROR;
+
+ if (parser->multipart.boundary_count == 0) {
+ // We haven't seen a boundary yet, so this must be the preamble part.
+ parser->current_part->type = MULTIPART_PART_PREAMBLE;
+ parser->multipart.flags |= HTP_MULTIPART_HAS_PREAMBLE;
+ parser->current_part_mode = MODE_DATA;
+ } else {
+ // Part after preamble.
+ parser->current_part_mode = MODE_LINE;
+ }
+
+ // Add part to the list.
+ htp_list_push(parser->multipart.parts, parser->current_part);
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "Created new part type %d\n", parser->current_part->type);
+ #endif
+ }
+
+ // Send data to the part.
+ return htp_mpart_part_handle_data(parser->current_part, data, len, is_line);
+}
+
+/**
+ * Handles a boundary event, which means that it will finalize a part if one exists.
+ *
+ * @param[in] mpartp
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+static htp_status_t htp_mpartp_handle_boundary(htp_mpartp_t *parser) {
+ #if HTP_DEBUG
+ fprintf(stderr, "htp_mpartp_handle_boundary\n");
+ #endif
+
+ if (parser->current_part != NULL) {
+ if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ // We're done with this part
+ parser->current_part = NULL;
+
+ // Revert to line mode
+ parser->current_part_mode = MODE_LINE;
+ }
+
+ return HTP_OK;
+}
+
+static htp_status_t htp_mpartp_init_boundary(htp_mpartp_t *parser, unsigned char *data, size_t len) {
+ if ((parser == NULL) || (data == NULL)) return HTP_ERROR;
+
+ // Copy the boundary and convert it to lowercase.
+
+ parser->multipart.boundary_len = len + 4;
+ parser->multipart.boundary = malloc(parser->multipart.boundary_len + 1);
+ if (parser->multipart.boundary == NULL) return HTP_ERROR;
+
+ parser->multipart.boundary[0] = CR;
+ parser->multipart.boundary[1] = LF;
+ parser->multipart.boundary[2] = '-';
+ parser->multipart.boundary[3] = '-';
+
+ for (size_t i = 0; i < len; i++) {
+ parser->multipart.boundary[i + 4] = data[i];
+ }
+
+ parser->multipart.boundary[parser->multipart.boundary_len] = '\0';
+
+ // We're starting in boundary-matching mode. The first boundary can appear without the
+ // CRLF, and our starting state expects that. If we encounter non-boundary data, the
+ // state will switch to data mode. Then, if the data is CRLF or LF, we will go back
+ // to boundary matching. Thus, we handle all the possibilities.
+
+ parser->parser_state = STATE_BOUNDARY;
+ parser->boundary_match_pos = 2;
+
+ return HTP_OK;
+}
+
+htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags) {
+ if ((cfg == NULL) || (boundary == NULL)) return NULL;
+
+ htp_mpartp_t *parser = calloc(1, sizeof (htp_mpartp_t));
+ if (parser == NULL) return NULL;
+
+ parser->cfg = cfg;
+
+ parser->boundary_pieces = bstr_builder_create();
+ if (parser->boundary_pieces == NULL) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ parser->part_data_pieces = bstr_builder_create();
+ if (parser->part_data_pieces == NULL) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ parser->part_header_pieces = bstr_builder_create();
+ if (parser->part_header_pieces == NULL) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ parser->multipart.parts = htp_list_create(64);
+ if (parser->multipart.parts == NULL) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ parser->multipart.flags = flags;
+ parser->parser_state = STATE_INIT;
+ parser->extract_files = cfg->extract_request_files;
+ parser->extract_dir = cfg->tmpdir;
+ if (cfg->extract_request_files_limit >= 0) {
+ parser->extract_limit = cfg->extract_request_files_limit;
+ } else {
+ parser->extract_limit = DEFAULT_FILE_EXTRACT_LIMIT;
+ }
+ parser->handle_data = htp_mpartp_handle_data;
+ parser->handle_boundary = htp_mpartp_handle_boundary;
+
+ // Initialize the boundary.
+ htp_status_t rc = htp_mpartp_init_boundary(parser, bstr_ptr(boundary), bstr_len(boundary));
+ if (rc != HTP_OK) {
+ htp_mpartp_destroy(parser);
+ return NULL;
+ }
+
+ // On success, the ownership of the boundary parameter
+ // is transferred to us. We made a copy, and so we
+ // don't need it any more.
+ bstr_free(boundary);
+
+ return parser;
+}
+
+void htp_mpartp_destroy(htp_mpartp_t *parser) {
+ if (parser == NULL) return;
+
+ if (parser->multipart.boundary != NULL) {
+ free(parser->multipart.boundary);
+ }
+
+ bstr_builder_destroy(parser->boundary_pieces);
+ bstr_builder_destroy(parser->part_header_pieces);
+ bstr_free(parser->pending_header_line);
+ bstr_builder_destroy(parser->part_data_pieces);
+
+ // Free the parts.
+ if (parser->multipart.parts != NULL) {
+ for (size_t i = 0, n = htp_list_size(parser->multipart.parts); i < n; i++) {
+ htp_multipart_part_t * part = htp_list_get(parser->multipart.parts, i);
+ htp_mpart_part_destroy(part, parser->gave_up_data);
+ }
+
+ htp_list_destroy(parser->multipart.parts);
+ }
+
+ free(parser);
+}
+
+/**
+ * Processes set-aside data.
+ *
+ * @param[in] mpartp
+ * @param[in] data
+ * @param[in] pos
+ * @param[in] startpos
+ * @param[in] return_pos
+ * @param[in] matched
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+static htp_status_t htp_martp_process_aside(htp_mpartp_t *parser, int matched) {
+ // The stored data pieces can contain up to one line. If we're in data mode and there
+ // was no boundary match, things are straightforward -- we process everything as data.
+ // If there was a match, we need to take care to not send the line ending as data, nor
+ // anything that follows (because it's going to be a part of the boundary). Similarly,
+ // when we are in line mode, we need to split the first data chunk, processing the first
+ // part as line and the second part as data.
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "mpartp_process_aside matched %d current_part_mode %d\n", matched, parser->current_part_mode);
+ #endif
+
+ // Do we need to do any chunk splitting?
+ if (matched || (parser->current_part_mode == MODE_LINE)) {
+ // Line mode or boundary match
+
+ // Process the CR byte, if set aside.
+ if ((!matched) && (parser->cr_aside)) {
+ // Treat as part data, when there is not a match.
+ parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
+ parser->cr_aside = 0;
+ } else {
+ // Treat as boundary, when there is a match.
+ parser->cr_aside = 0;
+ }
+
+ // We know that we went to match a boundary because
+ // we saw a new line. Now we have to find that line and
+ // process it. It's either going to be in the current chunk,
+ // or in the first stored chunk.
+ if (bstr_builder_size(parser->boundary_pieces) > 0) {
+ int first = 1;
+ for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
+ bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
+
+ if (first) {
+ first = 0;
+
+ // Split the first chunk.
+
+ if (!matched) {
+ // In line mode, we are OK with line endings.
+ parser->handle_data(parser, bstr_ptr(b), parser->boundary_candidate_pos, /* line */ 1);
+ } else {
+ // But if there was a match, the line ending belongs to the boundary.
+ unsigned char *dx = bstr_ptr(b);
+ size_t lx = parser->boundary_candidate_pos;
+
+ // Remove LF or CRLF.
+ if ((lx > 0) && (dx[lx - 1] == LF)) {
+ lx--;
+ // Remove CR.
+ if ((lx > 0) && (dx[lx - 1] == CR)) {
+ lx--;
+ }
+ }
+
+ parser->handle_data(parser, dx, lx, /* not a line */ 0);
+ }
+
+ // The second part of the split chunks belongs to the boundary
+ // when matched, data otherwise.
+ if (!matched) {
+ parser->handle_data(parser, bstr_ptr(b) + parser->boundary_candidate_pos,
+ bstr_len(b) - parser->boundary_candidate_pos, /* not a line */ 0);
+ }
+ } else {
+ // Do not send data if there was a boundary match. The stored
+ // data belongs to the boundary.
+ if (!matched) {
+ parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
+ }
+ }
+ }
+
+ bstr_builder_clear(parser->boundary_pieces);
+ }
+ } else {
+ // Data mode and no match.
+
+ // In data mode, we process the lone CR byte as data.
+ if (parser->cr_aside) {
+ parser->handle_data(parser, (const unsigned char *)&"\r", 1, /* not a line */ 0);
+ parser->cr_aside = 0;
+ }
+
+ // We then process any pieces that we might have stored, also as data.
+ if (bstr_builder_size(parser->boundary_pieces) > 0) {
+ for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
+ bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
+ parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
+ }
+
+ bstr_builder_clear(parser->boundary_pieces);
+ }
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser) {
+ if (parser->current_part != NULL) {
+ // Process buffered data, if any.
+ htp_martp_process_aside(parser, 0);
+
+ // Finalize the last part.
+ if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) return HTP_ERROR;
+
+ // It is OK to end abruptly in the epilogue part, but not in any other.
+ if (parser->current_part->type != MULTIPART_PART_EPILOGUE) {
+ parser->multipart.flags |= HTP_MULTIPART_INCOMPLETE;
+ }
+ }
+
+ bstr_builder_clear(parser->boundary_pieces);
+
+ return HTP_OK;
+}
+
+htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *_data, size_t len) {
+ unsigned char *data = (unsigned char *) _data;
+
+ // The current position in the entire input buffer.
+ size_t pos = 0;
+
+ // The position of the first unprocessed byte of data. We split the
+ // input buffer into smaller chunks, according to their purpose. Once
+ // an entire such smaller chunk is processed, we move to the next
+ // and update startpos.
+ size_t startpos = 0;
+
+ // The position of the (possible) boundary. We investigate for possible
+ // boundaries whenever we encounter CRLF or just LF. If we don't find a
+ // boundary we need to go back, and this is what data_return_pos helps with.
+ size_t data_return_pos = 0;
+
+ #if HTP_DEBUG
+ fprint_raw_data(stderr, "htp_mpartp_parse: data chunk", data, len);
+ #endif
+
+ // While there's data in the input buffer.
+
+ while (pos < len) {
+
+STATE_SWITCH:
+ #if HTP_DEBUG
+ fprintf(stderr, "htp_mpartp_parse: state %d pos %zd startpos %zd\n", parser->parser_state, pos, startpos);
+ #endif
+
+ switch (parser->parser_state) {
+
+ case STATE_INIT:
+ // Incomplete initialization.
+ return HTP_ERROR;
+ break;
+
+ case STATE_DATA: // Handle part data.
+
+ // While there's data in the input buffer.
+
+ while (pos < len) {
+ // Check for a CRLF-terminated line.
+ if (data[pos] == CR) {
+ // We have a CR byte.
+
+ // Is this CR the last byte in the input buffer?
+ if (pos + 1 == len) {
+ // We have CR as the last byte in input. We are going to process
+ // what we have in the buffer as data, except for the CR byte,
+ // which we're going to leave for later. If it happens that a
+ // CR is followed by a LF and then a boundary, the CR is going
+ // to be discarded.
+ pos++; // Advance over CR.
+ parser->cr_aside = 1;
+ } else {
+ // We have CR and at least one more byte in the buffer, so we
+ // are able to test for the LF byte too.
+ if (data[pos + 1] == LF) {
+ pos += 2; // Advance over CR and LF.
+
+ parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
+
+ // Prepare to switch to boundary testing.
+ data_return_pos = pos;
+ parser->boundary_candidate_pos = pos - startpos;
+ parser->boundary_match_pos = 2; // After LF; position of the first dash.
+ parser->parser_state = STATE_BOUNDARY;
+
+ goto STATE_SWITCH;
+ } else {
+ // This is not a new line; advance over the
+ // byte and clear the CR set-aside flag.
+ pos++;
+ parser->cr_aside = 0;
+ }
+ }
+ } else if (data[pos] == LF) { // Check for a LF-terminated line.
+ pos++; // Advance over LF.
+
+ // Did we have a CR in the previous input chunk?
+ if (parser->cr_aside == 0) {
+ parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
+ } else {
+ parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
+ }
+
+ // Prepare to switch to boundary testing.
+ data_return_pos = pos;
+ parser->boundary_candidate_pos = pos - startpos;
+ parser->boundary_match_pos = 2; // After LF; position of the first dash.
+ parser->parser_state = STATE_BOUNDARY;
+
+ goto STATE_SWITCH;
+ } else {
+ // Take one byte from input
+ pos++;
+
+ // Earlier we might have set aside a CR byte not knowing if the next
+ // byte is a LF. Now we know that it is not, and so we can release the CR.
+ if (parser->cr_aside) {
+ parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
+ parser->cr_aside = 0;
+ }
+ }
+ } // while
+
+ // No more data in the input buffer; process the data chunk.
+ parser->handle_data(parser, data + startpos, pos - startpos - parser->cr_aside, /* not a line */ 0);
+
+ break;
+
+ case STATE_BOUNDARY: // Handle a possible boundary.
+ while (pos < len) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "boundary (len %zd pos %zd char %d) data char %d\n", parser->multipart.boundary_len,
+ parser->boundary_match_pos, parser->multipart.boundary[parser->boundary_match_pos], tolower(data[pos]));
+ #endif
+
+ // Check if the bytes match.
+ if (!(data[pos] == parser->multipart.boundary[parser->boundary_match_pos])) {
+ // Boundary mismatch.
+
+ // Process stored (buffered) data.
+ htp_martp_process_aside(parser, /* no match */ 0);
+
+ // Return back where data parsing left off.
+ if (parser->current_part_mode == MODE_LINE) {
+ // In line mode, we process the line.
+ parser->handle_data(parser, data + startpos, data_return_pos - startpos, /* line */ 1);
+ startpos = data_return_pos;
+ } else {
+ // In data mode, we go back where we left off.
+ pos = data_return_pos;
+ }
+
+ parser->parser_state = STATE_DATA;
+
+ goto STATE_SWITCH;
+ }
+
+ // Consume one matched boundary byte
+ pos++;
+ parser->boundary_match_pos++;
+
+ // Have we seen all boundary bytes?
+ if (parser->boundary_match_pos == parser->multipart.boundary_len) {
+ // Boundary match!
+
+ // Process stored (buffered) data.
+ htp_martp_process_aside(parser, /* boundary match */ 1);
+
+ // Process data prior to the boundary in the current input buffer.
+ // Because we know this is the last chunk before boundary, we can
+ // remove the line endings.
+ size_t dlen = data_return_pos - startpos;
+ if ((dlen > 0) && (data[startpos + dlen - 1] == LF)) dlen--;
+ if ((dlen > 0) && (data[startpos + dlen - 1] == CR)) dlen--;
+ parser->handle_data(parser, data + startpos, dlen, /* line */ 1);
+
+ // Keep track of how many boundaries we've seen.
+ parser->multipart.boundary_count++;
+
+ if (parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
+ parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
+ }
+
+ // Run boundary match.
+ parser->handle_boundary(parser);
+
+ // We now need to check if this is the last boundary in the payload
+ parser->parser_state = STATE_BOUNDARY_IS_LAST2;
+
+ goto STATE_SWITCH;
+ }
+ } // while
+
+ // No more data in the input buffer; store (buffer) the unprocessed
+ // part for later, for after we find out if this is a boundary.
+ bstr_builder_append_mem(parser->boundary_pieces, data + startpos, len - startpos);
+
+ break;
+
+ case STATE_BOUNDARY_IS_LAST2:
+ // Examine the first byte after the last boundary character. If it is
+ // a dash, then we maybe processing the last boundary in the payload. If
+ // it is not, move to eat all bytes until the end of the line.
+
+ if (data[pos] == '-') {
+ // Found one dash, now go to check the next position.
+ pos++;
+ parser->parser_state = STATE_BOUNDARY_IS_LAST1;
+ } else {
+ // This is not the last boundary. Change state but
+ // do not advance the position, allowing the next
+ // state to process the byte.
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS;
+ }
+ break;
+
+ case STATE_BOUNDARY_IS_LAST1:
+ // Examine the byte after the first dash; expected to be another dash.
+ // If not, eat all bytes until the end of the line.
+
+ if (data[pos] == '-') {
+ // This is indeed the last boundary in the payload.
+ pos++;
+ parser->multipart.flags |= HTP_MULTIPART_SEEN_LAST_BOUNDARY;
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS;
+ } else {
+ // The second character is not a dash, and so this is not
+ // the final boundary. Raise the flag for the first dash,
+ // and change state to consume the rest of the boundary line.
+ parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS;
+ }
+ break;
+
+ case STATE_BOUNDARY_EAT_LWS:
+ if (data[pos] == CR) {
+ // CR byte, which could indicate a CRLF line ending.
+ pos++;
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS_CR;
+ } else if (data[pos] == LF) {
+ // LF line ending; we're done with boundary processing; data bytes follow.
+ pos++;
+ startpos = pos;
+ parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
+ parser->parser_state = STATE_DATA;
+ } else {
+ if (htp_is_lws(data[pos])) {
+ // Linear white space is allowed here.
+ parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_LWS_AFTER;
+ pos++;
+ } else {
+ // Unexpected byte; consume, but remain in the same state.
+ parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
+ pos++;
+ }
+ }
+ break;
+
+ case STATE_BOUNDARY_EAT_LWS_CR:
+ if (data[pos] == LF) {
+ // CRLF line ending; we're done with boundary processing; data bytes follow.
+ pos++;
+ startpos = pos;
+ parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
+ parser->parser_state = STATE_DATA;
+ } else {
+ // Not a line ending; start again, but do not process this byte.
+ parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
+ parser->parser_state = STATE_BOUNDARY_EAT_LWS;
+ }
+ break;
+ } // switch
+ }
+
+ return HTP_OK;
+}
+
+static void htp_mpartp_validate_boundary(bstr *boundary, uint64_t *flags) {
+ /*
+
+ RFC 1341:
+
+ The only mandatory parameter for the multipart Content-Type
+ is the boundary parameter, which consists of 1 to 70
+ characters from a set of characters known to be very robust
+ through email gateways, and NOT ending with white space.
+ (If a boundary appears to end with white space, the white
+ space must be presumed to have been added by a gateway, and
+ should be deleted.) It is formally specified by the
+ following BNF:
+
+ boundary := 0*69<bchars> bcharsnospace
+
+ bchars := bcharsnospace / " "
+
+ bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_"
+ / "," / "-" / "." / "/" / ":" / "=" / "?"
+ */
+
+ /*
+ Chrome: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD
+ Firefox: Content-Type: multipart/form-data; boundary=---------------------------21071316483088
+ MSIE: Content-Type: multipart/form-data; boundary=---------------------------7dd13e11c0452
+ Opera: Content-Type: multipart/form-data; boundary=----------2JL5oh7QWEDwyBllIRc7fh
+ Safari: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryre6zL3b0BelnTY5S
+ */
+
+ unsigned char *data = bstr_ptr(boundary);
+ size_t len = bstr_len(boundary);
+
+ // The RFC allows up to 70 characters. In real life,
+ // boundaries tend to be shorter.
+ if ((len == 0) || (len > 70)) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+
+ // Check boundary characters. This check is stricter than the
+ // RFC, which seems to allow many separator characters.
+ size_t pos = 0;
+ while (pos < len) {
+ if (!(((data[pos] >= '0') && (data[pos] <= '9'))
+ || ((data[pos] >= 'a') && (data[pos] <= 'z'))
+ || ((data[pos] >= 'A') && (data[pos] <= 'Z'))
+ || (data[pos] == '-'))) {
+
+ switch (data[pos]) {
+ case '\'':
+ case '(':
+ case ')':
+ case '+':
+ case '_':
+ case ',':
+ case '.':
+ case '/':
+ case ':':
+ case '=':
+ case '?':
+ // These characters are allowed by the RFC, but not common.
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+ break;
+
+ default:
+ // Invalid character.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ break;
+ }
+ }
+
+ pos++;
+ }
+}
+
+static void htp_mpartp_validate_content_type(bstr *content_type, uint64_t *flags) {
+ unsigned char *data = bstr_ptr(content_type);
+ size_t len = bstr_len(content_type);
+ size_t counter = 0;
+
+ while (len > 0) {
+ int i = bstr_util_mem_index_of_c_nocase(data, len, "boundary");
+ if (i == -1) break;
+
+ data = data + i;
+ len = len - i;
+
+ // In order to work around the fact that WebKit actually uses
+ // the word "boundary" in their boundary, we also require one
+ // equals character the follow the words.
+ // "multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD"
+ if (memchr(data, '=', len) == NULL) break;
+
+ counter++;
+
+ // Check for case variations.
+ for (size_t j = 0; j < 8; j++) {
+ if (!((*data >= 'a') && (*data <= 'z'))) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+
+ data++;
+ len--;
+ }
+ }
+
+ // How many boundaries have we seen?
+ if (counter > 1) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+}
+
+htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *flags) {
+ if ((content_type == NULL) || (boundary == NULL) || (flags == NULL)) return HTP_ERROR;
+
+ // Our approach is to ignore the MIME type and instead just look for
+ // the boundary. This approach is more reliable in the face of various
+ // evasion techniques that focus on submitting invalid MIME types.
+
+ // Reset flags.
+ *flags = 0;
+
+ // Look for the boundary, case insensitive.
+ int i = bstr_index_of_c_nocase(content_type, "boundary");
+ if (i == -1) return HTP_DECLINED;
+
+ unsigned char *data = bstr_ptr(content_type) + i + 8;
+ size_t len = bstr_len(content_type) - i - 8;
+
+ // Look for the boundary value.
+ size_t pos = 0;
+ while ((pos < len) && (data[pos] != '=')) {
+ if (htp_is_space(data[pos])) {
+ // It is unusual to see whitespace before the equals sign.
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+ } else {
+ // But seeing a non-whitespace character may indicate evasion.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+
+ pos++;
+ }
+
+ if (pos >= len) {
+ // No equals sign in the header.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ return HTP_DECLINED;
+ }
+
+ // Go over the '=' character.
+ pos++;
+
+ // Ignore any whitespace after the equals sign.
+ while ((pos < len) && (htp_is_space(data[pos]))) {
+ if (htp_is_space(data[pos])) {
+ // It is unusual to see whitespace after
+ // the equals sign.
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+ }
+
+ pos++;
+ }
+
+ if (pos >= len) {
+ // No value after the equals sign.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ return HTP_DECLINED;
+ }
+
+ if (data[pos] == '"') {
+ // Quoted boundary.
+
+ // Possibly not very unusual, but let's see.
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+
+ pos++; // Over the double quote.
+ size_t startpos = pos; // Starting position of the boundary.
+
+ // Look for the terminating double quote.
+ while ((pos < len) && (data[pos] != '"')) pos++;
+
+ if (pos >= len) {
+ // Ran out of space without seeing
+ // the terminating double quote.
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+
+ // Include the starting double quote in the boundary.
+ startpos--;
+ }
+
+ *boundary = bstr_dup_mem(data + startpos, pos - startpos);
+ if (*boundary == NULL) return HTP_ERROR;
+
+ pos++; // Over the double quote.
+ } else {
+ // Boundary not quoted.
+
+ size_t startpos = pos;
+
+ // Find the end of the boundary. For the time being, we replicate
+ // the behavior of PHP 5.4.x. This may result with a boundary that's
+ // closer to what would be accepted in real life. Our subsequent
+ // checks of boundary characters will catch irregularities.
+ while ((pos < len) && (data[pos] != ',') && (data[pos] != ';') && (!htp_is_space(data[pos]))) pos++;
+
+ *boundary = bstr_dup_mem(data + startpos, pos - startpos);
+ if (*boundary == NULL) return HTP_ERROR;
+ }
+
+ // Check for a zero-length boundary.
+ if (bstr_len(*boundary) == 0) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ bstr_free(*boundary);
+ *boundary = NULL;
+ return HTP_DECLINED;
+ }
+
+ // Allow only whitespace characters after the boundary.
+ int seen_space = 0, seen_non_space = 0;
+
+ while (pos < len) {
+ if (!htp_is_space(data[pos])) {
+ seen_non_space = 1;
+ } else {
+ seen_space = 1;
+ }
+
+ pos++;
+ }
+
+ // Raise INVALID if we see any non-space characters,
+ // but raise UNUSUAL if we see _only_ space characters.
+ if (seen_non_space) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ } else if (seen_space) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
+ }
+
+ #ifdef HTP_DEBUG
+ fprint_bstr(stderr, "Multipart boundary", *boundary);
+ #endif
+
+ // Validate boundary characters.
+ htp_mpartp_validate_boundary(*boundary, flags);
+
+ // Correlate with the MIME type. This might be a tad too
+ // sensitive because it may catch non-browser access with sloppy
+ // implementations, but let's go with it for now.
+ if (bstr_begins_with_c(content_type, "multipart/form-data;") == 0) {
+ *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
+ }
+
+ htp_mpartp_validate_content_type(content_type, flags);
+
+ return HTP_OK;
+}
diff --git a/htp/htp_multipart.h b/htp/htp_multipart.h
new file mode 100644
index 0000000..614ef63
--- /dev/null
+++ b/htp/htp_multipart.h
@@ -0,0 +1,345 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _HTP_MULTIPART_H
+#define _HTP_MULTIPART_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "bstr.h"
+#include "htp.h"
+#include "htp_table.h"
+
+
+// Constants and enums.
+
+/**
+ * Seen a LF line in the payload. LF lines are not allowed, but
+ * some clients do use them and some backends do accept them. Mixing
+ * LF and CRLF lines within some payload might be unusual.
+ */
+#define HTP_MULTIPART_LF_LINE 0x0001
+
+/** Seen a CRLF line in the payload. This is normal and expected. */
+#define HTP_MULTIPART_CRLF_LINE 0x0002
+
+/** Seen LWS after a boundary instance in the body. Unusual. */
+#define HTP_MULTIPART_BBOUNDARY_LWS_AFTER 0x0004
+
+/** Seen non-LWS content after a boundary instance in the body. Highly unusual. */
+#define HTP_MULTIPART_BBOUNDARY_NLWS_AFTER 0x0008
+
+/**
+ * Payload has a preamble part. Might not be that unusual.
+ */
+#define HTP_MULTIPART_HAS_PREAMBLE 0x0010
+
+/**
+ * Payload has an epilogue part. Unusual.
+ */
+#define HTP_MULTIPART_HAS_EPILOGUE 0x0020
+
+/**
+ * The last boundary was seen in the payload. Absence of the last boundary
+ * may not break parsing with some (most?) backends, but it means that the payload
+ * is not well formed. Can occur if the client gives up, or if the connection is
+ * interrupted. Incomplete payloads should be blocked whenever possible.
+ */
+#define HTP_MULTIPART_SEEN_LAST_BOUNDARY 0x0040
+
+/**
+ * There was a part after the last boundary. This is highly irregular
+ * and indicative of evasion.
+ */
+#define HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY 0x0080
+
+/**
+ * The payloads ends abruptly, without proper termination. Can occur if the client gives up,
+ * or if the connection is interrupted. When this flag is raised, HTP_MULTIPART_PART_INCOMPLETE
+ * will also be raised for the part that was only partially processed. (But the opposite may not
+ * always be the case -- there are other ways in which a part can be left incomplete.)
+ */
+#define HTP_MULTIPART_INCOMPLETE 0x0100
+
+/** The boundary in the Content-Type header is invalid. */
+#define HTP_MULTIPART_HBOUNDARY_INVALID 0x0200
+
+/**
+ * The boundary in the Content-Type header is unusual. This may mean that evasion
+ * is attempted, but it could also mean that we have encountered a client that does
+ * not do things in the way it should.
+ */
+#define HTP_MULTIPART_HBOUNDARY_UNUSUAL 0x0400
+
+/**
+ * The boundary in the Content-Type header is quoted. This is very unusual,
+ * and may be indicative of an evasion attempt.
+ */
+#define HTP_MULTIPART_HBOUNDARY_QUOTED 0x0800
+
+/** Header folding was used in part headers. Very unusual. */
+#define HTP_MULTIPART_PART_HEADER_FOLDING 0x1000
+
+/**
+ * A part of unknown type was encountered, which probably means that the part is lacking
+ * a Content-Disposition header, or that the header is invalid. Highly unusual.
+ */
+#define HTP_MULTIPART_PART_UNKNOWN 0x2000
+
+/** There was a repeated part header, possibly in an attempt to confuse the parser. Very unusual. */
+#define HTP_MULTIPART_PART_HEADER_REPEATED 0x4000
+
+/** Unknown part header encountered. */
+#define HTP_MULTIPART_PART_HEADER_UNKNOWN 0x8000
+
+/** Invalid part header encountered. */
+#define HTP_MULTIPART_PART_HEADER_INVALID 0x10000
+
+/** Part type specified in the C-D header is neither MULTIPART_PART_TEXT nor MULTIPART_PART_FILE. */
+#define HTP_MULTIPART_CD_TYPE_INVALID 0x20000
+
+/** Content-Disposition part header with multiple parameters with the same name. */
+#define HTP_MULTIPART_CD_PARAM_REPEATED 0x40000
+
+/** Unknown Content-Disposition parameter. */
+#define HTP_MULTIPART_CD_PARAM_UNKNOWN 0x80000
+
+/** Invalid Content-Disposition syntax. */
+#define HTP_MULTIPART_CD_SYNTAX_INVALID 0x100000
+
+/**
+ * There is an abruptly terminated part. This can happen when the payload itself is abruptly
+ * terminated (in which case HTP_MULTIPART_INCOMPLETE) will be raised. However, it can also
+ * happen when a boundary is seen before any part data.
+ */
+#define HTP_MULTIPART_PART_INCOMPLETE 0x200000
+
+/** A NUL byte was seen in a part header area. */
+#define HTP_MULTIPART_NUL_BYTE 0x400000
+
+/** A collection of flags that all indicate an invalid C-D header. */
+#define HTP_MULTIPART_CD_INVALID ( \
+ HTP_MULTIPART_CD_TYPE_INVALID | \
+ HTP_MULTIPART_CD_PARAM_REPEATED | \
+ HTP_MULTIPART_CD_PARAM_UNKNOWN | \
+ HTP_MULTIPART_CD_SYNTAX_INVALID )
+
+/** A collection of flags that all indicate an invalid part. */
+#define HTP_MULTIPART_PART_INVALID ( \
+ HTP_MULTIPART_CD_INVALID | \
+ HTP_MULTIPART_NUL_BYTE | \
+ HTP_MULTIPART_PART_UNKNOWN | \
+ HTP_MULTIPART_PART_HEADER_REPEATED | \
+ HTP_MULTIPART_PART_INCOMPLETE | \
+ HTP_MULTIPART_PART_HEADER_UNKNOWN | \
+ HTP_MULTIPART_PART_HEADER_INVALID )
+
+/** A collection of flags that all indicate an invalid Multipart payload. */
+#define HTP_MULTIPART_INVALID ( \
+ HTP_MULTIPART_PART_INVALID | \
+ HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY | \
+ HTP_MULTIPART_INCOMPLETE | \
+ HTP_MULTIPART_HBOUNDARY_INVALID )
+
+/** A collection of flags that all indicate an unusual Multipart payload. */
+#define HTP_MULTIPART_UNUSUAL ( \
+ HTP_MULTIPART_INVALID | \
+ HTP_MULTIPART_PART_HEADER_FOLDING | \
+ HTP_MULTIPART_BBOUNDARY_NLWS_AFTER | \
+ HTP_MULTIPART_HAS_EPILOGUE | \
+ HTP_MULTIPART_HBOUNDARY_UNUSUAL \
+ HTP_MULTIPART_HBOUNDARY_QUOTED )
+
+/** A collection of flags that all indicate an unusual Multipart payload, with a low sensitivity to irregularities. */
+#define HTP_MULTIPART_UNUSUAL_PARANOID ( \
+ HTP_MULTIPART_UNUSUAL | \
+ HTP_MULTIPART_LF_LINE | \
+ HTP_MULTIPART_BBOUNDARY_LWS_AFTER | \
+ HTP_MULTIPART_HAS_PREAMBLE )
+
+#define HTP_MULTIPART_MIME_TYPE "multipart/form-data"
+
+enum htp_multipart_type_t {
+
+ /** Unknown part. */
+ MULTIPART_PART_UNKNOWN = 0,
+
+ /** Text (parameter) part. */
+ MULTIPART_PART_TEXT = 1,
+
+ /** File part. */
+ MULTIPART_PART_FILE = 2,
+
+ /** Free-text part before the first boundary. */
+ MULTIPART_PART_PREAMBLE = 3,
+
+ /** Free-text part after the last boundary. */
+ MULTIPART_PART_EPILOGUE = 4
+};
+
+
+// Structures
+
+/**
+ * Holds multipart parser configuration and state. Private.
+ */
+typedef struct htp_mpartp_t htp_mpartp_t;
+
+/**
+ * Holds information related to a multipart body.
+ */
+typedef struct htp_multipart_t {
+ /** Multipart boundary. */
+ char *boundary;
+
+ /** Boundary length. */
+ size_t boundary_len;
+
+ /** How many boundaries were there? */
+ int boundary_count;
+
+ /** List of parts, in the order in which they appeared in the body. */
+ htp_list_t *parts;
+
+ /** Parsing flags. */
+ uint64_t flags;
+} htp_multipart_t;
+
+/**
+ * Holds information related to a part.
+ */
+typedef struct htp_multipart_part_t {
+ /** Pointer to the parser. */
+ htp_mpartp_t *parser;
+
+ /** Part type; see the MULTIPART_PART_* constants. */
+ enum htp_multipart_type_t type;
+
+ /** Raw part length (i.e., headers and data). */
+ size_t len;
+
+ /** Part name, from the Content-Disposition header. Can be NULL. */
+ bstr *name;
+
+ /**
+ * Part value; the contents depends on the type of the part:
+ * 1) NULL for files; 2) contains complete part contents for
+ * preamble and epilogue parts (they have no headers), and
+ * 3) data only (headers excluded) for text and unknown parts.
+ */
+ bstr *value;
+
+ /** Part content type, from the Content-Type header. Can be NULL. */
+ bstr *content_type;
+
+ /** Part headers (htp_header_t instances), using header name as the key. */
+ htp_table_t *headers;
+
+ /** File data, available only for MULTIPART_PART_FILE parts. */
+ htp_file_t *file;
+} htp_multipart_part_t;
+
+
+// Functions
+
+/**
+ * Creates a new multipart/form-data parser. On a successful invocation,
+ * the ownership of the boundary parameter is transferred to the parser.
+ *
+ * @param[in] cfg
+ * @param[in] boundary
+ * @param[in] flags
+ * @return New parser instance, or NULL on memory allocation failure.
+ */
+htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags);
+
+/**
+ * Looks for boundary in the supplied Content-Type request header. The extracted
+ * boundary will be allocated on the heap.
+ *
+ * @param[in] content_type
+ * @param[out] boundary
+ * @param[out] multipart_flags Multipart flags, which are not compatible from general LibHTP flags.
+ * @return HTP_OK on success (boundary found), HTP_DECLINED if boundary was not found,
+ * and HTP_ERROR on failure. Flags may be set on HTP_OK and HTP_DECLINED. For
+ * example, if a boundary could not be extracted but there is indication that
+ * one is present, HTP_MULTIPART_HBOUNDARY_INVALID will be set.
+ */
+htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *multipart_flags);
+
+/**
+ * Returns the multipart structure created by the parser.
+ *
+ * @param[in] parser
+ * @return The main multipart structure.
+ */
+htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser);
+
+/**
+ * Destroys the provided parser.
+ *
+ * @param[in] parser
+ */
+void htp_mpartp_destroy(htp_mpartp_t *parser);
+
+/**
+ * Finalize parsing.
+ *
+ * @param[in] parser
+ * @returns HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser);
+
+/**
+ * Parses a chunk of multipart/form-data data. This function should be called
+ * as many times as necessary until all data has been consumed.
+ *
+ * @param[in] parser
+ * @param[in] data
+ * @param[in] len
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *data, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTP_MULTIPART_H */
diff --git a/htp/htp_multipart_private.h b/htp/htp_multipart_private.h
new file mode 100644
index 0000000..5b8d228
--- /dev/null
+++ b/htp/htp_multipart_private.h
@@ -0,0 +1,203 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _HTP_MULTIPART_PRIVATE_H
+#define _HTP_MULTIPART_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "htp_multipart.h"
+
+#define CD_PARAM_OTHER 0
+#define CD_PARAM_NAME 1
+#define CD_PARAM_FILENAME 2
+
+#define DEFAULT_FILE_EXTRACT_LIMIT 16
+
+enum htp_part_mode_t {
+ /** When in line mode, the parser is handling part headers. */
+ MODE_LINE = 0,
+
+ /** When in data mode, the parser is consuming part data. */
+ MODE_DATA = 1
+};
+
+enum htp_multipart_state_t {
+ /** Initial state, after the parser has been created but before the boundary initialized. */
+ STATE_INIT = 0,
+
+ /** Processing data, waiting for a new line (which might indicate a new boundary). */
+ STATE_DATA = 1,
+
+ /** Testing a potential boundary. */
+ STATE_BOUNDARY = 2,
+
+ /** Checking the first byte after a boundary. */
+ STATE_BOUNDARY_IS_LAST1 = 3,
+
+ /** Checking the second byte after a boundary. */
+ STATE_BOUNDARY_IS_LAST2 = 4,
+
+ /** Consuming linear whitespace after a boundary. */
+ STATE_BOUNDARY_EAT_LWS = 5,
+
+ /** Used after a CR byte is detected in STATE_BOUNDARY_EAT_LWS. */
+ STATE_BOUNDARY_EAT_LWS_CR = 6
+};
+
+struct htp_mpartp_t {
+ htp_multipart_t multipart;
+
+ htp_cfg_t *cfg;
+
+ int extract_files;
+
+ int extract_limit;
+
+ char *extract_dir;
+
+ int file_count;
+
+ // Parsing callbacks
+
+ int (*handle_data)(htp_mpartp_t *mpartp, const unsigned char *data,
+ size_t len, int line_end);
+ int (*handle_boundary)(htp_mpartp_t *mpartp);
+
+ // Internal parsing fields; move into a private structure
+
+ /**
+ * Parser state; one of MULTIPART_STATE_* constants.
+ */
+ enum htp_multipart_state_t parser_state;
+
+ /**
+ * Keeps track of the current position in the boundary matching progress.
+ * When this field reaches boundary_len, we have a boundary match.
+ */
+ size_t boundary_match_pos;
+
+ /**
+ * Pointer to the part that is currently being processed.
+ */
+ htp_multipart_part_t *current_part;
+
+ /**
+ * This parser consists of two layers: the outer layer is charged with
+ * finding parts, and the internal layer handles part data. There is an
+ * interesting interaction between the two parsers. Because the
+ * outer layer is seeing every line (it has to, in order to test for
+ * boundaries), it also effectively also splits input into lines. The
+ * inner parser deals with two areas: first is the headers, which are
+ * line based, followed by binary data. When parsing headers, the inner
+ * parser can reuse the lines identified by the outer parser. In this
+ * variable we keep the current parsing mode of the part, which helps
+ * us process input data more efficiently. The possible values are
+ * MULTIPART_MODE_LINE and MULTIPART_MODE_DATA.
+ */
+ enum htp_part_mode_t current_part_mode;
+
+ /**
+ * Used for buffering when a potential boundary is fragmented
+ * across many input data buffers. On a match, the data stored here is
+ * discarded. When there is no match, the buffer is processed as data
+ * (belonging to the currently active part).
+ */
+ bstr_builder_t *boundary_pieces;
+
+ bstr_builder_t *part_header_pieces;
+
+ bstr *pending_header_line;
+
+ /**
+ * Stores text part pieces until the entire part is seen, at which
+ * point the pieces are assembled into a single buffer, and the
+ * builder cleared.
+ */
+ bstr_builder_t *part_data_pieces;
+
+ /**
+ * The offset of the current boundary candidate, relative to the most
+ * recent data chunk (first unprocessed chunk of data).
+ */
+ size_t boundary_candidate_pos;
+
+ /**
+ * When we encounter a CR as the last byte in a buffer, we don't know
+ * if the byte is part of a CRLF combination. If it is, then the CR
+ * might be a part of a boundary. But if it is not, it's current
+ * part's data. Because we know how to handle everything before the
+ * CR, we do, and we use this flag to indicate that a CR byte is
+ * effectively being buffered. This is probably a case of premature
+ * optimization, but I am going to leave it in for now.
+ */
+ int cr_aside;
+
+ /**
+ * When set, indicates that this parser no longer owns names and
+ * values of MULTIPART_PART_TEXT parts. It is used to avoid data
+ * duplication when the parser is used by LibHTP internally.
+ */
+ int gave_up_data;
+};
+
+htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len);
+
+htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part);
+
+htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len);
+
+htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line);
+
+int htp_mpartp_is_boundary_character(int c);
+
+htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser);
+
+htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part);
+
+void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data);
+
+htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTP_MULTIPART_PRIVATE_H */
diff --git a/htp/htp_parsers.c b/htp/htp_parsers.c
new file mode 100644
index 0000000..3f41abb
--- /dev/null
+++ b/htp/htp_parsers.c
@@ -0,0 +1,214 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * Determines protocol number from a textual representation (i.e., "HTTP/1.1"). This
+ * function will only understand a properly formatted protocol information. It does
+ * not try to be flexible.
+ *
+ * @param[in] protocol
+ * @return Protocol version or PROTOCOL_UNKNOWN.
+ */
+int htp_parse_protocol(bstr *protocol) {
+ if (protocol == NULL) return HTP_PROTOCOL_INVALID;
+
+ // TODO This function uses a very strict approach to parsing, whereas
+ // browsers will typically be more flexible, allowing whitespace
+ // before and after the forward slash, as well as allowing leading
+ // zeroes in the numbers. We should be able to parse such malformed
+ // content correctly (but emit a warning).
+ if (bstr_len(protocol) == 8) {
+ unsigned char *ptr = bstr_ptr(protocol);
+ if ((ptr[0] == 'H') && (ptr[1] == 'T') && (ptr[2] == 'T') && (ptr[3] == 'P')
+ && (ptr[4] == '/') && (ptr[6] == '.')) {
+ // Check the version numbers
+ if (ptr[5] == '0') {
+ if (ptr[7] == '9') {
+ return HTP_PROTOCOL_0_9;
+ }
+ } else if (ptr[5] == '1') {
+ if (ptr[7] == '0') {
+ return HTP_PROTOCOL_1_0;
+ } else if (ptr[7] == '1') {
+ return HTP_PROTOCOL_1_1;
+ }
+ }
+ }
+ }
+
+ return HTP_PROTOCOL_INVALID;
+}
+
+/**
+ * Determines the numerical value of a response status given as a string.
+ *
+ * @param[in] status
+ * @return Status code on success, or HTP_STATUS_INVALID on error.
+ */
+int htp_parse_status(bstr *status) {
+ int64_t r = htp_parse_positive_integer_whitespace((unsigned char *) bstr_ptr(status), bstr_len(status), 10);
+ if (r >= HTP_VALID_STATUS_MIN && r <= HTP_VALID_STATUS_MAX) {
+ return (int)r;
+ } else {
+ return HTP_STATUS_INVALID;
+ }
+}
+
+/**
+ * Parses Digest Authorization request header.
+ *
+ * @param[in] connp
+ * @param[in] auth_header
+ */
+int htp_parse_authorization_digest(htp_connp_t *connp, htp_header_t *auth_header) {
+ // Extract the username
+ int i = bstr_index_of_c(auth_header->value, "username=");
+ if (i == -1) return HTP_DECLINED;
+
+ unsigned char *data = bstr_ptr(auth_header->value);
+ size_t len = bstr_len(auth_header->value);
+ size_t pos = i + 9;
+
+ // Ignore whitespace
+ while ((pos < len) && (isspace((int) data[pos]))) pos++;
+ if (pos == len) return HTP_DECLINED;
+
+ if (data[pos] != '"') return HTP_DECLINED;
+
+ return htp_extract_quoted_string_as_bstr(data + pos, len - pos, &(connp->in_tx->request_auth_username), NULL);
+}
+
+/**
+ * Parses Basic Authorization request header.
+ *
+ * @param[in] connp
+ * @param[in] auth_header
+ */
+int htp_parse_authorization_basic(htp_connp_t *connp, htp_header_t *auth_header) {
+ unsigned char *data = bstr_ptr(auth_header->value);
+ size_t len = bstr_len(auth_header->value);
+ size_t pos = 5;
+
+ // Ignore whitespace
+ while ((pos < len) && (isspace((int) data[pos]))) pos++;
+ if (pos == len) return HTP_DECLINED;
+
+ // Decode base64-encoded data
+ bstr *decoded = htp_base64_decode_mem(data + pos, len - pos);
+ if (decoded == NULL) return HTP_ERROR;
+
+ // Now extract the username and password
+ int i = bstr_index_of_c(decoded, ":");
+ if (i == -1) {
+ bstr_free(decoded);
+ return HTP_DECLINED;
+ }
+
+ connp->in_tx->request_auth_username = bstr_dup_ex(decoded, 0, i);
+ if (connp->in_tx->request_auth_username == NULL) {
+ bstr_free(decoded);
+ return HTP_ERROR;
+ }
+
+ connp->in_tx->request_auth_password = bstr_dup_ex(decoded, i + 1, bstr_len(decoded) - i - 1);
+ if (connp->in_tx->request_auth_password == NULL) {
+ bstr_free(decoded);
+ bstr_free(connp->in_tx->request_auth_username);
+ return HTP_ERROR;
+ }
+
+ bstr_free(decoded);
+
+ return HTP_OK;
+}
+
+/**
+ * Parses Bearer Authorization request header.
+ *
+ * @param[in] connp
+ * @param[in] auth_header
+ */
+int htp_parse_authorization_bearer(htp_connp_t *connp, htp_header_t *auth_header) {
+ unsigned char *data = bstr_ptr(auth_header->value);
+ size_t len = bstr_len(auth_header->value);
+ size_t pos = 6;
+
+ // Ignore whitespace
+ while ((pos < len) && (isspace((int) data[pos]))) pos++;
+ if (pos == len) return HTP_DECLINED;
+
+ // There is nothing much else to check with Bearer auth so we just return
+ return HTP_OK;
+}
+/**
+ * Parses Authorization request header.
+ *
+ * @param[in] connp
+ */
+int htp_parse_authorization(htp_connp_t *connp) {
+ htp_header_t *auth_header = htp_table_get_c(connp->in_tx->request_headers, "authorization");
+ if (auth_header == NULL) {
+ connp->in_tx->request_auth_type = HTP_AUTH_NONE;
+ return HTP_OK;
+ }
+
+ // TODO Need a flag to raise when failing to parse authentication headers.
+
+ if (bstr_begins_with_c_nocase(auth_header->value, "basic")) {
+ // Basic authentication
+ connp->in_tx->request_auth_type = HTP_AUTH_BASIC;
+ return htp_parse_authorization_basic(connp, auth_header);
+ } else if (bstr_begins_with_c_nocase(auth_header->value, "digest")) {
+ // Digest authentication
+ connp->in_tx->request_auth_type = HTP_AUTH_DIGEST;
+ return htp_parse_authorization_digest(connp, auth_header);
+ } else if (bstr_begins_with_c_nocase(auth_header->value, "bearer")) {
+ // OAuth Bearer authentication
+ connp->in_tx->request_auth_type = HTP_AUTH_BEARER;
+ return htp_parse_authorization_bearer(connp, auth_header);
+ } else {
+ // Unrecognized authentication method
+ connp->in_tx->request_auth_type = HTP_AUTH_UNRECOGNIZED;
+ }
+
+ return HTP_OK;
+}
diff --git a/htp/htp_php.c b/htp/htp_php.c
new file mode 100644
index 0000000..582d5b3
--- /dev/null
+++ b/htp/htp_php.c
@@ -0,0 +1,116 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * This is a proof-of-concept processor that processes parameter names in
+ * a way _similar_ to PHP. Whitespace at the beginning is removed, and the
+ * remaining whitespace characters are converted to underscores. Proper
+ * research of PHP's behavior is needed before we can claim to be emulating it.
+ *
+ * @param[in,out] p
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_php_parameter_processor(htp_param_t *p) {
+ if (p == NULL) return HTP_ERROR;
+
+ // Name transformation
+
+ bstr *new_name = NULL;
+
+ // Ignore whitespace characters at the beginning of parameter name.
+
+ unsigned char *data = bstr_ptr(p->name);
+ size_t len = bstr_len(p->name);
+ size_t pos = 0;
+
+ // Advance over any whitespace characters at the beginning of the name.
+ while ((pos < len) && (isspace(data[pos]))) pos++;
+
+ // Have we seen any whitespace?
+ if (pos > 0) {
+ // Make a copy of the name, starting with
+ // the first non-whitespace character.
+ new_name = bstr_dup_mem(data + pos, len - pos);
+ if (new_name == NULL) return HTP_ERROR;
+ }
+
+ // Replace remaining whitespace characters with underscores.
+
+ size_t offset = pos;
+ pos = 0;
+
+ // Advance to the end of name or to the first whitespace character.
+ while ((offset + pos < len)&&(!isspace(data[pos]))) pos++;
+
+ // Are we at the end of the name?
+ if (offset + pos < len) {
+ // Seen whitespace within the string.
+
+ // Make a copy of the name if needed (which would be the case
+ // with a parameter that does not have any whitespace in front).
+ if (new_name == NULL) {
+ new_name = bstr_dup(p->name);
+ if (new_name == NULL) return HTP_ERROR;
+ }
+
+ // Change the pointers to the new name and ditch the offset.
+ data = bstr_ptr(new_name);
+ len = bstr_len(new_name);
+
+ // Replace any whitespace characters in the copy with underscores.
+ while (pos < len) {
+ if (isspace(data[pos])) {
+ data[pos] = '_';
+ }
+
+ pos++;
+ }
+ }
+
+ // If we made any changes, free the old parameter name and put the new one in.
+ if (new_name != NULL) {
+ bstr_free(p->name);
+ p->name = new_name;
+ }
+
+ return HTP_OK;
+}
diff --git a/htp/htp_private.h b/htp/htp_private.h
new file mode 100644
index 0000000..9bcf19d
--- /dev/null
+++ b/htp/htp_private.h
@@ -0,0 +1,269 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _HTP_PRIVATE_H
+#define _HTP_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__cplusplus) && !defined(__STDC_FORMAT_MACROS)
+/* C99 requires that inttypes.h only exposes PRI* macros
+ * for C++ implementations if this is defined: */
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+#include <iconv.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdint.h>
+
+#include "htp_config_auto_gen.h"
+#include "htp.h"
+#include "htp_config_private.h"
+#include "htp_connection_parser_private.h"
+#include "htp_connection_private.h"
+#include "htp_list_private.h"
+#include "htp_multipart_private.h"
+#include "htp_table_private.h"
+
+#ifndef CR
+#define CR '\r'
+#endif
+
+#ifndef LF
+#define LF '\n'
+#endif
+
+// 1048576 is 1 Mbyte
+#define HTP_LZMA_MEMLIMIT 1048576
+//deflate max ratio is about 1000
+#define HTP_COMPRESSION_BOMB_RATIO 2048
+#define HTP_COMPRESSION_BOMB_LIMIT 1048576
+// 0.1 second
+#define HTP_COMPRESSION_TIME_LIMIT_USEC 100000
+// test time for compression every 256 callbacks
+#define HTP_COMPRESSION_TIME_FREQ_TEST 256
+
+#define HTP_FIELD_LIMIT_HARD 18000
+#define HTP_FIELD_LIMIT_SOFT 9000
+
+#define HTP_VALID_STATUS_MIN 100
+#define HTP_VALID_STATUS_MAX 999
+
+// Parser states, in the order in which they are
+// used as a single transaction is processed.
+
+htp_status_t htp_connp_REQ_IDLE(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_LINE(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_LINE_complete(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_PROTOCOL(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_HEADERS(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_CONNECT_CHECK(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_CONNECT_WAIT_RESPONSE(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_CONNECT_PROBE_DATA(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_BODY_DETERMINE(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_BODY_IDENTITY(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_BODY_CHUNKED_LENGTH(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_BODY_CHUNKED_DATA(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_BODY_CHUNKED_DATA_END(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_FINALIZE(htp_connp_t *connp);
+htp_status_t htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9(htp_connp_t *connp);
+
+htp_status_t htp_connp_RES_IDLE(htp_connp_t *connp);
+htp_status_t htp_connp_RES_LINE(htp_connp_t *connp);
+htp_status_t htp_connp_RES_HEADERS(htp_connp_t *connp);
+htp_status_t htp_connp_RES_BODY_DETERMINE(htp_connp_t *connp);
+htp_status_t htp_connp_RES_BODY_IDENTITY_CL_KNOWN(htp_connp_t *connp);
+htp_status_t htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE(htp_connp_t *connp);
+htp_status_t htp_connp_RES_BODY_CHUNKED_LENGTH(htp_connp_t *connp);
+htp_status_t htp_connp_RES_BODY_CHUNKED_DATA(htp_connp_t *connp);
+htp_status_t htp_connp_RES_BODY_CHUNKED_DATA_END(htp_connp_t *connp);
+htp_status_t htp_connp_RES_FINALIZE(htp_connp_t *connp);
+
+// Parsing functions
+
+htp_status_t htp_parse_request_line_generic(htp_connp_t *connp);
+htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates);
+htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len);
+htp_status_t htp_process_request_header_generic(htp_connp_t *, unsigned char *data, size_t len);
+
+htp_status_t htp_parse_request_line_apache_2_2(htp_connp_t *connp);
+htp_status_t htp_process_request_header_apache_2_2(htp_connp_t *, unsigned char *data, size_t len);
+
+htp_status_t htp_parse_response_line_generic(htp_connp_t *connp);
+htp_status_t htp_parse_response_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len);
+htp_status_t htp_process_response_header_generic(htp_connp_t *connp, unsigned char *data, size_t len);
+
+
+// Private transaction functions
+
+htp_status_t htp_tx_state_response_complete_ex(htp_tx_t *tx, int hybrid_mode);
+
+
+// Utility functions
+
+int htp_convert_method_to_number(bstr *);
+int htp_is_lws(int c);
+int htp_is_separator(int c);
+int htp_is_text(int c);
+int htp_is_token(int c);
+int htp_chomp(unsigned char *data, size_t *len);
+int htp_is_space(int c);
+
+int htp_parse_protocol(bstr *protocol);
+
+int htp_is_line_empty(unsigned char *data, size_t len);
+int htp_is_line_whitespace(unsigned char *data, size_t len);
+
+int htp_connp_is_line_folded(unsigned char *data, size_t len);
+int htp_is_folding_char(int c);
+int htp_connp_is_line_terminator(htp_connp_t *connp, unsigned char *data, size_t len, int next_no_lf);
+int htp_connp_is_line_ignorable(htp_connp_t *connp, unsigned char *data, size_t len);
+
+int htp_parse_uri(bstr *input, htp_uri_t **uri);
+htp_status_t htp_parse_hostport(bstr *authority, bstr **hostname, bstr **port, int *port_number, int *invalid);
+htp_status_t htp_parse_header_hostport(bstr *authority, bstr **hostname, bstr **port, int *port_number, uint64_t *flags);
+int htp_validate_hostname(bstr *hostname);
+int htp_parse_uri_hostport(htp_connp_t *connp, bstr *input, htp_uri_t *uri);
+int htp_normalize_parsed_uri(htp_tx_t *tx, htp_uri_t *parsed_uri_incomplete, htp_uri_t *parsed_uri);
+bstr *htp_normalize_hostname_inplace(bstr *input);
+
+int htp_decode_path_inplace(htp_tx_t *tx, bstr *path);
+
+ int htp_prenormalize_uri_path_inplace(bstr *s, int *flags, int case_insensitive, int backslash, int decode_separators, int remove_consecutive);
+void htp_normalize_uri_path_inplace(bstr *s);
+
+void htp_utf8_decode_path_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *path);
+void htp_utf8_validate_path(htp_tx_t *tx, bstr *path);
+
+int64_t htp_parse_content_length(bstr *b, htp_connp_t *connp);
+int64_t htp_parse_chunked_length(unsigned char *data, size_t len, int *extension);
+int64_t htp_parse_positive_integer_whitespace(unsigned char *data, size_t len, int base);
+int htp_parse_status(bstr *status);
+int htp_parse_authorization_digest(htp_connp_t *connp, htp_header_t *auth_header);
+int htp_parse_authorization_basic(htp_connp_t *connp, htp_header_t *auth_header);
+int htp_parse_authorization_bearer(htp_connp_t *connp, htp_header_t *auth_header);
+
+void htp_print_log(FILE *stream, htp_log_t *log);
+
+void fprint_bstr(FILE *stream, const char *name, bstr *b);
+void fprint_raw_data(FILE *stream, const char *name, const void *data, size_t len);
+void fprint_raw_data_ex(FILE *stream, const char *name, const void *data, size_t offset, size_t len);
+
+char *htp_connp_in_state_as_string(htp_connp_t *connp);
+char *htp_connp_out_state_as_string(htp_connp_t *connp);
+char *htp_tx_request_progress_as_string(htp_tx_t *tx);
+char *htp_tx_response_progress_as_string(htp_tx_t *tx);
+
+bstr *htp_unparse_uri_noencode(htp_uri_t *uri);
+
+int htp_treat_response_line_as_body(const uint8_t *data, size_t len);
+
+htp_status_t htp_req_run_hook_body_data(htp_connp_t *connp, htp_tx_data_t *d);
+htp_status_t htp_res_run_hook_body_data(htp_connp_t *connp, htp_tx_data_t *d);
+
+htp_status_t htp_ch_urlencoded_callback_request_body_data(htp_tx_data_t *d);
+htp_status_t htp_ch_urlencoded_callback_request_headers(htp_tx_t *tx);
+htp_status_t htp_ch_urlencoded_callback_request_line(htp_tx_t *tx);
+htp_status_t htp_ch_multipart_callback_request_body_data(htp_tx_data_t *d);
+htp_status_t htp_ch_multipart_callback_request_headers(htp_tx_t *tx);
+
+htp_status_t htp_php_parameter_processor(htp_param_t *p);
+
+int htp_transcode_params(htp_connp_t *connp, htp_table_t **params, int destroy_old);
+int htp_transcode_bstr(iconv_t cd, bstr *input, bstr **output);
+
+int htp_parse_single_cookie_v0(htp_connp_t *connp, unsigned char *data, size_t len);
+int htp_parse_cookies_v0(htp_connp_t *connp);
+int htp_parse_authorization(htp_connp_t *connp);
+
+htp_status_t htp_extract_quoted_string_as_bstr(unsigned char *data, size_t len, bstr **out, size_t *endoffset);
+
+htp_header_t *htp_connp_header_parse(htp_connp_t *, unsigned char *, size_t);
+
+htp_status_t htp_parse_ct_header(bstr *header, bstr **ct);
+
+htp_status_t htp_connp_req_receiver_finalize_clear(htp_connp_t *connp);
+htp_status_t htp_connp_res_receiver_finalize_clear(htp_connp_t *connp);
+
+htp_status_t htp_tx_finalize(htp_tx_t *tx);
+
+int htp_tx_is_complete(htp_tx_t *tx);
+
+htp_status_t htp_tx_state_request_complete_partial(htp_tx_t *tx);
+
+void htp_connp_tx_remove(htp_connp_t *connp, htp_tx_t *tx);
+
+void htp_tx_destroy_incomplete(htp_tx_t *tx);
+
+htp_status_t htp_tx_req_process_body_data_ex(htp_tx_t *tx, const void *data, size_t len);
+htp_status_t htp_tx_res_process_body_data_ex(htp_tx_t *tx, const void *data, size_t len);
+
+htp_status_t htp_tx_urldecode_uri_inplace(htp_tx_t *tx, bstr *input);
+htp_status_t htp_tx_urldecode_params_inplace(htp_tx_t *tx, bstr *input);
+
+void htp_connp_destroy_decompressors(htp_connp_t *connp);
+
+htp_status_t htp_header_has_token(const unsigned char *hvp, size_t hvlen, const unsigned char *value);
+
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t size);
+#endif
+
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t size);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+// as CURL_MAX_HTTP_HEADER
+#define HTP_MAX_HEADER_FOLDED 102400
+
+#endif /* _HTP_PRIVATE_H */
+
diff --git a/htp/htp_request.c b/htp/htp_request.c
new file mode 100644
index 0000000..9fddbd8
--- /dev/null
+++ b/htp/htp_request.c
@@ -0,0 +1,1173 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+#define IN_TEST_NEXT_BYTE_OR_RETURN(X) \
+if ((X)->in_current_read_offset >= (X)->in_current_len) { \
+ return HTP_DATA; \
+}
+
+#define IN_PEEK_NEXT(X) \
+if ((X)->in_current_read_offset >= (X)->in_current_len) { \
+ (X)->in_next_byte = -1; \
+} else { \
+ (X)->in_next_byte = (X)->in_current_data[(X)->in_current_read_offset]; \
+}
+
+#define IN_NEXT_BYTE(X) \
+if ((X)->in_current_read_offset < (X)->in_current_len) { \
+ (X)->in_next_byte = (X)->in_current_data[(X)->in_current_read_offset]; \
+ (X)->in_current_read_offset++; \
+ (X)->in_current_consume_offset++; \
+ (X)->in_stream_offset++; \
+} else { \
+ (X)->in_next_byte = -1; \
+}
+
+#define IN_NEXT_BYTE_OR_RETURN(X) \
+if ((X)->in_current_read_offset < (X)->in_current_len) { \
+ (X)->in_next_byte = (X)->in_current_data[(X)->in_current_read_offset]; \
+ (X)->in_current_read_offset++; \
+ (X)->in_current_consume_offset++; \
+ (X)->in_stream_offset++; \
+} else { \
+ return HTP_DATA; \
+}
+
+#define IN_COPY_BYTE_OR_RETURN(X) \
+if ((X)->in_current_read_offset < (X)->in_current_len) { \
+ (X)->in_next_byte = (X)->in_current_data[(X)->in_current_read_offset]; \
+ (X)->in_current_read_offset++; \
+ (X)->in_stream_offset++; \
+} else { \
+ return HTP_DATA_BUFFER; \
+}
+
+/**
+ * Sends outstanding connection data to the currently active data receiver hook.
+ *
+ * @param[in] connp
+ * @param[in] is_last
+ * @return HTP_OK, or a value returned from a callback.
+ */
+static htp_status_t htp_connp_req_receiver_send_data(htp_connp_t *connp, int is_last) {
+ if (connp->in_data_receiver_hook == NULL) return HTP_OK;
+
+ htp_tx_data_t d;
+ d.tx = connp->in_tx;
+ d.data = connp->in_current_data + connp->in_current_receiver_offset;
+ d.len = connp->in_current_read_offset - connp->in_current_receiver_offset;
+ d.is_last = is_last;
+
+ htp_status_t rc = htp_hook_run_all(connp->in_data_receiver_hook, &d);
+ if (rc != HTP_OK) return rc;
+
+ connp->in_current_receiver_offset = connp->in_current_read_offset;
+
+ return HTP_OK;
+}
+
+/**
+ * Configures the data receiver hook. If there is a previous hook, it will be finalized and cleared.
+ *
+ * @param[in] connp
+ * @param[in] data_receiver_hook
+ * @return HTP_OK, or a value returned from a callback.
+ */
+static htp_status_t htp_connp_req_receiver_set(htp_connp_t *connp, htp_hook_t *data_receiver_hook) {
+ htp_status_t rc = htp_connp_req_receiver_finalize_clear(connp);
+
+ connp->in_data_receiver_hook = data_receiver_hook;
+ connp->in_current_receiver_offset = connp->in_current_read_offset;
+
+ return rc;
+}
+
+/**
+ * Finalizes an existing data receiver hook by sending any outstanding data to it. The
+ * hook is then removed so that it receives no more data.
+ *
+ * @param[in] connp
+ * @return HTP_OK, or a value returned from a callback.
+ */
+htp_status_t htp_connp_req_receiver_finalize_clear(htp_connp_t *connp) {
+ if (connp->in_data_receiver_hook == NULL) return HTP_OK;
+
+ htp_status_t rc = htp_connp_req_receiver_send_data(connp, 1 /* last */);
+
+ connp->in_data_receiver_hook = NULL;
+
+ return rc;
+}
+
+/**
+ * Handles request parser state changes. At the moment, this function is used only
+ * to configure data receivers, which are sent raw connection data.
+ *
+ * @param[in] connp
+ * @return HTP_OK, or a value returned from a callback.
+ */
+static htp_status_t htp_req_handle_state_change(htp_connp_t *connp) {
+ if (connp->in_state_previous == connp->in_state) return HTP_OK;
+
+ if (connp->in_state == htp_connp_REQ_HEADERS) {
+ htp_status_t rc = HTP_OK;
+
+ switch (connp->in_tx->request_progress) {
+ case HTP_REQUEST_HEADERS:
+ rc = htp_connp_req_receiver_set(connp, connp->in_tx->cfg->hook_request_header_data);
+ break;
+
+ case HTP_REQUEST_TRAILER:
+ rc = htp_connp_req_receiver_set(connp, connp->in_tx->cfg->hook_request_trailer_data);
+ break;
+
+ default:
+ // Do nothing; receivers are currently used only for header blocks.
+ break;
+ }
+
+ if (rc != HTP_OK) return rc;
+ }
+
+ // Initially, I had the finalization of raw data sending here, but that
+ // caused the last REQUEST_HEADER_DATA hook to be invoked after the
+ // REQUEST_HEADERS hook -- which I thought made no sense. For that reason,
+ // the finalization is now initiated from the request header processing code,
+ // which is less elegant but provides a better user experience. Having some
+ // (or all) hooks to be invoked on state change might work better.
+
+ connp->in_state_previous = connp->in_state;
+
+ return HTP_OK;
+}
+
+/**
+ * If there is any data left in the inbound data chunk, this function will preserve
+ * it for later consumption. The maximum amount accepted for buffering is controlled
+ * by htp_config_t::field_limit_hard.
+ *
+ * @param[in] connp
+ * @return HTP_OK, or HTP_ERROR on fatal failure.
+ */
+static htp_status_t htp_connp_req_buffer(htp_connp_t *connp) {
+ if (connp->in_current_data == NULL) return HTP_OK;
+
+ unsigned char *data = connp->in_current_data + connp->in_current_consume_offset;
+ size_t len = connp->in_current_read_offset - connp->in_current_consume_offset;
+
+ if (len == 0)
+ return HTP_OK;
+
+ // Check the hard (buffering) limit.
+
+ size_t newlen = connp->in_buf_size + len;
+
+ // When calculating the size of the buffer, take into account the
+ // space we're using for the request header buffer.
+ if (connp->in_header != NULL) {
+ newlen += bstr_len(connp->in_header);
+ }
+
+ if (newlen > connp->in_tx->cfg->field_limit_hard) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request buffer over the limit: size %zd limit %zd.",
+ newlen, connp->in_tx->cfg->field_limit_hard);
+ return HTP_ERROR;
+ }
+
+ // Copy the data remaining in the buffer.
+
+ if (connp->in_buf == NULL) {
+ connp->in_buf = malloc(len);
+ if (connp->in_buf == NULL) return HTP_ERROR;
+ memcpy(connp->in_buf, data, len);
+ connp->in_buf_size = len;
+ } else {
+ size_t newsize = connp->in_buf_size + len;
+ unsigned char *newbuf = realloc(connp->in_buf, newsize);
+ if (newbuf == NULL) return HTP_ERROR;
+ connp->in_buf = newbuf;
+ memcpy(connp->in_buf + connp->in_buf_size, data, len);
+ connp->in_buf_size = newsize;
+ }
+
+ // Reset the consumer position.
+ connp->in_current_consume_offset = connp->in_current_read_offset;
+
+ return HTP_OK;
+}
+
+/**
+ * Returns to the caller the memory region that should be processed next. This function
+ * hides away the buffering process from the rest of the code, allowing it to work with
+ * non-buffered data that's in the inbound chunk, or buffered data that's in our structures.
+ *
+ * @param[in] connp
+ * @param[out] data
+ * @param[out] len
+ * @return HTP_OK
+ */
+static htp_status_t htp_connp_req_consolidate_data(htp_connp_t *connp, unsigned char **data, size_t *len) {
+ if (connp->in_buf == NULL) {
+ // We do not have any data buffered; point to the current data chunk.
+ *data = connp->in_current_data + connp->in_current_consume_offset;
+ *len = connp->in_current_read_offset - connp->in_current_consume_offset;
+ } else {
+ // We already have some data in the buffer. Add the data from the current
+ // chunk to it, and point to the consolidated buffer.
+ if (htp_connp_req_buffer(connp) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ *data = connp->in_buf;
+ *len = connp->in_buf_size;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Clears buffered inbound data and resets the consumer position to the reader position.
+ *
+ * @param[in] connp
+ */
+static void htp_connp_req_clear_buffer(htp_connp_t *connp) {
+ connp->in_current_consume_offset = connp->in_current_read_offset;
+
+ if (connp->in_buf != NULL) {
+ free(connp->in_buf);
+ connp->in_buf = NULL;
+ connp->in_buf_size = 0;
+ }
+}
+
+/**
+ * Performs a check for a CONNECT transaction to decide whether inbound
+ * parsing needs to be suspended.
+ *
+ * @param[in] connp
+ * @return HTP_OK if the request does not use CONNECT, HTP_DATA_OTHER if
+ * inbound parsing needs to be suspended until we hear from the
+ * other side
+ */
+htp_status_t htp_connp_REQ_CONNECT_CHECK(htp_connp_t *connp) {
+ // If the request uses the CONNECT method, then there will
+ // not be a request body, but first we need to wait to see the
+ // response in order to determine if the tunneling request
+ // was a success.
+ if (connp->in_tx->request_method_number == HTP_M_CONNECT) {
+ connp->in_state = htp_connp_REQ_CONNECT_WAIT_RESPONSE;
+ connp->in_status = HTP_STREAM_DATA_OTHER;
+ return HTP_DATA_OTHER;
+ }
+
+ // Continue to the next step to determine
+ // the presence of request body
+ connp->in_state = htp_connp_REQ_BODY_DETERMINE;
+
+ return HTP_OK;
+}
+
+/**
+ * Determines whether inbound parsing needs to continue or stop. In
+ * case the data appears to be plain text HTTP, we try to continue.
+ *
+ * @param[in] connp
+ * @return HTP_OK if the parser can resume parsing, HTP_DATA_BUFFER if
+ * we need more data.
+ */
+htp_status_t htp_connp_REQ_CONNECT_PROBE_DATA(htp_connp_t *connp) {
+ for (;;) {//;i < max_read; i++) {
+ IN_PEEK_NEXT(connp);
+ // Have we reached the end of the line? For some reason
+ // we can't test after IN_COPY_BYTE_OR_RETURN */
+ if (connp->in_next_byte == LF || connp->in_next_byte == 0x00)
+ break;
+
+ IN_COPY_BYTE_OR_RETURN(connp);
+
+ }
+
+ unsigned char *data;
+ size_t len;
+ if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) {
+ return HTP_ERROR;
+ }
+#ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "PROBING", data, len);
+#endif
+
+ size_t pos = 0;
+ size_t mstart = 0;
+ // skip past leading whitespace. IIS allows this
+ while ((pos < len) && htp_is_space(data[pos]))
+ pos++;
+ if (pos)
+ mstart = pos;
+ // The request method starts at the beginning of the
+ // line and ends with the first whitespace character.
+ while ((pos < len) && (!htp_is_space(data[pos])))
+ pos++;
+
+ int methodi = HTP_M_UNKNOWN;
+ bstr *method = bstr_dup_mem(data + mstart, pos - mstart);
+ if (method) {
+ methodi = htp_convert_method_to_number(method);
+ bstr_free(method);
+ }
+ if (methodi != HTP_M_UNKNOWN) {
+#ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "htp_connp_REQ_CONNECT_PROBE_DATA: tunnel contains plain text HTTP", data, len);
+#endif
+ return htp_tx_state_request_complete(connp->in_tx);
+ } else {
+#ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "htp_connp_REQ_CONNECT_PROBE_DATA: tunnel is not HTTP", data, len);
+#endif
+ connp->in_status = HTP_STREAM_TUNNEL;
+ connp->out_status = HTP_STREAM_TUNNEL;
+ }
+
+ // not calling htp_connp_req_clear_buffer, we're not consuming the data
+
+ return HTP_OK;
+}
+
+/**
+ * Determines whether inbound parsing, which was suspended after
+ * encountering a CONNECT transaction, can proceed (after receiving
+ * the response).
+ *
+ * @param[in] connp
+ * @return HTP_OK if the parser can resume parsing, HTP_DATA_OTHER if
+ * it needs to continue waiting.
+ */
+htp_status_t htp_connp_REQ_CONNECT_WAIT_RESPONSE(htp_connp_t *connp) {
+ // Check that we saw the response line of the current inbound transaction.
+ if (connp->in_tx->response_progress <= HTP_RESPONSE_LINE) {
+ return HTP_DATA_OTHER;
+ }
+
+ // A 2xx response means a tunnel was established. Anything
+ // else means we continue to follow the HTTP stream.
+ if ((connp->in_tx->response_status_number >= 200) && (connp->in_tx->response_status_number <= 299)) {
+ // TODO Check that the server did not accept a connection to itself.
+
+ // The requested tunnel was established: we are going
+ // to probe the remaining data on this stream to see
+ // if we need to ignore it or parse it
+ connp->in_state = htp_connp_REQ_CONNECT_PROBE_DATA;
+ } else {
+ // No tunnel; continue to the next transaction
+ connp->in_state = htp_connp_REQ_FINALIZE;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Consumes bytes until the end of the current line.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_REQ_BODY_CHUNKED_DATA_END(htp_connp_t *connp) {
+ // TODO We shouldn't really see anything apart from CR and LF,
+ // so we should warn about anything else.
+
+ for (;;) {
+ IN_NEXT_BYTE_OR_RETURN(connp);
+
+ connp->in_tx->request_message_len++;
+
+ if (connp->in_next_byte == LF) {
+ connp->in_state = htp_connp_REQ_BODY_CHUNKED_LENGTH;
+ return HTP_OK;
+ }
+ }
+
+ return HTP_ERROR;
+}
+
+/**
+ * Processes a chunk of data.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_REQ_BODY_CHUNKED_DATA(htp_connp_t *connp) {
+ // Determine how many bytes we can consume.
+ size_t bytes_to_consume;
+ if (connp->in_current_len - connp->in_current_read_offset >= connp->in_chunked_length) {
+ // Entire chunk available in the buffer; read all of it.
+ bytes_to_consume = connp->in_chunked_length;
+ } else {
+ // Partial chunk available in the buffer; read as much as we can.
+ bytes_to_consume = connp->in_current_len - connp->in_current_read_offset;
+ }
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_REQ_BODY_CHUNKED_DATA Consuming %zd bytes\n", bytes_to_consume);
+ #endif
+
+ // If the input buffer is empty, ask for more data.
+ if (bytes_to_consume == 0) return HTP_DATA;
+
+ // Consume the data.
+ htp_status_t rc = htp_tx_req_process_body_data_ex(connp->in_tx, connp->in_current_data + connp->in_current_read_offset, bytes_to_consume);
+ if (rc != HTP_OK) return rc;
+
+ // Adjust counters.
+ connp->in_current_read_offset += bytes_to_consume;
+ connp->in_current_consume_offset += bytes_to_consume;
+ connp->in_stream_offset += bytes_to_consume;
+ connp->in_tx->request_message_len += bytes_to_consume;
+ connp->in_chunked_length -= bytes_to_consume;
+
+ if (connp->in_chunked_length == 0) {
+ // End of the chunk.
+ connp->in_state = htp_connp_REQ_BODY_CHUNKED_DATA_END;
+ return HTP_OK;
+ }
+
+ // Ask for more data.
+ return HTP_DATA;
+}
+
+/**
+ * Extracts chunk length.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_REQ_BODY_CHUNKED_LENGTH(htp_connp_t *connp) {
+ for (;;) {
+ IN_COPY_BYTE_OR_RETURN(connp);
+
+ // Have we reached the end of the line?
+ if (connp->in_next_byte == LF) {
+ unsigned char *data;
+ size_t len;
+
+ if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ connp->in_tx->request_message_len += len;
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "Chunk length line", data, len);
+ #endif
+
+ htp_chomp(data, &len);
+
+ int chunk_ext = 0;
+ connp->in_chunked_length = htp_parse_chunked_length(data, len, &chunk_ext);
+ if (chunk_ext == 1) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request chunk extension");
+ }
+
+ htp_connp_req_clear_buffer(connp);
+
+ // Handle chunk length.
+ if (connp->in_chunked_length > 0) {
+ // More data available.
+ connp->in_state = htp_connp_REQ_BODY_CHUNKED_DATA;
+ } else if (connp->in_chunked_length == 0) {
+ // End of data.
+ connp->in_state = htp_connp_REQ_HEADERS;
+ connp->in_tx->request_progress = HTP_REQUEST_TRAILER;
+ } else {
+ // Invalid chunk length.
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request chunk encoding: Invalid chunk length");
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+ }
+ }
+
+ return HTP_ERROR;
+}
+
+/**
+ * Processes identity request body.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_REQ_BODY_IDENTITY(htp_connp_t *connp) {
+ // Determine how many bytes we can consume.
+ size_t bytes_to_consume;
+ if (connp->in_current_len - connp->in_current_read_offset >= connp->in_body_data_left) {
+ bytes_to_consume = connp->in_body_data_left;
+ } else {
+ bytes_to_consume = connp->in_current_len - connp->in_current_read_offset;
+ }
+
+ // If the input buffer is empty, ask for more data.
+ if (bytes_to_consume == 0) return HTP_DATA;
+
+ // Consume data.
+ int rc = htp_tx_req_process_body_data_ex(connp->in_tx, connp->in_current_data + connp->in_current_read_offset, bytes_to_consume);
+ if (rc != HTP_OK) return rc;
+
+ // Adjust counters.
+ connp->in_current_read_offset += bytes_to_consume;
+ connp->in_current_consume_offset += bytes_to_consume;
+ connp->in_stream_offset += bytes_to_consume;
+ connp->in_tx->request_message_len += bytes_to_consume;
+ connp->in_body_data_left -= bytes_to_consume;
+
+ if (connp->in_body_data_left == 0) {
+ // End of request body.
+ connp->in_state = htp_connp_REQ_FINALIZE;
+ return HTP_OK;
+ }
+
+ // Ask for more data.
+ return HTP_DATA;
+}
+
+/**
+ * Determines presence (and encoding) of a request body.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_REQ_BODY_DETERMINE(htp_connp_t *connp) {
+ // Determine the next state based on the presence of the request
+ // body, and the coding used.
+ switch (connp->in_tx->request_transfer_coding) {
+
+ case HTP_CODING_CHUNKED:
+ connp->in_state = htp_connp_REQ_BODY_CHUNKED_LENGTH;
+ connp->in_tx->request_progress = HTP_REQUEST_BODY;
+ break;
+
+ case HTP_CODING_IDENTITY:
+ connp->in_content_length = connp->in_tx->request_content_length;
+ connp->in_body_data_left = connp->in_content_length;
+
+ if (connp->in_content_length != 0) {
+ connp->in_state = htp_connp_REQ_BODY_IDENTITY;
+ connp->in_tx->request_progress = HTP_REQUEST_BODY;
+ } else {
+ connp->in_tx->connp->in_state = htp_connp_REQ_FINALIZE;
+ }
+ break;
+
+ case HTP_CODING_NO_BODY:
+ // This request does not have a body, which
+ // means that we're done with it
+ connp->in_state = htp_connp_REQ_FINALIZE;
+ break;
+
+ default:
+ // Should not be here
+ return HTP_ERROR;
+ break;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Parses request headers.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_REQ_HEADERS(htp_connp_t *connp) {
+ for (;;) {
+ if (connp->in_status == HTP_STREAM_CLOSED) {
+ // Parse previous header, if any.
+ if (connp->in_header != NULL) {
+ if (connp->cfg->process_request_header(connp, bstr_ptr(connp->in_header),
+ bstr_len(connp->in_header)) != HTP_OK)
+ return HTP_ERROR;
+ bstr_free(connp->in_header);
+ connp->in_header = NULL;
+ }
+
+ htp_connp_req_clear_buffer(connp);
+
+ connp->in_tx->request_progress = HTP_REQUEST_TRAILER;
+
+ // We've seen all the request headers.
+ return htp_tx_state_request_headers(connp->in_tx);
+ }
+ IN_COPY_BYTE_OR_RETURN(connp);
+
+ // Have we reached the end of the line?
+ if (connp->in_next_byte == LF) {
+ unsigned char *data;
+ size_t len;
+
+ if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, __func__, data, len);
+ #endif
+
+ // Should we terminate headers?
+ if (htp_connp_is_line_terminator(connp, data, len, 0)) {
+ // Parse previous header, if any.
+ if (connp->in_header != NULL) {
+ if (connp->cfg->process_request_header(connp, bstr_ptr(connp->in_header),
+ bstr_len(connp->in_header)) != HTP_OK) return HTP_ERROR;
+
+ bstr_free(connp->in_header);
+ connp->in_header = NULL;
+ }
+
+ htp_connp_req_clear_buffer(connp);
+
+ // We've seen all the request headers.
+ return htp_tx_state_request_headers(connp->in_tx);
+ }
+
+ htp_chomp(data, &len);
+
+ // Check for header folding.
+ if (htp_connp_is_line_folded(data, len) == 0) {
+ // New header line.
+
+ // Parse previous header, if any.
+ if (connp->in_header != NULL) {
+ if (connp->cfg->process_request_header(connp, bstr_ptr(connp->in_header),
+ bstr_len(connp->in_header)) != HTP_OK) return HTP_ERROR;
+
+ bstr_free(connp->in_header);
+ connp->in_header = NULL;
+ }
+
+ IN_PEEK_NEXT(connp);
+
+ if (connp->in_next_byte != -1 && htp_is_folding_char(connp->in_next_byte) == 0) {
+ // Because we know this header is not folded, we can process the buffer straight away.
+ if (connp->cfg->process_request_header(connp, data, len) != HTP_OK) return HTP_ERROR;
+ } else {
+ // Keep the partial header data for parsing later.
+ connp->in_header = bstr_dup_mem(data, len);
+ if (connp->in_header == NULL) return HTP_ERROR;
+ }
+ } else {
+ // Folding; check that there's a previous header line to add to.
+ if (connp->in_header == NULL) {
+ // Invalid folding.
+
+ // Warn only once per transaction.
+ if (!(connp->in_tx->flags & HTP_INVALID_FOLDING)) {
+ connp->in_tx->flags |= HTP_INVALID_FOLDING;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Invalid request field folding");
+ }
+
+ // Keep the header data for parsing later.
+ size_t trim = 0;
+ while(trim < len) {
+ if (!htp_is_folding_char(data[trim])) {
+ break;
+ }
+ trim++;
+ }
+ connp->in_header = bstr_dup_mem(data + trim, len - trim);
+ if (connp->in_header == NULL) return HTP_ERROR;
+ } else {
+ // Add to the existing header.
+ if (bstr_len(connp->in_header) < HTP_MAX_HEADER_FOLDED) {
+ bstr *new_in_header = bstr_add_mem(connp->in_header, data, len);
+ if (new_in_header == NULL) return HTP_ERROR;
+ connp->in_header = new_in_header;
+ } else {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field length exceeds folded maximum");
+ }
+ }
+ }
+
+ htp_connp_req_clear_buffer(connp);
+ }
+ }
+
+ return HTP_ERROR;
+}
+
+// HTTP/0.9 is supposed to be only a request line without protocol.
+// Libhtp will still consider the request to be HTTP/0.9 if there
+// are some junk whitespaces after that request line.
+// Libhtp allows the small value of 16 extra bytes/whitespaces,
+// otherwise we consider it to be a HTTP/1.x request with missing protocol.
+// It is unlikely to meet HTTP/0.9, and we want to limit probing.
+#define HTTP09_MAX_JUNK_LEN 16
+
+/**
+ * Determines request protocol.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_REQ_PROTOCOL(htp_connp_t *connp) {
+ // Is this a short-style HTTP/0.9 request? If it is,
+ // we will not want to parse request headers.
+ if (connp->in_tx->is_protocol_0_9 == 0) {
+ // Switch to request header parsing.
+ connp->in_state = htp_connp_REQ_HEADERS;
+ connp->in_tx->request_progress = HTP_REQUEST_HEADERS;
+ } else {
+ // Let's check if the protocol was simply missing
+ int64_t pos = connp->in_current_read_offset;
+ // Probe if data looks like a header line
+ if (connp->in_current_len > connp->in_current_read_offset + HTTP09_MAX_JUNK_LEN) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: missing protocol");
+ connp->in_tx->is_protocol_0_9 = 0;
+ // Switch to request header parsing.
+ connp->in_state = htp_connp_REQ_HEADERS;
+ connp->in_tx->request_progress = HTP_REQUEST_HEADERS;
+ return HTP_OK;
+ }
+ while (pos < connp->in_current_len) {
+ if (!htp_is_space(connp->in_current_data[pos])) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: missing protocol");
+ connp->in_tx->is_protocol_0_9 = 0;
+ // Switch to request header parsing.
+ connp->in_state = htp_connp_REQ_HEADERS;
+ connp->in_tx->request_progress = HTP_REQUEST_HEADERS;
+ return HTP_OK;
+ }
+ pos++;
+ }
+ // We're done with this request.
+ connp->in_state = htp_connp_REQ_FINALIZE;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Parse the request line.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on succesful parse, HTP_ERROR on error.
+ */
+htp_status_t htp_connp_REQ_LINE_complete(htp_connp_t *connp) {
+ unsigned char *data;
+ size_t len;
+
+ if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, __func__, data, len);
+ #endif
+ if (len == 0) {
+ htp_connp_req_clear_buffer(connp);
+ return HTP_DATA;
+ }
+
+ // Is this a line that should be ignored?
+ if (htp_connp_is_line_ignorable(connp, data, len)) {
+ // We have an empty/whitespace line, which we'll note, ignore and move on.
+ connp->in_tx->request_ignored_lines++;
+
+ htp_connp_req_clear_buffer(connp);
+
+ return HTP_OK;
+ }
+
+ // Process request line.
+
+ htp_chomp(data, &len);
+
+ connp->in_tx->request_line = bstr_dup_mem(data, len);
+ if (connp->in_tx->request_line == NULL)
+ return HTP_ERROR;
+
+ if (connp->cfg->parse_request_line(connp) != HTP_OK)
+ return HTP_ERROR;
+
+ // Finalize request line parsing.
+
+ if (htp_tx_state_request_line(connp->in_tx) != HTP_OK)
+ return HTP_ERROR;
+
+ htp_connp_req_clear_buffer(connp);
+
+ return HTP_OK;
+}
+
+/**
+ * Parses request line.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_REQ_LINE(htp_connp_t *connp) {
+ for (;;) {
+ // Get one byte
+ IN_PEEK_NEXT(connp);
+ if (connp->in_status == HTP_STREAM_CLOSED && connp->in_next_byte == -1) {
+ return htp_connp_REQ_LINE_complete(connp);
+ }
+ IN_COPY_BYTE_OR_RETURN(connp);
+
+ // Have we reached the end of the line?
+ if (connp->in_next_byte == LF) {
+ return htp_connp_REQ_LINE_complete(connp);
+ }
+ }
+
+ return HTP_ERROR;
+}
+
+htp_status_t htp_connp_REQ_FINALIZE(htp_connp_t *connp) {
+ if (connp->in_status != HTP_STREAM_CLOSED) {
+ IN_PEEK_NEXT(connp);
+ if (connp->in_next_byte == -1) {
+ return htp_tx_state_request_complete(connp->in_tx);
+ }
+ if (connp->in_next_byte != LF || connp->in_current_consume_offset >= connp->in_current_read_offset) {
+ for (;;) {//;i < max_read; i++) {
+ // peek until LF but do not mark it read so that REQ_LINE works
+ IN_PEEK_NEXT(connp);
+ if (connp->in_next_byte == LF)
+ break;
+ IN_COPY_BYTE_OR_RETURN(connp);
+ }
+ }
+ }
+
+ unsigned char *data;
+ size_t len;
+ if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) {
+ return HTP_ERROR;
+ }
+#ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "PROBING request finalize", data, len);
+#endif
+ if (len == 0) {
+ //closing
+ return htp_tx_state_request_complete(connp->in_tx);
+ }
+
+ size_t pos = 0;
+ size_t mstart = 0;
+ // skip past leading whitespace. IIS allows this
+ while ((pos < len) && htp_is_space(data[pos]))
+ pos++;
+ if (pos)
+ mstart = pos;
+ // The request method starts at the beginning of the
+ // line and ends with the first whitespace character.
+ while ((pos < len) && (!htp_is_space(data[pos])))
+ pos++;
+
+ if (pos > mstart) {
+ //non empty whitespace line
+ int methodi = HTP_M_UNKNOWN;
+ bstr *method = bstr_dup_mem(data + mstart, pos - mstart);
+ if (method) {
+ methodi = htp_convert_method_to_number(method);
+ bstr_free(method);
+ }
+ if (methodi != HTP_M_UNKNOWN) {
+ connp->in_body_data_left = -1;
+ return htp_tx_state_request_complete(connp->in_tx);
+ } // else continue
+ if (connp->in_body_data_left <= 0) {
+ // log only once per transaction
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Unexpected request body");
+ } else {
+ connp->in_body_data_left = 1;
+ }
+ }
+ //Adds linefeed to the buffer if there was one
+ if (connp->in_next_byte == LF) {
+ IN_COPY_BYTE_OR_RETURN(connp);
+ htp_connp_req_consolidate_data(connp, &data, &len);
+ }
+ // Interpret remaining bytes as body data
+ htp_status_t rc = htp_tx_req_process_body_data_ex(connp->in_tx, data, len);
+ htp_connp_req_clear_buffer(connp);
+ return rc;
+}
+
+htp_status_t htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9(htp_connp_t *connp) {
+ // Consume whatever is left in the buffer.
+
+ size_t bytes_left = connp->in_current_len - connp->in_current_read_offset;
+
+ if (bytes_left > 0) {
+ connp->conn->flags |= HTP_CONN_HTTP_0_9_EXTRA;
+ }
+
+ connp->in_current_read_offset += bytes_left;
+ connp->in_current_consume_offset += bytes_left;
+ connp->in_stream_offset += bytes_left;
+
+ return HTP_DATA;
+}
+
+/**
+ * The idle state is where the parser will end up after a transaction is processed.
+ * If there is more data available, a new request will be started.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_REQ_IDLE(htp_connp_t * connp) {
+ // We want to start parsing the next request (and change
+ // the state from IDLE) only if there's at least one
+ // byte of data available. Otherwise we could be creating
+ // new structures even if there's no more data on the
+ // connection.
+ IN_TEST_NEXT_BYTE_OR_RETURN(connp);
+
+ connp->in_tx = htp_connp_tx_create(connp);
+ if (connp->in_tx == NULL) return HTP_ERROR;
+
+ // Change state to TRANSACTION_START
+ htp_tx_state_request_start(connp->in_tx);
+
+ return HTP_OK;
+}
+
+/**
+ * Returns how many bytes from the current data chunks were consumed so far.
+ *
+ * @param[in] connp
+ * @return The number of bytes consumed.
+ */
+size_t htp_connp_req_data_consumed(htp_connp_t *connp) {
+ return connp->in_current_read_offset;
+}
+
+int htp_connp_req_data(htp_connp_t *connp, const htp_time_t *timestamp, const void *data, size_t len) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data(connp->in_status %x)\n", connp->in_status);
+ fprint_raw_data(stderr, __func__, data, len);
+ #endif
+
+ // Return if the connection is in stop state.
+ if (connp->in_status == HTP_STREAM_STOP) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_INFO, 0, "Inbound parser is in HTP_STREAM_STOP");
+ return HTP_STREAM_STOP;
+ }
+
+ // Return if the connection had a fatal error earlier
+ if (connp->in_status == HTP_STREAM_ERROR) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Inbound parser is in HTP_STREAM_ERROR");
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA (previous error)\n");
+ #endif
+
+ return HTP_STREAM_ERROR;
+ }
+
+ // Sanity check: we must have a transaction pointer if the state is not IDLE (no inbound transaction)
+ if ((connp->in_tx == NULL)&&(connp->in_state != htp_connp_REQ_IDLE)) {
+ connp->in_status = HTP_STREAM_ERROR;
+
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Missing inbound transaction data");
+
+ return HTP_STREAM_ERROR;
+ }
+
+ // If the length of the supplied data chunk is zero, proceed
+ // only if the stream has been closed. We do not allow zero-sized
+ // chunks in the API, but we use them internally to force the parsers
+ // to finalize parsing.
+ if (len == 0 && connp->in_status != HTP_STREAM_CLOSED) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Zero-length data chunks are not allowed");
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA (zero-length chunk)\n");
+ #endif
+
+ return HTP_STREAM_CLOSED;
+ }
+
+ // Remember the timestamp of the current request data chunk
+ if (timestamp != NULL) {
+ memcpy(&connp->in_timestamp, timestamp, sizeof (*timestamp));
+ }
+
+ // Store the current chunk information
+ connp->in_current_data = (unsigned char *) data;
+ connp->in_current_len = len;
+ connp->in_current_read_offset = 0;
+ connp->in_current_consume_offset = 0;
+ connp->in_current_receiver_offset = 0;
+ connp->in_chunk_count++;
+
+ htp_conn_track_inbound_data(connp->conn, len, timestamp);
+
+
+ // Return without processing any data if the stream is in tunneling
+ // mode (which it would be after an initial CONNECT transaction).
+ if (connp->in_status == HTP_STREAM_TUNNEL) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_TUNNEL\n");
+ #endif
+
+ return HTP_STREAM_TUNNEL;
+ }
+
+ if (connp->out_status == HTP_STREAM_DATA_OTHER) {
+ connp->out_status = HTP_STREAM_DATA;
+ }
+
+ // Invoke a processor, in a loop, until an error
+ // occurs or until we run out of data. Many processors
+ // will process a request, each pointing to the next
+ // processor that needs to run.
+ for (;;) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: in state=%s, progress=%s\n",
+ htp_connp_in_state_as_string(connp),
+ htp_tx_request_progress_as_string(connp->in_tx));
+ #endif
+
+ // Return if there's been an error or if we've run out of data. We are relying
+ // on processors to supply error messages, so we'll keep quiet here.
+
+ htp_status_t rc;
+ //handle gap
+ if (data == NULL && len > 0) {
+ //cannot switch over a function pointer in C
+ if (connp->in_state == htp_connp_REQ_BODY_IDENTITY ||
+ connp->in_state == htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9) {
+ rc = connp->in_state(connp);
+ } else if (connp->in_state == htp_connp_REQ_FINALIZE) {
+ //simple version without probing
+ rc = htp_tx_state_request_complete(connp->in_tx);
+ } else {
+ // go to htp_connp_REQ_CONNECT_PROBE_DATA ?
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Gaps are not allowed during this state");
+ return HTP_STREAM_CLOSED;
+ }
+ } else {
+ rc = connp->in_state(connp);
+ }
+ if (rc == HTP_OK) {
+ if (connp->in_status == HTP_STREAM_TUNNEL) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_TUNNEL\n");
+ #endif
+
+ return HTP_STREAM_TUNNEL;
+ }
+
+ rc = htp_req_handle_state_change(connp);
+ }
+
+ if (rc != HTP_OK) {
+ // Do we need more data?
+ if ((rc == HTP_DATA) || (rc == HTP_DATA_BUFFER)) {
+ htp_connp_req_receiver_send_data(connp, 0 /* not last */);
+
+ if (rc == HTP_DATA_BUFFER) {
+ if (htp_connp_req_buffer(connp) != HTP_OK) {
+ connp->in_status = HTP_STREAM_ERROR;
+ return HTP_STREAM_ERROR;
+ }
+ }
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA\n");
+ #endif
+
+ connp->in_status = HTP_STREAM_DATA;
+
+ return HTP_STREAM_DATA;
+ }
+
+ // Check for suspended parsing.
+ if (rc == HTP_DATA_OTHER) {
+ // We might have actually consumed the entire data chunk?
+ if (connp->in_current_read_offset >= connp->in_current_len) {
+ // Do not send STREAM_DATE_DATA_OTHER if we've consumed the entire chunk.
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA (suspended parsing)\n");
+ #endif
+
+ connp->in_status = HTP_STREAM_DATA;
+
+ return HTP_STREAM_DATA;
+ } else {
+ // Partial chunk consumption.
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA_OTHER\n");
+ #endif
+
+ connp->in_status = HTP_STREAM_DATA_OTHER;
+
+ return HTP_STREAM_DATA_OTHER;
+ }
+ }
+
+ // Check for the stop signal.
+ if (rc == HTP_STOP) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_STOP\n");
+ #endif
+
+ connp->in_status = HTP_STREAM_STOP;
+
+ return HTP_STREAM_STOP;
+ }
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_ERROR\n");
+ #endif
+
+ // Permanent stream error.
+ connp->in_status = HTP_STREAM_ERROR;
+
+ return HTP_STREAM_ERROR;
+ }
+ }
+}
diff --git a/htp/htp_request_apache_2_2.c b/htp/htp_request_apache_2_2.c
new file mode 100644
index 0000000..6a56e4e
--- /dev/null
+++ b/htp/htp_request_apache_2_2.c
@@ -0,0 +1,64 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * Extract one request header. A header can span multiple lines, in
+ * which case they will be folded into one before parsing is attempted.
+ *
+ * @param[in] connp
+ * @param[in] data
+ * @param[in] len
+ * @return HTP_OK or HTP_ERROR
+ */
+htp_status_t htp_process_request_header_apache_2_2(htp_connp_t *connp, unsigned char *data, size_t len) {
+ return htp_process_request_header_generic(connp, data, len);
+}
+
+/**
+ * Parse request line as Apache 2.2 does.
+ *
+ * @param[in] connp
+ * @return HTP_OK or HTP_ERROR
+ */
+htp_status_t htp_parse_request_line_apache_2_2(htp_connp_t *connp) {
+ return htp_parse_request_line_generic_ex(connp, 1 /* NUL terminates line */);
+}
diff --git a/htp/htp_request_generic.c b/htp/htp_request_generic.c
new file mode 100644
index 0000000..435cf0a
--- /dev/null
+++ b/htp/htp_request_generic.c
@@ -0,0 +1,462 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * Extract one request header. A header can span multiple lines, in
+ * which case they will be folded into one before parsing is attempted.
+ *
+ * @param[in] connp
+ * @param[in] data
+ * @param[in] len
+ * @return HTP_OK or HTP_ERROR
+ */
+htp_status_t htp_process_request_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) {
+ // Create a new header structure.
+ htp_header_t *h = calloc(1, sizeof (htp_header_t));
+ if (h == NULL) return HTP_ERROR;
+
+ // Now try to parse the header.
+ if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) {
+ free(h);
+ return HTP_ERROR;
+ }
+
+ #ifdef HTP_DEBUG
+ fprint_bstr(stderr, "Header name", h->name);
+ fprint_bstr(stderr, "Header value", h->value);
+ #endif
+
+ // Do we already have a header with the same name?
+ htp_header_t *h_existing = htp_table_get(connp->in_tx->request_headers, h->name);
+ if (h_existing != NULL) {
+ // TODO Do we want to have a list of the headers that are
+ // allowed to be combined in this way?
+ if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) {
+ // This is the second occurence for this header.
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header");
+ } else {
+ // For simplicity reasons, we count the repetitions of all headers
+ if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) {
+ connp->in_tx->req_header_repetitions++;
+ } else {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ return HTP_OK;
+ }
+ }
+ // Keep track of repeated same-name headers.
+ h_existing->flags |= HTP_FIELD_REPEATED;
+
+ // Having multiple C-L headers is against the RFC but
+ // servers may ignore the subsequent headers if the values are the same.
+ if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) {
+ // Don't use string comparison here because we want to
+ // ignore small formatting differences.
+
+ int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL);
+ int64_t new_cl = htp_parse_content_length(h->value, NULL);
+ // Ambiguous response C-L value.
+ if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value");
+ }
+ // Ignoring the new C-L header that has the same value as the previous ones.
+ } else {
+ // Add to the existing header.
+ bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value));
+ if (new_value == NULL) {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ h_existing->value = new_value;
+ bstr_add_mem_noex(h_existing->value, ", ", 2);
+ bstr_add_noex(h_existing->value, h->value);
+ }
+
+ // The new header structure is no longer needed.
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ } else {
+ // Add as a new header.
+ if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ }
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Generic request header parser.
+ *
+ * @param[in] connp
+ * @param[in] h
+ * @param[in] data
+ * @param[in] len
+ * @return HTP_OK or HTP_ERROR
+ */
+htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) {
+ size_t name_start, name_end;
+ size_t value_start, value_end;
+
+ htp_chomp(data, &len);
+
+ name_start = 0;
+
+ // Look for the colon.
+ size_t colon_pos = 0;
+ while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++;
+
+ if ((colon_pos == len) || (data[colon_pos] == '\0')) {
+ // Missing colon.
+
+ h->flags |= HTP_FIELD_UNPARSEABLE;
+
+ // Log only once per transaction.
+ if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) {
+ connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: colon missing");
+ }
+
+ // We handle this case as a header with an empty name, with the value equal
+ // to the entire input string.
+
+ // TODO Apache will respond to this problem with a 400.
+
+ // Now extract the name and the value
+ h->name = bstr_dup_c("");
+ if (h->name == NULL) return HTP_ERROR;
+
+ h->value = bstr_dup_mem(data, len);
+ if (h->value == NULL) {
+ bstr_free(h->name);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+ }
+
+ if (colon_pos == 0) {
+ // Empty header name.
+
+ h->flags |= HTP_FIELD_INVALID;
+
+ // Log only once per transaction.
+ if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
+ connp->in_tx->flags |= HTP_FIELD_INVALID;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name");
+ }
+ }
+
+ name_end = colon_pos;
+
+ // Ignore LWS after field-name.
+ size_t prev = name_end;
+ while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
+ // LWS after header name.
+
+ prev--;
+ name_end--;
+
+ h->flags |= HTP_FIELD_INVALID;
+
+ // Log only once per transaction.
+ if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
+ connp->in_tx->flags |= HTP_FIELD_INVALID;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name");
+ }
+ }
+
+ // Header value.
+
+ value_start = colon_pos;
+
+ // Go over the colon.
+ if (value_start < len) {
+ value_start++;
+ }
+
+ // Ignore LWS before field-content.
+ while ((value_start < len) && (htp_is_lws(data[value_start]))) {
+ value_start++;
+ }
+
+ // Look for the end of field-content.
+ value_end = len;
+
+ // Ignore LWS after field-content.
+ prev = value_end - 1;
+ while ((prev > value_start) && (htp_is_lws(data[prev]))) {
+ prev--;
+ value_end--;
+ }
+
+ // Check that the header name is a token.
+ size_t i = name_start;
+ while (i < name_end) {
+ if (!htp_is_token(data[i])) {
+ // Incorrectly formed header name.
+
+ h->flags |= HTP_FIELD_INVALID;
+
+ // Log only once per transaction.
+ if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
+ connp->in_tx->flags |= HTP_FIELD_INVALID;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token");
+ }
+
+ break;
+ }
+
+ i++;
+ }
+
+ // Now extract the name and the value
+ h->name = bstr_dup_mem(data + name_start, name_end - name_start);
+ if (h->name == NULL) return HTP_ERROR;
+
+ h->value = bstr_dup_mem(data + value_start, value_end - value_start);
+ if (h->value == NULL) {
+ bstr_free(h->name);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Generic request line parser.
+ *
+ * @param[in] connp
+ * @return HTP_OK or HTP_ERROR
+ */
+htp_status_t htp_parse_request_line_generic(htp_connp_t *connp) {
+ return htp_parse_request_line_generic_ex(connp, 0 /* NUL does not terminates line */);
+}
+
+htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates) {
+ htp_tx_t *tx = connp->in_tx;
+ unsigned char *data = bstr_ptr(tx->request_line);
+ size_t len = bstr_len(tx->request_line);
+ size_t pos = 0;
+ size_t mstart = 0;
+ size_t start;
+ size_t bad_delim;
+
+ if (nul_terminates) {
+ // The line ends with the first NUL byte.
+
+ size_t newlen = 0;
+ while ((pos < len) && (data[pos] != '\0')) {
+ pos++;
+ newlen++;
+ }
+
+ // Start again, with the new length.
+ len = newlen;
+ pos = 0;
+ }
+
+ // skip past leading whitespace. IIS allows this
+ while ((pos < len) && htp_is_space(data[pos])) pos++;
+ if (pos) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: leading whitespace");
+ mstart = pos;
+
+ if (connp->cfg->requestline_leading_whitespace_unwanted != HTP_UNWANTED_IGNORE) {
+ // reset mstart so that we copy the whitespace into the method
+ mstart = 0;
+ // set expected response code to this anomaly
+ tx->response_status_expected_number = connp->cfg->requestline_leading_whitespace_unwanted;
+ }
+ }
+
+ // The request method starts at the beginning of the
+ // line and ends with the first whitespace character.
+ while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
+
+ // No, we don't care if the method is empty.
+
+ tx->request_method = bstr_dup_mem(data + mstart, pos - mstart);
+ if (tx->request_method == NULL) return HTP_ERROR;
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_method), bstr_len(tx->request_method));
+ #endif
+
+ tx->request_method_number = htp_convert_method_to_number(tx->request_method);
+
+ bad_delim = 0;
+ // Ignore whitespace after request method. The RFC allows
+ // for only one SP, but then suggests any number of SP and HT
+ // should be permitted. Apache uses isspace(), which is even
+ // more permitting, so that's what we use here.
+ while ((pos < len) && (isspace(data[pos]))) {
+ if (!bad_delim && data[pos] != 0x20) {
+ bad_delim++;
+ }
+ pos++;
+ }
+// Too much performance overhead for fuzzing
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (bad_delim) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI");
+ }
+#endif
+
+ // Is there anything after the request method?
+ if (pos == len) {
+ // No, this looks like a HTTP/0.9 request.
+
+ tx->is_protocol_0_9 = 1;
+ tx->request_protocol_number = HTP_PROTOCOL_0_9;
+ if (tx->request_method_number == HTP_M_UNKNOWN)
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only");
+
+ return HTP_OK;
+ }
+
+ start = pos;
+ bad_delim = 0;
+ if (tx->connp->cfg->allow_space_uri) {
+ pos = len - 1;
+ // Skips the spaces at the end of line (after protocol)
+ while (pos > start && htp_is_space(data[pos])) pos--;
+ // The URI ends with the last whitespace.
+ while ((pos > start) && (data[pos] != 0x20)) {
+ if (!bad_delim && htp_is_space(data[pos])) {
+ bad_delim++;
+ }
+ pos--;
+ }
+ /* if we've seen some 'bad' delimiters, we retry with those */
+ if (bad_delim && pos == start) {
+ // special case: even though RFC's allow only SP (0x20), many
+ // implementations allow other delimiters, like tab or other
+ // characters that isspace() accepts.
+ pos = len - 1;
+ while ((pos > start) && (!htp_is_space(data[pos]))) pos--;
+ } else {
+ // reset bad_delim found in protocol part
+ bad_delim = 0;
+ for (size_t i = start; i < pos; i++) {
+ if (data[i] != 0x20 && htp_is_space(data[i])) {
+ bad_delim = 1;
+ break;
+ }
+ }
+ }
+ if (bad_delim) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // warn regardless if we've seen non-compliant chars
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter");
+#endif
+ } else if (pos == start) {
+ pos = len;
+ }
+ } else {
+ // The URI ends with the first whitespace.
+ while ((pos < len) && (data[pos] != 0x20)) {
+ if (!bad_delim && htp_is_space(data[pos])) {
+ bad_delim++;
+ }
+ pos++;
+ }
+ /* if we've seen some 'bad' delimiters, we retry with those */
+ if (bad_delim && pos == len) {
+ // special case: even though RFC's allow only SP (0x20), many
+ // implementations allow other delimiters, like tab or other
+ // characters that isspace() accepts.
+ pos = start;
+ while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
+ }
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (bad_delim) {
+ // warn regardless if we've seen non-compliant chars
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter");
+ }
+#endif
+ }
+
+ tx->request_uri = bstr_dup_mem(data + start, pos - start);
+ if (tx->request_uri == NULL) return HTP_ERROR;
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_uri), bstr_len(tx->request_uri));
+ #endif
+
+ // Ignore whitespace after URI.
+ while ((pos < len) && (htp_is_space(data[pos]))) pos++;
+
+ // Is there protocol information available?
+ if (pos == len) {
+ // No, this looks like a HTTP/0.9 request.
+
+ tx->is_protocol_0_9 = 1;
+ tx->request_protocol_number = HTP_PROTOCOL_0_9;
+ if (tx->request_method_number == HTP_M_UNKNOWN)
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol");
+
+ return HTP_OK;
+ }
+
+ // The protocol information continues until the end of the line.
+ tx->request_protocol = bstr_dup_mem(data + pos, len - pos);
+ if (tx->request_protocol == NULL) return HTP_ERROR;
+
+ tx->request_protocol_number = htp_parse_protocol(tx->request_protocol);
+ if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID)
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol");
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol));
+ #endif
+
+ return HTP_OK;
+}
+
diff --git a/htp/htp_request_parsers.c b/htp/htp_request_parsers.c
new file mode 100644
index 0000000..448ed4c
--- /dev/null
+++ b/htp/htp_request_parsers.c
@@ -0,0 +1,149 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+#if 0
+
+/**
+ *
+ */
+int htp_header_parse_internal_strict(unsigned char *data, size_t len, htp_header_t *h) {
+ size_t name_start, name_end;
+ size_t value_start, value_end;
+
+ // Deal with the name first
+ name_start = name_end = 0;
+
+ // Find where the header name ends
+ while (name_end < len) {
+ if (htp_is_lws(data[name_end]) || data[name_end] == ':') break;
+ name_end++;
+ }
+
+ if (name_end == 0) {
+ // Empty header name
+ return -1;
+ }
+
+ if (name_end == len) {
+ // TODO
+ return -1;
+ }
+
+ // Is there any LWS before colon?
+ size_t pos = name_end;
+ while (pos < len) {
+ if (!htp_is_lws(data[pos])) break;
+ pos++;
+ // TODO
+ // return -1;
+ }
+
+ if (pos == len) {
+ // TODO
+ return -1;
+ }
+
+ // The next character must be a colon
+ if (data[pos] != ':') {
+ // TODO
+ return -1;
+ }
+
+ // Move over the colon
+ pos++;
+
+ // Again, ignore any LWS
+ while (pos < len) {
+ if (!htp_is_lws(data[pos])) break;
+ pos++;
+ }
+
+ if (pos == len) {
+ // TODO
+ return -1;
+ }
+
+ value_start = value_end = pos;
+
+ while (value_end < len) {
+ if (htp_is_lws(data[value_end])) break;
+ value_end++;
+ }
+
+ h->name_offset = name_start;
+ h->name_len = name_end - name_start;
+ h->value_offset = value_start;
+ h->value_len = value_end - value_start;
+
+ return 1;
+}
+ */
+
+/**
+ *
+ */
+htp_header_t *htp_connp_header_parse(htp_connp_t *reqp, unsigned char *data, size_t len) {
+ htp_header_t *h = calloc(1, sizeof (htp_header_t));
+ if (h == NULL) return NULL;
+
+ // Parse the header line
+ if (reqp->impl_header_parse(data, len, h) < 0) {
+ // Invalid header line
+ h->is_parsed = 0;
+ h->name = bstr_dup_mem(data, len);
+
+ return h;
+ }
+
+ // Now extract the name and the value
+ h->name = bstr_dup_mem(data + h->name_offset, h->name_len);
+ h->value = bstr_dup_mem(data + h->value_offset, h->value_len);
+ h->is_parsed = 1;
+
+ // Because header names are case-insensitive, we will convert
+ // the name to lowercase to use it as a lookup key.
+ h->name_lowercase = bstr_to_lowercase(h->name);
+
+ return h;
+}
+
+#endif
diff --git a/htp/htp_response.c b/htp/htp_response.c
new file mode 100644
index 0000000..121004c
--- /dev/null
+++ b/htp/htp_response.c
@@ -0,0 +1,1436 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+#define OUT_TEST_NEXT_BYTE_OR_RETURN(X) \
+if ((X)->out_current_read_offset >= (X)->out_current_len) { \
+ return HTP_DATA; \
+}
+
+#define OUT_PEEK_NEXT(X) \
+if ((X)->out_current_read_offset >= (X)->out_current_len) { \
+ (X)->out_next_byte = -1; \
+} else { \
+ (X)->out_next_byte = (X)->out_current_data[(X)->out_current_read_offset]; \
+}
+
+#define OUT_NEXT_BYTE(X) \
+if ((X)->out_current_read_offset < (X)->out_current_len) { \
+ (X)->out_next_byte = (X)->out_current_data[(X)->out_current_read_offset]; \
+ (X)->out_current_read_offset++; \
+ (X)->out_current_consume_offset++; \
+ (X)->out_stream_offset++; \
+} else { \
+ (X)->out_next_byte = -1; \
+}
+
+#define OUT_NEXT_BYTE_OR_RETURN(X) \
+if ((X)->out_current_read_offset < (X)->out_current_len) { \
+ (X)->out_next_byte = (X)->out_current_data[(X)->out_current_read_offset]; \
+ (X)->out_current_read_offset++; \
+ (X)->out_current_consume_offset++; \
+ (X)->out_stream_offset++; \
+} else { \
+ return HTP_DATA; \
+}
+
+#define OUT_COPY_BYTE_OR_RETURN(X) \
+if ((X)->out_current_read_offset < (X)->out_current_len) { \
+ (X)->out_next_byte = (X)->out_current_data[(X)->out_current_read_offset]; \
+ (X)->out_current_read_offset++; \
+ (X)->out_stream_offset++; \
+} else { \
+ return HTP_DATA_BUFFER; \
+}
+
+#define REQUEST_URI_NOT_SEEN "/libhtp::request_uri_not_seen"
+
+/**
+ * Sends outstanding connection data to the currently active data receiver hook.
+ *
+ * @param[in] connp
+ * @param[in] is_last
+ * @return HTP_OK, or a value returned from a callback.
+ */
+static htp_status_t htp_connp_res_receiver_send_data(htp_connp_t *connp, int is_last) {
+ if (connp->out_data_receiver_hook == NULL) return HTP_OK;
+
+ htp_tx_data_t d;
+ d.tx = connp->out_tx;
+ d.data = connp->out_current_data + connp->out_current_receiver_offset;
+ d.len = connp->out_current_read_offset - connp->out_current_receiver_offset;
+ d.is_last = is_last;
+
+ htp_status_t rc = htp_hook_run_all(connp->out_data_receiver_hook, &d);
+ if (rc != HTP_OK) return rc;
+
+ connp->out_current_receiver_offset = connp->out_current_read_offset;
+
+ return HTP_OK;
+}
+
+/**
+ * Finalizes an existing data receiver hook by sending any outstanding data to it. The
+ * hook is then removed so that it receives no more data.
+ *
+ * @param[in] connp
+ * @return HTP_OK, or a value returned from a callback.
+ */
+htp_status_t htp_connp_res_receiver_finalize_clear(htp_connp_t *connp) {
+ if (connp->out_data_receiver_hook == NULL) return HTP_OK;
+
+ htp_status_t rc = htp_connp_res_receiver_send_data(connp, 1 /* last */);
+
+ connp->out_data_receiver_hook = NULL;
+
+ return rc;
+}
+
+/**
+ * Configures the data receiver hook. If there is a previous hook, it will be finalized and cleared.
+ *
+ * @param[in] connp
+ * @param[in] data_receiver_hook
+ * @return HTP_OK, or a value returned from a callback.
+ */
+static htp_status_t htp_connp_res_receiver_set(htp_connp_t *connp, htp_hook_t *data_receiver_hook) {
+ htp_status_t rc = htp_connp_res_receiver_finalize_clear(connp);
+
+ connp->out_data_receiver_hook = data_receiver_hook;
+ connp->out_current_receiver_offset = connp->out_current_read_offset;
+
+ return rc;
+}
+
+/**
+ * Handles request parser state changes. At the moment, this function is used only
+ * to configure data receivers, which are sent raw connection data.
+ *
+ * @param[in] connp
+ * @return HTP_OK, or a value returned from a callback.
+ */
+static htp_status_t htp_res_handle_state_change(htp_connp_t *connp) {
+ if (connp->out_state_previous == connp->out_state) return HTP_OK;
+
+ if (connp->out_state == htp_connp_RES_HEADERS) {
+ htp_status_t rc = HTP_OK;
+
+ switch (connp->out_tx->response_progress) {
+ case HTP_RESPONSE_HEADERS:
+ rc = htp_connp_res_receiver_set(connp, connp->out_tx->cfg->hook_response_header_data);
+ break;
+
+ case HTP_RESPONSE_TRAILER:
+ rc = htp_connp_res_receiver_set(connp, connp->out_tx->cfg->hook_response_trailer_data);
+ break;
+
+ default:
+ // Do nothing; receivers are currently used only for header blocks.
+ break;
+ }
+
+ if (rc != HTP_OK) return rc;
+ }
+
+ // Same comment as in htp_req_handle_state_change(). Below is a copy.
+
+ // Initially, I had the finalization of raw data sending here, but that
+ // caused the last REQUEST_HEADER_DATA hook to be invoked after the
+ // REQUEST_HEADERS hook -- which I thought made no sense. For that reason,
+ // the finalization is now initiated from the request header processing code,
+ // which is less elegant but provides a better user experience. Having some
+ // (or all) hooks to be invoked on state change might work better.
+
+ connp->out_state_previous = connp->out_state;
+
+ return HTP_OK;
+}
+
+/**
+ * If there is any data left in the outbound data chunk, this function will preserve
+ * it for later consumption. The maximum amount accepted for buffering is controlled
+ * by htp_config_t::field_limit_hard.
+ *
+ * @param[in] connp
+ * @return HTP_OK, or HTP_ERROR on fatal failure.
+ */
+static htp_status_t htp_connp_res_buffer(htp_connp_t *connp) {
+ if (connp->out_current_data == NULL) return HTP_OK;
+
+ unsigned char *data = connp->out_current_data + connp->out_current_consume_offset;
+ size_t len = connp->out_current_read_offset - connp->out_current_consume_offset;
+
+ // Check the hard (buffering) limit.
+
+ size_t newlen = connp->out_buf_size + len;
+
+ // When calculating the size of the buffer, take into account the
+ // space we're using for the response header buffer.
+ if (connp->out_header != NULL) {
+ newlen += bstr_len(connp->out_header);
+ }
+
+ if (newlen > connp->out_tx->cfg->field_limit_hard) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Response the buffer limit: size %zd limit %zd.",
+ newlen, connp->out_tx->cfg->field_limit_hard);
+ return HTP_ERROR;
+ }
+
+ // Copy the data remaining in the buffer.
+
+ if (connp->out_buf == NULL) {
+ connp->out_buf = malloc(len);
+ if (connp->out_buf == NULL) return HTP_ERROR;
+ memcpy(connp->out_buf, data, len);
+ connp->out_buf_size = len;
+ } else {
+ size_t newsize = connp->out_buf_size + len;
+ unsigned char *newbuf = realloc(connp->out_buf, newsize);
+ if (newbuf == NULL) return HTP_ERROR;
+ connp->out_buf = newbuf;
+ memcpy(connp->out_buf + connp->out_buf_size, data, len);
+ connp->out_buf_size = newsize;
+ }
+
+ // Reset the consumer position.
+ connp->out_current_consume_offset = connp->out_current_read_offset;
+
+ return HTP_OK;
+}
+
+/**
+ * Returns to the caller the memory region that should be processed next. This function
+ * hides away the buffering process from the rest of the code, allowing it to work with
+ * non-buffered data that's in the outbound chunk, or buffered data that's in our structures.
+ *
+ * @param[in] connp
+ * @param[out] data
+ * @param[out] len
+ * @return HTP_OK
+ */
+static htp_status_t htp_connp_res_consolidate_data(htp_connp_t *connp, unsigned char **data, size_t *len) {
+ if (connp->out_buf == NULL) {
+ // We do not have any data buffered; point to the current data chunk.
+ *data = connp->out_current_data + connp->out_current_consume_offset;
+ *len = connp->out_current_read_offset - connp->out_current_consume_offset;
+ } else {
+ // We do have data in the buffer. Add data from the current
+ // chunk, and point to the consolidated buffer.
+ if (htp_connp_res_buffer(connp) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ *data = connp->out_buf;
+ *len = connp->out_buf_size;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Clears buffered outbound data and resets the consumer position to the reader position.
+ *
+ * @param[in] connp
+ */
+static void htp_connp_res_clear_buffer(htp_connp_t *connp) {
+ connp->out_current_consume_offset = connp->out_current_read_offset;
+
+ if (connp->out_buf != NULL) {
+ free(connp->out_buf);
+ connp->out_buf = NULL;
+ connp->out_buf_size = 0;
+ }
+}
+
+/**
+ * Consumes bytes until the end of the current line.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_RES_BODY_CHUNKED_DATA_END(htp_connp_t *connp) {
+ // TODO We shouldn't really see anything apart from CR and LF,
+ // so we should warn about anything else.
+
+ for (;;) {
+ OUT_NEXT_BYTE_OR_RETURN(connp);
+
+ connp->out_tx->response_message_len++;
+
+ if (connp->out_next_byte == LF) {
+ connp->out_state = htp_connp_RES_BODY_CHUNKED_LENGTH;
+
+ return HTP_OK;
+ }
+ }
+
+ return HTP_ERROR;
+}
+
+/**
+ * Processes a chunk of data.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_RES_BODY_CHUNKED_DATA(htp_connp_t *connp) {
+ size_t bytes_to_consume;
+
+ // Determine how many bytes we can consume.
+ if (connp->out_current_len - connp->out_current_read_offset >= connp->out_chunked_length) {
+ bytes_to_consume = connp->out_chunked_length;
+ } else {
+ bytes_to_consume = connp->out_current_len - connp->out_current_read_offset;
+ }
+
+ if (bytes_to_consume == 0) return HTP_DATA;
+
+ // Consume the data.
+ htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, connp->out_current_data + connp->out_current_read_offset, bytes_to_consume);
+ if (rc != HTP_OK) return rc;
+
+ // Adjust the counters.
+ connp->out_current_read_offset += bytes_to_consume;
+ connp->out_current_consume_offset += bytes_to_consume;
+ connp->out_stream_offset += bytes_to_consume;
+ connp->out_chunked_length -= bytes_to_consume;
+
+ // Have we seen the entire chunk?
+ if (connp->out_chunked_length == 0) {
+ connp->out_state = htp_connp_RES_BODY_CHUNKED_DATA_END;
+ return HTP_OK;
+ }
+
+ return HTP_DATA;
+}
+
+static inline int is_chunked_ctl_char(const unsigned char c) {
+ switch (c) {
+ case 0x0d:
+ case 0x0a:
+ case 0x20:
+ case 0x09:
+ case 0x0b:
+ case 0x0c:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Peeks ahead into the data to try to see if it starts with a valid Chunked
+ * length field.
+ *
+ * @returns 1 if it looks valid, 0 if it looks invalid
+ */
+static inline int data_probe_chunk_length(htp_connp_t *connp) {
+ if (connp->out_current_read_offset - connp->out_current_consume_offset < 8) {
+ // not enough data so far, consider valid still
+ return 1;
+ }
+
+ unsigned char *data = connp->out_current_data + connp->out_current_consume_offset;
+ size_t len = connp->out_current_read_offset - connp->out_current_consume_offset;
+
+ size_t i = 0;
+ while (i < len) {
+ unsigned char c = data[i];
+
+ if (is_chunked_ctl_char(c)) {
+ // ctl char, still good.
+ } else if (isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
+ // real chunklen char
+ return 1;
+ } else {
+ // leading junk, bad
+ return 0;
+ }
+ i++;
+ }
+ return 1;
+}
+
+/**
+ * Extracts chunk length.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_RES_BODY_CHUNKED_LENGTH(htp_connp_t *connp) {
+ for (;;) {
+ OUT_COPY_BYTE_OR_RETURN(connp);
+
+ // Have we reached the end of the line? Or is this not chunked after all?
+ if (connp->out_next_byte == LF ||
+ (!is_chunked_ctl_char((unsigned char) connp->out_next_byte) && !data_probe_chunk_length(connp))) {
+ unsigned char *data;
+ size_t len;
+
+ if (htp_connp_res_consolidate_data(connp, &data, &len) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ connp->out_tx->response_message_len += len;
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "Chunk length line", data, len);
+ #endif
+
+ int chunk_ext = 0;
+ connp->out_chunked_length = htp_parse_chunked_length(data, len, &chunk_ext);
+ if (chunk_ext == 1) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request chunk extension");
+ }
+ // empty chunk length line, lets try to continue
+ if (connp->out_chunked_length == -1004) {
+ connp->out_current_consume_offset = connp->out_current_read_offset;
+ continue;
+ }
+ if (connp->out_chunked_length < 0) {
+ // reset out_current_read_offset so htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE
+ // doesn't miss the first bytes
+
+ if (len > (size_t)connp->out_current_read_offset) {
+ connp->out_current_read_offset = 0;
+ } else {
+ connp->out_current_read_offset -= len;
+ }
+
+ connp->out_state = htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE;
+ connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY;
+
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "Response chunk encoding: Invalid chunk length: %"PRId64"",
+ connp->out_chunked_length);
+ return HTP_OK;
+ }
+ htp_connp_res_clear_buffer(connp);
+
+ // Handle chunk length
+ if (connp->out_chunked_length > 0) {
+ // More data available
+ connp->out_state = htp_connp_RES_BODY_CHUNKED_DATA;
+ } else if (connp->out_chunked_length == 0) {
+ // End of data
+ connp->out_state = htp_connp_RES_HEADERS;
+ connp->out_tx->response_progress = HTP_RESPONSE_TRAILER;
+ }
+
+ return HTP_OK;
+ }
+ }
+
+ return HTP_ERROR;
+}
+
+/**
+ * Processes an identity response body of known length.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_RES_BODY_IDENTITY_CL_KNOWN(htp_connp_t *connp) {
+ size_t bytes_to_consume;
+
+ // Determine how many bytes we can consume.
+ if (connp->out_current_len - connp->out_current_read_offset >= connp->out_body_data_left) {
+ bytes_to_consume = connp->out_body_data_left;
+ } else {
+ bytes_to_consume = connp->out_current_len - connp->out_current_read_offset;
+ }
+
+ if (connp->out_status == HTP_STREAM_CLOSED) {
+ connp->out_state = htp_connp_RES_FINALIZE;
+ // Sends close signal to decompressors
+ htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, NULL, 0);
+ return rc;
+ }
+ if (bytes_to_consume == 0) return HTP_DATA;
+
+ // Consume the data.
+ htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, connp->out_current_data + connp->out_current_read_offset, bytes_to_consume);
+ if (rc != HTP_OK) return rc;
+
+ // Adjust the counters.
+ connp->out_current_read_offset += bytes_to_consume;
+ connp->out_current_consume_offset += bytes_to_consume;
+ connp->out_stream_offset += bytes_to_consume;
+ connp->out_body_data_left -= bytes_to_consume;
+
+ // Have we seen the entire response body?
+ if (connp->out_body_data_left == 0) {
+ connp->out_state = htp_connp_RES_FINALIZE;
+ // Tells decompressors to output partially decompressed data
+ rc = htp_tx_res_process_body_data_ex(connp->out_tx, NULL, 0);
+ return rc;
+ }
+
+ return HTP_DATA;
+}
+
+/**
+ * Processes identity response body of unknown length. In this case, we assume the
+ * response body consumes all data until the end of the stream.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE(htp_connp_t *connp) {
+ // Consume all data from the input buffer.
+ size_t bytes_to_consume = connp->out_current_len - connp->out_current_read_offset;
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "bytes_to_consume %"PRIuMAX, (uintmax_t)bytes_to_consume);
+ #endif
+ if (bytes_to_consume != 0) {
+ htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, connp->out_current_data + connp->out_current_read_offset, bytes_to_consume);
+ if (rc != HTP_OK) return rc;
+
+ // Adjust the counters.
+ connp->out_current_read_offset += bytes_to_consume;
+ connp->out_current_consume_offset += bytes_to_consume;
+ connp->out_stream_offset += bytes_to_consume;
+ }
+
+ // Have we seen the entire response body?
+ if (connp->out_status == HTP_STREAM_CLOSED) {
+ connp->out_state = htp_connp_RES_FINALIZE;
+ return HTP_OK;
+ }
+
+ return HTP_DATA;
+}
+
+/**
+ * Determines presence (and encoding) of a response body.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_RES_BODY_DETERMINE(htp_connp_t *connp) {
+ // If the request uses the CONNECT method, then not only are we
+ // to assume there's no body, but we need to ignore all
+ // subsequent data in the stream.
+ if (connp->out_tx->request_method_number == HTP_M_CONNECT) {
+ if ((connp->out_tx->response_status_number >= 200)
+ && (connp->out_tx->response_status_number <= 299)) {
+ // This is a successful CONNECT stream, which means
+ // we need to switch into tunneling mode: on the
+ // request side we'll now probe the tunnel data to see
+ // if we need to parse or ignore it. So on the response
+ // side we wrap up the tx and wait.
+ connp->out_state = htp_connp_RES_FINALIZE;
+
+ // we may have response headers
+ htp_status_t rc = htp_tx_state_response_headers(connp->out_tx);
+ return rc;
+ } else if (connp->out_tx->response_status_number == 407) {
+ // proxy telling us to auth
+ if (connp->in_status != HTP_STREAM_ERROR)
+ connp->in_status = HTP_STREAM_DATA;
+ } else {
+ // This is a failed CONNECT stream, which means that
+ // we can unblock request parsing
+ if (connp->in_status != HTP_STREAM_ERROR)
+ connp->in_status = HTP_STREAM_DATA;
+
+ // We are going to continue processing this transaction,
+ // adding a note for ourselves to stop at the end (because
+ // we don't want to see the beginning of a new transaction).
+ connp->out_data_other_at_tx_end = 1;
+ }
+ }
+
+ htp_header_t *cl = htp_table_get_c(connp->out_tx->response_headers, "content-length");
+ htp_header_t *te = htp_table_get_c(connp->out_tx->response_headers, "transfer-encoding");
+
+ // Check for "101 Switching Protocol" response.
+ // If it's seen, it means that traffic after empty line following headers
+ // is no longer HTTP. We can treat it similarly to CONNECT.
+ // Unlike CONNECT, however, upgrades from HTTP to HTTP seem
+ // rather unlikely, so don't try to probe tunnel for nested HTTP,
+ // and switch to tunnel mode right away.
+ if (connp->out_tx->response_status_number == 101) {
+ if (te == NULL && cl == NULL) {
+ connp->out_state = htp_connp_RES_FINALIZE;
+
+ if (connp->in_status != HTP_STREAM_ERROR)
+ connp->in_status = HTP_STREAM_TUNNEL;
+ connp->out_status = HTP_STREAM_TUNNEL;
+
+ // we may have response headers
+ htp_status_t rc = htp_tx_state_response_headers(connp->out_tx);
+ return rc;
+ } else {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Switching Protocol with Content-Length");
+ }
+ }
+
+ // Check for an interim "100 Continue" response. Ignore it if found, and revert back to RES_LINE.
+ if (connp->out_tx->response_status_number == 100 && te == NULL) {
+ int is100continue = 1;
+ if (cl != NULL){
+ if (htp_parse_content_length(cl->value, connp) > 0) {
+ is100continue = 0;
+ }
+ }
+ if (is100continue) {
+ if (connp->out_tx->seen_100continue != 0) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Already seen 100-Continue.");
+ }
+
+ // Ignore any response headers seen so far.
+ htp_header_t *h = NULL;
+ for (size_t i = 0, n = htp_table_size(connp->out_tx->response_headers); i < n; i++) {
+ h = htp_table_get_index(connp->out_tx->response_headers, i, NULL);
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ }
+
+ htp_table_clear(connp->out_tx->response_headers);
+
+ // Expecting to see another response line next.
+ connp->out_state = htp_connp_RES_LINE;
+ connp->out_tx->response_progress = HTP_RESPONSE_LINE;
+ connp->out_tx->seen_100continue++;
+
+ return HTP_OK;
+ }
+ }
+
+ // A request can indicate it waits for headers validation
+ // before sending its body cf
+ // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect
+ if (connp->out_tx->response_status_number >= 400 &&
+ connp->out_tx->response_status_number <= 499 &&
+ connp->in_content_length > 0 &&
+ connp->in_body_data_left == connp->in_content_length) {
+ htp_header_t *exp = htp_table_get_c(connp->out_tx->request_headers, "expect");
+ if ((exp != NULL) && (bstr_cmp_c_nocase(exp->value, "100-continue") == 0)) {
+ connp->in_state = htp_connp_REQ_FINALIZE;
+ }
+ }
+
+ // 1. Any response message which MUST NOT include a message-body
+ // (such as the 1xx, 204, and 304 responses and any response to a HEAD
+ // request) is always terminated by the first empty line after the
+ // header fields, regardless of the entity-header fields present in the
+ // message.
+ if (connp->out_tx->request_method_number == HTP_M_HEAD) {
+ // There's no response body whatsoever
+ connp->out_tx->response_transfer_coding = HTP_CODING_NO_BODY;
+ connp->out_state = htp_connp_RES_FINALIZE;
+ }
+ else if (((connp->out_tx->response_status_number >= 100) && (connp->out_tx->response_status_number <= 199))
+ || (connp->out_tx->response_status_number == 204) || (connp->out_tx->response_status_number == 304)) {
+ // There should be no response body
+ // but browsers interpret content sent by the server as such
+ if (te == NULL && cl == NULL) {
+ connp->out_tx->response_transfer_coding = HTP_CODING_NO_BODY;
+ connp->out_state = htp_connp_RES_FINALIZE;
+ } else {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Unexpected Response body");
+ }
+ }
+ // Hack condition to check that we do not assume "no body"
+ if (connp->out_state != htp_connp_RES_FINALIZE) {
+ // We have a response body
+ htp_header_t *ct = htp_table_get_c(connp->out_tx->response_headers, "content-type");
+ if (ct != NULL) {
+ connp->out_tx->response_content_type = bstr_dup_lower(ct->value);
+ if (connp->out_tx->response_content_type == NULL) return HTP_ERROR;
+
+ // Ignore parameters
+ unsigned char *data = bstr_ptr(connp->out_tx->response_content_type);
+ size_t len = bstr_len(ct->value);
+ size_t newlen = 0;
+ while (newlen < len) {
+ // TODO Some platforms may do things differently here.
+ if (htp_is_space(data[newlen]) || (data[newlen] == ';')) {
+ bstr_adjust_len(connp->out_tx->response_content_type, newlen);
+ break;
+ }
+
+ newlen++;
+ }
+ }
+
+ // 2. If a Transfer-Encoding header field (section 14.40) is present and
+ // indicates that the "chunked" transfer coding has been applied, then
+ // the length is defined by the chunked encoding (section 3.6).
+ if ((te != NULL) && (bstr_index_of_c_nocasenorzero(te->value, "chunked") != -1)) {
+ if (bstr_cmp_c_nocase(te->value, "chunked") != 0) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "Transfer-encoding has abnormal chunked value");
+ }
+
+ // spec says chunked is HTTP/1.1 only, but some browsers accept it
+ // with 1.0 as well
+ if (connp->out_tx->response_protocol_number < HTP_PROTOCOL_1_1) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "Chunked transfer-encoding on HTTP/0.9 or HTTP/1.0");
+ }
+
+ // If the T-E header is present we are going to use it.
+ connp->out_tx->response_transfer_coding = HTP_CODING_CHUNKED;
+
+ // We are still going to check for the presence of C-L
+ if (cl != NULL) {
+ // This is a violation of the RFC
+ connp->out_tx->flags |= HTP_REQUEST_SMUGGLING;
+ }
+
+ connp->out_state = htp_connp_RES_BODY_CHUNKED_LENGTH;
+ connp->out_tx->response_progress = HTP_RESPONSE_BODY;
+ }// 3. If a Content-Length header field (section 14.14) is present, its
+ // value in bytes represents the length of the message-body.
+ else if (cl != NULL) {
+ // We know the exact length
+ connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY;
+
+ // Check for multiple C-L headers
+ if (cl->flags & HTP_FIELD_REPEATED) {
+ connp->out_tx->flags |= HTP_REQUEST_SMUGGLING;
+ }
+
+ // Get body length
+ connp->out_tx->response_content_length = htp_parse_content_length(cl->value, connp);
+ if (connp->out_tx->response_content_length < 0) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Invalid C-L field in response: %"PRId64"",
+ connp->out_tx->response_content_length);
+ return HTP_ERROR;
+ } else {
+ connp->out_content_length = connp->out_tx->response_content_length;
+ connp->out_body_data_left = connp->out_content_length;
+
+ if (connp->out_content_length != 0) {
+ connp->out_state = htp_connp_RES_BODY_IDENTITY_CL_KNOWN;
+ connp->out_tx->response_progress = HTP_RESPONSE_BODY;
+ } else {
+ connp->out_state = htp_connp_RES_FINALIZE;
+ }
+ }
+ } else {
+ // 4. If the message uses the media type "multipart/byteranges", which is
+ // self-delimiting, then that defines the length. This media type MUST
+ // NOT be used unless the sender knows that the recipient can parse it;
+ // the presence in a request of a Range header with multiple byte-range
+ // specifiers implies that the client can parse multipart/byteranges
+ // responses.
+ if (ct != NULL) {
+ // TODO Handle multipart/byteranges
+ if (bstr_index_of_c_nocase(ct->value, "multipart/byteranges") != -1) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "C-T multipart/byteranges in responses not supported");
+ return HTP_ERROR;
+ }
+ }
+
+ // 5. By the server closing the connection. (Closing the connection
+ // cannot be used to indicate the end of a request body, since that
+ // would leave no possibility for the server to send back a response.)
+ connp->out_state = htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE;
+ connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY;
+ connp->out_tx->response_progress = HTP_RESPONSE_BODY;
+ connp->out_body_data_left = -1;
+ }
+ }
+
+ // NOTE We do not need to check for short-style HTTP/0.9 requests here because
+ // that is done earlier, before response line parsing begins
+
+ htp_status_t rc = htp_tx_state_response_headers(connp->out_tx);
+ if (rc != HTP_OK) return rc;
+
+ return HTP_OK;
+}
+
+/**
+ * Parses response headers.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_RES_HEADERS(htp_connp_t *connp) {
+ int endwithcr;
+ int lfcrending = 0;
+
+ for (;;) {
+ if (connp->out_status == HTP_STREAM_CLOSED) {
+ // Finalize sending raw trailer data.
+ htp_status_t rc = htp_connp_res_receiver_finalize_clear(connp);
+ if (rc != HTP_OK) return rc;
+
+ // Run hook response_TRAILER.
+ rc = htp_hook_run_all(connp->cfg->hook_response_trailer, connp->out_tx);
+ if (rc != HTP_OK) return rc;
+
+ connp->out_state = htp_connp_RES_FINALIZE;
+ return HTP_OK;
+ }
+ OUT_COPY_BYTE_OR_RETURN(connp);
+
+ // Have we reached the end of the line?
+ if (connp->out_next_byte != LF && connp->out_next_byte != CR) {
+ lfcrending = 0;
+ } else {
+ endwithcr = 0;
+ if (connp->out_next_byte == CR) {
+ OUT_PEEK_NEXT(connp);
+ if (connp->out_next_byte == -1) {
+ return HTP_DATA_BUFFER;
+ } else if (connp->out_next_byte == LF) {
+ OUT_COPY_BYTE_OR_RETURN(connp);
+ if (lfcrending) {
+ // Handling LFCRCRLFCRLF
+ // These 6 characters mean only 2 end of lines
+ OUT_PEEK_NEXT(connp);
+ if (connp->out_next_byte == CR) {
+ OUT_COPY_BYTE_OR_RETURN(connp);
+ connp->out_current_consume_offset++;
+ OUT_PEEK_NEXT(connp);
+ if (connp->out_next_byte == LF) {
+ OUT_COPY_BYTE_OR_RETURN(connp);
+ connp->out_current_consume_offset++;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "Weird response end of lines mix");
+ }
+ }
+ }
+ } else if (connp->out_next_byte == CR) {
+ continue;
+ }
+ lfcrending = 0;
+ endwithcr = 1;
+ } else {
+ // connp->out_next_byte == LF
+ OUT_PEEK_NEXT(connp);
+ lfcrending = 0;
+ if (connp->out_next_byte == CR) {
+ // hanldes LF-CR sequence as end of line
+ OUT_COPY_BYTE_OR_RETURN(connp);
+ lfcrending = 1;
+ }
+ }
+
+ unsigned char *data;
+ size_t len;
+
+ if (htp_connp_res_consolidate_data(connp, &data, &len) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ // CRCRLF is not an empty line
+ if (endwithcr && len < 2) {
+ continue;
+ }
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, __func__, data, len);
+ #endif
+
+ int next_no_lf = 0;
+ if (connp->out_current_read_offset < connp->out_current_len &&
+ connp->out_current_data[connp->out_current_read_offset] != LF) {
+ next_no_lf = 1;
+ }
+ // Should we terminate headers?
+ if (htp_connp_is_line_terminator(connp, data, len, next_no_lf)) {
+ // Parse previous header, if any.
+ if (connp->out_header != NULL) {
+ if (connp->cfg->process_response_header(connp, bstr_ptr(connp->out_header),
+ bstr_len(connp->out_header)) != HTP_OK) return HTP_ERROR;
+
+ bstr_free(connp->out_header);
+ connp->out_header = NULL;
+ }
+
+ htp_connp_res_clear_buffer(connp);
+
+ // We've seen all response headers.
+ if (connp->out_tx->response_progress == HTP_RESPONSE_HEADERS) {
+ // Response headers.
+
+ // The next step is to determine if this response has a body.
+ connp->out_state = htp_connp_RES_BODY_DETERMINE;
+ } else {
+ // Response trailer.
+
+ // Finalize sending raw trailer data.
+ htp_status_t rc = htp_connp_res_receiver_finalize_clear(connp);
+ if (rc != HTP_OK) return rc;
+
+ // Run hook response_TRAILER.
+ rc = htp_hook_run_all(connp->cfg->hook_response_trailer, connp->out_tx);
+ if (rc != HTP_OK) return rc;
+
+ // The next step is to finalize this response.
+ connp->out_state = htp_connp_RES_FINALIZE;
+ }
+
+ return HTP_OK;
+ }
+
+ htp_chomp(data, &len);
+
+ // Check for header folding.
+ if (htp_connp_is_line_folded(data, len) == 0) {
+ // New header line.
+
+ // Parse previous header, if any.
+ if (connp->out_header != NULL) {
+ if (connp->cfg->process_response_header(connp, bstr_ptr(connp->out_header),
+ bstr_len(connp->out_header)) != HTP_OK) return HTP_ERROR;
+
+ bstr_free(connp->out_header);
+ connp->out_header = NULL;
+ }
+
+ OUT_PEEK_NEXT(connp);
+
+ if (htp_is_folding_char(connp->out_next_byte) == 0) {
+ // Because we know this header is not folded, we can process the buffer straight away.
+ if (connp->cfg->process_response_header(connp, data, len) != HTP_OK) return HTP_ERROR;
+ } else {
+ // Keep the partial header data for parsing later.
+ connp->out_header = bstr_dup_mem(data, len);
+ if (connp->out_header == NULL) return HTP_ERROR;
+ }
+ } else {
+ // Folding; check that there's a previous header line to add to.
+ if (connp->out_header == NULL) {
+ // Invalid folding.
+
+ // Warn only once per transaction.
+ if (!(connp->out_tx->flags & HTP_INVALID_FOLDING)) {
+ connp->out_tx->flags |= HTP_INVALID_FOLDING;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Invalid response field folding");
+ }
+
+ // Keep the header data for parsing later.
+ size_t trim = 0;
+ while(trim < len) {
+ if (!htp_is_folding_char(data[trim])) {
+ break;
+ }
+ trim++;
+ }
+ connp->out_header = bstr_dup_mem(data + trim, len - trim);
+ if (connp->out_header == NULL) return HTP_ERROR;
+ } else {
+ size_t colon_pos = 0;
+ while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
+
+ if (colon_pos < len &&
+ bstr_chr(connp->out_header, ':') >= 0 &&
+ connp->out_tx->response_protocol_number == HTP_PROTOCOL_1_1) {
+ // Warn only once per transaction.
+ if (!(connp->out_tx->flags & HTP_INVALID_FOLDING)) {
+ connp->out_tx->flags |= HTP_INVALID_FOLDING;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Invalid response field folding");
+ }
+ if (connp->cfg->process_response_header(connp, bstr_ptr(connp->out_header),
+ bstr_len(connp->out_header)) != HTP_OK)
+ return HTP_ERROR;
+ bstr_free(connp->out_header);
+ connp->out_header = bstr_dup_mem(data+1, len-1);
+ if (connp->out_header == NULL)
+ return HTP_ERROR;
+ } else {
+ // Add to the existing header.
+ if (bstr_len(connp->out_header) < HTP_MAX_HEADER_FOLDED) {
+ bstr *new_out_header = bstr_add_mem(connp->out_header, data, len);
+ if (new_out_header == NULL)
+ return HTP_ERROR;
+ connp->out_header = new_out_header;
+ } else {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response field length exceeds folded maximum");
+ }
+ }
+ }
+ }
+
+ htp_connp_res_clear_buffer(connp);
+ }
+ }
+
+ return HTP_ERROR;
+}
+
+/**
+ * Parses response line.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_RES_LINE(htp_connp_t *connp) {
+ for (;;) {
+ // Don't try to get more data if the stream is closed. If we do, we'll return, asking for more data.
+ if (connp->out_status != HTP_STREAM_CLOSED) {
+ // Get one byte
+ OUT_COPY_BYTE_OR_RETURN(connp);
+ }
+
+ // Have we reached the end of the line? We treat stream closure as end of line in
+ // order to handle the case when the first line of the response is actually response body
+ // (and we wish it processed as such).
+ if (connp->out_next_byte == CR) {
+ OUT_PEEK_NEXT(connp);
+ if (connp->out_next_byte == -1) {
+ return HTP_DATA_BUFFER;
+ } else if (connp->out_next_byte == LF) {
+ continue;
+ }
+ connp->out_next_byte = LF;
+ }
+ if ((connp->out_next_byte == LF)||(connp->out_status == HTP_STREAM_CLOSED)) {
+ unsigned char *data;
+ size_t len;
+
+ if (htp_connp_res_consolidate_data(connp, &data, &len) != HTP_OK) {
+ return HTP_ERROR;
+ }
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, __func__, data, len);
+ #endif
+
+ // Is this a line that should be ignored?
+ if (htp_connp_is_line_ignorable(connp, data, len)) {
+ if (connp->out_status == HTP_STREAM_CLOSED) {
+ connp->out_state = htp_connp_RES_FINALIZE;
+ }
+ // We have an empty/whitespace line, which we'll note, ignore and move on
+ connp->out_tx->response_ignored_lines++;
+
+ // TODO How many lines are we willing to accept?
+
+ // Start again
+ htp_connp_res_clear_buffer(connp);
+
+ return HTP_OK;
+ }
+
+ // Deallocate previous response line allocations, which we would have on a 100 response.
+
+ if (connp->out_tx->response_line != NULL) {
+ bstr_free(connp->out_tx->response_line);
+ connp->out_tx->response_line = NULL;
+ }
+
+ if (connp->out_tx->response_protocol != NULL) {
+ bstr_free(connp->out_tx->response_protocol);
+ connp->out_tx->response_protocol = NULL;
+ }
+
+ if (connp->out_tx->response_status != NULL) {
+ bstr_free(connp->out_tx->response_status);
+ connp->out_tx->response_status = NULL;
+ }
+
+ if (connp->out_tx->response_message != NULL) {
+ bstr_free(connp->out_tx->response_message);
+ connp->out_tx->response_message = NULL;
+ }
+
+ // Process response line.
+
+ int chomp_result = htp_chomp(data, &len);
+
+ // If the response line is invalid, determine if it _looks_ like
+ // a response line. If it does not look like a line, process the
+ // data as a response body because that is what browsers do.
+
+ if (htp_treat_response_line_as_body(data, len)) {
+ // if we have a next line beginning with H, skip this one
+ if (connp->out_current_read_offset+1 < connp->out_current_len && (connp->out_current_data[connp->out_current_read_offset] == 'H' || len <= 2)) {
+ connp->out_tx->response_ignored_lines++;
+ htp_connp_res_clear_buffer(connp);
+ return HTP_OK;
+ }
+ connp->out_tx->response_content_encoding_processing = HTP_COMPRESSION_NONE;
+
+ connp->out_current_consume_offset = connp->out_current_read_offset;
+ htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, data, len + chomp_result);
+ htp_connp_res_clear_buffer(connp);
+ if (rc != HTP_OK) return rc;
+
+ // Continue to process response body. Because we don't have
+ // any headers to parse, we assume the body continues until
+ // the end of the stream.
+
+ // Have we seen the entire response body?
+ if (connp->out_current_len <= connp->out_current_read_offset) {
+ connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY;
+ connp->out_tx->response_progress = HTP_RESPONSE_BODY;
+ connp->out_body_data_left = -1;
+ connp->out_state = htp_connp_RES_FINALIZE;
+ }
+
+ return HTP_OK;
+ }
+
+ connp->out_tx->response_line = bstr_dup_mem(data, len);
+ if (connp->out_tx->response_line == NULL) return HTP_ERROR;
+
+ if (connp->cfg->parse_response_line(connp) != HTP_OK) return HTP_ERROR;
+
+ htp_status_t rc = htp_tx_state_response_line(connp->out_tx);
+ if (rc != HTP_OK) return rc;
+
+ htp_connp_res_clear_buffer(connp);
+
+ // Move on to the next phase.
+ connp->out_state = htp_connp_RES_HEADERS;
+ connp->out_tx->response_progress = HTP_RESPONSE_HEADERS;
+
+ return HTP_OK;
+ }
+ }
+
+ return HTP_ERROR;
+}
+
+size_t htp_connp_res_data_consumed(htp_connp_t *connp) {
+ return connp->out_current_read_offset;
+}
+
+htp_status_t htp_connp_RES_FINALIZE(htp_connp_t *connp) {
+ if (connp->out_status != HTP_STREAM_CLOSED) {
+ OUT_PEEK_NEXT(connp);
+ if (connp->out_next_byte == -1) {
+ return htp_tx_state_response_complete_ex(connp->out_tx, 0);
+ }
+ if (connp->out_next_byte != LF || connp->out_current_consume_offset >= connp->out_current_read_offset) {
+ for (;;) {//;i < max_read; i++) {
+ OUT_COPY_BYTE_OR_RETURN(connp);
+ // Have we reached the end of the line? For some reason
+ // we can't test after IN_COPY_BYTE_OR_RETURN */
+ if (connp->out_next_byte == LF)
+ break;
+ }
+ }
+ }
+ size_t bytes_left;
+ unsigned char * data;
+
+ if (htp_connp_res_consolidate_data(connp, &data, &bytes_left) != HTP_OK) {
+ return HTP_ERROR;
+ }
+#ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "PROBING response finalize", data, bytes_left);
+#endif
+ if (bytes_left == 0) {
+ //closing
+ return htp_tx_state_response_complete_ex(connp->out_tx, 0);
+ }
+
+ if (htp_treat_response_line_as_body(data, bytes_left)) {
+ // Interpret remaining bytes as body data
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Unexpected response body");
+ htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, data, bytes_left);
+ htp_connp_res_clear_buffer(connp);
+ return rc;
+ }
+
+ //unread last end of line so that RES_LINE works
+ if (connp->out_current_read_offset < (int64_t)bytes_left) {
+ connp->out_current_read_offset=0;
+ } else {
+ connp->out_current_read_offset-=bytes_left;
+ }
+ if (connp->out_current_read_offset < connp->out_current_consume_offset) {
+ connp->out_current_consume_offset=connp->out_current_read_offset;
+ }
+ return htp_tx_state_response_complete_ex(connp->out_tx, 0 /* not hybrid mode */);
+}
+
+/**
+ * The response idle state will initialize response processing, as well as
+ * finalize each transactions after we are done with it.
+ *
+ * @param[in] connp
+ * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed.
+ */
+htp_status_t htp_connp_RES_IDLE(htp_connp_t *connp) {
+
+ // We want to start parsing the next response (and change
+ // the state from IDLE) only if there's at least one
+ // byte of data available. Otherwise we could be creating
+ // new structures even if there's no more data on the
+ // connection.
+ OUT_TEST_NEXT_BYTE_OR_RETURN(connp);
+
+ // Parsing a new response
+
+ // Find the next outgoing transaction
+ // If there is none, we just create one so that responses without
+ // request can still be processed.
+ connp->out_tx = htp_list_get(connp->conn->transactions, connp->out_next_tx_index);
+ if (connp->out_tx == NULL) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Unable to match response to request");
+ // finalize dangling request waiting for next request or body
+ if (connp->in_state == htp_connp_REQ_FINALIZE) {
+ htp_tx_state_request_complete(connp->in_tx);
+ }
+ connp->out_tx = htp_connp_tx_create(connp);
+ if (connp->out_tx == NULL) {
+ return HTP_ERROR;
+ }
+ connp->out_tx->parsed_uri = htp_uri_alloc();
+ if (connp->out_tx->parsed_uri == NULL) {
+ return HTP_ERROR;
+ }
+ connp->out_tx->parsed_uri->path = bstr_dup_c(REQUEST_URI_NOT_SEEN);
+ if (connp->out_tx->parsed_uri->path == NULL) {
+ return HTP_ERROR;
+ }
+ connp->out_tx->request_uri = bstr_dup_c(REQUEST_URI_NOT_SEEN);
+ if (connp->out_tx->request_uri == NULL) {
+ return HTP_ERROR;
+ }
+
+ connp->in_state = htp_connp_REQ_FINALIZE;
+#ifdef HTP_DEBUG
+ fprintf(stderr, "picked up response w/o request");
+#endif
+ // We've used one transaction
+ connp->out_next_tx_index++;
+ } else {
+ // We've used one transaction
+ connp->out_next_tx_index++;
+
+ // TODO Detect state mismatch
+
+ connp->out_content_length = -1;
+ connp->out_body_data_left = -1;
+ }
+
+ htp_status_t rc = htp_tx_state_response_start(connp->out_tx);
+ if (rc != HTP_OK) return rc;
+
+ return HTP_OK;
+}
+
+int htp_connp_res_data(htp_connp_t *connp, const htp_time_t *timestamp, const void *data, size_t len) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data(connp->out_status %x)\n", connp->out_status);
+ fprint_raw_data(stderr, __func__, data, len);
+ #endif
+
+ // Return if the connection is in stop state
+ if (connp->out_status == HTP_STREAM_STOP) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_INFO, 0, "Outbound parser is in HTP_STREAM_STOP");
+
+ return HTP_STREAM_STOP;
+ }
+
+ // Return if the connection has had a fatal error
+ if (connp->out_status == HTP_STREAM_ERROR) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Outbound parser is in HTP_STREAM_ERROR");
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA (previous error)\n");
+ #endif
+
+ return HTP_STREAM_ERROR;
+ }
+
+ // Sanity check: we must have a transaction pointer if the state is not IDLE (no outbound transaction)
+ if ((connp->out_tx == NULL)&&(connp->out_state != htp_connp_RES_IDLE)) {
+ connp->out_status = HTP_STREAM_ERROR;
+
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Missing outbound transaction data");
+
+ return HTP_STREAM_ERROR;
+ }
+
+ // If the length of the supplied data chunk is zero, proceed
+ // only if the stream has been closed. We do not allow zero-sized
+ // chunks in the API, but we use it internally to force the parsers
+ // to finalize parsing.
+ if (len == 0 && connp->out_status != HTP_STREAM_CLOSED) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Zero-length data chunks are not allowed");
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA (zero-length chunk)\n");
+ #endif
+
+ return HTP_STREAM_CLOSED;
+ }
+
+ // Remember the timestamp of the current response data chunk
+ if (timestamp != NULL) {
+ memcpy(&connp->out_timestamp, timestamp, sizeof (*timestamp));
+ }
+
+ // Store the current chunk information
+ connp->out_current_data = (unsigned char *) data;
+ connp->out_current_len = len;
+ connp->out_current_read_offset = 0;
+ connp->out_current_consume_offset = 0;
+ connp->out_current_receiver_offset = 0;
+
+ htp_conn_track_outbound_data(connp->conn, len, timestamp);
+
+ // Return without processing any data if the stream is in tunneling
+ // mode (which it would be after an initial CONNECT transaction.
+ if (connp->out_status == HTP_STREAM_TUNNEL) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_TUNNEL\n");
+ #endif
+
+ return HTP_STREAM_TUNNEL;
+ }
+
+ // Invoke a processor, in a loop, until an error
+ // occurs or until we run out of data. Many processors
+ // will process a request, each pointing to the next
+ // processor that needs to run.
+ for (;;) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: out state=%s, progress=%s\n",
+ htp_connp_out_state_as_string(connp),
+ htp_tx_response_progress_as_string(connp->out_tx));
+ #endif
+
+ // Return if there's been an error
+ // or if we've run out of data. We are relying
+ // on processors to add error messages, so we'll
+ // keep quiet here.
+ htp_status_t rc;
+
+ //handle gap
+ if (data == NULL && len > 0) {
+ if (connp->out_state == htp_connp_RES_BODY_IDENTITY_CL_KNOWN ||
+ connp->out_state == htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE) {
+ rc = connp->out_state(connp);
+ } else if (connp->out_state == htp_connp_RES_FINALIZE) {
+ rc = htp_tx_state_response_complete_ex(connp->out_tx, 0);
+ } else {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Gaps are not allowed during this state");
+ return HTP_STREAM_CLOSED;
+ }
+ } else {
+ rc = connp->out_state(connp);
+ }
+ if (rc == HTP_OK) {
+ if (connp->out_status == HTP_STREAM_TUNNEL) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_TUNNEL\n");
+ #endif
+
+ return HTP_STREAM_TUNNEL;
+ }
+
+ rc = htp_res_handle_state_change(connp);
+ }
+
+ if (rc != HTP_OK) {
+ // Do we need more data?
+ if ((rc == HTP_DATA) || (rc == HTP_DATA_BUFFER)) {
+ htp_connp_res_receiver_send_data(connp, 0 /* not last */);
+
+ if (rc == HTP_DATA_BUFFER) {
+ if (htp_connp_res_buffer(connp) != HTP_OK) {
+ connp->out_status = HTP_STREAM_ERROR;
+ return HTP_STREAM_ERROR;
+ }
+ }
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA\n");
+ #endif
+
+ connp->out_status = HTP_STREAM_DATA;
+
+ return HTP_STREAM_DATA;
+ }
+
+ // Check for stop
+ if (rc == HTP_STOP) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_STOP\n");
+ #endif
+
+ connp->out_status = HTP_STREAM_STOP;
+
+ return HTP_STREAM_STOP;
+ }
+
+ // Check for suspended parsing
+ if (rc == HTP_DATA_OTHER) {
+ // We might have actually consumed the entire data chunk?
+ if (connp->out_current_read_offset >= connp->out_current_len) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA (suspended parsing)\n");
+ #endif
+
+ connp->out_status = HTP_STREAM_DATA;
+
+ // Do not send STREAM_DATE_DATA_OTHER if we've
+ // consumed the entire chunk
+ return HTP_STREAM_DATA;
+ } else {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA_OTHER\n");
+ #endif
+
+ connp->out_status = HTP_STREAM_DATA_OTHER;
+
+ // Partial chunk consumption
+ return HTP_STREAM_DATA_OTHER;
+ }
+ }
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_ERROR\n");
+ #endif
+
+ // Permanent stream error.
+ connp->out_status = HTP_STREAM_ERROR;
+
+ return HTP_STREAM_ERROR;
+ }
+ }
+}
diff --git a/htp/htp_response_generic.c b/htp/htp_response_generic.c
new file mode 100644
index 0000000..f5fa59e
--- /dev/null
+++ b/htp/htp_response_generic.c
@@ -0,0 +1,334 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * Generic response line parser.
+ *
+ * @param[in] connp
+ * @return HTP status
+ */
+htp_status_t htp_parse_response_line_generic(htp_connp_t *connp) {
+ htp_tx_t *tx = connp->out_tx;
+ unsigned char *data = bstr_ptr(tx->response_line);
+ size_t len = bstr_len(tx->response_line);
+ size_t pos = 0;
+
+ tx->response_protocol = NULL;
+ tx->response_protocol_number = HTP_PROTOCOL_INVALID;
+ tx->response_status = NULL;
+ tx->response_status_number = HTP_STATUS_INVALID;
+ tx->response_message = NULL;
+
+ // Ignore whitespace at the beginning of the line.
+ while ((pos < len) && (htp_is_space(data[pos]))) pos++;
+
+ size_t start = pos;
+
+ // Find the end of the protocol string.
+ while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
+ if (pos - start == 0) return HTP_OK;
+
+ tx->response_protocol = bstr_dup_mem(data + start, pos - start);
+ if (tx->response_protocol == NULL) return HTP_ERROR;
+
+ tx->response_protocol_number = htp_parse_protocol(tx->response_protocol);
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "Response protocol", bstr_ptr(tx->response_protocol), bstr_len(tx->response_protocol));
+ fprintf(stderr, "Response protocol number: %d\n", tx->response_protocol_number);
+ #endif
+
+ // Ignore whitespace after the response protocol.
+ while ((pos < len) && (htp_is_space(data[pos]))) pos++;
+ if (pos == len) return HTP_OK;
+
+ start = pos;
+
+ // Find the next whitespace character.
+ while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
+ if (pos - start == 0) return HTP_OK;
+
+ tx->response_status = bstr_dup_mem(data + start, pos - start);
+ if (tx->response_status == NULL) return HTP_ERROR;
+
+ tx->response_status_number = htp_parse_status(tx->response_status);
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "Response status (as text)", bstr_ptr(tx->response_status), bstr_len(tx->response_status));
+ fprintf(stderr, "Response status number: %d\n", tx->response_status_number);
+ #endif
+
+ // Ignore whitespace that follows the status code.
+ while ((pos < len) && (isspace(data[pos]))) pos++;
+ if (pos == len) return HTP_OK;
+
+ // Assume the message stretches until the end of the line.
+ tx->response_message = bstr_dup_mem(data + pos, len - pos);
+ if (tx->response_message == NULL) return HTP_ERROR;
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "Response status message", bstr_ptr(tx->response_message), bstr_len(tx->response_message));
+ #endif
+
+ return HTP_OK;
+}
+
+/**
+ * Generic response header parser.
+ *
+ * @param[in] connp
+ * @param[in] h
+ * @param[in] data
+ * @param[in] len
+ * @return HTP status
+ */
+htp_status_t htp_parse_response_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) {
+ size_t name_start, name_end;
+ size_t value_start, value_end;
+ size_t prev;
+
+ htp_chomp(data, &len);
+
+ name_start = 0;
+
+ // Look for the first colon.
+ size_t colon_pos = 0;
+ while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
+
+ if (colon_pos == len) {
+ // Header line with a missing colon.
+
+ h->flags |= HTP_FIELD_UNPARSEABLE;
+ h->flags |= HTP_FIELD_INVALID;
+
+ if (!(connp->out_tx->flags & HTP_FIELD_UNPARSEABLE)) {
+ // Only once per transaction.
+ connp->out_tx->flags |= HTP_FIELD_UNPARSEABLE;
+ connp->out_tx->flags |= HTP_FIELD_INVALID;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response field invalid: missing colon.");
+ }
+
+ // Reset the position. We're going to treat this invalid header
+ // as a header with an empty name. That will increase the probability
+ // that the content will be inspected.
+ colon_pos = 0;
+ (void)colon_pos; // suppress scan-build warning
+ name_end = 0;
+ value_start = 0;
+ } else {
+ // Header line with a colon.
+
+ if (colon_pos == 0) {
+ // Empty header name.
+
+ h->flags |= HTP_FIELD_INVALID;
+
+ if (!(connp->out_tx->flags & HTP_FIELD_INVALID)) {
+ // Only once per transaction.
+ connp->out_tx->flags |= HTP_FIELD_INVALID;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response field invalid: empty name.");
+ }
+ }
+
+ name_end = colon_pos;
+
+ // Ignore unprintable after field-name.
+ prev = name_end;
+ while ((prev > name_start) && htp_is_space(data[prev - 1])) {
+ prev--;
+ name_end--;
+
+ h->flags |= HTP_FIELD_INVALID;
+
+ if (!(connp->out_tx->flags & HTP_FIELD_INVALID)) {
+ // Only once per transaction.
+ connp->out_tx->flags |= HTP_FIELD_INVALID;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response field invalid: LWS after name.");
+ }
+ }
+
+ value_start = colon_pos + 1;
+ }
+
+ // Header value.
+
+ // Ignore LWS before field-content.
+ while ((value_start < len) && (htp_is_lws(data[value_start]))) {
+ value_start++;
+ }
+
+ // Look for the end of field-content.
+ value_end = len;
+
+ // Check that the header name is a token.
+ size_t i = name_start;
+ while (i < name_end) {
+ if (!htp_is_token(data[i])) {
+ h->flags |= HTP_FIELD_INVALID;
+
+ if (!(connp->out_tx->flags & HTP_FIELD_INVALID)) {
+ connp->out_tx->flags |= HTP_FIELD_INVALID;
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response header name is not a token.");
+ }
+
+ break;
+ }
+
+ i++;
+ }
+ for (i = value_start; i < value_end; i++) {
+ if (data[i] == 0) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response header value contains null.");
+ break;
+ }
+ }
+ // Ignore LWS after field-content.
+ prev = value_end - 1;
+ while ((prev > value_start) && (htp_is_lws(data[prev]))) {
+ prev--;
+ value_end--;
+ }
+
+ // Now extract the name and the value.
+ h->name = bstr_dup_mem(data + name_start, name_end - name_start);
+ h->value = bstr_dup_mem(data + value_start, value_end - value_start);
+ if ((h->name == NULL) || (h->value == NULL)) {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Generic response header line(s) processor, which assembles folded lines
+ * into a single buffer before invoking the parsing function.
+ *
+ * @param[in] connp
+ * @param[in] data
+ * @param[in] len
+ * @return HTP status
+ */
+htp_status_t htp_process_response_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) {
+ // Create a new header structure.
+ htp_header_t *h = calloc(1, sizeof (htp_header_t));
+ if (h == NULL) return HTP_ERROR;
+
+ if (htp_parse_response_header_generic(connp, h, data, len) != HTP_OK) {
+ free(h);
+ return HTP_ERROR;
+ }
+
+ #ifdef HTP_DEBUG
+ fprint_bstr(stderr, "Header name", h->name);
+ fprint_bstr(stderr, "Header value", h->value);
+ #endif
+
+ // Do we already have a header with the same name?
+ htp_header_t *h_existing = htp_table_get(connp->out_tx->response_headers, h->name);
+ if (h_existing != NULL) {
+ // Keep track of repeated same-name headers.
+ if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) {
+ // This is the second occurence for this header.
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header");
+ } else {
+ // For simplicity reasons, we count the repetitions of all headers
+ if (connp->out_tx->res_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) {
+ connp->out_tx->res_header_repetitions++;
+ } else {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ return HTP_OK;
+ }
+ }
+ h_existing->flags |= HTP_FIELD_REPEATED;
+
+ // Having multiple C-L headers is against the RFC but many
+ // browsers ignore the subsequent headers if the values are the same.
+ if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) {
+ // Don't use string comparison here because we want to
+ // ignore small formatting differences.
+
+ int64_t existing_cl, new_cl;
+
+ existing_cl = htp_parse_content_length(h_existing->value, NULL);
+ new_cl = htp_parse_content_length(h->value, NULL);
+ if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) {
+ // Ambiguous response C-L value.
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous response C-L value");
+ }
+
+ // Ignoring the new C-L header that has the same value as the previous ones.
+ } else {
+ // Add to the existing header.
+
+ bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value));
+ if (new_value == NULL) {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ h_existing->value = new_value;
+ bstr_add_mem_noex(h_existing->value, (unsigned char *) ", ", 2);
+ bstr_add_noex(h_existing->value, h->value);
+ }
+
+ // The new header structure is no longer needed.
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ } else {
+ // Add as a new header.
+ if (htp_table_add(connp->out_tx->response_headers, h->name, h) != HTP_OK) {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ return HTP_ERROR;
+ }
+ }
+
+ return HTP_OK;
+}
diff --git a/htp/htp_table.c b/htp/htp_table.c
new file mode 100644
index 0000000..535b961
--- /dev/null
+++ b/htp/htp_table.c
@@ -0,0 +1,250 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+static htp_status_t _htp_table_add(htp_table_t *table, const bstr *key, const void *element) {
+ // Add key.
+ if (htp_list_add(&table->list, (void *)key) != HTP_OK) return HTP_ERROR;
+
+ // Add element.
+ if (htp_list_add(&table->list, (void *)element) != HTP_OK) {
+ htp_list_pop(&table->list);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_table_add(htp_table_t *table, const bstr *key, const void *element) {
+ if ((table == NULL)||(key == NULL)) return HTP_ERROR;
+
+ // Keep track of how keys are allocated, and
+ // ensure that all invocations are consistent.
+ if (table->alloc_type == HTP_TABLE_KEYS_ALLOC_UKNOWN) {
+ table->alloc_type = HTP_TABLE_KEYS_COPIED;
+ } else {
+ if (table->alloc_type != HTP_TABLE_KEYS_COPIED) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "# Inconsistent key management strategy. Actual %d. Attempted %d.\n",
+ table->alloc_type, HTP_TABLE_KEYS_COPIED);
+ #endif
+
+ return HTP_ERROR;
+ }
+ }
+
+ bstr *dupkey = bstr_dup(key);
+ if (dupkey == NULL) return HTP_ERROR;
+
+ if (_htp_table_add(table, dupkey, element) != HTP_OK) {
+ bstr_free(dupkey);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_table_addn(htp_table_t *table, const bstr *key, const void *element) {
+ if ((table == NULL)||(key == NULL)) return HTP_ERROR;
+
+ // Keep track of how keys are allocated, and
+ // ensure that all invocations are consistent.
+ if (table->alloc_type == HTP_TABLE_KEYS_ALLOC_UKNOWN) {
+ table->alloc_type = HTP_TABLE_KEYS_ADOPTED;
+ } else {
+ if (table->alloc_type != HTP_TABLE_KEYS_ADOPTED) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "# Inconsistent key management strategy. Actual %d. Attempted %d.\n",
+ table->alloc_type, HTP_TABLE_KEYS_ADOPTED);
+ #endif
+
+ return HTP_ERROR;
+ }
+ }
+
+ return _htp_table_add(table, key, element);
+}
+
+htp_status_t htp_table_addk(htp_table_t *table, const bstr *key, const void *element) {
+ if ((table == NULL)||(key == NULL)) return HTP_ERROR;
+
+ // Keep track of how keys are allocated, and
+ // ensure that all invocations are consistent.
+ if (table->alloc_type == HTP_TABLE_KEYS_ALLOC_UKNOWN) {
+ table->alloc_type = HTP_TABLE_KEYS_REFERENCED;
+ } else {
+ if (table->alloc_type != HTP_TABLE_KEYS_REFERENCED) {
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "# Inconsistent key management strategy. Actual %d. Attempted %d.\n",
+ table->alloc_type, HTP_TABLE_KEYS_REFERENCED);
+ #endif
+
+ return HTP_ERROR;
+ }
+ }
+
+ return _htp_table_add(table, key, element);
+}
+
+void htp_table_clear(htp_table_t *table) {
+ if (table == NULL) return;
+
+ // Free the table keys, but only if we're managing them.
+ if ((table->alloc_type == HTP_TABLE_KEYS_COPIED)||(table->alloc_type == HTP_TABLE_KEYS_ADOPTED)) {
+ bstr *key = NULL;
+ for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) {
+ key = htp_list_get(&table->list, i);
+ bstr_free(key);
+ }
+ }
+
+ htp_list_clear(&table->list);
+}
+
+void htp_table_clear_ex(htp_table_t *table) {
+ if (table == NULL) return;
+
+ // This function does not free table keys.
+
+ htp_list_clear(&table->list);
+}
+
+htp_table_t *htp_table_create(size_t size) {
+ if (size == 0) return NULL;
+
+ htp_table_t *table = calloc(1, sizeof (htp_table_t));
+ if (table == NULL) return NULL;
+
+ table->alloc_type = HTP_TABLE_KEYS_ALLOC_UKNOWN;
+
+ // Use a list behind the scenes.
+ if (htp_list_init(&table->list, size * 2) == HTP_ERROR) {
+ free(table);
+ return NULL;
+ }
+
+ return table;
+}
+
+void htp_table_destroy(htp_table_t *table) {
+ if (table == NULL) return;
+
+ htp_table_clear(table);
+
+ htp_list_array_release(&table->list);
+
+ free(table);
+}
+
+void htp_table_destroy_ex(htp_table_t *table) {
+ if (table == NULL) return;
+
+ // Change allocation strategy in order to
+ // prevent the keys from being freed.
+ table->alloc_type = HTP_TABLE_KEYS_REFERENCED;
+
+ htp_table_destroy(table);
+}
+
+void *htp_table_get(const htp_table_t *table, const bstr *key) {
+ if ((table == NULL)||(key == NULL)) return NULL;
+
+ // Iterate through the list, comparing
+ // keys with the parameter, return data if found.
+ for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) {
+ bstr *key_candidate = htp_list_get(&table->list, i);
+ void *element = htp_list_get(&table->list, i + 1);
+ if (bstr_cmp_nocase(key_candidate, key) == 0) {
+ return element;
+ }
+ }
+
+ return NULL;
+}
+
+void *htp_table_get_c(const htp_table_t *table, const char *ckey) {
+ if ((table == NULL)||(ckey == NULL)) return NULL;
+
+ // Iterate through the list, comparing
+ // keys with the parameter, return data if found.
+ for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) {
+ bstr *key_candidate = htp_list_get(&table->list, i);
+ void *element = htp_list_get(&table->list, i + 1);
+ if (bstr_cmp_c_nocasenorzero(key_candidate, ckey) == 0) {
+ return element;
+ }
+ }
+
+ return NULL;
+}
+
+void *htp_table_get_index(const htp_table_t *table, size_t idx, bstr **key) {
+ if (table == NULL) return NULL;
+
+ if (idx >= htp_list_size(&table->list)) return NULL;
+
+ if (key != NULL) {
+ *key = htp_list_get(&table->list, idx * 2);
+ }
+
+ return htp_list_get(&table->list, (idx * 2) + 1);
+}
+
+void *htp_table_get_mem(const htp_table_t *table, const void *key, size_t key_len) {
+ if ((table == NULL)||(key == NULL)) return NULL;
+
+ // Iterate through the list, comparing
+ // keys with the parameter, return data if found.
+ for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) {
+ bstr *key_candidate = htp_list_get(&table->list, i);
+ void *element = htp_list_get(&table->list, i + 1);
+ if (bstr_cmp_mem_nocase(key_candidate, key, key_len) == 0) {
+ return element;
+ }
+ }
+
+ return NULL;
+}
+
+size_t htp_table_size(const htp_table_t *table) {
+ if (table == NULL) return 0;
+ return htp_list_size(&table->list) / 2;
+}
diff --git a/htp/htp_table.h b/htp/htp_table.h
new file mode 100644
index 0000000..70a1e9b
--- /dev/null
+++ b/htp/htp_table.h
@@ -0,0 +1,184 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_TABLE_H
+#define HTP_TABLE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct htp_table_t htp_table_t;
+
+/**
+ * Add a new element to the table. The key will be copied, and the copy
+ * managed by the table. The table keeps a pointer to the element. It is the
+ * callers responsibility to ensure the pointer remains valid.
+ *
+ * @param[in] table
+ * @param[in] key
+ * @param[in] element
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_table_add(htp_table_t *table, const bstr *key, const void *element);
+
+/**
+ * Add a new element to the table. The key provided will be adopted and managed
+ * by the table. You should not keep a copy of the pointer to the key unless you're
+ * certain that the table will live longer that the copy. The table keeps a pointer
+ * to the element. It is the callers responsibility to ensure the pointer remains
+ * valid.
+ *
+ * @param[in] table
+ * @param[in] key
+ * @param[in] element
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_table_addn(htp_table_t *table, const bstr *key, const void *element);
+
+/**
+ * Add a new element to the table. The key provided will be only referenced and the
+ * caller remains responsible to keep it alive until after the table is destroyed. The
+ * table keeps a pointer to the element. It is the callers responsibility to ensure
+ * the pointer remains valid.
+ *
+ * @param[in] table
+ * @param[in] key
+ * @param[in] element
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_table_addk(htp_table_t *table, const bstr *key, const void *element);
+
+/**
+ * Remove all elements from the table. This function handles keys
+ * according to the active allocation strategy. If the elements need freeing,
+ * you need to free them before invoking this function.
+ *
+ * @param[in] table
+ */
+void htp_table_clear(htp_table_t *table);
+
+/**
+ * Remove all elements from the table without freeing any of the keys, even
+ * if the table is using an allocation strategy where keys belong to it. This
+ * function is useful if all the keys have been adopted by some other structure.
+ *
+ * @param[in] table
+ */
+void htp_table_clear_ex(htp_table_t *table);
+
+/**
+ * Create a new table structure. The table will grow automatically as needed,
+ * but you are required to provide a starting size.
+ *
+ * @param[in] size The starting size.
+ * @return Newly created table instance, or NULL on failure.
+ */
+htp_table_t *htp_table_create(size_t size);
+
+/**
+ * Destroy a table. This function handles the keys according to the active
+ * allocation strategy. If the elements need freeing, you need to free them
+ * before invoking this function. After the table has been destroyed,
+ * the pointer is set to NULL.
+ *
+ * @param[in] table
+ */
+void htp_table_destroy(htp_table_t *table);
+
+/**
+ * Destroy the given table, but don't free the keys. even if they are managed by
+ * the table. Use this method when the responsibility for the keys has been transferred
+ * elsewhere. After the table has been destroyed, the pointer is set to NULL.
+ *
+ * @param[in] table
+ */
+void htp_table_destroy_ex(htp_table_t *table);
+
+/**
+ * Retrieve the first element that matches the given bstr key.
+ *
+ * @param[in] table
+ * @param[in] key
+ * @return Matched element, or NULL if no elements match the key.
+ */
+void *htp_table_get(const htp_table_t *table, const bstr *key);
+
+/**
+ * Retrieve the first element that matches the given NUL-terminated key.
+ *
+ * @param[in] table
+ * @param[in] ckey
+ * @return Matched element, or NULL if no elements match the key.
+ */
+void *htp_table_get_c(const htp_table_t *table, const char *ckey);
+
+/**
+ * Retrieve key and element at the given index.
+ *
+ * @param[in] table
+ * @param[in] idx
+ * @param[in,out] key Pointer in which the key will be returned. Can be NULL.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+void *htp_table_get_index(const htp_table_t *table, size_t idx, bstr **key);
+
+/**
+ * Retrieve table key defined by the provided pointer and length.
+ *
+ * @param[in] table
+ * @param[in] key
+ * @param[in] key_len
+ * @return Matched element, or NULL if no elements match the key.
+ */
+void *htp_table_get_mem(const htp_table_t *table, const void *key, size_t key_len);
+
+/**
+ * Return the size of the table.
+ *
+ * @param[in] table
+ * @return table size
+ */
+size_t htp_table_size(const htp_table_t *table);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_TABLE_H */
+
diff --git a/htp/htp_table_private.h b/htp/htp_table_private.h
new file mode 100644
index 0000000..273ec99
--- /dev/null
+++ b/htp/htp_table_private.h
@@ -0,0 +1,78 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_TABLE_PRIVATE_H
+#define HTP_TABLE_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "htp_list.h"
+#include "htp_table.h"
+
+enum htp_table_alloc_t {
+ /** This is the default value, used only until the first element is added. */
+ HTP_TABLE_KEYS_ALLOC_UKNOWN = 0,
+
+ /** Keys are copied.*/
+ HTP_TABLE_KEYS_COPIED = 1,
+
+ /** Keys are adopted and freed when the table is destroyed. */
+ HTP_TABLE_KEYS_ADOPTED = 2,
+
+ /** Keys are only referenced; the caller is still responsible for freeing them after the table is destroyed. */
+ HTP_TABLE_KEYS_REFERENCED = 3
+};
+
+struct htp_table_t {
+ /** Table key and value pairs are stored in this list; name first, then value. */
+ htp_list_t list;
+
+ /**
+ * Key management strategy. Initially set to HTP_TABLE_KEYS_ALLOC_UKNOWN. The
+ * actual strategy is determined by the first allocation.
+ */
+ enum htp_table_alloc_t alloc_type;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_TABLE_PRIVATE_H */
diff --git a/htp/htp_transaction.c b/htp/htp_transaction.c
new file mode 100644
index 0000000..7220459
--- /dev/null
+++ b/htp/htp_transaction.c
@@ -0,0 +1,1558 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+static void htp_tx_req_destroy_decompressors(htp_connp_t *connp);
+static htp_status_t htp_tx_req_process_body_data_decompressor_callback(htp_tx_data_t *d);
+
+static bstr *copy_or_wrap_mem(const void *data, size_t len, enum htp_alloc_strategy_t alloc) {
+ if (data == NULL) return NULL;
+
+ if (alloc == HTP_ALLOC_REUSE) {
+ return bstr_wrap_mem(data, len);
+ } else {
+ return bstr_dup_mem(data, len);
+ }
+}
+
+htp_tx_t *htp_tx_create(htp_connp_t *connp) {
+ if (connp == NULL) return NULL;
+
+ htp_tx_t *tx = calloc(1, sizeof (htp_tx_t));
+ if (tx == NULL) return NULL;
+
+ tx->connp = connp;
+ tx->conn = connp->conn;
+ tx->index = htp_list_size(tx->conn->transactions);
+ tx->cfg = connp->cfg;
+ tx->is_config_shared = HTP_CONFIG_SHARED;
+
+ // Request fields.
+
+ tx->request_progress = HTP_REQUEST_NOT_STARTED;
+ tx->request_protocol_number = HTP_PROTOCOL_UNKNOWN;
+ tx->request_content_length = -1;
+
+ tx->parsed_uri_raw = htp_uri_alloc();
+ if (tx->parsed_uri_raw == NULL) {
+ htp_tx_destroy_incomplete(tx);
+ return NULL;
+ }
+
+ tx->request_headers = htp_table_create(32);
+ if (tx->request_headers == NULL) {
+ htp_tx_destroy_incomplete(tx);
+ return NULL;
+ }
+
+ tx->request_params = htp_table_create(32);
+ if (tx->request_params == NULL) {
+ htp_tx_destroy_incomplete(tx);
+ return NULL;
+ }
+
+ // Response fields.
+
+ tx->response_progress = HTP_RESPONSE_NOT_STARTED;
+ tx->response_status = NULL;
+ tx->response_status_number = HTP_STATUS_UNKNOWN;
+ tx->response_protocol_number = HTP_PROTOCOL_UNKNOWN;
+ tx->response_content_length = -1;
+
+ tx->response_headers = htp_table_create(32);
+ if (tx->response_headers == NULL) {
+ htp_tx_destroy_incomplete(tx);
+ return NULL;
+ }
+
+ htp_list_add(tx->conn->transactions, tx);
+
+ return tx;
+}
+
+htp_status_t htp_tx_destroy(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ if (!htp_tx_is_complete(tx)) return HTP_ERROR;
+
+ htp_tx_destroy_incomplete(tx);
+
+ return HTP_OK;
+}
+
+void htp_tx_destroy_incomplete(htp_tx_t *tx) {
+ if (tx == NULL) return;
+
+ // Disconnect transaction from other structures.
+ htp_conn_remove_tx(tx->conn, tx);
+ htp_connp_tx_remove(tx->connp, tx);
+
+ // Request fields.
+
+ bstr_free(tx->request_line);
+ bstr_free(tx->request_method);
+ bstr_free(tx->request_uri);
+ bstr_free(tx->request_protocol);
+ bstr_free(tx->request_content_type);
+ bstr_free(tx->request_hostname);
+ htp_uri_free(tx->parsed_uri_raw);
+ htp_uri_free(tx->parsed_uri);
+ bstr_free(tx->request_auth_username);
+ bstr_free(tx->request_auth_password);
+
+ // Request_headers.
+ if (tx->request_headers != NULL) {
+ htp_header_t *h = NULL;
+ for (size_t i = 0, n = htp_table_size(tx->request_headers); i < n; i++) {
+ h = htp_table_get_index(tx->request_headers, i, NULL);
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ }
+
+ htp_table_destroy(tx->request_headers);
+ }
+
+ // Request parsers.
+
+ htp_urlenp_destroy(tx->request_urlenp_query);
+ htp_urlenp_destroy(tx->request_urlenp_body);
+ htp_mpartp_destroy(tx->request_mpartp);
+
+ // Request parameters.
+
+ htp_param_t *param = NULL;
+ for (size_t i = 0, n = htp_table_size(tx->request_params); i < n; i++) {
+ param = htp_table_get_index(tx->request_params, i, NULL);
+ bstr_free(param->name);
+ bstr_free(param->value);
+ free(param);
+ }
+
+ htp_table_destroy(tx->request_params);
+
+ // Request cookies.
+
+ if (tx->request_cookies != NULL) {
+ bstr *b = NULL;
+ for (size_t i = 0, n = htp_table_size(tx->request_cookies); i < n; i++) {
+ b = htp_table_get_index(tx->request_cookies, i, NULL);
+ bstr_free(b);
+ }
+
+ htp_table_destroy(tx->request_cookies);
+ }
+
+ htp_hook_destroy(tx->hook_request_body_data);
+
+ // Response fields.
+
+ bstr_free(tx->response_line);
+ bstr_free(tx->response_protocol);
+ bstr_free(tx->response_status);
+ bstr_free(tx->response_message);
+ bstr_free(tx->response_content_type);
+
+ // Destroy response headers.
+ if (tx->response_headers != NULL) {
+ htp_header_t *h = NULL;
+ for (size_t i = 0, n = htp_table_size(tx->response_headers); i < n; i++) {
+ h = htp_table_get_index(tx->response_headers, i, NULL);
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ }
+
+ htp_table_destroy(tx->response_headers);
+ }
+
+ // If we're using a private configuration structure, destroy it.
+ if (tx->is_config_shared == HTP_CONFIG_PRIVATE) {
+ htp_config_destroy(tx->cfg);
+ }
+
+ free(tx);
+}
+
+int htp_tx_get_is_config_shared(const htp_tx_t *tx) {
+ if (tx == NULL) return -1;
+ return tx->is_config_shared;
+}
+
+void *htp_tx_get_user_data(const htp_tx_t *tx) {
+ if (tx == NULL) return NULL;
+ return tx->user_data;
+}
+
+void htp_tx_set_config(htp_tx_t *tx, htp_cfg_t *cfg, int is_cfg_shared) {
+ if ((tx == NULL) || (cfg == NULL)) return;
+
+ if ((is_cfg_shared != HTP_CONFIG_PRIVATE) && (is_cfg_shared != HTP_CONFIG_SHARED)) return;
+
+ // If we're using a private configuration, destroy it.
+ if (tx->is_config_shared == HTP_CONFIG_PRIVATE) {
+ htp_config_destroy(tx->cfg);
+ }
+
+ tx->cfg = cfg;
+ tx->is_config_shared = is_cfg_shared;
+}
+
+void htp_tx_set_user_data(htp_tx_t *tx, void *user_data) {
+ if (tx == NULL) return;
+ tx->user_data = user_data;
+}
+
+htp_status_t htp_tx_req_add_param(htp_tx_t *tx, htp_param_t *param) {
+ if ((tx == NULL) || (param == NULL)) return HTP_ERROR;
+
+ if (tx->cfg->parameter_processor != NULL) {
+ if (tx->cfg->parameter_processor(param) != HTP_OK) return HTP_ERROR;
+ }
+
+ return htp_table_addk(tx->request_params, param->name, param);
+}
+
+htp_param_t *htp_tx_req_get_param(htp_tx_t *tx, const char *name, size_t name_len) {
+ if ((tx == NULL) || (name == NULL)) return NULL;
+ return htp_table_get_mem(tx->request_params, name, name_len);
+}
+
+htp_param_t *htp_tx_req_get_param_ex(htp_tx_t *tx, enum htp_data_source_t source, const char *name, size_t name_len) {
+ if ((tx == NULL) || (name == NULL)) return NULL;
+
+ htp_param_t *p = NULL;
+
+ for (size_t i = 0, n = htp_table_size(tx->request_params); i < n; i++) {
+ p = htp_table_get_index(tx->request_params, i, NULL);
+ if (p->source != source) continue;
+
+ if (bstr_cmp_mem_nocase(p->name, name, name_len) == 0) return p;
+ }
+
+ return NULL;
+}
+
+int htp_tx_req_has_body(const htp_tx_t *tx) {
+ if (tx == NULL) return -1;
+
+ if ((tx->request_transfer_coding == HTP_CODING_IDENTITY) || (tx->request_transfer_coding == HTP_CODING_CHUNKED)) {
+ return 1;
+ }
+
+ return 0;
+}
+
+htp_status_t htp_tx_req_set_header(htp_tx_t *tx, const char *name, size_t name_len,
+ const char *value, size_t value_len, enum htp_alloc_strategy_t alloc) {
+ if ((tx == NULL) || (name == NULL) || (value == NULL)) return HTP_ERROR;
+
+ htp_header_t *h = calloc(1, sizeof (htp_header_t));
+ if (h == NULL) return HTP_ERROR;
+
+ h->name = copy_or_wrap_mem(name, name_len, alloc);
+ if (h->name == NULL) {
+ free(h);
+ return HTP_ERROR;
+ }
+
+ h->value = copy_or_wrap_mem(value, value_len, alloc);
+ if (h->value == NULL) {
+ bstr_free(h->name);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ if (htp_table_add(tx->request_headers, h->name, h) != HTP_OK) {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_req_set_method(htp_tx_t *tx, const char *method, size_t method_len, enum htp_alloc_strategy_t alloc) {
+ if ((tx == NULL) || (method == NULL)) return HTP_ERROR;
+
+ tx->request_method = copy_or_wrap_mem(method, method_len, alloc);
+ if (tx->request_method == NULL) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+void htp_tx_req_set_method_number(htp_tx_t *tx, enum htp_method_t method_number) {
+ if (tx == NULL) return;
+ tx->request_method_number = method_number;
+}
+
+htp_status_t htp_tx_req_set_uri(htp_tx_t *tx, const char *uri, size_t uri_len, enum htp_alloc_strategy_t alloc) {
+ if ((tx == NULL) || (uri == NULL)) return HTP_ERROR;
+
+ tx->request_uri = copy_or_wrap_mem(uri, uri_len, alloc);
+ if (tx->request_uri == NULL) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_req_set_protocol(htp_tx_t *tx, const char *protocol, size_t protocol_len, enum htp_alloc_strategy_t alloc) {
+ if ((tx == NULL) || (protocol == NULL)) return HTP_ERROR;
+
+ tx->request_protocol = copy_or_wrap_mem(protocol, protocol_len, alloc);
+ if (tx->request_protocol == NULL) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+void htp_tx_req_set_protocol_number(htp_tx_t *tx, int protocol_number) {
+ if (tx == NULL) return;
+ tx->request_protocol_number = protocol_number;
+}
+
+void htp_tx_req_set_protocol_0_9(htp_tx_t *tx, int is_protocol_0_9) {
+ if (tx == NULL) return;
+
+ if (is_protocol_0_9) {
+ tx->is_protocol_0_9 = 1;
+ } else {
+ tx->is_protocol_0_9 = 0;
+ }
+}
+
+static htp_status_t htp_tx_process_request_headers(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ // Determine if we have a request body, and how it is packaged.
+
+ htp_status_t rc = HTP_OK;
+
+ if (tx->connp->cfg->request_decompression_enabled) {
+ tx->request_content_encoding = HTP_COMPRESSION_NONE;
+ htp_header_t *ce = htp_table_get_c(tx->request_headers, "content-encoding");
+ if (ce != NULL) {
+ /* fast paths: regular gzip and friends */
+ if ((bstr_cmp_c_nocasenorzero(ce->value, "gzip") == 0) ||
+ (bstr_cmp_c_nocasenorzero(ce->value, "x-gzip") == 0)) {
+ tx->request_content_encoding = HTP_COMPRESSION_GZIP;
+ } else if ((bstr_cmp_c_nocasenorzero(ce->value, "deflate") == 0) ||
+ (bstr_cmp_c_nocasenorzero(ce->value, "x-deflate") == 0)) {
+ tx->request_content_encoding = HTP_COMPRESSION_DEFLATE;
+ } else if (bstr_cmp_c_nocasenorzero(ce->value, "lzma") == 0) {
+ tx->request_content_encoding = HTP_COMPRESSION_LZMA;
+ }
+ //ignore other cases such as inflate, ot multiple layers
+ if ((tx->request_content_encoding != HTP_COMPRESSION_NONE))
+ {
+ if (tx->connp->req_decompressor != NULL) {
+ htp_tx_req_destroy_decompressors(tx->connp);
+ }
+ tx->connp->req_decompressor = htp_gzip_decompressor_create(tx->connp, tx->request_content_encoding);
+ if (tx->connp->req_decompressor == NULL)
+ return HTP_ERROR;
+
+ tx->connp->req_decompressor->callback = htp_tx_req_process_body_data_decompressor_callback;
+ }
+ }
+ }
+
+ htp_header_t *cl = htp_table_get_c(tx->request_headers, "content-length");
+ htp_header_t *te = htp_table_get_c(tx->request_headers, "transfer-encoding");
+
+ // Check for the Transfer-Encoding header, which would indicate a chunked request body.
+ if (te != NULL) {
+ // Make sure it contains "chunked" only.
+ // TODO The HTTP/1.1 RFC also allows the T-E header to contain "identity", which
+ // presumably should have the same effect as T-E header absence. However, Apache
+ // (2.2.22 on Ubuntu 12.04 LTS) instead errors out with "Unknown Transfer-Encoding: identity".
+ // And it behaves strangely, too, sending a 501 and proceeding to process the request
+ // (e.g., PHP is run), but without the body. It then closes the connection.
+ if (htp_header_has_token(bstr_ptr(te->value), bstr_len(te->value), (unsigned char*) "chunked") != HTP_OK) {
+ // Invalid T-E header value.
+ tx->request_transfer_coding = HTP_CODING_INVALID;
+ tx->flags |= HTP_REQUEST_INVALID_T_E;
+ tx->flags |= HTP_REQUEST_INVALID;
+ } else {
+ // Chunked encoding is a HTTP/1.1 feature, so check that an earlier protocol
+ // version is not used. The flag will also be set if the protocol could not be parsed.
+ //
+ // TODO IIS 7.0, for example, would ignore the T-E header when it
+ // it is used with a protocol below HTTP 1.1. This should be a
+ // personality trait.
+ if (tx->request_protocol_number < HTP_PROTOCOL_1_1) {
+ tx->flags |= HTP_REQUEST_INVALID_T_E;
+ tx->flags |= HTP_REQUEST_SMUGGLING;
+ }
+
+ // If the T-E header is present we are going to use it.
+ tx->request_transfer_coding = HTP_CODING_CHUNKED;
+
+ // We are still going to check for the presence of C-L.
+ if (cl != NULL) {
+ // According to the HTTP/1.1 RFC (section 4.4):
+ //
+ // "The Content-Length header field MUST NOT be sent
+ // if these two lengths are different (i.e., if a Transfer-Encoding
+ // header field is present). If a message is received with both a
+ // Transfer-Encoding header field and a Content-Length header field,
+ // the latter MUST be ignored."
+ //
+ tx->flags |= HTP_REQUEST_SMUGGLING;
+ }
+ }
+ } else if (cl != NULL) {
+ // Check for a folded C-L header.
+ if (cl->flags & HTP_FIELD_FOLDED) {
+ tx->flags |= HTP_REQUEST_SMUGGLING;
+ }
+
+ // Check for multiple C-L headers.
+ if (cl->flags & HTP_FIELD_REPEATED) {
+ tx->flags |= HTP_REQUEST_SMUGGLING;
+ // TODO Personality trait to determine which C-L header to parse.
+ // At the moment we're parsing the combination of all instances,
+ // which is bound to fail (because it will contain commas).
+ }
+
+ // Get the body length.
+ tx->request_content_length = htp_parse_content_length(cl->value, tx->connp);
+ if (tx->request_content_length < 0) {
+ tx->request_transfer_coding = HTP_CODING_INVALID;
+ tx->flags |= HTP_REQUEST_INVALID_C_L;
+ tx->flags |= HTP_REQUEST_INVALID;
+ } else {
+ // We have a request body of known length.
+ tx->request_transfer_coding = HTP_CODING_IDENTITY;
+ }
+ } else {
+ // No body.
+ tx->request_transfer_coding = HTP_CODING_NO_BODY;
+ }
+
+ // If we could not determine the correct body handling,
+ // consider the request invalid.
+ if (tx->request_transfer_coding == HTP_CODING_UNKNOWN) {
+ tx->request_transfer_coding = HTP_CODING_INVALID;
+ tx->flags |= HTP_REQUEST_INVALID;
+ }
+
+ // Check for PUT requests, which we need to treat as file uploads.
+ if (tx->request_method_number == HTP_M_PUT) {
+ if (htp_tx_req_has_body(tx)) {
+ // Prepare to treat PUT request body as a file.
+
+ tx->connp->put_file = calloc(1, sizeof (htp_file_t));
+ if (tx->connp->put_file == NULL) return HTP_ERROR;
+
+ tx->connp->put_file->fd = -1;
+ tx->connp->put_file->source = HTP_FILE_PUT;
+ } else {
+ // TODO Warn about PUT request without a body.
+ }
+ }
+
+ // Determine hostname.
+
+ // Use the hostname from the URI, when available.
+ if (tx->parsed_uri->hostname != NULL) {
+ tx->request_hostname = bstr_dup(tx->parsed_uri->hostname);
+ if (tx->request_hostname == NULL) return HTP_ERROR;
+ }
+
+ tx->request_port_number = tx->parsed_uri->port_number;
+
+ // Examine the Host header.
+
+ htp_header_t *h = htp_table_get_c(tx->request_headers, "host");
+ if (h == NULL) {
+ // No host information in the headers.
+
+ // HTTP/1.1 requires host information in the headers.
+ if (tx->request_protocol_number >= HTP_PROTOCOL_1_1) {
+ tx->flags |= HTP_HOST_MISSING;
+ }
+ } else {
+ // Host information available in the headers.
+
+ bstr *hostname;
+ int port;
+
+ rc = htp_parse_header_hostport(h->value, &hostname, NULL, &port, &(tx->flags));
+ if (rc != HTP_OK) return rc;
+
+ if (hostname != NULL) {
+ // The host information in the headers is valid.
+
+ // Is there host information in the URI?
+ if (tx->request_hostname == NULL) {
+ // There is no host information in the URI. Place the
+ // hostname from the headers into the parsed_uri structure.
+ tx->request_hostname = hostname;
+ tx->request_port_number = port;
+ } else {
+ // The host information appears in the URI and in the headers. The
+ // HTTP RFC states that we should ignore the header copy.
+
+ // Check for different hostnames.
+ if (bstr_cmp_nocase(hostname, tx->request_hostname) != 0) {
+ tx->flags |= HTP_HOST_AMBIGUOUS;
+ }
+
+ // Check for different ports.
+ if (((tx->request_port_number != -1)&&(port != -1))&&(tx->request_port_number != port)) {
+ tx->flags |= HTP_HOST_AMBIGUOUS;
+ }
+
+ bstr_free(hostname);
+ }
+ } else {
+ // Invalid host information in the headers.
+
+ if (tx->request_hostname != NULL) {
+ // Raise the flag, even though the host information in the headers is invalid.
+ tx->flags |= HTP_HOST_AMBIGUOUS;
+ }
+ }
+ }
+
+ // Determine Content-Type.
+ htp_header_t *ct = htp_table_get_c(tx->request_headers, "content-type");
+ if (ct != NULL) {
+ rc = htp_parse_ct_header(ct->value, &tx->request_content_type);
+ if (rc != HTP_OK) return rc;
+ }
+
+ // Parse cookies.
+ if (tx->connp->cfg->parse_request_cookies) {
+ rc = htp_parse_cookies_v0(tx->connp);
+ if (rc != HTP_OK) return rc;
+ }
+
+ // Parse authentication information.
+ if (tx->connp->cfg->parse_request_auth) {
+ rc = htp_parse_authorization(tx->connp);
+ if (rc == HTP_DECLINED) {
+ // Don't fail the stream if an authorization header is invalid, just set a flag.
+ tx->flags |= HTP_AUTH_INVALID;
+ } else {
+ if (rc != HTP_OK) return rc;
+ }
+ }
+
+ // Finalize sending raw header data.
+ rc = htp_connp_req_receiver_finalize_clear(tx->connp);
+ if (rc != HTP_OK) return rc;
+
+ // Run hook REQUEST_HEADERS.
+ rc = htp_hook_run_all(tx->connp->cfg->hook_request_headers, tx);
+ if (rc != HTP_OK) return rc;
+
+ // We still proceed if the request is invalid.
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_req_process_body_data(htp_tx_t *tx, const void *data, size_t len) {
+ if ((tx == NULL) || (data == NULL)) return HTP_ERROR;
+ if (len == 0) return HTP_OK;
+
+ return htp_tx_req_process_body_data_ex(tx, data, len);
+}
+
+htp_status_t htp_tx_req_process_body_data_ex(htp_tx_t *tx, const void *data, size_t len) {
+ if (tx == NULL) return HTP_ERROR;
+
+ // NULL data is allowed in this private function; it's
+ // used to indicate the end of request body.
+
+ // Send data to the callbacks.
+
+ htp_tx_data_t d;
+ d.tx = tx;
+ d.data = (unsigned char *) data;
+ d.len = len;
+ d.is_last = (data == NULL && len == 0);
+
+ switch(tx->request_content_encoding) {
+ case HTP_COMPRESSION_UNKNOWN:
+ case HTP_COMPRESSION_NONE:
+ // When there's no decompression, request_entity_len.
+ // is identical to request_message_len.
+ tx->request_entity_len += d.len;
+ htp_status_t rc = htp_req_run_hook_body_data(tx->connp, &d);
+ if (rc != HTP_OK) {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request body data callback returned error (%d)", rc);
+ return HTP_ERROR;
+ }
+ break;
+
+ case HTP_COMPRESSION_GZIP:
+ case HTP_COMPRESSION_DEFLATE:
+ case HTP_COMPRESSION_LZMA:
+ // In severe memory stress these could be NULL
+ if (tx->connp->req_decompressor == NULL)
+ return HTP_ERROR;
+
+ // Send data buffer to the decompressor.
+ htp_gzip_decompressor_decompress(tx->connp->req_decompressor, &d);
+
+ if (data == NULL) {
+ // Shut down the decompressor, if we used one.
+ htp_tx_req_destroy_decompressors(tx->connp);
+ }
+ break;
+
+ default:
+ // Internal error.
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "[Internal Error] Invalid tx->request_content_encoding value: %d",
+ tx->request_content_encoding);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_req_set_headers_clear(htp_tx_t *tx) {
+ if ((tx == NULL) || (tx->request_headers == NULL)) return HTP_ERROR;
+
+ htp_header_t *h = NULL;
+ for (size_t i = 0, n = htp_table_size(tx->request_headers); i < n; i++) {
+ h = htp_table_get_index(tx->request_headers, i, NULL);
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ }
+
+ htp_table_destroy(tx->request_headers);
+
+ tx->request_headers = htp_table_create(32);
+ if (tx->request_headers == NULL) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_req_set_line(htp_tx_t *tx, const char *line, size_t line_len, enum htp_alloc_strategy_t alloc) {
+ if ((tx == NULL) || (line == NULL) || (line_len == 0)) return HTP_ERROR;
+
+ tx->request_line = copy_or_wrap_mem(line, line_len, alloc);
+ if (tx->request_line == NULL) return HTP_ERROR;
+
+ if (tx->connp->cfg->parse_request_line(tx->connp) != HTP_OK) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+void htp_tx_req_set_parsed_uri(htp_tx_t *tx, htp_uri_t *parsed_uri) {
+ if ((tx == NULL) || (parsed_uri == NULL)) return;
+
+ if (tx->parsed_uri != NULL) {
+ htp_uri_free(tx->parsed_uri);
+ }
+
+ tx->parsed_uri = parsed_uri;
+}
+
+htp_status_t htp_tx_res_set_status_line(htp_tx_t *tx, const char *line, size_t line_len, enum htp_alloc_strategy_t alloc) {
+ if ((tx == NULL) || (line == NULL) || (line_len == 0)) return HTP_ERROR;
+
+ tx->response_line = copy_or_wrap_mem(line, line_len, alloc);
+ if (tx->response_line == NULL) return HTP_ERROR;
+
+ if (tx->connp->cfg->parse_response_line(tx->connp) != HTP_OK) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+void htp_tx_res_set_protocol_number(htp_tx_t *tx, int protocol_number) {
+ if (tx == NULL) return;
+ tx->response_protocol_number = protocol_number;
+}
+
+void htp_tx_res_set_status_code(htp_tx_t *tx, int status_code) {
+ if (tx == NULL) return;
+ tx->response_status_number = status_code;
+}
+
+htp_status_t htp_tx_res_set_status_message(htp_tx_t *tx, const char *msg, size_t msg_len, enum htp_alloc_strategy_t alloc) {
+ if ((tx == NULL) || (msg == NULL)) return HTP_ERROR;
+
+ if (tx->response_message != NULL) {
+ bstr_free(tx->response_message);
+ }
+
+ tx->response_message = copy_or_wrap_mem(msg, msg_len, alloc);
+ if (tx->response_message == NULL) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_state_response_line(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ #if 0
+ // Commented-out until we determine which fields can be
+ // unavailable in real-life.
+
+ // Unless we're dealing with HTTP/0.9, check that
+ // the minimum amount of data has been provided.
+ if (tx->is_protocol_0_9 != 0) {
+ if ((tx->response_protocol == NULL) || (tx->response_status_number == -1) || (tx->response_message == NULL)) {
+ return HTP_ERROR;
+ }
+ }
+ #endif
+
+ // Is the response line valid?
+ if (tx->response_protocol_number == HTP_PROTOCOL_INVALID) {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "Invalid response line: invalid protocol");
+ tx->flags |= HTP_STATUS_LINE_INVALID;
+ }
+ if ((tx->response_status_number == HTP_STATUS_INVALID)
+ || (tx->response_status_number < HTP_VALID_STATUS_MIN)
+ || (tx->response_status_number > HTP_VALID_STATUS_MAX)) {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "Invalid response line: invalid response status %d.",
+ tx->response_status_number);
+ tx->response_status_number = HTP_STATUS_INVALID;
+ tx->flags |= HTP_STATUS_LINE_INVALID;
+ }
+
+ // Run hook HTP_RESPONSE_LINE
+ htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_response_line, tx);
+ if (rc != HTP_OK) return rc;
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_res_set_header(htp_tx_t *tx, const char *name, size_t name_len,
+ const char *value, size_t value_len, enum htp_alloc_strategy_t alloc) {
+ if ((tx == NULL) || (name == NULL) || (value == NULL)) return HTP_ERROR;
+
+
+ htp_header_t *h = calloc(1, sizeof (htp_header_t));
+ if (h == NULL) return HTP_ERROR;
+
+ h->name = copy_or_wrap_mem(name, name_len, alloc);
+ if (h->name == NULL) {
+ free(h);
+ return HTP_ERROR;
+ }
+
+ h->value = copy_or_wrap_mem(value, value_len, alloc);
+ if (h->value == NULL) {
+ bstr_free(h->name);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ if (htp_table_add(tx->response_headers, h->name, h) != HTP_OK) {
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_res_set_headers_clear(htp_tx_t *tx) {
+ if ((tx == NULL) || (tx->response_headers == NULL)) return HTP_ERROR;
+
+ htp_header_t *h = NULL;
+ for (size_t i = 0, n = htp_table_size(tx->response_headers); i < n; i++) {
+ h = htp_table_get_index(tx->response_headers, i, NULL);
+ bstr_free(h->name);
+ bstr_free(h->value);
+ free(h);
+ }
+
+ htp_table_destroy(tx->response_headers);
+
+ tx->response_headers = htp_table_create(32);
+ if (tx->response_headers == NULL) return HTP_ERROR;
+
+ return HTP_OK;
+}
+
+/** \internal
+ *
+ * Clean up decompressor(s).
+ *
+ * @param[in] tx
+ */
+static void htp_tx_res_destroy_decompressors(htp_connp_t *connp) {
+ htp_decompressor_t *comp = connp->out_decompressor;
+ while (comp) {
+ htp_decompressor_t *next = comp->next;
+ htp_gzip_decompressor_destroy(comp);
+ comp = next;
+ }
+ connp->out_decompressor = NULL;
+}
+
+static void htp_tx_req_destroy_decompressors(htp_connp_t *connp) {
+ htp_decompressor_t *comp = connp->req_decompressor;
+ while (comp) {
+ htp_decompressor_t *next = comp->next;
+ htp_gzip_decompressor_destroy(comp);
+ comp = next;
+ }
+ connp->req_decompressor = NULL;
+}
+
+void htp_connp_destroy_decompressors(htp_connp_t *connp) {
+ htp_tx_res_destroy_decompressors(connp);
+ htp_tx_req_destroy_decompressors(connp);
+}
+
+static htp_status_t htp_timer_track(int32_t *time_spent, struct timeval * after, struct timeval *before) {
+ if (after->tv_sec < before->tv_sec) {
+ return HTP_ERROR;
+ } else if (after->tv_sec == before->tv_sec) {
+ if (after->tv_usec < before->tv_usec) {
+ return HTP_ERROR;
+ }
+ *time_spent += after->tv_usec - before->tv_usec;
+ } else {
+ *time_spent += (after->tv_sec - before->tv_sec) * 1000000 + after->tv_usec - before->tv_usec;
+ }
+ return HTP_OK;
+}
+
+static htp_status_t htp_tx_req_process_body_data_decompressor_callback(htp_tx_data_t *d) {
+ if (d == NULL) return HTP_ERROR;
+
+ #if HTP_DEBUG
+ fprint_raw_data(stderr, __func__, d->data, d->len);
+ #endif
+
+ // Keep track of actual request body length.
+ d->tx->request_entity_len += d->len;
+
+ // Invoke all callbacks.
+ htp_status_t rc = htp_req_run_hook_body_data(d->tx->connp, d);
+ if (rc != HTP_OK) return HTP_ERROR;
+ d->tx->connp->req_decompressor->nb_callbacks++;
+ if (d->tx->connp->req_decompressor->nb_callbacks % HTP_COMPRESSION_TIME_FREQ_TEST == 0) {
+ struct timeval after;
+ gettimeofday(&after, NULL);
+ // sanity check for race condition if system time changed
+ if ( htp_timer_track(&d->tx->connp->req_decompressor->time_spent, &after, &d->tx->connp->req_decompressor->time_before) == HTP_OK) {
+ // updates last tracked time
+ d->tx->connp->req_decompressor->time_before = after;
+ if (d->tx->connp->req_decompressor->time_spent > d->tx->connp->cfg->compression_time_limit ) {
+ htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "Compression bomb: spent %"PRId32" us decompressing",
+ d->tx->connp->req_decompressor->time_spent);
+ d->tx->connp->req_decompressor->passthrough = 1;
+ }
+ }
+
+ }
+ if (d->tx->request_entity_len > d->tx->connp->cfg->compression_bomb_limit &&
+ d->tx->request_entity_len > HTP_COMPRESSION_BOMB_RATIO * d->tx->request_message_len) {
+ htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "Compression bomb: decompressed %"PRId64" bytes out of %"PRId64,
+ d->tx->request_entity_len, d->tx->request_message_len);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+static htp_status_t htp_tx_res_process_body_data_decompressor_callback(htp_tx_data_t *d) {
+ if (d == NULL) return HTP_ERROR;
+
+ #if HTP_DEBUG
+ fprint_raw_data(stderr, __func__, d->data, d->len);
+ #endif
+
+ // Keep track of actual response body length.
+ d->tx->response_entity_len += d->len;
+
+ // Invoke all callbacks.
+ htp_status_t rc = htp_res_run_hook_body_data(d->tx->connp, d);
+ if (rc != HTP_OK) return HTP_ERROR;
+ d->tx->connp->out_decompressor->nb_callbacks++;
+ if (d->tx->connp->out_decompressor->nb_callbacks % HTP_COMPRESSION_TIME_FREQ_TEST == 0) {
+ struct timeval after;
+ gettimeofday(&after, NULL);
+ // sanity check for race condition if system time changed
+ if ( htp_timer_track(&d->tx->connp->out_decompressor->time_spent, &after, &d->tx->connp->out_decompressor->time_before) == HTP_OK) {
+ // updates last tracked time
+ d->tx->connp->out_decompressor->time_before = after;
+ if (d->tx->connp->out_decompressor->time_spent > d->tx->connp->cfg->compression_time_limit ) {
+ htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "Compression bomb: spent %"PRId32" us decompressing",
+ d->tx->connp->out_decompressor->time_spent);
+ d->tx->connp->out_decompressor->passthrough = 1;
+ }
+ }
+
+ }
+ if (d->tx->response_entity_len > d->tx->connp->cfg->compression_bomb_limit &&
+ d->tx->response_entity_len > HTP_COMPRESSION_BOMB_RATIO * d->tx->response_message_len) {
+ htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "Compression bomb: decompressed %"PRId64" bytes out of %"PRId64,
+ d->tx->response_entity_len, d->tx->response_message_len);
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_res_process_body_data(htp_tx_t *tx, const void *data, size_t len) {
+ if ((tx == NULL) || (data == NULL)) return HTP_ERROR;
+ if (len == 0) return HTP_OK;
+ return htp_tx_res_process_body_data_ex(tx, data, len);
+}
+
+htp_status_t htp_tx_res_process_body_data_ex(htp_tx_t *tx, const void *data, size_t len) {
+ if (tx == NULL) return HTP_ERROR;
+
+ // NULL data is allowed in this private function; it's
+ // used to indicate the end of response body.
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, __func__, data, len);
+ #endif
+
+ htp_tx_data_t d;
+
+ d.tx = tx;
+ d.data = (unsigned char *) data;
+ d.len = len;
+ d.is_last = 0;
+
+ // Keep track of body size before decompression.
+ tx->response_message_len += d.len;
+
+ switch (tx->response_content_encoding_processing) {
+ case HTP_COMPRESSION_GZIP:
+ case HTP_COMPRESSION_DEFLATE:
+ case HTP_COMPRESSION_LZMA:
+ // In severe memory stress these could be NULL
+ if (tx->connp->out_decompressor == NULL)
+ return HTP_ERROR;
+
+ struct timeval after;
+ gettimeofday(&tx->connp->out_decompressor->time_before, NULL);
+ // Send data buffer to the decompressor.
+ tx->connp->out_decompressor->nb_callbacks=0;
+ htp_gzip_decompressor_decompress(tx->connp->out_decompressor, &d);
+ gettimeofday(&after, NULL);
+ // sanity check for race condition if system time changed
+ if ( htp_timer_track(&tx->connp->out_decompressor->time_spent, &after, &tx->connp->out_decompressor->time_before) == HTP_OK) {
+ if ( tx->connp->out_decompressor->time_spent > tx->connp->cfg->compression_time_limit ) {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "Compression bomb: spent %"PRId32" us decompressing",
+ tx->connp->out_decompressor->time_spent);
+ tx->connp->out_decompressor->passthrough = 1;
+ }
+ }
+
+ if (data == NULL) {
+ // Shut down the decompressor, if we used one.
+ htp_tx_res_destroy_decompressors(tx->connp);
+ }
+ break;
+
+ case HTP_COMPRESSION_NONE:
+ // When there's no decompression, response_entity_len.
+ // is identical to response_message_len.
+ tx->response_entity_len += d.len;
+
+ htp_status_t rc = htp_res_run_hook_body_data(tx->connp, &d);
+ if (rc != HTP_OK) return HTP_ERROR;
+ break;
+
+ default:
+ // Internal error.
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "[Internal Error] Invalid tx->response_content_encoding_processing value: %d",
+ tx->response_content_encoding_processing);
+ return HTP_ERROR;
+ break;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_state_request_complete_partial(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ // Finalize request body.
+ if (htp_tx_req_has_body(tx)) {
+ htp_status_t rc = htp_tx_req_process_body_data_ex(tx, NULL, 0);
+ if (rc != HTP_OK) return rc;
+ }
+
+ tx->request_progress = HTP_REQUEST_COMPLETE;
+
+ // Run hook REQUEST_COMPLETE.
+ htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_request_complete, tx);
+ if (rc != HTP_OK) return rc;
+ rc = htp_connp_req_receiver_finalize_clear(tx->connp);
+ if (rc != HTP_OK) return rc;
+
+ // Clean-up.
+ if (tx->connp->put_file != NULL) {
+ bstr_free(tx->connp->put_file->filename);
+ free(tx->connp->put_file);
+ tx->connp->put_file = NULL;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_state_request_complete(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ if (tx->request_progress != HTP_REQUEST_COMPLETE) {
+ htp_status_t rc = htp_tx_state_request_complete_partial(tx);
+ if (rc != HTP_OK) return rc;
+ }
+
+ // Make a copy of the connection parser pointer, so that
+ // we don't have to reference it via tx, which may be
+ // destroyed later.
+ htp_connp_t *connp = tx->connp;
+
+ // Determine what happens next, and remove this transaction from the parser.
+ if (tx->is_protocol_0_9) {
+ connp->in_state = htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9;
+ } else {
+ connp->in_state = htp_connp_REQ_IDLE;
+ }
+
+ // Check if the entire transaction is complete. This call may
+ // destroy the transaction, if auto-destroy is enabled.
+ htp_tx_finalize(tx);
+
+ // At this point, tx may no longer be valid.
+
+ connp->in_tx = NULL;
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_state_request_start(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ // Run hook REQUEST_START.
+ htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_request_start, tx);
+ if (rc != HTP_OK) return rc;
+
+ // Change state into request line parsing.
+ tx->connp->in_state = htp_connp_REQ_LINE;
+ tx->connp->in_tx->request_progress = HTP_REQUEST_LINE;
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_state_request_headers(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ // If we're in HTP_REQ_HEADERS that means that this is the
+ // first time we're processing headers in a request. Otherwise,
+ // we're dealing with trailing headers.
+ if (tx->request_progress > HTP_REQUEST_HEADERS) {
+ // Request trailers.
+
+ // Run hook HTP_REQUEST_TRAILER.
+ htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_request_trailer, tx);
+ if (rc != HTP_OK) return rc;
+
+ // Finalize sending raw header data.
+ rc = htp_connp_req_receiver_finalize_clear(tx->connp);
+ if (rc != HTP_OK) return rc;
+
+ // Completed parsing this request; finalize it now.
+ tx->connp->in_state = htp_connp_REQ_FINALIZE;
+ } else if (tx->request_progress >= HTP_REQUEST_LINE) {
+ // Request headers.
+
+ // Did this request arrive in multiple data chunks?
+ if (tx->connp->in_chunk_count != tx->connp->in_chunk_request_index) {
+ tx->flags |= HTP_MULTI_PACKET_HEAD;
+ }
+
+ htp_status_t rc = htp_tx_process_request_headers(tx);
+ if (rc != HTP_OK) return rc;
+
+ tx->connp->in_state = htp_connp_REQ_CONNECT_CHECK;
+ } else {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "[Internal Error] Invalid tx progress: %d", tx->request_progress);
+
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_state_request_line(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ // Determine how to process the request URI.
+
+ if (tx->request_method_number == HTP_M_CONNECT) {
+ // When CONNECT is used, the request URI contains an authority string.
+ if (htp_parse_uri_hostport(tx->connp, tx->request_uri, tx->parsed_uri_raw) != HTP_OK) {
+ return HTP_ERROR;
+ }
+ } else {
+ // Parse the request URI into htp_tx_t::parsed_uri_raw.
+ if (htp_parse_uri(tx->request_uri, &(tx->parsed_uri_raw)) != HTP_OK) {
+ return HTP_ERROR;
+ }
+ }
+
+ // Build htp_tx_t::parsed_uri, but only if it was not explicitly set already.
+ if (tx->parsed_uri == NULL) {
+ tx->parsed_uri = htp_uri_alloc();
+ if (tx->parsed_uri == NULL) return HTP_ERROR;
+
+ // Keep the original URI components, but create a copy which we can normalize and use internally.
+ if (htp_normalize_parsed_uri(tx, tx->parsed_uri_raw, tx->parsed_uri) != HTP_OK) {
+ return HTP_ERROR;
+ }
+ }
+
+ // Check parsed_uri hostname.
+ if (tx->parsed_uri->hostname != NULL) {
+ if (htp_validate_hostname(tx->parsed_uri->hostname) == 0) {
+ tx->flags |= HTP_HOSTU_INVALID;
+ }
+ }
+
+ // Run hook REQUEST_URI_NORMALIZE.
+ htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_request_uri_normalize, tx);
+ if (rc != HTP_OK) return rc;
+
+
+ // Run hook REQUEST_LINE.
+ rc = htp_hook_run_all(tx->connp->cfg->hook_request_line, tx);
+ if (rc != HTP_OK) return rc;
+
+ // Move on to the next phase.
+ tx->connp->in_state = htp_connp_REQ_PROTOCOL;
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_state_response_complete(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+ return htp_tx_state_response_complete_ex(tx, 1 /* hybrid mode */);
+}
+
+htp_status_t htp_tx_finalize(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ if (!htp_tx_is_complete(tx)) return HTP_OK;
+
+ // Run hook TRANSACTION_COMPLETE.
+ htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_transaction_complete, tx);
+ if (rc != HTP_OK) return rc;
+
+ // In streaming processing, we destroy the transaction because it will not be needed any more.
+ if (tx->connp->cfg->tx_auto_destroy) {
+ htp_tx_destroy(tx);
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_state_response_complete_ex(htp_tx_t *tx, int hybrid_mode) {
+ if (tx == NULL) return HTP_ERROR;
+
+ if (tx->response_progress != HTP_RESPONSE_COMPLETE) {
+ tx->response_progress = HTP_RESPONSE_COMPLETE;
+
+ // Run the last RESPONSE_BODY_DATA HOOK, but only if there was a response body present.
+ if (tx->response_transfer_coding != HTP_CODING_NO_BODY) {
+ htp_tx_res_process_body_data_ex(tx, NULL, 0);
+ }
+
+ // Run hook RESPONSE_COMPLETE.
+ htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_response_complete, tx);
+ if (rc != HTP_OK) return rc;
+
+ // Clear the data receivers hook if any
+ rc = htp_connp_res_receiver_finalize_clear(tx->connp);
+ if (rc != HTP_OK) return rc;
+ }
+
+ if (!hybrid_mode) {
+ // Check if the inbound parser is waiting on us. If it is, that means that
+ // there might be request data that the inbound parser hasn't consumed yet.
+ // If we don't stop parsing we might encounter a response without a request,
+ // which is why we want to return straight away before processing any data.
+ //
+ // This situation will occur any time the parser needs to see the server
+ // respond to a particular situation before it can decide how to proceed. For
+ // example, when a CONNECT is sent, different paths are used when it is accepted
+ // and when it is not accepted.
+ //
+ // It is not enough to check only in_status here. Because of pipelining, it's possible
+ // that many inbound transactions have been processed, and that the parser is
+ // waiting on a response that we have not seen yet.
+ if ((tx->connp->in_status == HTP_STREAM_DATA_OTHER) && (tx->connp->in_tx == tx->connp->out_tx)) {
+ return HTP_DATA_OTHER;
+ }
+
+ // Do we have a signal to yield to inbound processing at
+ // the end of the next transaction?
+ if (tx->connp->out_data_other_at_tx_end) {
+ // We do. Let's yield then.
+ tx->connp->out_data_other_at_tx_end = 0;
+ return HTP_DATA_OTHER;
+ }
+ }
+
+ // Make a copy of the connection parser pointer, so that
+ // we don't have to reference it via tx, which may be destroyed later.
+ htp_connp_t *connp = tx->connp;
+
+ // Finalize the transaction. This may call may destroy the transaction, if auto-destroy is enabled.
+ htp_status_t rc = htp_tx_finalize(tx);
+ if (rc != HTP_OK) return rc;
+
+ // Disconnect transaction from the parser.
+ connp->out_tx = NULL;
+
+ connp->out_state = htp_connp_RES_IDLE;
+
+ return HTP_OK;
+}
+
+/**
+ * @internal
+ * @brief split input into tokens separated by "seps"
+ * @param seps nul-terminated string: each character is a separator
+ */
+static int get_token(const unsigned char *in, size_t in_len, const char *seps,
+ unsigned char **ret_tok_ptr, size_t *ret_tok_len)
+{
+ #if HTP_DEBUG
+ fprintf(stderr, "INPUT %"PRIuMAX, (uintmax_t)in_len);
+ fprint_raw_data(stderr, __func__, in, in_len);
+ #endif
+
+ size_t i = 0;
+
+ /* skip leading 'separators' */
+ while (i < in_len)
+ {
+ int match = 0;
+ for (const char *s = seps; *s != '\0'; s++) {
+ if (in[i] == *s) {
+ match++;
+ break;
+ }
+ }
+ if (!match)
+ break;
+
+ i++;
+ }
+ if (i >= in_len)
+ return 0;
+
+ in += i;
+ in_len -= i;
+
+ #if HTP_DEBUG
+ fprintf(stderr, "INPUT (POST SEP STRIP) %"PRIuMAX, (uintmax_t)in_len);
+ fprint_raw_data(stderr, __func__, in, in_len);
+ #endif
+
+ for (i = 0; i < in_len; i++)
+ {
+ for (const char *s = seps; *s != '\0'; s++) {
+ if (in[i] == *s) {
+ *ret_tok_ptr = (unsigned char *)in;
+ *ret_tok_len = i;
+ return 1;
+ }
+ }
+ }
+
+ *ret_tok_ptr = (unsigned char *)in;
+ *ret_tok_len = in_len;
+ return 1;
+}
+
+htp_status_t htp_tx_state_response_headers(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ // Check for compression.
+
+ // Determine content encoding.
+
+ int ce_multi_comp = 0;
+ tx->response_content_encoding = HTP_COMPRESSION_NONE;
+ htp_header_t *ce = htp_table_get_c(tx->response_headers, "content-encoding");
+ if (ce != NULL) {
+ /* fast paths: regular gzip and friends */
+ if ((bstr_cmp_c_nocasenorzero(ce->value, "gzip") == 0) ||
+ (bstr_cmp_c_nocasenorzero(ce->value, "x-gzip") == 0)) {
+ tx->response_content_encoding = HTP_COMPRESSION_GZIP;
+ } else if ((bstr_cmp_c_nocasenorzero(ce->value, "deflate") == 0) ||
+ (bstr_cmp_c_nocasenorzero(ce->value, "x-deflate") == 0)) {
+ tx->response_content_encoding = HTP_COMPRESSION_DEFLATE;
+ } else if (bstr_cmp_c_nocasenorzero(ce->value, "lzma") == 0) {
+ tx->response_content_encoding = HTP_COMPRESSION_LZMA;
+ } else if (bstr_cmp_c_nocasenorzero(ce->value, "inflate") == 0) {
+ // ignore
+ } else {
+ /* exceptional cases: enter slow path */
+ ce_multi_comp = 1;
+ }
+ }
+
+ // Configure decompression, if enabled in the configuration.
+ if (tx->connp->cfg->response_decompression_enabled) {
+ tx->response_content_encoding_processing = tx->response_content_encoding;
+ } else {
+ tx->response_content_encoding_processing = HTP_COMPRESSION_NONE;
+ ce_multi_comp = 0;
+ }
+
+ // Finalize sending raw header data.
+ htp_status_t rc = htp_connp_res_receiver_finalize_clear(tx->connp);
+ if (rc != HTP_OK) return rc;
+
+ // Run hook RESPONSE_HEADERS.
+ rc = htp_hook_run_all(tx->connp->cfg->hook_response_headers, tx);
+ if (rc != HTP_OK) return rc;
+
+ // Initialize the decompression engine as necessary. We can deal with three
+ // scenarios:
+ //
+ // 1. Decompression is enabled, compression indicated in headers, and we decompress.
+ //
+ // 2. As above, but the user disables decompression by setting response_content_encoding
+ // to COMPRESSION_NONE.
+ //
+ // 3. Decompression is disabled and we do not attempt to enable it, but the user
+ // forces decompression by setting response_content_encoding to one of the
+ // supported algorithms.
+ if ((tx->response_content_encoding_processing == HTP_COMPRESSION_GZIP) ||
+ (tx->response_content_encoding_processing == HTP_COMPRESSION_DEFLATE) ||
+ (tx->response_content_encoding_processing == HTP_COMPRESSION_LZMA) ||
+ ce_multi_comp)
+ {
+ if (tx->connp->out_decompressor != NULL) {
+ htp_tx_res_destroy_decompressors(tx->connp);
+ }
+
+ /* normal case */
+ if (!ce_multi_comp) {
+ tx->connp->out_decompressor = htp_gzip_decompressor_create(tx->connp, tx->response_content_encoding_processing);
+ if (tx->connp->out_decompressor == NULL) return HTP_ERROR;
+
+ tx->connp->out_decompressor->callback = htp_tx_res_process_body_data_decompressor_callback;
+
+ /* multiple ce value case */
+ } else {
+ int layers = 0;
+ htp_decompressor_t *comp = NULL;
+ int nblzma = 0;
+
+ uint8_t *tok = NULL;
+ size_t tok_len = 0;
+
+ uint8_t *input = bstr_ptr(ce->value);
+ size_t input_len = bstr_len(ce->value);
+
+ #if HTP_DEBUG
+ fprintf(stderr, "INPUT %"PRIuMAX, (uintmax_t)input_len);
+ fprint_raw_data(stderr, __func__, input, input_len);
+ #endif
+
+ while (input_len > 0 &&
+ get_token(input, input_len, ", ", &tok, &tok_len))
+ {
+ #if HTP_DEBUG
+ fprintf(stderr, "TOKEN %"PRIuMAX, (uintmax_t)tok_len);
+ fprint_raw_data(stderr, __func__, tok, tok_len);
+ #endif
+ enum htp_content_encoding_t cetype = HTP_COMPRESSION_NONE;
+
+ /* check depth limit (0 means no limit) */
+ if ((tx->connp->cfg->response_decompression_layer_limit != 0) &&
+ ((++layers) > tx->connp->cfg->response_decompression_layer_limit))
+ {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "Too many response content encoding layers");
+ break;
+ }
+
+ nblzma++;
+ if (bstr_util_mem_index_of_c_nocase(tok, tok_len, "gzip") != -1) {
+ if (!(bstr_util_cmp_mem(tok, tok_len, "gzip", 4) == 0 ||
+ bstr_util_cmp_mem(tok, tok_len, "x-gzip", 6) == 0)) {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "C-E gzip has abnormal value");
+ }
+ cetype = HTP_COMPRESSION_GZIP;
+ } else if (bstr_util_mem_index_of_c_nocase(tok, tok_len, "deflate") != -1) {
+ if (!(bstr_util_cmp_mem(tok, tok_len, "deflate", 7) == 0 ||
+ bstr_util_cmp_mem(tok, tok_len, "x-deflate", 9) == 0)) {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "C-E deflate has abnormal value");
+ }
+ cetype = HTP_COMPRESSION_DEFLATE;
+ } else if (bstr_util_cmp_mem(tok, tok_len, "lzma", 4) == 0) {
+ cetype = HTP_COMPRESSION_LZMA;
+ if (nblzma > tx->connp->cfg->response_lzma_layer_limit) {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0,
+ "Compression bomb: multiple encoding with lzma");
+ break;
+ }
+ } else if (bstr_util_cmp_mem(tok, tok_len, "inflate", 7) == 0 || bstr_util_cmp_mem(tok, tok_len, "none", 4) == 0) {
+ cetype = HTP_COMPRESSION_NONE;
+ } else {
+ // continue
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "C-E unknown setting");
+ }
+
+ if (cetype != HTP_COMPRESSION_NONE) {
+ if (comp == NULL) {
+ tx->response_content_encoding_processing = cetype;
+ tx->connp->out_decompressor = htp_gzip_decompressor_create(tx->connp, tx->response_content_encoding_processing);
+ if (tx->connp->out_decompressor == NULL) {
+ return HTP_ERROR;
+ }
+ tx->connp->out_decompressor->callback = htp_tx_res_process_body_data_decompressor_callback;
+ comp = tx->connp->out_decompressor;
+ } else {
+ comp->next = htp_gzip_decompressor_create(tx->connp, cetype);
+ if (comp->next == NULL) {
+ return HTP_ERROR;
+ }
+ comp->next->callback = htp_tx_res_process_body_data_decompressor_callback;
+ comp = comp->next;
+ }
+ }
+
+ if ((tok_len + 1) >= input_len)
+ break;
+ input += (tok_len + 1);
+ input_len -= (tok_len + 1);
+ }
+ }
+ } else if (tx->response_content_encoding_processing != HTP_COMPRESSION_NONE) {
+ return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_state_response_start(htp_tx_t *tx) {
+ if (tx == NULL) return HTP_ERROR;
+
+ tx->connp->out_tx = tx;
+
+ // Run hook RESPONSE_START.
+ htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_response_start, tx);
+ if (rc != HTP_OK) return rc;
+
+ // Change state into response line parsing, except if we're following
+ // a HTTP/0.9 request (no status line or response headers).
+ if (tx->is_protocol_0_9) {
+ tx->response_transfer_coding = HTP_CODING_IDENTITY;
+ tx->response_content_encoding_processing = HTP_COMPRESSION_NONE;
+ tx->response_progress = HTP_RESPONSE_BODY;
+ tx->connp->out_state = htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE;
+ tx->connp->out_body_data_left = -1;
+ } else {
+ tx->connp->out_state = htp_connp_RES_LINE;
+ tx->response_progress = HTP_RESPONSE_LINE;
+ }
+
+ /* If at this point we have no method and no uri and our status
+ * is still htp_connp_REQ_LINE, we likely have timed out request
+ * or a overly long request */
+ if (tx->request_method == HTP_M_UNKNOWN && tx->request_uri == NULL && tx->connp->in_state == htp_connp_REQ_LINE) {
+ htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line incomplete");
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Register callback for the transaction-specific REQUEST_BODY_DATA hook.
+ *
+ * @param[in] tx
+ * @param[in] callback_fn
+ */
+void htp_tx_register_request_body_data(htp_tx_t *tx, int (*callback_fn)(htp_tx_data_t *)) {
+ if ((tx == NULL) || (callback_fn == NULL)) return;
+ htp_hook_register(&tx->hook_request_body_data, (htp_callback_fn_t) callback_fn);
+}
+
+/**
+ * Register callback for the transaction-specific RESPONSE_BODY_DATA hook.
+ *
+ * @param[in] tx
+ * @param[in] callback_fn
+ */
+void htp_tx_register_response_body_data(htp_tx_t *tx, int (*callback_fn)(htp_tx_data_t *)) {
+ if ((tx == NULL) || (callback_fn == NULL)) return;
+ htp_hook_register(&tx->hook_response_body_data, (htp_callback_fn_t) callback_fn);
+}
+
+int htp_tx_is_complete(htp_tx_t *tx) {
+ if (tx == NULL) return -1;
+
+ // A transaction is considered complete only when both the request and
+ // response are complete. (Sometimes a complete response can be seen
+ // even while the request is ongoing.)
+ if ((tx->request_progress != HTP_REQUEST_COMPLETE) || (tx->response_progress != HTP_RESPONSE_COMPLETE)) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
diff --git a/htp/htp_transaction.h b/htp/htp_transaction.h
new file mode 100644
index 0000000..32d6773
--- /dev/null
+++ b/htp/htp_transaction.h
@@ -0,0 +1,529 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/*
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_TRANSACTION_H
+#define HTP_TRANSACTION_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "htp.h"
+
+/**
+ * Enumerate possible data handling strategies in hybrid parsing
+ * mode. The two possibilities are to make copies of all data and
+ * use bstr instances to wrap already available data.
+ */
+enum htp_alloc_strategy_t {
+ /**
+ * Make copies of all data. This strategy should be used when
+ * the supplied buffers are transient and will go away after
+ * the invoked function returns.
+ */
+ HTP_ALLOC_COPY = 1,
+
+ /**
+ * Reuse buffers, without a change of ownership. We assume the
+ * buffers will continue to be available until the transaction
+ * is deleted by the container.
+ */
+ HTP_ALLOC_REUSE = 2
+};
+
+/**
+ * Possible states of a progressing transaction. Internally, progress will change
+ * to the next state when the processing activities associated with that state
+ * begin. For example, when we start to process request line bytes, the request
+ * state will change from HTP_REQUEST_NOT_STARTED to HTP_REQUEST_LINE.*
+ */
+enum htp_tx_req_progress_t {
+ HTP_REQUEST_NOT_STARTED = 0,
+ HTP_REQUEST_LINE = 1,
+ HTP_REQUEST_HEADERS = 2,
+ HTP_REQUEST_BODY = 3,
+ HTP_REQUEST_TRAILER = 4,
+ HTP_REQUEST_COMPLETE = 5
+};
+
+enum htp_tx_res_progress_t {
+ HTP_RESPONSE_NOT_STARTED = 0,
+ HTP_RESPONSE_LINE = 1,
+ HTP_RESPONSE_HEADERS = 2,
+ HTP_RESPONSE_BODY = 3,
+ HTP_RESPONSE_TRAILER = 4,
+ HTP_RESPONSE_COMPLETE = 5
+};
+
+#define HTP_CONFIG_PRIVATE 0
+#define HTP_CONFIG_SHARED 1
+
+/**
+ * Creates a new transaction structure.
+ *
+ * @param[in] connp Connection parser pointer. Must not be NULL.
+ * @return The newly created transaction, or NULL on memory allocation failure.
+ */
+htp_tx_t *htp_tx_create(htp_connp_t *connp);
+
+/**
+ * Destroys the supplied transaction.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ */
+htp_status_t htp_tx_destroy(htp_tx_t *tx);
+
+/**
+ * Determines if the transaction used a shared configuration structure. See the
+ * documentation for htp_tx_set_config() for more information why you might want
+ * to know that.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_CFG_SHARED or HTP_CFG_PRIVATE.
+ */
+int htp_tx_get_is_config_shared(const htp_tx_t *tx);
+
+/**
+ * Returns the user data associated with this transaction.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return A pointer to user data or NULL.
+ */
+void *htp_tx_get_user_data(const htp_tx_t *tx);
+
+/**
+ * Registers a callback that will be invoked to process the transaction's request body data.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] callback_fn Callback function pointer. Must not be NULL.
+ */
+void htp_tx_register_request_body_data(htp_tx_t *tx, int (*callback_fn)(htp_tx_data_t *));
+
+/**
+ * Registers a callback that will be invoked to process the transaction's response body data.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] callback_fn Callback function pointer. Must not be NULL.
+ */
+void htp_tx_register_response_body_data(htp_tx_t *tx, int (*callback_fn)(htp_tx_data_t *));
+
+/**
+ * Adds one parameter to the request. THis function will take over the
+ * responsibility for the provided htp_param_t structure.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] param Parameter pointer. Must not be NULL.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_req_add_param(htp_tx_t *tx, htp_param_t *param);
+
+/**
+ * Returns the first request parameter that matches the given name, using case-insensitive matching.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] name Name data pointer. Must not be NULL.
+ * @param[in] name_len Name data length.
+ * @return htp_param_t instance, or NULL if parameter not found.
+ */
+htp_param_t *htp_tx_req_get_param(htp_tx_t *tx, const char *name, size_t name_len);
+
+/**
+ * Returns the first request parameter from the given source that matches the given name,
+ * using case-insensitive matching.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] source Parameter source (where in request the parameter was located).
+ * @param[in] name Name data pointer. Must not be NULL.
+ * @param[in] name_len Name data length.
+ * @return htp_param_t instance, or NULL if parameter not found.
+ */
+htp_param_t *htp_tx_req_get_param_ex(htp_tx_t *tx, enum htp_data_source_t source, const char *name, size_t name_len);
+
+/**
+ * Determine if the request has a body.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return 1 if there is a body, 0 otherwise.
+ */
+int htp_tx_req_has_body(const htp_tx_t *tx);
+
+/**
+ * Process a chunk of request body data. This function assumes that
+ * handling of chunked encoding is implemented by the container. When
+ * you're done submitting body data, invoke a state change (to REQUEST)
+ * to finalize any processing that might be pending. The supplied data is
+ * fully consumed and there is no expectation that it will be available
+ * afterwards. The protocol parsing code makes no copies of the data,
+ * but some parsers might.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] data Data pointer. Must not be NULL.
+ * @param[in] len Data length.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_req_process_body_data(htp_tx_t *tx, const void *data, size_t len);
+
+/**
+ * Set one request header. This function should be invoked once for
+ * each available header, and in the order in which headers were
+ * seen in the request.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] name Name data pointer. Must not be NULL.
+ * @param[in] name_len Name data length.
+ * @param[in] value Value data pointer. Must not be NULL.
+ * @param[in] value_len Value data length.
+ * @param[in] alloc Desired allocation strategy.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_req_set_header(htp_tx_t *tx, const char *name, size_t name_len,
+ const char *value, size_t value_len, enum htp_alloc_strategy_t alloc);
+
+/**
+ * Removes all request headers associated with this transaction. This
+ * function is needed because in some cases the container does not
+ * differentiate between standard and trailing headers. In that case,
+ * you set request headers once at the beginning of the transaction,
+ * read the body (at this point the request headers should contain the
+ * mix of regular and trailing headers), clear all headers, and then set
+ * them all again.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_req_set_headers_clear(htp_tx_t *tx);
+
+/**
+ * Set request line. When used, this function should always be called first,
+ * with more specific functions following. Must not contain line terminators.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] line Line data pointer. Must not be NULL.
+ * @param[in] line_len Line data length.
+ * @param[in] alloc Desired allocation strategy.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_req_set_line(htp_tx_t *tx, const char *line, size_t line_len, enum htp_alloc_strategy_t alloc);
+
+/**
+ * Set transaction request method. This function will enable you to keep
+ * track of the text representation of the method.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] method Method data pointer. Must not be NULL.
+ * @param[in] method_len Method data length.
+ * @param[in] alloc Desired allocation strategy.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_req_set_method(htp_tx_t *tx, const char *method, size_t method_len, enum htp_alloc_strategy_t alloc);
+
+/**
+ * Set transaction request method number. This function enables you to
+ * keep track how a particular method string is interpreted. This function
+ * is useful with web servers that ignore invalid methods; for example, some
+ * web servers will treat them as a GET.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] method_number Method number.
+ */
+void htp_tx_req_set_method_number(htp_tx_t *tx, enum htp_method_t method_number);
+
+/**
+ * Set parsed request URI. You don't need to use this function if you are already providing
+ * the request line or request URI. But if your container already has this data available,
+ * feeding it to LibHTP will minimize any potential data differences. This function assumes
+ * management of the data provided in parsed_uri. This function will not change htp_tx_t::parsed_uri_raw
+ * (which may have data in it from the parsing of the request URI).
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] parsed_uri URI pointer. Must not be NULL.
+ */
+void htp_tx_req_set_parsed_uri(htp_tx_t *tx, htp_uri_t *parsed_uri);
+
+/**
+ * Forces HTTP/0.9 as the transaction protocol. This method exists to ensure
+ * that both LibHTP and the container treat the transaction as HTTP/0.9, despite
+ * potential differences in how the protocol version is determined.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] is_protocol_0_9 Zero if protocol is not HTTP/0.9, or 1 if it is.
+ */
+void htp_tx_req_set_protocol_0_9(htp_tx_t *tx, int is_protocol_0_9);
+
+/**
+ * Sets the request protocol string (e.g., "HTTP/1.0"). The information provided
+ * is only stored, not parsed. Use htp_tx_req_set_protocol_number() to set the
+ * actual protocol number, as interpreted by the container.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] protocol Protocol data pointer. Must not be NULL.
+ * @param[in] protocol_len Protocol data length.
+ * @param[in] alloc Desired allocation strategy.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_req_set_protocol(htp_tx_t *tx, const char *protocol, size_t protocol_len, enum htp_alloc_strategy_t alloc);
+
+/**
+ * Set request protocol version number. Must be invoked after
+ * htp_txh_set_req_protocol(), because it will overwrite the previously
+ * extracted version number. Convert the protocol version number to an integer
+ * by multiplying it with 100. For example, 1.1 becomes 110. Alternatively,
+ * use the HTP_PROTOCOL_0_9, HTP_PROTOCOL_1_0, and HTP_PROTOCOL_1_1 constants.
+ * Note: setting protocol to HTP_PROTOCOL_0_9 alone will _not_ get the library to
+ * treat the transaction as HTTP/0.9. You need to also invoke htp_tx_req_set_protocol_0_9().
+ * This is because HTTP 0.9 is used only when protocol information is absent from the
+ * request line, and not when it is explicitly stated (as "HTTP/0.9"). This behavior is
+ * consistent with that of Apache httpd.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] protocol_number Protocol number.
+ */
+void htp_tx_req_set_protocol_number(htp_tx_t *tx, int protocol_number);
+
+/**
+ * Set transaction request URI. The value provided here will be stored in htp_tx_t::request_uri
+ * and subsequently parsed. If htp_tx_req_set_line() was previously used, the uri provided
+ * when calling this function will overwrite any previously parsed value.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] uri URI data pointer. Must not be NULL.
+ * @param[in] uri_len URI data length.
+ * @param[in] alloc Desired allocation strategy.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_req_set_uri(htp_tx_t *tx, const char *uri, size_t uri_len, enum htp_alloc_strategy_t alloc);
+
+/**
+ * Process a chunk of response body data. This function assumes that
+ * handling of chunked encoding is implemented by the container. When
+ * you're done submitting body data, invoking a state change (to RESPONSE)
+ * will finalize any processing that might be pending.
+ *
+ * The response body data will be decompressed if two conditions are met: one,
+ * decompression is enabled in configuration and two, if the response headers
+ * indicate compression. Alternatively, you can control decompression from
+ * a RESPONSE_HEADERS callback, by setting tx->response_content_encoding either
+ * to COMPRESSION_NONE (to disable compression), or to one of the supported
+ * decompression algorithms.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] data Data pointer. Must not be NULL.
+ * @param[in] len Data length.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_res_process_body_data(htp_tx_t *tx, const void *data, size_t len);
+
+/**
+ * Set one response header. This function should be invoked once for
+ * each available header, and in the order in which headers were
+ * seen in the response.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] name Name data pointer. Must not be NULL.
+ * @param[in] name_len Name data length.
+ * @param[in] value Value data pointer. Must not be NULL.
+ * @param[in] value_len Value length.
+ * @param[in] alloc Desired allocation strategy.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_res_set_header(htp_tx_t *tx, const char *name, size_t name_len,
+ const char *value, size_t value_len, enum htp_alloc_strategy_t alloc);
+
+/**
+ * Removes all response headers associated with this transaction. This
+ * function is needed because in some cases the container does not
+ * differentiate between standard and trailing headers. In that case,
+ * you set response headers once at the beginning of the transaction,
+ * read the body, clear all headers, and then set them all again. After
+ * the headers are set for the second time, they will potentially contain
+ * a mixture of standard and trailing headers.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_res_set_headers_clear(htp_tx_t *tx);
+
+/**
+ * Set response protocol number. See htp_tx_res_set_protocol_number() for more information
+ * about the correct format of the protocol_parameter parameter.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] protocol_number Protocol number.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+void htp_tx_res_set_protocol_number(htp_tx_t *tx, int protocol_number);
+
+/**
+ * Set response line. Use this function is you have a single buffer containing
+ * the entire line. If you have individual request line pieces, use the other
+ * available functions.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] line Line data pointer. Must not be NULL.
+ * @param[in] line_len Line data length.
+ * @param[in] alloc Desired allocation strategy.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_res_set_status_line(htp_tx_t *tx, const char *line, size_t line_len, enum htp_alloc_strategy_t alloc);
+
+/**
+ * Set response status code.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] status_code Response status code.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+void htp_tx_res_set_status_code(htp_tx_t *tx, int status_code);
+
+/**
+ * Set response status message, which is the part of the response
+ * line that comes after the status code.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] msg Message data pointer. Must not be NULL.
+ * @param[in] msg_len Message data length.
+ * @param[in] alloc Desired allocation strategy.
+ * @return HTP_OK on success, HTP_ERROR on failure.
+ */
+htp_status_t htp_tx_res_set_status_message(htp_tx_t *tx, const char *msg, size_t msg_len, enum htp_alloc_strategy_t alloc);
+
+/**
+ * Sets the configuration that is to be used for this transaction. If the
+ * second parameter is set to HTP_CFG_PRIVATE, the transaction will adopt
+ * the configuration structure and destroy it when appropriate. This function is
+ * useful if you need to make changes to configuration on per-transaction basis.
+ * Initially, all transactions will share the configuration with that of the
+ * connection; if you were to make changes on it, they would affect all
+ * current and future connections. To work around that, you make a copy of the
+ * configuration object, call this function with the second parameter set to
+ * HTP_CFG_PRIVATE, and modify configuration at will.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] cfg Configuration pointer. Must not be NULL.
+ * @param[in] is_cfg_shared HTP_CFG_SHARED or HTP_CFG_PRIVATE
+ */
+void htp_tx_set_config(htp_tx_t *tx, htp_cfg_t *cfg, int is_cfg_shared);
+
+/**
+ * Associates user data with this transaction.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @param[in] user_data Opaque user data pointer.
+ */
+void htp_tx_set_user_data(htp_tx_t *tx, void *user_data);
+
+/**
+ * Change transaction state to REQUEST and invoke registered callbacks.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the
+ * callbacks does not want to follow the transaction any more.
+ */
+htp_status_t htp_tx_state_request_complete(htp_tx_t *tx);
+
+/**
+ * Change transaction state to REQUEST_HEADERS and invoke all
+ * registered callbacks.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the
+ * callbacks does not want to follow the transaction any more.
+ */
+htp_status_t htp_tx_state_request_headers(htp_tx_t *tx);
+
+/**
+ * Change transaction state to REQUEST_LINE and invoke all
+ * registered callbacks.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the
+ * callbacks does not want to follow the transaction any more.
+ */
+htp_status_t htp_tx_state_request_line(htp_tx_t *tx);
+
+/**
+ * Initialize hybrid parsing mode, change state to TRANSACTION_START,
+ * and invoke all registered callbacks.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the
+ * callbacks does not want to follow the transaction any more.
+ */
+htp_status_t htp_tx_state_request_start(htp_tx_t *tx);
+
+/**
+ * Change transaction state to RESPONSE and invoke registered callbacks.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the
+ * callbacks does not want to follow the transaction any more.
+ */
+htp_status_t htp_tx_state_response_complete(htp_tx_t *tx);
+
+/**
+ * Change transaction state to RESPONSE_HEADERS and invoke registered callbacks.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the
+ * callbacks does not want to follow the transaction any more.
+ */
+htp_status_t htp_tx_state_response_headers(htp_tx_t *tx);
+
+/**
+ * Change transaction state to HTP_RESPONSE_LINE and invoke registered callbacks.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the
+ * callbacks does not want to follow the transaction any more.
+ */
+htp_status_t htp_tx_state_response_line(htp_tx_t *tx);
+
+/**
+ * Change transaction state to RESPONSE_START and invoke registered callbacks.
+ *
+ * @param[in] tx Transaction pointer. Must not be NULL.
+ * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the
+ * callbacks does not want to follow the transaction any more.
+ */
+htp_status_t htp_tx_state_response_start(htp_tx_t *tx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_HYBRID_H */
diff --git a/htp/htp_transcoder.c b/htp/htp_transcoder.c
new file mode 100644
index 0000000..03d49ed
--- /dev/null
+++ b/htp/htp_transcoder.c
@@ -0,0 +1,211 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * Transcode all parameters supplied in the table.
+ *
+ * @param[in] connp
+ * @param[in] params
+ * @param[in] destroy_old
+ */
+int htp_transcode_params(htp_connp_t *connp, htp_table_t **params, int destroy_old) {
+ htp_table_t *input_params = *params;
+
+ // No transcoding unless necessary
+ if ((connp->cfg->internal_encoding == NULL)||(connp->cfg->request_encoding == NULL)) return HTP_OK;
+
+ // Create a new table that will hold transcoded parameters
+ htp_table_t *output_params = htp_table_create(htp_table_size(input_params));
+ if (output_params == NULL) return HTP_ERROR;
+
+ // Initialize iconv
+ iconv_t cd = iconv_open(connp->cfg->internal_encoding, connp->cfg->request_encoding);
+ if (cd == (iconv_t) -1) {
+ htp_table_destroy(output_params);
+ return HTP_ERROR;
+ }
+
+ #if (_LIBICONV_VERSION >= 0x0108 && HAVE_ICONVCTL)
+ int iconv_param = 0;
+ iconvctl(cd, ICONV_SET_TRANSLITERATE, &iconv_param);
+ iconv_param = 1;
+ iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &iconv_param);
+ #endif
+
+ // Convert the parameters, one by one
+ bstr *name = NULL;
+ bstr *value = NULL;
+ for (size_t i = 0, n = htp_table_size(input_params); i < n; i++) {
+ value = htp_table_get_index(input_params, i, &name);
+
+ bstr *new_name = NULL, *new_value = NULL;
+
+ // Convert name
+ htp_transcode_bstr(cd, name, &new_name);
+ if (new_name == NULL) {
+ iconv_close(cd);
+
+ bstr *b = NULL;
+ for (size_t j = 0, k = htp_table_size(output_params); j < k; j++) {
+ b = htp_table_get_index(output_params, j, NULL);
+ bstr_free(b);
+ }
+
+ htp_table_destroy(output_params);
+ return HTP_ERROR;
+ }
+
+ // Convert value
+ htp_transcode_bstr(cd, value, &new_value);
+ if (new_value == NULL) {
+ bstr_free(new_name);
+ iconv_close(cd);
+
+ bstr *b = NULL;
+ for (size_t j = 0, k = htp_table_size(output_params); j < k; j++) {
+ b = htp_table_get_index(output_params, j, NULL);
+ bstr_free(b);
+ }
+
+ htp_table_destroy(output_params);
+ return HTP_ERROR;
+ }
+
+ // Add to new table
+ htp_table_addn(output_params, new_name, new_value);
+ }
+
+ // Replace the old parameter table
+ *params = output_params;
+
+ // Destroy the old parameter table if necessary
+ if (destroy_old) {
+ bstr *b = NULL;
+ for (size_t i = 0, n = htp_table_size(input_params); i < n; i++) {
+ b = htp_table_get_index(input_params, i, NULL);
+ bstr_free(b);
+ }
+
+ htp_table_destroy(input_params);
+ }
+
+ iconv_close(cd);
+
+ return HTP_OK;
+}
+
+/**
+ * Transcode one bstr.
+ *
+ * @param[in] cd
+ * @param[in] input
+ * @param[in] output
+ */
+int htp_transcode_bstr(iconv_t cd, bstr *input, bstr **output) {
+ // Reset conversion state for every new string
+ iconv(cd, NULL, 0, NULL, 0);
+
+ bstr_builder_t *bb = NULL;
+
+ const size_t buflen = 10;
+ unsigned char *buf = malloc(buflen);
+ if (buf == NULL) {
+ return HTP_ERROR;
+ }
+
+ const char *inbuf = (const char *)bstr_ptr(input);
+ size_t inleft = bstr_len(input);
+ char *outbuf = (char *)buf;
+ size_t outleft = buflen;
+
+ int loop = 1;
+ while (loop) {
+ loop = 0;
+
+ if (iconv(cd, (ICONV_CONST char **)&inbuf, &inleft, (char **)&outbuf, &outleft) == (size_t) - 1) {
+ if (errno == E2BIG) {
+ // Create bstr builder on-demand
+ if (bb == NULL) {
+ bb = bstr_builder_create();
+ if (bb == NULL) {
+ free(buf);
+ return HTP_ERROR;
+ }
+ }
+
+ // The output buffer is full
+ bstr_builder_append_mem(bb, buf, buflen - outleft);
+
+ outbuf = (char *)buf;
+ outleft = buflen;
+
+ // Continue in the loop, as there's more work to do
+ loop = 1;
+ } else {
+ // Error
+ if (bb != NULL) bstr_builder_destroy(bb);
+ free(buf);
+ return HTP_ERROR;
+ }
+ }
+ }
+
+ if (bb != NULL) {
+ bstr_builder_append_mem(bb, buf, buflen - outleft);
+ *output = bstr_builder_to_str(bb);
+ bstr_builder_destroy(bb);
+ if (*output == NULL) {
+ free(buf);
+ return HTP_ERROR;
+ }
+ } else {
+ *output = bstr_dup_mem(buf, buflen - outleft);
+ if (*output == NULL) {
+ free(buf);
+ return HTP_ERROR;
+ }
+ }
+
+ free(buf);
+
+ return HTP_OK;
+}
diff --git a/htp/htp_urlencoded.c b/htp/htp_urlencoded.c
new file mode 100644
index 0000000..5ad3886
--- /dev/null
+++ b/htp/htp_urlencoded.c
@@ -0,0 +1,332 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+/**
+ * This method is invoked whenever a piece of data, belonging to a single field (name or value)
+ * becomes available. It will either create a new parameter or store the transient information
+ * until a parameter can be created.
+ *
+ * @param[in] urlenp
+ * @param[in] data
+ * @param[in] startpos
+ * @param[in] endpos
+ * @param[in] c Should contain -1 if the reason this function is called is because the end of
+ * the current data chunk is reached.
+ */
+static void htp_urlenp_add_field_piece(htp_urlenp_t *urlenp, const unsigned char *data, size_t startpos, size_t endpos, int last_char) {
+ // Add field if we know it ended (last_char is something other than -1)
+ // or if we know that there won't be any more input data (urlenp->_complete is true).
+ if ((last_char != -1) || (urlenp->_complete)) {
+ // Prepare the field value, assembling from multiple pieces as necessary.
+
+ bstr *field = NULL;
+
+ // Did we use the string builder for this field?
+ if (bstr_builder_size(urlenp->_bb) > 0) {
+ // The current field consists of more than once piece, we have to use the string builder.
+
+ // Add current piece to string builder.
+ if ((data != NULL) && (endpos - startpos > 0)) {
+ bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos);
+ }
+
+ // Generate the field and clear the string builder.
+ field = bstr_builder_to_str(urlenp->_bb);
+ if (field == NULL) return;
+
+ bstr_builder_clear(urlenp->_bb);
+ } else {
+ // We only have the current piece to work with, so no need to involve the string builder.
+ if ((data != NULL) && (endpos - startpos > 0)) {
+ field = bstr_dup_mem(data + startpos, endpos - startpos);
+ if (field == NULL) return;
+ }
+ }
+
+ // Process field as key or value, as appropriate.
+
+ if (urlenp->_state == HTP_URLENP_STATE_KEY) {
+ // Key.
+
+ // If there is no more work left to do, then we have a single key. Add it.
+ if ((urlenp->_complete)||(last_char == urlenp->argument_separator)) {
+
+ // Handling empty pairs is tricky. We don't want to create a pair for
+ // an entirely empty input, but in some cases it may be appropriate
+ // (e.g., /index.php?&q=2).
+ if ((field != NULL)||(last_char == urlenp->argument_separator)) {
+ // Add one pair, with an empty value and possibly empty key too.
+
+ bstr *name = field;
+ if (name == NULL) {
+ name = bstr_dup_c("");
+ if (name == NULL) return;
+ }
+
+ bstr *value = bstr_dup_c("");
+ if (value == NULL) {
+ bstr_free(name);
+ return;
+ }
+
+ if (urlenp->decode_url_encoding) {
+ htp_tx_urldecode_params_inplace(urlenp->tx, name);
+ }
+
+ htp_table_addn(urlenp->params, name, value);
+
+ urlenp->_name = NULL;
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name));
+ fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value));
+ #endif
+ }
+ } else {
+ // This key will possibly be followed by a value, so keep it for later.
+ urlenp->_name = field;
+ }
+ } else {
+ // Value (with a key remembered from before).
+
+ bstr *name = urlenp->_name;
+ urlenp->_name = NULL;
+
+ if (name == NULL) {
+ name = bstr_dup_c("");
+ if (name == NULL) {
+ bstr_free(field);
+ return;
+ }
+ }
+
+ bstr *value = field;
+ if (value == NULL) {
+ value = bstr_dup_c("");
+ if (value == NULL) {
+ bstr_free(name);
+ return;
+ }
+ }
+
+ if (urlenp->decode_url_encoding) {
+ htp_tx_urldecode_params_inplace(urlenp->tx, name);
+ htp_tx_urldecode_params_inplace(urlenp->tx, value);
+ }
+
+ htp_table_addn(urlenp->params, name, value);
+
+ #ifdef HTP_DEBUG
+ fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name));
+ fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value));
+ #endif
+ }
+ } else {
+ // The field has not ended. We'll make a copy of of the available data for later.
+ if ((data != NULL) && (endpos - startpos > 0)) {
+ bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos);
+ }
+ }
+}
+
+/**
+ * Creates a new URLENCODED parser.
+ *
+ * @return New parser, or NULL on memory allocation failure.
+ */
+htp_urlenp_t *htp_urlenp_create(htp_tx_t *tx) {
+ htp_urlenp_t *urlenp = calloc(1, sizeof (htp_urlenp_t));
+ if (urlenp == NULL) return NULL;
+
+ urlenp->tx = tx;
+
+ urlenp->params = htp_table_create(HTP_URLENP_DEFAULT_PARAMS_SIZE);
+ if (urlenp->params == NULL) {
+ free(urlenp);
+ return NULL;
+ }
+
+ urlenp->_bb = bstr_builder_create();
+ if (urlenp->_bb == NULL) {
+ htp_table_destroy(urlenp->params);
+ free(urlenp);
+ return NULL;
+ }
+
+ urlenp->argument_separator = '&';
+ urlenp->decode_url_encoding = 1;
+ urlenp->_state = HTP_URLENP_STATE_KEY;
+
+ return urlenp;
+}
+
+/**
+ * Destroys an existing URLENCODED parser.
+ *
+ * @param[in] urlenp
+ */
+void htp_urlenp_destroy(htp_urlenp_t *urlenp) {
+ if (urlenp == NULL) return;
+
+ if (urlenp->_name != NULL) {
+ bstr_free(urlenp->_name);
+ }
+
+ bstr_builder_destroy(urlenp->_bb);
+
+ if (urlenp->params != NULL) {
+ // Destroy parameters.
+ for (size_t i = 0, n = htp_table_size(urlenp->params); i < n; i++) {
+ bstr *b = htp_table_get_index(urlenp->params, i, NULL);
+ // Parameter name will be freed by the table code.
+ bstr_free(b);
+ }
+
+ htp_table_destroy(urlenp->params);
+ }
+
+ free(urlenp);
+}
+
+/**
+ * Finalizes parsing, forcing the parser to convert any outstanding
+ * data into parameters. This method should be invoked at the end
+ * of a parsing operation that used htp_urlenp_parse_partial().
+ *
+ * @param[in] urlenp
+ * @return Success indication
+ */
+htp_status_t htp_urlenp_finalize(htp_urlenp_t *urlenp) {
+ urlenp->_complete = 1;
+ return htp_urlenp_parse_partial(urlenp, NULL, 0);
+}
+
+/**
+ * Parses the provided data chunk under the assumption
+ * that it contains all the data that will be parsed. When this
+ * method is used for parsing the finalization method should not
+ * be invoked.
+ *
+ * @param[in] urlenp
+ * @param[in] data
+ * @param[in] len
+ * @return
+ */
+htp_status_t htp_urlenp_parse_complete(htp_urlenp_t *urlenp, const void *data, size_t len) {
+ htp_urlenp_parse_partial(urlenp, data, len);
+ return htp_urlenp_finalize(urlenp);
+}
+
+/**
+ * Parses the provided data chunk, keeping state to allow streaming parsing, i.e., the
+ * parsing where only partial information is available at any one time. The method
+ * htp_urlenp_finalize() must be invoked at the end to finalize parsing.
+ *
+ * @param[in] urlenp
+ * @param[in] _data
+ * @param[in] len
+ * @return
+ */
+htp_status_t htp_urlenp_parse_partial(htp_urlenp_t *urlenp, const void *_data, size_t len) {
+ unsigned char *data = (unsigned char *) _data;
+ size_t startpos = 0;
+ size_t pos = 0;
+ int c;
+
+ if (data == NULL) len = 0;
+
+ do {
+ // Get the next character, or use -1 to indicate end of input.
+ if (pos < len) c = data[pos];
+ else c = -1;
+
+ switch (urlenp->_state) {
+
+ case HTP_URLENP_STATE_KEY:
+ // Look for =, argument separator, or end of input.
+ if ((c == '=') || (c == urlenp->argument_separator) || (c == -1)) {
+ // Data from startpos to pos.
+ htp_urlenp_add_field_piece(urlenp, data, startpos, pos, c);
+
+ // If it's not the end of input, then it must be the end of this field.
+ if (c != -1) {
+ // Next state.
+ startpos = pos + 1;
+
+ if (c == urlenp->argument_separator) {
+ urlenp->_state = HTP_URLENP_STATE_KEY;
+ } else {
+ urlenp->_state = HTP_URLENP_STATE_VALUE;
+ }
+ }
+ }
+
+ pos++;
+
+ break;
+
+ case HTP_URLENP_STATE_VALUE:
+ // Look for argument separator or end of input.
+ if ((c == urlenp->argument_separator) || (c == -1)) {
+ // Data from startpos to pos.
+ htp_urlenp_add_field_piece(urlenp, data, startpos, pos, c);
+
+ // If it's not the end of input, then it must be the end of this field.
+ if (c != -1) {
+ // Next state.
+ startpos = pos + 1;
+ urlenp->_state = HTP_URLENP_STATE_KEY;
+ }
+ }
+
+ pos++;
+
+ break;
+
+ default:
+ // Invalid state.
+ return HTP_ERROR;
+ }
+ } while (c != -1);
+
+ return HTP_OK;
+}
diff --git a/htp/htp_urlencoded.h b/htp/htp_urlencoded.h
new file mode 100644
index 0000000..bc4697c
--- /dev/null
+++ b/htp/htp_urlencoded.h
@@ -0,0 +1,111 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef _HTP_URLENCODED_H
+#define _HTP_URLENCODED_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct htp_urlenp_t htp_urlenp_t;
+typedef struct htp_urlen_param_t htp_urlen_param_t;
+
+#define HTP_URLENP_DEFAULT_PARAMS_SIZE 32
+
+#define HTP_URLENP_STATE_KEY 1
+#define HTP_URLENP_STATE_VALUE 2
+
+// The MIME type that triggers the parser. Must be lowercase.
+#define HTP_URLENCODED_MIME_TYPE "application/x-www-form-urlencoded"
+
+#include "htp.h"
+
+/**
+ * This is the main URLENCODED parser structure. It is used to store
+ * parser configuration, temporary parsing data, as well as the parameters.
+ */
+struct htp_urlenp_t {
+ /** The transaction this parser belongs to. */
+ htp_tx_t *tx;
+
+ /** The character used to separate parameters. Defaults to & and should
+ * not be changed without good reason.
+ */
+ unsigned char argument_separator;
+
+ /** Whether to perform URL-decoding on parameters. */
+ int decode_url_encoding;
+
+ /** This table contains the list of parameters, indexed by name. */
+ htp_table_t *params;
+
+ // Private fields; these are used during the parsing process only
+ int _state;
+ int _complete;
+ bstr *_name;
+ bstr_builder_t *_bb;
+};
+
+/**
+ * Holds one application/x-www-form-urlencoded parameter.
+ */
+struct htp_urlen_param_t {
+ /** Parameter name. */
+ bstr *name;
+
+ /** Parameter value. */
+ bstr *value;
+};
+
+htp_urlenp_t *htp_urlenp_create(htp_tx_t *tx);
+void htp_urlenp_destroy(htp_urlenp_t *urlenp);
+
+void htp_urlenp_set_argument_separator(htp_urlenp_t *urlenp, unsigned char argument_separator);
+void htp_urlenp_set_decode_url_encoding(htp_urlenp_t *urlenp, int decode_url_encoding);
+
+htp_status_t htp_urlenp_parse_partial(htp_urlenp_t *urlenp, const void *data, size_t len);
+htp_status_t htp_urlenp_parse_complete(htp_urlenp_t *urlenp, const void *data, size_t len);
+htp_status_t htp_urlenp_finalize(htp_urlenp_t *urlenp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTP_URLENCODED_H */
+
diff --git a/htp/htp_utf8_decoder.c b/htp/htp_utf8_decoder.c
new file mode 100644
index 0000000..6017a18
--- /dev/null
+++ b/htp/htp_utf8_decoder.c
@@ -0,0 +1,118 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+/*
+Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+and associated documentation files (the "Software"), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge, publish, distribute,
+sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or
+substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+static const uint8_t utf8d[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+ 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+ 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+ 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+ 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+ 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+ 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+
+static const uint8_t utf8d_allow_overlong[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df; changed c0 and c1
+ 0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef; changed e0
+ 0x6,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff; changed f0
+ 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+ 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+ 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+ 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+
+/**
+ * Process one byte of UTF-8 data and return a code point if one is available. Allows
+ * overlong characters in input.
+ *
+ * @param[in] state
+ * @param[in] codep
+ * @param[in] byte
+ * @return HTP_UTF8_ACCEPT for a valid character, HTP_UTF8_REJECT for an invalid character,
+ * or something else if the character has not yet been formed
+ */
+uint32_t htp_utf8_decode_allow_overlong(uint32_t* state, uint32_t* codep, uint32_t byte) {
+ uint32_t type = utf8d_allow_overlong[byte];
+
+ *codep = (*state != HTP_UTF8_ACCEPT) ?
+ (byte & 0x3fu) | (*codep << 6) :
+ (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state*16 + type];
+ return *state;
+}
diff --git a/htp/htp_utf8_decoder.h b/htp/htp_utf8_decoder.h
new file mode 100644
index 0000000..b39abdd
--- /dev/null
+++ b/htp/htp_utf8_decoder.h
@@ -0,0 +1,85 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+/* LibHTP changes:
+ *
+ * - Changed the name of the function from "decode" to "utf8_decode"
+ * - Created a separate header file
+ * - Copied the license from the web page
+ * - Created a copy of the data and function "utf8_decode_allow_overlong", which
+ * does not treat overlong characters as invalid.
+ */
+
+/*
+Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+and associated documentation files (the "Software"), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge, publish, distribute,
+sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or
+substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef _UTF8_DECODER_H
+#define _UTF8_DECODER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define HTP_UTF8_ACCEPT 0
+#define HTP_UTF8_REJECT 1
+
+uint32_t htp_utf8_decode_allow_overlong(uint32_t* state, uint32_t* codep, uint32_t byte);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UTF8_DECODER_H */
diff --git a/htp/htp_util.c b/htp/htp_util.c
new file mode 100644
index 0000000..936e22b
--- /dev/null
+++ b/htp/htp_util.c
@@ -0,0 +1,2602 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#include "htp_config_auto.h"
+
+//inet_pton
+#if _WIN32
+#include <ws2tcpip.h>
+#else // mac, linux, freebsd
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#endif
+
+#include "htp_private.h"
+
+/**
+ * Is character a linear white space character?
+ *
+ * @param[in] c
+ * @return 0 or 1
+ */
+int htp_is_lws(int c) {
+ if ((c == ' ') || (c == '\t')) return 1;
+ else return 0;
+}
+
+/**
+ * Is character a separator character?
+ *
+ * @param[in] c
+ * @return 0 or 1
+ */
+int htp_is_separator(int c) {
+ /* separators = "(" | ")" | "<" | ">" | "@"
+ | "," | ";" | ":" | "\" | <">
+ | "/" | "[" | "]" | "?" | "="
+ | "{" | "}" | SP | HT */
+ switch (c) {
+ case '(':
+ case ')':
+ case '<':
+ case '>':
+ case '@':
+ case ',':
+ case ';':
+ case ':':
+ case '\\':
+ case '"':
+ case '/':
+ case '[':
+ case ']':
+ case '?':
+ case '=':
+ case '{':
+ case '}':
+ case ' ':
+ case '\t':
+ return 1;
+ break;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Is character a text character?
+ *
+ * @param[in] c
+ * @return 0 or 1
+ */
+int htp_is_text(int c) {
+ if (c == '\t') return 1;
+ if (c < 32) return 0;
+ return 1;
+}
+
+/**
+ * Is character a token character?
+ *
+ * @param[in] c
+ * @return 0 or 1
+ */
+int htp_is_token(int c) {
+ /* token = 1*<any CHAR except CTLs or separators> */
+ /* CHAR = <any US-ASCII character (octets 0 - 127)> */
+ if ((c < 32) || (c > 126)) return 0;
+ if (htp_is_separator(c)) return 0;
+ return 1;
+}
+
+/**
+ * Remove all line terminators (LF, CR or CRLF) from
+ * the end of the line provided as input.
+ *
+ * @return 0 if nothing was removed, 1 if one or more LF characters were removed, or
+ * 2 if one or more CR and/or LF characters were removed.
+ */
+int htp_chomp(unsigned char *data, size_t *len) {
+ int r = 0;
+
+ // Loop until there's no more stuff in the buffer
+ while (*len > 0) {
+ // Try one LF first
+ if (data[*len - 1] == LF) {
+ (*len)--;
+ r = 1;
+
+ if (*len == 0) return r;
+
+ // A CR is allowed before LF
+ if (data[*len - 1] == CR) {
+ (*len)--;
+ r = 2;
+ }
+ } else if (data[*len - 1] == CR) {
+ (*len)--;
+ r = 1;
+ } else return r;
+ }
+
+ return r;
+}
+
+/**
+ * Is character a white space character?
+ *
+ * @param[in] c
+ * @return 0 or 1
+ */
+int htp_is_space(int c) {
+ switch (c) {
+ case ' ':
+ case '\f':
+ case '\v':
+ case '\t':
+ case '\r':
+ case '\n':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Converts request method, given as a string, into a number.
+ *
+ * @param[in] method
+ * @return Method number of M_UNKNOWN
+ */
+int htp_convert_method_to_number(bstr *method) {
+ if (method == NULL) return HTP_M_UNKNOWN;
+
+ // TODO Optimize using parallel matching, or something similar.
+
+ if (bstr_cmp_c(method, "GET") == 0) return HTP_M_GET;
+ if (bstr_cmp_c(method, "PUT") == 0) return HTP_M_PUT;
+ if (bstr_cmp_c(method, "POST") == 0) return HTP_M_POST;
+ if (bstr_cmp_c(method, "DELETE") == 0) return HTP_M_DELETE;
+ if (bstr_cmp_c(method, "CONNECT") == 0) return HTP_M_CONNECT;
+ if (bstr_cmp_c(method, "OPTIONS") == 0) return HTP_M_OPTIONS;
+ if (bstr_cmp_c(method, "TRACE") == 0) return HTP_M_TRACE;
+ if (bstr_cmp_c(method, "PATCH") == 0) return HTP_M_PATCH;
+ if (bstr_cmp_c(method, "PROPFIND") == 0) return HTP_M_PROPFIND;
+ if (bstr_cmp_c(method, "PROPPATCH") == 0) return HTP_M_PROPPATCH;
+ if (bstr_cmp_c(method, "MKCOL") == 0) return HTP_M_MKCOL;
+ if (bstr_cmp_c(method, "COPY") == 0) return HTP_M_COPY;
+ if (bstr_cmp_c(method, "MOVE") == 0) return HTP_M_MOVE;
+ if (bstr_cmp_c(method, "LOCK") == 0) return HTP_M_LOCK;
+ if (bstr_cmp_c(method, "UNLOCK") == 0) return HTP_M_UNLOCK;
+ if (bstr_cmp_c(method, "VERSION-CONTROL") == 0) return HTP_M_VERSION_CONTROL;
+ if (bstr_cmp_c(method, "CHECKOUT") == 0) return HTP_M_CHECKOUT;
+ if (bstr_cmp_c(method, "UNCHECKOUT") == 0) return HTP_M_UNCHECKOUT;
+ if (bstr_cmp_c(method, "CHECKIN") == 0) return HTP_M_CHECKIN;
+ if (bstr_cmp_c(method, "UPDATE") == 0) return HTP_M_UPDATE;
+ if (bstr_cmp_c(method, "LABEL") == 0) return HTP_M_LABEL;
+ if (bstr_cmp_c(method, "REPORT") == 0) return HTP_M_REPORT;
+ if (bstr_cmp_c(method, "MKWORKSPACE") == 0) return HTP_M_MKWORKSPACE;
+ if (bstr_cmp_c(method, "MKACTIVITY") == 0) return HTP_M_MKACTIVITY;
+ if (bstr_cmp_c(method, "BASELINE-CONTROL") == 0) return HTP_M_BASELINE_CONTROL;
+ if (bstr_cmp_c(method, "MERGE") == 0) return HTP_M_MERGE;
+ if (bstr_cmp_c(method, "INVALID") == 0) return HTP_M_INVALID;
+ if (bstr_cmp_c(method, "HEAD") == 0) return HTP_M_HEAD;
+
+ return HTP_M_UNKNOWN;
+}
+
+/**
+ * Is the given line empty?
+ *
+ * @param[in] data
+ * @param[in] len
+ * @return 0 or 1
+ */
+int htp_is_line_empty(unsigned char *data, size_t len) {
+ if (((len == 1) && ((data[0] == CR) || (data[0] == LF))) ||
+ ((len == 2) && (data[0] == CR) && (data[1] == LF))) {
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * Does line consist entirely of whitespace characters?
+ *
+ * @param[in] data
+ * @param[in] len
+ * @return 0 or 1
+ */
+int htp_is_line_whitespace(unsigned char *data, size_t len) {
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (!isspace(data[i])) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/**
+ * Parses Content-Length string (positive decimal number).
+ * White space is allowed before and after the number.
+ *
+ * @param[in] b
+ * @return Content-Length as a number, or -1 on error.
+ */
+int64_t htp_parse_content_length(bstr *b, htp_connp_t *connp) {
+ size_t len = bstr_len(b);
+ unsigned char * data = (unsigned char *) bstr_ptr(b);
+ size_t pos = 0;
+ int64_t r = 0;
+
+ if (len == 0) return -1003;
+
+ // Ignore junk before
+ while ((pos < len) && (data[pos] < '0' || data[pos] > '9')) {
+ if (!htp_is_lws(data[pos]) && connp != NULL && r == 0) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "C-L value with extra data in the beginning");
+ r = -1;
+ }
+ pos++;
+ }
+ if (pos == len) return -1001;
+
+ r = bstr_util_mem_to_pint(data + pos, len - pos, 10, &pos);
+ // Ok to have junk afterwards
+ if (pos < len && connp != NULL) {
+ htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
+ "C-L value with extra data in the end");
+ }
+ return r;
+}
+
+/**
+ * Parses chunk length (positive hexadecimal number). White space is allowed before
+ * and after the number. An error will be returned if the chunk length is greater than
+ * INT32_MAX.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @return Chunk length, or a negative number on error.
+ */
+int64_t htp_parse_chunked_length(unsigned char *data, size_t len, int *extension) {
+ // skip leading line feeds and other control chars
+ while (len) {
+ unsigned char c = *data;
+ if (!(c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x0b || c == 0x0c))
+ break;
+ data++;
+ len--;
+ }
+ if (len == 0)
+ return -1004;
+
+ // find how much of the data is correctly formatted
+ size_t i = 0;
+ while (i < len) {
+ unsigned char c = data[i];
+ if (!(isdigit(c) ||
+ (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')))
+ break;
+ i++;
+ }
+ // cut off trailing junk
+ if (i != len) {
+ if (extension) {
+ size_t j = i;
+ while (j < len) {
+ if (data[j] == ';') {
+ *extension = 1;
+ break;
+ }
+ j++;
+ }
+ }
+ len = i;
+ }
+
+ int64_t chunk_len = htp_parse_positive_integer_whitespace(data, len, 16);
+ if (chunk_len < 0) return chunk_len;
+ if (chunk_len > INT32_MAX) return -1;
+ return chunk_len;
+}
+
+/**
+ * A somewhat forgiving parser for a positive integer in a given base.
+ * Only LWS is allowed before and after the number.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @param[in] base
+ * @return The parsed number on success; a negative number on error.
+ */
+int64_t htp_parse_positive_integer_whitespace(unsigned char *data, size_t len, int base) {
+ if (len == 0) return -1003;
+
+ size_t last_pos;
+ size_t pos = 0;
+
+ // Ignore LWS before
+ while ((pos < len) && (htp_is_lws(data[pos]))) pos++;
+ if (pos == len) return -1001;
+
+ int64_t r = bstr_util_mem_to_pint(data + pos, len - pos, base, &last_pos);
+ if (r < 0) return r;
+
+ // Move after the last digit
+ pos += last_pos;
+
+ // Ignore LWS after
+ while (pos < len) {
+ if (!htp_is_lws(data[pos])) {
+ return -1002;
+ }
+
+ pos++;
+ }
+
+ return r;
+}
+
+#ifdef HTP_DEBUG
+
+/**
+ * Prints one log message to stderr.
+ *
+ * @param[in] stream
+ * @param[in] log
+ */
+void htp_print_log(FILE *stream, htp_log_t *log) {
+ if (log->code != 0) {
+ fprintf(stream, "[%d][code %d][file %s][line %d] %s\n", log->level,
+ log->code, log->file, log->line, log->msg);
+ } else {
+ fprintf(stream, "[%d][file %s][line %d] %s\n", log->level,
+ log->file, log->line, log->msg);
+ }
+}
+#endif
+
+/**
+ * Records one log message.
+ *
+ * @param[in] connp
+ * @param[in] file
+ * @param[in] line
+ * @param[in] level
+ * @param[in] code
+ * @param[in] fmt
+ */
+void htp_log(htp_connp_t *connp, const char *file, int line, enum htp_log_level_t level, int code, const char *fmt, ...) {
+ if (connp == NULL) return;
+
+ char buf[1024];
+ va_list args;
+
+ // Ignore messages below our log level.
+ if (connp->cfg->log_level < level) {
+ return;
+ }
+
+ va_start(args, fmt);
+
+ int r = vsnprintf(buf, 1024, fmt, args);
+
+ va_end(args);
+
+ if (r < 0) {
+ snprintf(buf, 1024, "[vnsprintf returned error %d]", r);
+ } else if (r >= 1024) {
+ // Indicate overflow with a '+' at the end.
+ buf[1022] = '+';
+ buf[1023] = '\0';
+ }
+
+ // Create a new log entry.
+
+ htp_log_t *log = calloc(1, sizeof (htp_log_t));
+ if (log == NULL) return;
+
+ log->connp = connp;
+ log->file = file;
+ log->line = line;
+ log->level = level;
+ log->code = code;
+ log->msg = strdup(buf);
+
+ if (htp_list_add(connp->conn->messages, log) != HTP_OK) {
+ free((void *) log->msg);
+ free(log);
+ return;
+ }
+
+ if (level == HTP_LOG_ERROR) {
+ connp->last_error = log;
+ }
+
+ #ifdef HTP_DEBUG
+ fprintf(stderr, "[LOG] %s\n", log->msg);
+ #endif
+
+ /* coverity[check_return] */
+ htp_hook_run_all(connp->cfg->hook_log, log);
+}
+
+/**
+ * Determines if the given line is a continuation (of some previous line).
+ *
+ * @param[in] data
+ * @param[in] len
+ * @return 0 or 1 for false and true, respectively. Returns -1 on error (NULL pointer or length zero).
+ */
+int htp_connp_is_line_folded(unsigned char *data, size_t len) {
+ if ((data == NULL) || (len == 0)) return -1;
+ return htp_is_folding_char(data[0]);
+}
+
+int htp_is_folding_char(int c) {
+ if (htp_is_lws(c) || c == 0) return 1;
+ else return 0;
+}
+
+/**
+ * Determines if the given line is a request terminator.
+ *
+ * @param[in] connp
+ * @param[in] data
+ * @param[in] len
+ * @return 0 or 1
+ */
+int htp_connp_is_line_terminator(htp_connp_t *connp, unsigned char *data, size_t len, int next_no_lf) {
+ // Is this the end of request headers?
+ switch (connp->cfg->server_personality) {
+ case HTP_SERVER_IIS_5_1:
+ // IIS 5 will accept a whitespace line as a terminator
+ if (htp_is_line_whitespace(data, len)) {
+ return 1;
+ }
+
+ // Fall through
+ default:
+ // Treat an empty line as terminator
+ if (htp_is_line_empty(data, len)) {
+ return 1;
+ }
+ // Only space is terminator if terminator does not follow right away
+ if (len == 2 && htp_is_lws(data[0]) && data[1] == LF) {
+ return next_no_lf;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * Determines if the given line can be ignored when it appears before a request.
+ *
+ * @param[in] connp
+ * @param[in] data
+ * @param[in] len
+ * @return 0 or 1
+ */
+int htp_connp_is_line_ignorable(htp_connp_t *connp, unsigned char *data, size_t len) {
+ return htp_connp_is_line_terminator(connp, data, len, 0);
+}
+
+static htp_status_t htp_parse_port(unsigned char *data, size_t len, int *port, int *invalid) {
+ if (len == 0) {
+ *port = -1;
+ *invalid = 1;
+ return HTP_OK;
+ }
+
+ int64_t port_parsed = htp_parse_positive_integer_whitespace(data, len, 10);
+
+ if (port_parsed < 0) {
+ // Failed to parse the port number.
+ *port = -1;
+ *invalid = 1;
+ } else if ((port_parsed > 0) && (port_parsed < 65536)) {
+ // Valid port number.
+ *port = (int) port_parsed;
+ } else {
+ // Port number out of range.
+ *port = -1;
+ *invalid = 1;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Parses an authority string, which consists of a hostname with an optional port number; username
+ * and password are not allowed and will not be handled.
+ *
+ * @param[in] hostport
+ * @param[out] hostname A bstring containing the hostname, or NULL if the hostname is invalid. If this value
+ * is not NULL, the caller assumes responsibility for memory management.
+ * @param[out] port Port as text, or NULL if not provided.
+ * @param[out] port_number Port number, or -1 if the port is not present or invalid.
+ * @param[out] invalid Set to 1 if any part of the authority is invalid.
+ * @return HTP_OK on success, HTP_ERROR on memory allocation failure.
+ */
+htp_status_t htp_parse_hostport(bstr *hostport, bstr **hostname, bstr **port, int *port_number, int *invalid) {
+ if ((hostport == NULL) || (hostname == NULL) || (port_number == NULL) || (invalid == NULL)) return HTP_ERROR;
+
+ *hostname = NULL;
+ if (port != NULL) {
+ *port = NULL;
+ }
+ *port_number = -1;
+ *invalid = 0;
+
+ unsigned char *data = bstr_ptr(hostport);
+ size_t len = bstr_len(hostport);
+
+ bstr_util_mem_trim(&data, &len);
+
+ if (len == 0) {
+ *invalid = 1;
+ return HTP_OK;
+ }
+
+ // Check for an IPv6 address.
+ if (data[0] == '[') {
+ // IPv6 host.
+
+ // Find the end of the IPv6 address.
+ size_t pos = 0;
+ while ((pos < len) && (data[pos] != ']')) pos++;
+ if (pos == len) {
+ *invalid = 1;
+ return HTP_OK;
+ }
+
+ *hostname = bstr_dup_mem(data, pos + 1);
+ if (*hostname == NULL) return HTP_ERROR;
+
+ // Over the ']'.
+ pos++;
+ if (pos == len) return HTP_OK;
+
+ // Handle port.
+ if (data[pos] == ':') {
+ if (port != NULL) {
+ *port = bstr_dup_mem(data + pos + 1, len - pos - 1);
+ if (*port == NULL) {
+ bstr_free(*hostname);
+ return HTP_ERROR;
+ }
+ }
+
+ return htp_parse_port(data + pos + 1, len - pos - 1, port_number, invalid);
+ } else {
+ *invalid = 1;
+ return HTP_OK;
+ }
+ } else {
+ // Not IPv6 host.
+
+ // Is there a colon?
+ unsigned char *colon = memchr(data, ':', len);
+ if (colon == NULL) {
+ // Hostname alone, no port.
+
+ *hostname = bstr_dup_mem(data, len);
+ if (*hostname == NULL) return HTP_ERROR;
+
+ bstr_to_lowercase(*hostname);
+ } else {
+ // Hostname and port.
+
+ // Ignore whitespace at the end of hostname.
+ unsigned char *hostend = colon;
+ while ((hostend > data) && (isspace(*(hostend - 1)))) hostend--;
+
+ *hostname = bstr_dup_mem(data, hostend - data);
+ if (*hostname == NULL) return HTP_ERROR;
+
+ if (port != NULL) {
+ *port = bstr_dup_mem(colon + 1, len - (colon + 1 - data));
+ if (*port == NULL) {
+ bstr_free(*hostname);
+ return HTP_ERROR;
+ }
+ }
+
+ return htp_parse_port(colon + 1, len - (colon + 1 - data), port_number, invalid);
+ }
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Parses hostport provided in the URI.
+ *
+ * @param[in] connp
+ * @param[in] hostport
+ * @param[in] uri
+ * @return HTP_OK on success or HTP_ERROR error.
+ */
+int htp_parse_uri_hostport(htp_connp_t *connp, bstr *hostport, htp_uri_t *uri) {
+ int invalid;
+
+ htp_status_t rc = htp_parse_hostport(hostport, &(uri->hostname), &(uri->port), &(uri->port_number), &invalid);
+ if (rc != HTP_OK) return rc;
+
+ if (invalid) {
+ connp->in_tx->flags |= HTP_HOSTU_INVALID;
+ }
+
+ if (uri->hostname != NULL) {
+ if (htp_validate_hostname(uri->hostname) == 0) {
+ connp->in_tx->flags |= HTP_HOSTU_INVALID;
+ }
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Parses hostport provided in the Host header.
+ *
+ * @param[in] hostport
+ * @param[out] hostname
+ * @param[out] port
+ * @param[out] port_number
+ * @param[out] flags
+ * @return HTP_OK on success or HTP_ERROR error.
+ */
+htp_status_t htp_parse_header_hostport(bstr *hostport, bstr **hostname, bstr **port, int *port_number, uint64_t *flags) {
+ int invalid;
+
+ htp_status_t rc = htp_parse_hostport(hostport, hostname, port, port_number, &invalid);
+ if (rc != HTP_OK) return rc;
+
+ if (invalid) {
+ *flags |= HTP_HOSTH_INVALID;
+ }
+
+ if (*hostname != NULL) {
+ if (htp_validate_hostname(*hostname) == 0) {
+ *flags |= HTP_HOSTH_INVALID;
+ }
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Parses request URI, making no attempt to validate the contents.
+ *
+ * @param[in] input
+ * @param[in] uri
+ * @return HTP_ERROR on memory allocation failure, HTP_OK otherwise
+ */
+int htp_parse_uri(bstr *input, htp_uri_t **uri) {
+ // Allow a htp_uri_t structure to be provided on input,
+ // but allocate a new one if the structure is NULL.
+ if (*uri == NULL) {
+ *uri = calloc(1, sizeof (htp_uri_t));
+ if (*uri == NULL) return HTP_ERROR;
+ }
+
+ if (input == NULL) {
+ // The input might be NULL on requests that don't actually
+ // contain the URI. We allow that.
+ return HTP_OK;
+ }
+
+ unsigned char *data = bstr_ptr(input);
+ size_t len = bstr_len(input);
+ // remove trailing spaces
+ while (len > 0) {
+ if (data[len-1] != ' ') {
+ break;
+ }
+ len--;
+ }
+ size_t start, pos;
+
+ if (len == 0) {
+ // Empty string.
+ return HTP_OK;
+ }
+
+ pos = 0;
+
+ // Scheme test: if it doesn't start with a forward slash character (which it must
+ // for the contents to be a path or an authority, then it must be the scheme part
+ if (data[0] != '/') {
+ // Parse scheme
+
+ // Find the colon, which marks the end of the scheme part
+ start = pos;
+ while ((pos < len) && (data[pos] != ':')) pos++;
+
+ if (pos >= len) {
+ // We haven't found a colon, which means that the URI
+ // is invalid. Apache will ignore this problem and assume
+ // the URI contains an invalid path so, for the time being,
+ // we are going to do the same.
+ pos = 0;
+ } else {
+ // Make a copy of the scheme
+ (*uri)->scheme = bstr_dup_mem(data + start, pos - start);
+ if ((*uri)->scheme == NULL) return HTP_ERROR;
+
+ // Go over the colon
+ pos++;
+ }
+ }
+
+ // Authority test: two forward slash characters and it's an authority.
+ // One, three or more slash characters, and it's a path. We, however,
+ // only attempt to parse authority if we've seen a scheme.
+ if ((*uri)->scheme != NULL)
+ if ((pos + 2 < len) && (data[pos] == '/') && (data[pos + 1] == '/') && (data[pos + 2] != '/')) {
+ // Parse authority
+
+ // Go over the two slash characters
+ start = pos = pos + 2;
+
+ // Authority ends with a question mark, forward slash or hash
+ while ((pos < len) && (data[pos] != '?') && (data[pos] != '/') && (data[pos] != '#')) pos++;
+
+ unsigned char *hostname_start;
+ size_t hostname_len;
+
+ // Are the credentials included in the authority?
+ unsigned char *m = memchr(data + start, '@', pos - start);
+ if (m != NULL) {
+ // Credentials present
+ unsigned char *credentials_start = data + start;
+ size_t credentials_len = m - data - start;
+
+ // Figure out just the hostname part
+ hostname_start = data + start + credentials_len + 1;
+ hostname_len = pos - start - credentials_len - 1;
+
+ // Extract the username and the password
+ m = memchr(credentials_start, ':', credentials_len);
+ if (m != NULL) {
+ // Username and password
+ (*uri)->username = bstr_dup_mem(credentials_start, m - credentials_start);
+ if ((*uri)->username == NULL) return HTP_ERROR;
+ (*uri)->password = bstr_dup_mem(m + 1, credentials_len - (m - credentials_start) - 1);
+ if ((*uri)->password == NULL) return HTP_ERROR;
+ } else {
+ // Username alone
+ (*uri)->username = bstr_dup_mem(credentials_start, credentials_len);
+ if ((*uri)->username == NULL) return HTP_ERROR;
+ }
+ } else {
+ // No credentials
+ hostname_start = data + start;
+ hostname_len = pos - start;
+ }
+
+ // Parsing authority without credentials.
+ if ((hostname_len > 0) && (hostname_start[0] == '[')) {
+ // IPv6 address.
+
+ m = memchr(hostname_start, ']', hostname_len);
+ if (m == NULL) {
+ // Invalid IPv6 address; use the entire string as hostname.
+ (*uri)->hostname = bstr_dup_mem(hostname_start, hostname_len);
+ if ((*uri)->hostname == NULL) return HTP_ERROR;
+ } else {
+ (*uri)->hostname = bstr_dup_mem(hostname_start, m - hostname_start + 1);
+ if ((*uri)->hostname == NULL) return HTP_ERROR;
+
+ // Is there a port?
+ hostname_len = hostname_len - (m - hostname_start + 1);
+ hostname_start = m + 1;
+
+ // Port string
+ m = memchr(hostname_start, ':', hostname_len);
+ if (m != NULL) {
+ size_t port_len = hostname_len - (m - hostname_start) - 1;
+ (*uri)->port = bstr_dup_mem(m + 1, port_len);
+ if ((*uri)->port == NULL) return HTP_ERROR;
+ }
+ }
+ } else {
+ // Not IPv6 address.
+
+ m = memchr(hostname_start, ':', hostname_len);
+ if (m != NULL) {
+ size_t port_len = hostname_len - (m - hostname_start) - 1;
+ hostname_len = hostname_len - port_len - 1;
+
+ // Port string
+ (*uri)->port = bstr_dup_mem(m + 1, port_len);
+ if ((*uri)->port == NULL) return HTP_ERROR;
+ }
+
+ // Hostname
+ (*uri)->hostname = bstr_dup_mem(hostname_start, hostname_len);
+ if ((*uri)->hostname == NULL) return HTP_ERROR;
+ }
+ }
+
+ // Path
+ start = pos;
+
+ // The path part will end with a question mark or a hash character, which
+ // mark the beginning of the query part or the fragment part, respectively.
+ while ((pos < len) && (data[pos] != '?') && (data[pos] != '#')) pos++;
+
+ // Path
+ (*uri)->path = bstr_dup_mem(data + start, pos - start);
+ if ((*uri)->path == NULL) return HTP_ERROR;
+
+ if (pos == len) return HTP_OK;
+
+ // Query
+ if (data[pos] == '?') {
+ // Step over the question mark
+ start = pos + 1;
+
+ // The query part will end with the end of the input
+ // or the beginning of the fragment part
+ while ((pos < len) && (data[pos] != '#')) pos++;
+
+ // Query string
+ (*uri)->query = bstr_dup_mem(data + start, pos - start);
+ if ((*uri)->query == NULL) return HTP_ERROR;
+
+ if (pos == len) return HTP_OK;
+ }
+
+ // Fragment
+ if (data[pos] == '#') {
+ // Step over the hash character
+ start = pos + 1;
+
+ // Fragment; ends with the end of the input
+ (*uri)->fragment = bstr_dup_mem(data + start, len - start);
+ if ((*uri)->fragment == NULL) return HTP_ERROR;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Convert two input bytes, pointed to by the pointer parameter,
+ * into a single byte by assuming the input consists of hexadecimal
+ * characters. This function will happily convert invalid input.
+ *
+ * @param[in] what
+ * @return hex-decoded byte
+ */
+static unsigned char x2c(unsigned char *what) {
+ register unsigned char digit;
+
+ digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0'));
+ digit *= 16;
+ digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 : (what[1] - '0'));
+
+ return digit;
+}
+
+/**
+ * Convert a Unicode codepoint into a single-byte, using best-fit
+ * mapping (as specified in the provided configuration structure).
+ *
+ * @param[in] cfg
+ * @param[in] codepoint
+ * @return converted single byte
+ */
+static uint8_t bestfit_codepoint(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, uint32_t codepoint) {
+ // Is it a single-byte codepoint?
+ if (codepoint < 0x100) {
+ return (uint8_t) codepoint;
+ }
+
+ // Our current implementation converts only the 2-byte codepoints.
+ if (codepoint > 0xffff) {
+ return cfg->decoder_cfgs[ctx].bestfit_replacement_byte;
+ }
+
+ uint8_t *p = cfg->decoder_cfgs[ctx].bestfit_map;
+
+ // TODO Optimize lookup.
+
+ for (;;) {
+ uint32_t x = (p[0] << 8) + p[1];
+
+ if (x == 0) {
+ return cfg->decoder_cfgs[ctx].bestfit_replacement_byte;
+ }
+
+ if (x == codepoint) {
+ return p[2];
+ }
+
+ // Move to the next triplet
+ p += 3;
+ }
+}
+
+/**
+ * Decode a UTF-8 encoded path. Overlong characters will be decoded, invalid
+ * characters will be left as-is. Best-fit mapping will be used to convert
+ * UTF-8 into a single-byte stream.
+ *
+ * @param[in] cfg
+ * @param[in] tx
+ * @param[in] path
+ */
+void htp_utf8_decode_path_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *path) {
+ if (path == NULL) return;
+
+ uint8_t *data = bstr_ptr(path);
+ if (data == NULL) return;
+
+ size_t len = bstr_len(path);
+ size_t rpos = 0;
+ size_t wpos = 0;
+ uint32_t codepoint = 0;
+ uint32_t state = HTP_UTF8_ACCEPT;
+ uint32_t counter = 0;
+ uint8_t seen_valid = 0;
+
+ while ((rpos < len)&&(wpos < len)) {
+ counter++;
+
+ switch (htp_utf8_decode_allow_overlong(&state, &codepoint, data[rpos])) {
+ case HTP_UTF8_ACCEPT:
+ if (counter == 1) {
+ // ASCII character, which we just copy.
+ data[wpos++] = (uint8_t) codepoint;
+ } else {
+ // A valid UTF-8 character, which we need to convert.
+
+ seen_valid = 1;
+
+ // Check for overlong characters and set the flag accordingly.
+ switch (counter) {
+ case 2:
+ if (codepoint < 0x80) {
+ tx->flags |= HTP_PATH_UTF8_OVERLONG;
+ }
+ break;
+ case 3:
+ if (codepoint < 0x800) {
+ tx->flags |= HTP_PATH_UTF8_OVERLONG;
+ }
+ break;
+ case 4:
+ if (codepoint < 0x10000) {
+ tx->flags |= HTP_PATH_UTF8_OVERLONG;
+ }
+ break;
+ }
+
+ // Special flag for half-width/full-width evasion.
+ if ((codepoint >= 0xff00) && (codepoint <= 0xffef)) {
+ tx->flags |= HTP_PATH_HALF_FULL_RANGE;
+ }
+
+ // Use best-fit mapping to convert to a single byte.
+ data[wpos++] = bestfit_codepoint(cfg, HTP_DECODER_URL_PATH, codepoint);
+ }
+
+ // Advance over the consumed byte and reset the byte counter.
+ rpos++;
+ counter = 0;
+
+ break;
+
+ case HTP_UTF8_REJECT:
+ // Invalid UTF-8 character.
+
+ tx->flags |= HTP_PATH_UTF8_INVALID;
+
+ // Is the server expected to respond with 400?
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].utf8_invalid_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].utf8_invalid_unwanted;
+ }
+
+ // Output the replacement byte, replacing one or more invalid bytes.
+ data[wpos++] = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].bestfit_replacement_byte;
+
+ // If the invalid byte was first in a sequence, consume it. Otherwise,
+ // assume it's the starting byte of the next character.
+ if (counter == 1) {
+ rpos++;
+ }
+
+ // Reset the decoder state and continue decoding.
+ state = HTP_UTF8_ACCEPT;
+ codepoint = 0;
+ counter = 0;
+
+ break;
+
+ default:
+ // Keep going; the character is not yet formed.
+ rpos++;
+ break;
+ }
+ }
+
+ // Did the input stream seem like a valid UTF-8 string?
+ if ((seen_valid) && (!(tx->flags & HTP_PATH_UTF8_INVALID))) {
+ tx->flags |= HTP_PATH_UTF8_VALID;
+ }
+
+ // Adjust the length of the string, because
+ // we're doing in-place decoding.
+ bstr_adjust_len(path, wpos);
+}
+
+/**
+ * Validate a path that is quite possibly UTF-8 encoded.
+ *
+ * @param[in] tx
+ * @param[in] path
+ */
+void htp_utf8_validate_path(htp_tx_t *tx, bstr *path) {
+ unsigned char *data = bstr_ptr(path);
+ size_t len = bstr_len(path);
+ size_t rpos = 0;
+ uint32_t codepoint = 0;
+ uint32_t state = HTP_UTF8_ACCEPT;
+ uint32_t counter = 0; // How many bytes used by a UTF-8 character.
+ uint8_t seen_valid = 0;
+
+ while (rpos < len) {
+ counter++;
+
+ switch (htp_utf8_decode_allow_overlong(&state, &codepoint, data[rpos])) {
+ case HTP_UTF8_ACCEPT:
+ // We have a valid character.
+
+ if (counter > 1) {
+ // A valid UTF-8 character, consisting of 2 or more bytes.
+
+ seen_valid = 1;
+
+ // Check for overlong characters and set the flag accordingly.
+ switch (counter) {
+ case 2:
+ if (codepoint < 0x80) {
+ tx->flags |= HTP_PATH_UTF8_OVERLONG;
+ }
+ break;
+ case 3:
+ if (codepoint < 0x800) {
+ tx->flags |= HTP_PATH_UTF8_OVERLONG;
+ }
+ break;
+ case 4:
+ if (codepoint < 0x10000) {
+ tx->flags |= HTP_PATH_UTF8_OVERLONG;
+ }
+ break;
+ }
+ }
+
+ // Special flag for half-width/full-width evasion.
+ if ((codepoint > 0xfeff) && (codepoint < 0x010000)) {
+ tx->flags |= HTP_PATH_HALF_FULL_RANGE;
+ }
+
+ // Advance over the consumed byte and reset the byte counter.
+ rpos++;
+ counter = 0;
+
+ break;
+
+ case HTP_UTF8_REJECT:
+ // Invalid UTF-8 character.
+
+ tx->flags |= HTP_PATH_UTF8_INVALID;
+
+ // Override the decoder state because we want to continue decoding.
+ state = HTP_UTF8_ACCEPT;
+
+ // Advance over the consumed byte and reset the byte counter.
+ rpos++;
+ counter = 0;
+
+ break;
+
+ default:
+ // Keep going; the character is not yet formed.
+ rpos++;
+ break;
+ }
+ }
+
+ // Did the input stream seem like a valid UTF-8 string?
+ if ((seen_valid) && (!(tx->flags & HTP_PATH_UTF8_INVALID))) {
+ tx->flags |= HTP_PATH_UTF8_VALID;
+ }
+}
+
+/**
+ * Decode a %u-encoded character, using best-fit mapping as necessary. Path version.
+ *
+ * @param[in] cfg
+ * @param[in] tx
+ * @param[in] data
+ * @return decoded byte
+ */
+static uint8_t decode_u_encoding_path(htp_cfg_t *cfg, htp_tx_t *tx, unsigned char *data) {
+ uint8_t c1 = x2c(data);
+ uint8_t c2 = x2c(data + 2);
+ uint8_t r = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].bestfit_replacement_byte;
+
+ if (c1 == 0x00) {
+ r = c2;
+ tx->flags |= HTP_PATH_OVERLONG_U;
+ } else {
+ // Check for fullwidth form evasion
+ if (c1 == 0xff) {
+ tx->flags |= HTP_PATH_HALF_FULL_RANGE;
+ }
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_unwanted;
+ }
+
+ // Use best-fit mapping
+ unsigned char *p = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].bestfit_map;
+
+ // TODO Optimize lookup.
+
+ for (;;) {
+ // Have we reached the end of the map?
+ if ((p[0] == 0) && (p[1] == 0)) {
+ break;
+ }
+
+ // Have we found the mapping we're looking for?
+ if ((p[0] == c1) && (p[1] == c2)) {
+ r = p[2];
+ break;
+ }
+
+ // Move to the next triplet
+ p += 3;
+ }
+ }
+
+ // Check for encoded path separators
+ if ((r == '/') || ((cfg->decoder_cfgs[HTP_DECODER_URL_PATH].backslash_convert_slashes) && (r == '\\'))) {
+ tx->flags |= HTP_PATH_ENCODED_SEPARATOR;
+ }
+
+ return r;
+}
+
+/**
+ * Decode a %u-encoded character, using best-fit mapping as necessary. Params version.
+ *
+ * @param[in] cfg
+ * @param[in] tx
+ * @param[in] data
+ * @return decoded byte
+ */
+static uint8_t decode_u_encoding_params(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, unsigned char *data, uint64_t *flags) {
+ uint8_t c1 = x2c(data);
+ uint8_t c2 = x2c(data + 2);
+
+ // Check for overlong usage first.
+ if (c1 == 0) {
+ (*flags) |= HTP_URLEN_OVERLONG_U;
+ return c2;
+ }
+
+ // Both bytes were used.
+
+ // Detect half-width and full-width range.
+ if ((c1 == 0xff) && (c2 <= 0xef)) {
+ (*flags) |= HTP_URLEN_HALF_FULL_RANGE;
+ }
+
+ // Use best-fit mapping.
+ unsigned char *p = cfg->decoder_cfgs[ctx].bestfit_map;
+ uint8_t r = cfg->decoder_cfgs[ctx].bestfit_replacement_byte;
+
+ // TODO Optimize lookup.
+
+ for (;;) {
+ // Have we reached the end of the map?
+ if ((p[0] == 0) && (p[1] == 0)) {
+ break;
+ }
+
+ // Have we found the mapping we're looking for?
+ if ((p[0] == c1) && (p[1] == c2)) {
+ r = p[2];
+ break;
+ }
+
+ // Move to the next triplet
+ p += 3;
+ }
+
+ return r;
+}
+
+/**
+ * Decode a request path according to the settings in the
+ * provided configuration structure.
+ *
+ * @param[in] cfg
+ * @param[in] tx
+ * @param[in] path
+ */
+htp_status_t htp_decode_path_inplace(htp_tx_t *tx, bstr *path) {
+ if (path == NULL) return HTP_ERROR;
+ unsigned char *data = bstr_ptr(path);
+ if (data == NULL) return HTP_ERROR;
+
+ size_t len = bstr_len(path);
+
+ htp_cfg_t *cfg = tx->cfg;
+
+ size_t rpos = 0;
+ size_t wpos = 0;
+ int previous_was_separator = 0;
+
+ while ((rpos < len) && (wpos < len)) {
+ uint8_t c = data[rpos];
+
+ // Decode encoded characters
+ if (c == '%') {
+ if (rpos + 2 < len) {
+ int handled = 0;
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_decode) {
+ // Check for the %u encoding
+ if ((data[rpos + 1] == 'u') || (data[rpos + 1] == 'U')) {
+ handled = 1;
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_unwanted;
+ }
+
+ if (rpos + 5 < len) {
+ if (isxdigit(data[rpos + 2]) && (isxdigit(data[rpos + 3]))
+ && isxdigit(data[rpos + 4]) && (isxdigit(data[rpos + 5]))) {
+ // Decode a valid %u encoding
+ c = decode_u_encoding_path(cfg, tx, &data[rpos + 2]);
+ rpos += 6;
+
+ if (c == 0) {
+ tx->flags |= HTP_PATH_ENCODED_NUL;
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_unwanted;
+ }
+ }
+ } else {
+ // Invalid %u encoding
+ tx->flags |= HTP_PATH_INVALID_ENCODING;
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted;
+ }
+
+ switch (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_handling) {
+ case HTP_URL_DECODE_REMOVE_PERCENT:
+ // Do not place anything in output; eat
+ // the percent character
+ rpos++;
+ continue;
+ break;
+ case HTP_URL_DECODE_PRESERVE_PERCENT:
+ // Leave the percent character in output
+ rpos++;
+ break;
+ case HTP_URL_DECODE_PROCESS_INVALID:
+ // Decode invalid %u encoding
+ c = decode_u_encoding_path(cfg, tx, &data[rpos + 2]);
+ rpos += 6;
+ break;
+ }
+ }
+ } else {
+ // Invalid %u encoding (not enough data)
+ tx->flags |= HTP_PATH_INVALID_ENCODING;
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted;
+ }
+
+ switch (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_handling) {
+ case HTP_URL_DECODE_REMOVE_PERCENT:
+ // Do not place anything in output; eat
+ // the percent character
+ rpos++;
+ continue;
+ break;
+ case HTP_URL_DECODE_PRESERVE_PERCENT:
+ // Leave the percent character in output
+ rpos++;
+ break;
+ case HTP_URL_DECODE_PROCESS_INVALID:
+ // Cannot decode, because there's not enough data.
+ // Leave the percent character in output
+ rpos++;
+ // TODO Configurable handling.
+ break;
+ }
+ }
+ }
+ }
+
+ // Handle standard URL encoding
+ if (!handled) {
+ if ((isxdigit(data[rpos + 1])) && (isxdigit(data[rpos + 2]))) {
+ c = x2c(&data[rpos + 1]);
+
+ if (c == 0) {
+ tx->flags |= HTP_PATH_ENCODED_NUL;
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_unwanted;
+ }
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_terminates) {
+ bstr_adjust_len(path, wpos);
+ return HTP_OK;
+ }
+ }
+
+ if ((c == '/') || ((cfg->decoder_cfgs[HTP_DECODER_URL_PATH].backslash_convert_slashes) && (c == '\\'))) {
+ tx->flags |= HTP_PATH_ENCODED_SEPARATOR;
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].path_separators_encoded_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].path_separators_encoded_unwanted;
+ }
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].path_separators_decode) {
+ // Decode
+ rpos += 3;
+ } else {
+ // Leave encoded
+ c = '%';
+ rpos++;
+ }
+ } else {
+ // Decode
+ rpos += 3;
+ }
+ } else {
+ // Invalid encoding
+ tx->flags |= HTP_PATH_INVALID_ENCODING;
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted;
+ }
+
+ switch (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_handling) {
+ case HTP_URL_DECODE_REMOVE_PERCENT:
+ // Do not place anything in output; eat
+ // the percent character
+ rpos++;
+ continue;
+ break;
+ case HTP_URL_DECODE_PRESERVE_PERCENT:
+ // Leave the percent character in output
+ rpos++;
+ break;
+ case HTP_URL_DECODE_PROCESS_INVALID:
+ // Decode
+ c = x2c(&data[rpos + 1]);
+ rpos += 3;
+ // Note: What if an invalid encoding decodes into a path
+ // separator? This is theoretical at the moment, because
+ // the only platform we know doesn't convert separators is
+ // Apache, who will also respond with 400 if invalid encoding
+ // is encountered. Thus no check for a separator here.
+ break;
+ default:
+ // Unknown setting
+ return HTP_ERROR;
+ break;
+ }
+ }
+ }
+ } else {
+ // Invalid URL encoding (not enough data)
+ tx->flags |= HTP_PATH_INVALID_ENCODING;
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted;
+ }
+
+ switch (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_handling) {
+ case HTP_URL_DECODE_REMOVE_PERCENT:
+ // Do not place anything in output; eat
+ // the percent character
+ rpos++;
+ continue;
+ break;
+ case HTP_URL_DECODE_PRESERVE_PERCENT:
+ // Leave the percent character in output
+ rpos++;
+ break;
+ case HTP_URL_DECODE_PROCESS_INVALID:
+ // Cannot decode, because there's not enough data.
+ // Leave the percent character in output.
+ // TODO Configurable handling.
+ rpos++;
+ break;
+ }
+ }
+ } else {
+ // One non-encoded character
+
+ // Is it a NUL byte?
+ if (c == 0) {
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_raw_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_raw_unwanted;
+ }
+
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_raw_terminates) {
+ // Terminate path with a raw NUL byte
+ bstr_adjust_len(path, wpos);
+ return HTP_OK;
+ break;
+ }
+ }
+
+ rpos++;
+ }
+
+ // Place the character into output
+
+ // Check for control characters
+ if (c < 0x20) {
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].control_chars_unwanted != HTP_UNWANTED_IGNORE) {
+ tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].control_chars_unwanted;
+ }
+ }
+
+ // Convert backslashes to forward slashes, if necessary
+ if ((c == '\\') && (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].backslash_convert_slashes)) {
+ c = '/';
+ }
+
+ // Lowercase characters, if necessary
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].convert_lowercase) {
+ c = (uint8_t) tolower(c);
+ }
+
+ // If we're compressing separators then we need
+ // to track if the previous character was a separator
+ if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].path_separators_compress) {
+ if (c == '/') {
+ if (!previous_was_separator) {
+ data[wpos++] = c;
+ previous_was_separator = 1;
+ } else {
+ // Do nothing; we don't want
+ // another separator in output
+ }
+ } else {
+ data[wpos++] = c;
+ previous_was_separator = 0;
+ }
+ } else {
+ data[wpos++] = c;
+ }
+ }
+
+ bstr_adjust_len(path, wpos);
+
+ return HTP_OK;
+}
+
+htp_status_t htp_tx_urldecode_uri_inplace(htp_tx_t *tx, bstr *input) {
+ uint64_t flags = 0;
+
+ htp_status_t rc = htp_urldecode_inplace_ex(tx->cfg, HTP_DECODER_URL_PATH, input, &flags, &(tx->response_status_expected_number));
+
+ if (flags & HTP_URLEN_INVALID_ENCODING) {
+ tx->flags |= HTP_PATH_INVALID_ENCODING;
+ }
+
+ if (flags & HTP_URLEN_ENCODED_NUL) {
+ tx->flags |= HTP_PATH_ENCODED_NUL;
+ }
+
+ if (flags & HTP_URLEN_RAW_NUL) {
+ tx->flags |= HTP_PATH_RAW_NUL;
+ }
+
+ return rc;
+}
+
+htp_status_t htp_tx_urldecode_params_inplace(htp_tx_t *tx, bstr *input) {
+ return htp_urldecode_inplace_ex(tx->cfg, HTP_DECODER_URLENCODED, input, &(tx->flags), &(tx->response_status_expected_number));
+}
+
+htp_status_t htp_urldecode_inplace(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags) {
+ int expected_status_code = 0;
+ return htp_urldecode_inplace_ex(cfg, ctx, input, flags, &expected_status_code);
+}
+
+htp_status_t htp_urldecode_inplace_ex(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags, int *expected_status_code) {
+ if (input == NULL) return HTP_ERROR;
+
+ unsigned char *data = bstr_ptr(input);
+ if (data == NULL) return HTP_ERROR;
+ size_t len = bstr_len(input);
+
+ size_t rpos = 0;
+ size_t wpos = 0;
+
+ while ((rpos < len) && (wpos < len)) {
+ uint8_t c = data[rpos];
+
+ // Decode encoded characters.
+ if (c == '%') {
+ // Need at least 2 additional bytes for %HH.
+ if (rpos + 2 < len) {
+ int handled = 0;
+
+ // Decode %uHHHH encoding, but only if allowed in configuration.
+ if (cfg->decoder_cfgs[ctx].u_encoding_decode) {
+ // The next character must be a case-insensitive u.
+ if ((data[rpos + 1] == 'u') || (data[rpos + 1] == 'U')) {
+ handled = 1;
+
+ if (cfg->decoder_cfgs[ctx].u_encoding_unwanted != HTP_UNWANTED_IGNORE) {
+ (*expected_status_code) = cfg->decoder_cfgs[ctx].u_encoding_unwanted;
+ }
+
+ // Need at least 5 additional bytes for %uHHHH.
+ if (rpos + 5 < len) {
+ if (isxdigit(data[rpos + 2]) && (isxdigit(data[rpos + 3]))
+ && isxdigit(data[rpos + 4]) && (isxdigit(data[rpos + 5]))) {
+ // Decode a valid %u encoding.
+ c = decode_u_encoding_params(cfg, ctx, &(data[rpos + 2]), flags);
+ rpos += 6;
+ } else {
+ // Invalid %u encoding (could not find 4 xdigits).
+ (*flags) |= HTP_URLEN_INVALID_ENCODING;
+
+ if (cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) {
+ (*expected_status_code) = cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted;
+ }
+
+ switch (cfg->decoder_cfgs[ctx].url_encoding_invalid_handling) {
+ case HTP_URL_DECODE_REMOVE_PERCENT:
+ // Do not place anything in output; consume the %.
+ rpos++;
+ continue;
+ break;
+ case HTP_URL_DECODE_PRESERVE_PERCENT:
+ // Leave the % in output.
+ rpos++;
+ break;
+ case HTP_URL_DECODE_PROCESS_INVALID:
+ // Decode invalid %u encoding.
+ c = decode_u_encoding_params(cfg, ctx, &(data[rpos + 2]), flags);
+ rpos += 6;
+ break;
+ }
+ }
+ } else {
+ // Invalid %u encoding; not enough data.
+ (*flags) |= HTP_URLEN_INVALID_ENCODING;
+
+ if (cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) {
+ (*expected_status_code) = cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted;
+ }
+
+ switch (cfg->decoder_cfgs[ctx].url_encoding_invalid_handling) {
+ case HTP_URL_DECODE_REMOVE_PERCENT:
+ // Do not place anything in output; consume the %.
+ rpos++;
+ continue;
+ break;
+ case HTP_URL_DECODE_PRESERVE_PERCENT:
+ // Leave the % in output.
+ rpos++;
+ break;
+ case HTP_URL_DECODE_PROCESS_INVALID:
+ // Cannot decode because there's not enough data.
+ // Leave the % in output.
+ // TODO Configurable handling of %, u, etc.
+ rpos++;
+ break;
+ }
+ }
+ }
+ }
+
+ // Handle standard URL encoding.
+ if (!handled) {
+ // Need 2 hexadecimal digits.
+ if ((isxdigit(data[rpos + 1])) && (isxdigit(data[rpos + 2]))) {
+ // Decode %HH encoding.
+ c = x2c(&(data[rpos + 1]));
+ rpos += 3;
+ } else {
+ // Invalid encoding (enough bytes, but not hexadecimal digits).
+ (*flags) |= HTP_URLEN_INVALID_ENCODING;
+
+ if (cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) {
+ (*expected_status_code) = cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted;
+ }
+
+ switch (cfg->decoder_cfgs[ctx].url_encoding_invalid_handling) {
+ case HTP_URL_DECODE_REMOVE_PERCENT:
+ // Do not place anything in output; consume the %.
+ rpos++;
+ continue;
+ break;
+ case HTP_URL_DECODE_PRESERVE_PERCENT:
+ // Leave the % in output.
+ rpos++;
+ break;
+ case HTP_URL_DECODE_PROCESS_INVALID:
+ // Decode.
+ c = x2c(&(data[rpos + 1]));
+ rpos += 3;
+ break;
+ }
+ }
+ }
+ } else {
+ // Invalid encoding; not enough data (at least 2 bytes required).
+ (*flags) |= HTP_URLEN_INVALID_ENCODING;
+
+ if (cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) {
+ (*expected_status_code) = cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted;
+ }
+
+ switch (cfg->decoder_cfgs[ctx].url_encoding_invalid_handling) {
+ case HTP_URL_DECODE_REMOVE_PERCENT:
+ // Do not place anything in output; consume the %.
+ rpos++;
+ continue;
+ break;
+ case HTP_URL_DECODE_PRESERVE_PERCENT:
+ // Leave the % in output.
+ rpos++;
+ break;
+ case HTP_URL_DECODE_PROCESS_INVALID:
+ // Cannot decode because there's not enough data.
+ // Leave the % in output.
+ // TODO Configurable handling of %, etc.
+ rpos++;
+ break;
+ }
+ }
+
+ // Did we get an encoded NUL byte?
+ if (c == 0) {
+ if (cfg->decoder_cfgs[ctx].nul_encoded_unwanted != HTP_UNWANTED_IGNORE) {
+ (*expected_status_code) = cfg->decoder_cfgs[ctx].nul_encoded_unwanted;
+ }
+
+ (*flags) |= HTP_URLEN_ENCODED_NUL;
+
+ if (cfg->decoder_cfgs[ctx].nul_encoded_terminates) {
+ // Terminate the path at the raw NUL byte.
+ bstr_adjust_len(input, wpos);
+ return 1;
+ }
+ }
+
+ data[wpos++] = c;
+ } else if (c == '+') {
+ // Decoding of the plus character is conditional on the configuration.
+
+ if (cfg->decoder_cfgs[ctx].plusspace_decode) {
+ c = 0x20;
+ }
+
+ rpos++;
+ data[wpos++] = c;
+ } else {
+ // One non-encoded byte.
+
+ // Did we get a raw NUL byte?
+ if (c == 0) {
+ if (cfg->decoder_cfgs[ctx].nul_raw_unwanted != HTP_UNWANTED_IGNORE) {
+ (*expected_status_code) = cfg->decoder_cfgs[ctx].nul_raw_unwanted;
+ }
+
+ (*flags) |= HTP_URLEN_RAW_NUL;
+
+ if (cfg->decoder_cfgs[ctx].nul_raw_terminates) {
+ // Terminate the path at the encoded NUL byte.
+ bstr_adjust_len(input, wpos);
+ return HTP_OK;
+ }
+ }
+
+ rpos++;
+ data[wpos++] = c;
+ }
+ }
+
+ bstr_adjust_len(input, wpos);
+
+ return HTP_OK;
+}
+
+/**
+ * Normalize a previously-parsed request URI.
+ *
+ * @param[in] connp
+ * @param[in] incomplete
+ * @param[in] normalized
+ * @return HTP_OK or HTP_ERROR
+ */
+int htp_normalize_parsed_uri(htp_tx_t *tx, htp_uri_t *incomplete, htp_uri_t *normalized) {
+ // Scheme.
+ if (incomplete->scheme != NULL) {
+ // Duplicate and convert to lowercase.
+ normalized->scheme = bstr_dup_lower(incomplete->scheme);
+ if (normalized->scheme == NULL) return HTP_ERROR;
+ }
+
+ // Username.
+ if (incomplete->username != NULL) {
+ normalized->username = bstr_dup(incomplete->username);
+ if (normalized->username == NULL) return HTP_ERROR;
+ htp_tx_urldecode_uri_inplace(tx, normalized->username);
+ }
+
+ // Password.
+ if (incomplete->password != NULL) {
+ normalized->password = bstr_dup(incomplete->password);
+ if (normalized->password == NULL) return HTP_ERROR;
+ htp_tx_urldecode_uri_inplace(tx, normalized->password);
+ }
+
+ // Hostname.
+ if (incomplete->hostname != NULL) {
+ // We know that incomplete->hostname does not contain
+ // port information, so no need to check for it here.
+ normalized->hostname = bstr_dup(incomplete->hostname);
+ if (normalized->hostname == NULL) return HTP_ERROR;
+ htp_tx_urldecode_uri_inplace(tx, normalized->hostname);
+ htp_normalize_hostname_inplace(normalized->hostname);
+ }
+
+ // Port.
+ if (incomplete->port != NULL) {
+ int64_t port_parsed = htp_parse_positive_integer_whitespace(
+ bstr_ptr(incomplete->port), bstr_len(incomplete->port), 10);
+
+ if (port_parsed < 0) {
+ // Failed to parse the port number.
+ normalized->port_number = -1;
+ tx->flags |= HTP_HOSTU_INVALID;
+ } else if ((port_parsed > 0) && (port_parsed < 65536)) {
+ // Valid port number.
+ normalized->port_number = (int) port_parsed;
+ } else {
+ // Port number out of range.
+ normalized->port_number = -1;
+ tx->flags |= HTP_HOSTU_INVALID;
+ }
+ } else {
+ normalized->port_number = -1;
+ }
+
+ // Path.
+ if (incomplete->path != NULL) {
+ // Make a copy of the path, so that we can work on it.
+ normalized->path = bstr_dup(incomplete->path);
+ if (normalized->path == NULL) return HTP_ERROR;
+
+ // Decode URL-encoded (and %u-encoded) characters, as well as lowercase,
+ // compress separators and convert backslashes.
+ htp_decode_path_inplace(tx, normalized->path);
+
+ // Handle UTF-8 in the path.
+ if (tx->cfg->decoder_cfgs[HTP_DECODER_URL_PATH].utf8_convert_bestfit) {
+ // Decode Unicode characters into a single-byte stream, using best-fit mapping.
+ htp_utf8_decode_path_inplace(tx->cfg, tx, normalized->path);
+ } else {
+ // No decoding, but try to validate the path as a UTF-8 stream.
+ htp_utf8_validate_path(tx, normalized->path);
+ }
+
+ // RFC normalization.
+ htp_normalize_uri_path_inplace(normalized->path);
+ }
+
+ // Query string.
+ if (incomplete->query != NULL) {
+ normalized->query = bstr_dup(incomplete->query);
+ if (normalized->query == NULL) return HTP_ERROR;
+ }
+
+ // Fragment.
+ if (incomplete->fragment != NULL) {
+ normalized->fragment = bstr_dup(incomplete->fragment);
+ if (normalized->fragment == NULL) return HTP_ERROR;
+ htp_tx_urldecode_uri_inplace(tx, normalized->fragment);
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Normalize request hostname. Convert all characters to lowercase and
+ * remove trailing dots from the end, if present.
+ *
+ * @param[in] hostname
+ * @return Normalized hostname.
+ */
+bstr *htp_normalize_hostname_inplace(bstr *hostname) {
+ if (hostname == NULL) return NULL;
+
+ bstr_to_lowercase(hostname);
+
+ // Remove dots from the end of the string.
+ while (bstr_char_at_end(hostname, 0) == '.') bstr_chop(hostname);
+
+ return hostname;
+}
+
+/**
+ * Normalize URL path. This function implements the remove dot segments algorithm
+ * specified in RFC 3986, section 5.2.4.
+ *
+ * @param[in] s
+ */
+void htp_normalize_uri_path_inplace(bstr *s) {
+ if (s == NULL) return;
+
+ unsigned char *data = bstr_ptr(s);
+ if (data == NULL) return;
+ size_t len = bstr_len(s);
+
+ size_t rpos = 0;
+ size_t wpos = 0;
+
+ int c = -1;
+ while ((rpos < len)&&(wpos < len)) {
+ if (c == -1) {
+ c = data[rpos++];
+ }
+
+ // A. If the input buffer begins with a prefix of "../" or "./",
+ // then remove that prefix from the input buffer; otherwise,
+ if (c == '.') {
+ if ((rpos + 1 < len) && (data[rpos] == '.') && (data[rpos + 1] == '/')) {
+ c = -1;
+ rpos += 2;
+ continue;
+ } else if ((rpos < len) && (data[rpos] == '/')) {
+ c = -1;
+ rpos += 1;
+ continue;
+ }
+ }
+
+ if (c == '/') {
+ // B. if the input buffer begins with a prefix of "/./" or "/.",
+ // where "." is a complete path segment, then replace that
+ // prefix with "/" in the input buffer; otherwise,
+ if ((rpos + 1 < len) && (data[rpos] == '.') && (data[rpos + 1] == '/')) {
+ c = '/';
+ rpos += 2;
+ continue;
+ } else if ((rpos + 1 == len) && (data[rpos] == '.')) {
+ c = '/';
+ rpos += 1;
+ continue;
+ }
+
+ // C. if the input buffer begins with a prefix of "/../" or "/..",
+ // where ".." is a complete path segment, then replace that
+ // prefix with "/" in the input buffer and remove the last
+ // segment and its preceding "/" (if any) from the output
+ // buffer; otherwise,
+ if ((rpos + 2 < len) && (data[rpos] == '.') && (data[rpos + 1] == '.') && (data[rpos + 2] == '/')) {
+ c = '/';
+ rpos += 3;
+
+ // Remove the last segment
+ while ((wpos > 0) && (data[wpos - 1] != '/')) wpos--;
+ if (wpos > 0) wpos--;
+ continue;
+ } else if ((rpos + 2 == len) && (data[rpos] == '.') && (data[rpos + 1] == '.')) {
+ c = '/';
+ rpos += 2;
+
+ // Remove the last segment
+ while ((wpos > 0) && (data[wpos - 1] != '/')) wpos--;
+ if (wpos > 0) wpos--;
+ continue;
+ }
+ }
+
+ // D. if the input buffer consists only of "." or "..", then remove
+ // that from the input buffer; otherwise,
+ if ((c == '.') && (rpos == len)) {
+ rpos++;
+ continue;
+ }
+
+ if ((c == '.') && (rpos + 1 == len) && (data[rpos] == '.')) {
+ rpos += 2;
+ continue;
+ }
+
+ // E. move the first path segment in the input buffer to the end of
+ // the output buffer, including the initial "/" character (if
+ // any) and any subsequent characters up to, but not including,
+ // the next "/" character or the end of the input buffer.
+ data[wpos++] = (uint8_t) c;
+
+ while ((rpos < len) && (data[rpos] != '/') && (wpos < len)) {
+ data[wpos++] = data[rpos++];
+ }
+
+ c = -1;
+ }
+
+ bstr_adjust_len(s, wpos);
+}
+
+/**
+ *
+ */
+void fprint_bstr(FILE *stream, const char *name, bstr *b) {
+ if (b == NULL) {
+ fprint_raw_data_ex(stream, name, "(null)", 0, 6);
+ return;
+ }
+
+ fprint_raw_data_ex(stream, name, bstr_ptr(b), 0, bstr_len(b));
+}
+
+/**
+ *
+ */
+void fprint_raw_data(FILE *stream, const char *name, const void *data, size_t len) {
+ // may happen for gaps
+ if (data == NULL) {
+ fprintf(stream, "\n%s: ptr NULL len %u\n", name, (unsigned int)len);
+ } else {
+ fprint_raw_data_ex(stream, name, data, 0, len);
+ }
+}
+
+/**
+ *
+ */
+void fprint_raw_data_ex(FILE *stream, const char *name, const void *_data, size_t offset, size_t printlen) {
+ const unsigned char *data = (const unsigned char *) _data;
+ char buf[160];
+ size_t len = offset + printlen;
+
+ fprintf(stream, "\n%s: ptr %p offset %u len %u\n", name, (void*) data, (unsigned int)offset, (unsigned int)len);
+
+ while (offset < len) {
+ size_t i;
+
+ snprintf(buf, sizeof(buf), "%x" PRIx64, (unsigned int) offset);
+ strlcat(buf, " ", sizeof(buf));
+
+ i = 0;
+ while (i < 8) {
+ if (offset + i < len) {
+ char step[4];
+ snprintf(step, sizeof(step), "%02x ", data[offset + i]);
+ strlcat(buf, step, sizeof(buf));
+ } else {
+ strlcat(buf, " ", sizeof(buf));
+ }
+
+ i++;
+ }
+
+ strlcat(buf, " ", sizeof(buf));
+
+ i = 8;
+ while (i < 16) {
+ if (offset + i < len) {
+ char step[4];
+ snprintf(step, sizeof(step), "%02x ", data[offset + i]);
+ strlcat(buf, step, sizeof(buf));
+ } else {
+ strlcat(buf, " ", sizeof(buf));
+ }
+
+ i++;
+ }
+
+ strlcat(buf, " |", sizeof(buf));
+
+ i = 0;
+ char *p = buf + strlen(buf);
+ while ((offset + i < len) && (i < 16)) {
+ uint8_t c = data[offset + i];
+
+ if (isprint(c)) {
+ *p++ = c;
+ } else {
+ *p++ = '.';
+ }
+
+ i++;
+ }
+
+ *p++ = '|';
+ *p++ = '\n';
+ *p = '\0';
+
+ fprintf(stream, "%s", buf);
+ offset += 16;
+ }
+
+ fprintf(stream, "\n");
+}
+
+/**
+ *
+ */
+char *htp_connp_in_state_as_string(htp_connp_t *connp) {
+ if (connp == NULL) return "NULL";
+
+ if (connp->in_state == htp_connp_REQ_IDLE) return "REQ_IDLE";
+ if (connp->in_state == htp_connp_REQ_LINE) return "REQ_LINE";
+ if (connp->in_state == htp_connp_REQ_PROTOCOL) return "REQ_PROTOCOL";
+ if (connp->in_state == htp_connp_REQ_HEADERS) return "REQ_HEADERS";
+ if (connp->in_state == htp_connp_REQ_CONNECT_CHECK) return "REQ_CONNECT_CHECK";
+ if (connp->in_state == htp_connp_REQ_CONNECT_WAIT_RESPONSE) return "REQ_CONNECT_WAIT_RESPONSE";
+ if (connp->in_state == htp_connp_REQ_BODY_DETERMINE) return "REQ_BODY_DETERMINE";
+ if (connp->in_state == htp_connp_REQ_BODY_IDENTITY) return "REQ_BODY_IDENTITY";
+ if (connp->in_state == htp_connp_REQ_BODY_CHUNKED_LENGTH) return "REQ_BODY_CHUNKED_LENGTH";
+ if (connp->in_state == htp_connp_REQ_BODY_CHUNKED_DATA) return "REQ_BODY_CHUNKED_DATA";
+ if (connp->in_state == htp_connp_REQ_BODY_CHUNKED_DATA_END) return "REQ_BODY_CHUNKED_DATA_END";
+ if (connp->in_state == htp_connp_REQ_FINALIZE) return "REQ_FINALIZE";
+ if (connp->in_state == htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9) return "REQ_IGNORE_DATA_AFTER_HTTP_0_9";
+
+ return "UNKNOWN";
+}
+
+/**
+ *
+ */
+char *htp_connp_out_state_as_string(htp_connp_t *connp) {
+ if (connp == NULL) return "NULL";
+
+ if (connp->out_state == htp_connp_RES_IDLE) return "RES_IDLE";
+ if (connp->out_state == htp_connp_RES_LINE) return "RES_LINE";
+ if (connp->out_state == htp_connp_RES_HEADERS) return "RES_HEADERS";
+ if (connp->out_state == htp_connp_RES_BODY_DETERMINE) return "RES_BODY_DETERMINE";
+ if (connp->out_state == htp_connp_RES_BODY_IDENTITY_CL_KNOWN) return "RES_BODY_IDENTITY_CL_KNOWN";
+ if (connp->out_state == htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE) return "RES_BODY_IDENTITY_STREAM_CLOSE";
+ if (connp->out_state == htp_connp_RES_BODY_CHUNKED_LENGTH) return "RES_BODY_CHUNKED_LENGTH";
+ if (connp->out_state == htp_connp_RES_BODY_CHUNKED_DATA) return "RES_BODY_CHUNKED_DATA";
+ if (connp->out_state == htp_connp_RES_BODY_CHUNKED_DATA_END) return "RES_BODY_CHUNKED_DATA_END";
+ if (connp->out_state == htp_connp_RES_FINALIZE) return "RES_BODY_FINALIZE";
+
+ return "UNKNOWN";
+}
+
+/**
+ *
+ */
+char *htp_tx_request_progress_as_string(htp_tx_t *tx) {
+ if (tx == NULL) return "NULL";
+
+ switch (tx->request_progress) {
+ case HTP_REQUEST_NOT_STARTED:
+ return "NOT_STARTED";
+ case HTP_REQUEST_LINE:
+ return "REQ_LINE";
+ case HTP_REQUEST_HEADERS:
+ return "REQ_HEADERS";
+ case HTP_REQUEST_BODY:
+ return "REQ_BODY";
+ case HTP_REQUEST_TRAILER:
+ return "REQ_TRAILER";
+ case HTP_REQUEST_COMPLETE:
+ return "COMPLETE";
+ }
+
+ return "INVALID";
+}
+
+/**
+ *
+ */
+char *htp_tx_response_progress_as_string(htp_tx_t *tx) {
+ if (tx == NULL) return "NULL";
+
+ switch (tx->response_progress) {
+ case HTP_RESPONSE_NOT_STARTED:
+ return "NOT_STARTED";
+ case HTP_RESPONSE_LINE:
+ return "RES_LINE";
+ case HTP_RESPONSE_HEADERS:
+ return "RES_HEADERS";
+ case HTP_RESPONSE_BODY:
+ return "RES_BODY";
+ case HTP_RESPONSE_TRAILER:
+ return "RES_TRAILER";
+ case HTP_RESPONSE_COMPLETE:
+ return "COMPLETE";
+ }
+
+ return "INVALID";
+}
+
+bstr *htp_unparse_uri_noencode(htp_uri_t *uri) {
+ if (uri == NULL) return NULL;
+
+ // On the first pass determine the length of the final string
+ size_t len = 0;
+
+ if (uri->scheme != NULL) {
+ len += bstr_len(uri->scheme);
+ len += 3; // "://"
+ }
+
+ if ((uri->username != NULL) || (uri->password != NULL)) {
+ if (uri->username != NULL) {
+ len += bstr_len(uri->username);
+ }
+
+ len += 1; // ":"
+
+ if (uri->password != NULL) {
+ len += bstr_len(uri->password);
+ }
+
+ len += 1; // "@"
+ }
+
+ if (uri->hostname != NULL) {
+ len += bstr_len(uri->hostname);
+ }
+
+ if (uri->port != NULL) {
+ len += 1; // ":"
+ len += bstr_len(uri->port);
+ }
+
+ if (uri->path != NULL) {
+ len += bstr_len(uri->path);
+ }
+
+ if (uri->query != NULL) {
+ len += 1; // "?"
+ len += bstr_len(uri->query);
+ }
+
+ if (uri->fragment != NULL) {
+ len += 1; // "#"
+ len += bstr_len(uri->fragment);
+ }
+
+ // On the second pass construct the string
+ bstr *r = bstr_alloc(len);
+ if (r == NULL) return NULL;
+
+ if (uri->scheme != NULL) {
+ bstr_add_noex(r, uri->scheme);
+ bstr_add_c_noex(r, "://");
+ }
+
+ if ((uri->username != NULL) || (uri->password != NULL)) {
+ if (uri->username != NULL) {
+ bstr_add_noex(r, uri->username);
+ }
+
+ bstr_add_c_noex(r, ":");
+
+ if (uri->password != NULL) {
+ bstr_add_noex(r, uri->password);
+ }
+
+ bstr_add_c_noex(r, "@");
+ }
+
+ if (uri->hostname != NULL) {
+ bstr_add_noex(r, uri->hostname);
+ }
+
+ if (uri->port != NULL) {
+ bstr_add_c_noex(r, ":");
+ bstr_add_noex(r, uri->port);
+ }
+
+ if (uri->path != NULL) {
+ bstr_add_noex(r, uri->path);
+ }
+
+ if (uri->query != NULL) {
+ bstr_add_c_noex(r, "?");
+ bstr_add_noex(r, uri->query);
+ }
+
+ if (uri->fragment != NULL) {
+ bstr_add_c_noex(r, "#");
+ bstr_add_noex(r, uri->fragment);
+ }
+
+ return r;
+}
+
+/**
+ * Determine if the information provided on the response line
+ * is good enough. Browsers are lax when it comes to response
+ * line parsing. In most cases they will only look for the
+ * words "http" at the beginning.
+ *
+ * @param[in] data pointer to bytearray
+ * @param[in] len length in bytes of data
+ * @return 1 for good enough or 0 for not good enough
+ */
+int htp_treat_response_line_as_body(const uint8_t *data, size_t len) {
+ // Browser behavior:
+ // Firefox 3.5.x: (?i)^\s*http
+ // IE: (?i)^\s*http\s*/
+ // Safari: ^HTTP/\d+\.\d+\s+\d{3}
+ size_t pos = 0;
+
+ if (data == NULL) return 1;
+ while ((pos < len) && (htp_is_space(data[pos]) || data[pos] == 0)) pos++;
+
+ if (len < pos + 4) return 1;
+
+ if ((data[pos] != 'H') && (data[pos] != 'h')) return 1;
+ if ((data[pos+1] != 'T') && (data[pos+1] != 't')) return 1;
+ if ((data[pos+2] != 'T') && (data[pos+2] != 't')) return 1;
+ if ((data[pos+3] != 'P') && (data[pos+3] != 'p')) return 1;
+
+ return 0;
+}
+
+/**
+ * Run the REQUEST_BODY_DATA hook.
+ *
+ * @param[in] connp
+ * @param[in] d
+ */
+htp_status_t htp_req_run_hook_body_data(htp_connp_t *connp, htp_tx_data_t *d) {
+ // Do not invoke callbacks with an empty data chunk
+ if ((d->data != NULL) && (d->len == 0)) return HTP_OK;
+
+ // Do not invoke callbacks without a transaction.
+ if (connp->in_tx == NULL) return HTP_OK;
+
+ // Run transaction hooks first
+ htp_status_t rc = htp_hook_run_all(connp->in_tx->hook_request_body_data, d);
+ if (rc != HTP_OK) return rc;
+
+ // Run configuration hooks second
+ rc = htp_hook_run_all(connp->cfg->hook_request_body_data, d);
+ if (rc != HTP_OK) return rc;
+
+ // On PUT requests, treat request body as file
+ if (connp->put_file != NULL) {
+ htp_file_data_t file_data;
+
+ file_data.data = d->data;
+ file_data.len = d->len;
+ file_data.file = connp->put_file;
+ file_data.file->len += d->len;
+
+ rc = htp_hook_run_all(connp->cfg->hook_request_file_data, &file_data);
+ if (rc != HTP_OK) return rc;
+ }
+
+ return HTP_OK;
+}
+
+/**
+ * Run the RESPONSE_BODY_DATA hook.
+ *
+ * @param[in] connp
+ * @param[in] d
+ */
+htp_status_t htp_res_run_hook_body_data(htp_connp_t *connp, htp_tx_data_t *d) {
+ // Do not invoke callbacks with an empty data chunk.
+ if ((d->data != NULL) && (d->len == 0)) return HTP_OK;
+
+ // Run transaction hooks first
+ htp_status_t rc = htp_hook_run_all(connp->out_tx->hook_response_body_data, d);
+ if (rc != HTP_OK) return rc;
+
+ // Run configuration hooks second
+ rc = htp_hook_run_all(connp->cfg->hook_response_body_data, d);
+ if (rc != HTP_OK) return rc;
+
+ return HTP_OK;
+}
+
+/**
+ * Parses the provided memory region, extracting the double-quoted string.
+ *
+ * @param[in] data
+ * @param[in] len
+ * @param[out] out
+ * @param[out] endoffset
+ * @return HTP_OK on success, HTP_DECLINED if the input is not well formed, and HTP_ERROR on fatal errors.
+ */
+htp_status_t htp_extract_quoted_string_as_bstr(unsigned char *data, size_t len, bstr **out, size_t *endoffset) {
+ if ((data == NULL) || (out == NULL)) return HTP_ERROR;
+
+ if (len == 0) return HTP_DECLINED;
+
+ size_t pos = 0;
+
+ // Check that the first character is a double quote.
+ if (data[pos] != '"') return HTP_DECLINED;
+
+ // Step over the double quote.
+ pos++;
+ if (pos == len) return HTP_DECLINED;
+
+ // Calculate the length of the resulting string.
+ size_t escaped_chars = 0;
+ while (pos < len) {
+ if (data[pos] == '\\') {
+ if (pos + 1 < len) {
+ escaped_chars++;
+ pos += 2;
+ continue;
+ }
+ } else if (data[pos] == '"') {
+ break;
+ }
+
+ pos++;
+ }
+
+ // Have we reached the end of input without seeing the terminating double quote?
+ if (pos == len) return HTP_DECLINED;
+
+ // Copy the data and unescape it as necessary.
+ size_t outlen = pos - 1 - escaped_chars;
+ *out = bstr_alloc(outlen);
+ if (*out == NULL) return HTP_ERROR;
+ unsigned char *outptr = bstr_ptr(*out);
+ size_t outpos = 0;
+
+ pos = 1;
+ while ((pos < len) && (outpos < outlen)) {
+ // TODO We are not properly unescaping test here, we're only
+ // handling escaped double quotes.
+ if (data[pos] == '\\') {
+ if (pos + 1 < len) {
+ outptr[outpos++] = data[pos + 1];
+ pos += 2;
+ continue;
+ }
+ } else if (data[pos] == '"') {
+ break;
+ }
+
+ outptr[outpos++] = data[pos++];
+ }
+
+ bstr_adjust_len(*out, outlen);
+
+ if (endoffset != NULL) {
+ *endoffset = pos;
+ }
+
+ return HTP_OK;
+}
+
+htp_status_t htp_parse_ct_header(bstr *header, bstr **ct) {
+ if ((header == NULL) || (ct == NULL)) return HTP_ERROR;
+
+ unsigned char *data = bstr_ptr(header);
+ size_t len = bstr_len(header);
+
+ // The assumption here is that the header value we receive
+ // here has been left-trimmed, which means the starting position
+ // is on the media type. On some platforms that may not be the
+ // case, and we may need to do the left-trim ourselves.
+
+ // Find the end of the MIME type, using the same approach PHP 5.4.3 uses.
+ size_t pos = 0;
+ while ((pos < len) && (data[pos] != ';') && (data[pos] != ',') && (data[pos] != ' ')) pos++;
+
+ *ct = bstr_dup_ex(header, 0, pos);
+ if (*ct == NULL) return HTP_ERROR;
+
+ bstr_to_lowercase(*ct);
+
+ return HTP_OK;
+}
+
+/**
+ * Implements relaxed (not strictly RFC) hostname validation.
+ *
+ * @param[in] hostname
+ * @return 1 if the supplied hostname is valid; 0 if it is not.
+ */
+int htp_validate_hostname(bstr *hostname) {
+ unsigned char *data = bstr_ptr(hostname);
+ size_t len = bstr_len(hostname);
+ size_t startpos = 0;
+ size_t pos = 0;
+
+ if ((len == 0) || (len > 255)) return 0;
+
+ if (data[0] == '[') {
+ // only ipv6 possible
+ if (len < 2 || len - 2 >= INET6_ADDRSTRLEN) {
+ return 0;
+ }
+ char dst[sizeof(struct in6_addr)];
+ char str[INET6_ADDRSTRLEN];
+ memcpy(str, data+1, len-2);
+ str[len-2] = 0;
+ return inet_pton(AF_INET6, str, dst);
+ }
+ while (pos < len) {
+ // Validate label characters.
+ startpos = pos;
+ while ((pos < len) && (data[pos] != '.')) {
+ unsigned char c = data[pos];
+ // According to the RFC, the underscore is not allowed in a label, but
+ // we allow it here because we think it's often seen in practice.
+ if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) ||
+ ((c >= '0') && (c <= '9')) ||
+ (c == '-') || (c == '_')))
+ {
+ return 0;
+ }
+
+ pos++;
+ }
+
+ // Validate label length.
+ if ((pos - startpos == 0) || (pos - startpos > 63)) return 0;
+
+ if (pos >= len) return 1; // No more data after label.
+
+ // How many dots are there?
+ startpos = pos;
+ while ((pos < len) && (data[pos] == '.')) pos++;
+
+ if (pos - startpos != 1) return 0; // Exactly one dot expected.
+ }
+
+ return 1;
+}
+
+void htp_uri_free(htp_uri_t *uri) {
+ if (uri == NULL) return;
+
+ bstr_free(uri->scheme);
+ bstr_free(uri->username);
+ bstr_free(uri->password);
+ bstr_free(uri->hostname);
+ bstr_free(uri->port);
+ bstr_free(uri->path);
+ bstr_free(uri->query);
+ bstr_free(uri->fragment);
+
+ free(uri);
+}
+
+htp_uri_t *htp_uri_alloc(void) {
+ htp_uri_t *u = calloc(1, sizeof (htp_uri_t));
+ if (u == NULL) return NULL;
+
+ u->port_number = -1;
+
+ return u;
+}
+
+char *htp_get_version(void) {
+ return HTP_VERSION_STRING_FULL;
+}
+
+/**
+ * Tells if a header value (haystack) contains a token (needle)
+ * This is done with a caseless comparison
+ *
+ * @param[in] hvp header value pointer
+ * @param[in] hvlen length of header value buffer
+ * @param[in] value token to look for (null-terminated string), should be a lowercase constant
+ * @return HTP_OK if the header has the token; HTP_ERROR if it has not.
+ */
+htp_status_t htp_header_has_token(const unsigned char *hvp, size_t hvlen, const unsigned char *value) {
+ int state = 0;
+ // offset to compare in value
+ size_t v_off = 0;
+ // The header value is a list of comma-separated tokens (with additional spaces)
+ for (size_t i = 0; i < hvlen; i++) {
+ switch (state) {
+ case 0:
+ if (v_off == 0 && htp_is_space(hvp[i])) {
+ // skip leading space
+ continue;
+ }
+ if (tolower(hvp[i]) == value[v_off]) {
+ v_off++;
+ if (value[v_off] == 0) {
+ // finish validation if end of token
+ state = 2;
+ }
+ continue;
+ } else {
+ // wait for a new token
+ v_off = 0;
+ state = 1;
+ }
+ // fallthrough
+ case 1:
+ if (hvp[i] == ',') {
+ // start of next token
+ state = 0;
+ }
+ break;
+ case 2:
+ if (hvp[i] == ',') {
+ return HTP_OK;
+ }
+ if (!htp_is_space(hvp[i])) {
+ // trailing junk in token, wait for a next one
+ v_off = 0;
+ state = 1;
+ }
+ }
+ }
+ if (state == 2) {
+ return HTP_OK;
+ }
+ return HTP_ERROR;
+}
diff --git a/htp/htp_version.h.in b/htp/htp_version.h.in
new file mode 100644
index 0000000..89a503f
--- /dev/null
+++ b/htp/htp_version.h.in
@@ -0,0 +1,53 @@
+/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_VERSION_H
+#define HTP_VERSION_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define HTP_VERSION_STRING "@PACKAGE_VERSION@"
+#define HTP_VERSION_STRING_FULL "LibHTP v" HTP_VERSION_STRING
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_VERSION_H */
diff --git a/htp/lzma/7zTypes.h b/htp/lzma/7zTypes.h
new file mode 100644
index 0000000..f5d7505
--- /dev/null
+++ b/htp/lzma/7zTypes.h
@@ -0,0 +1,375 @@
+/* 7zTypes.h -- Basic types
+2018-08-04 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#ifdef _WIN32
+/* #include <windows.h> */
+#endif
+
+#include <stddef.h>
+#include <zconf.h>
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+
+#ifdef _WIN32
+
+/* typedef DWORD WRes; */
+typedef unsigned WRes;
+#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
+
+#else
+
+typedef int WRes;
+#define MY__FACILITY_WIN32 7
+#define MY__FACILITY__WRes MY__FACILITY_WIN32
+#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
+
+#endif
+
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+#ifdef _SZ_NO_INT_64
+
+/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
+ NOTES: Some code will work incorrectly in that case! */
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#define UINT64_CONST(n) n
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#define UINT64_CONST(n) n ## ULL
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
+typedef int BoolInt;
+/* typedef BoolInt Bool; */
+#define True 1
+#define False 0
+
+
+#ifdef _WIN32
+#define MY_STD_CALL __stdcall
+#else
+#define MY_STD_CALL
+#endif
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_FORCE_INLINE __forceinline
+
+#define MY_CDECL __cdecl
+#define MY_FAST_CALL __fastcall
+
+#else
+
+#define MY_NO_INLINE
+#define MY_FORCE_INLINE
+#define MY_CDECL
+#define MY_FAST_CALL
+
+/* inline keyword : for C++ / C99 */
+
+/* GCC, clang: */
+/*
+#if defined (__GNUC__) && (__GNUC__ >= 4)
+#define MY_FORCE_INLINE __attribute__((always_inline))
+#define MY_NO_INLINE __attribute__((noinline))
+#endif
+*/
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct IByteIn IByteIn;
+struct IByteIn
+{
+ Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
+};
+#define IByteIn_Read(p) (p)->Read(p)
+
+
+typedef struct IByteOut IByteOut;
+struct IByteOut
+{
+ void (*Write)(const IByteOut *p, Byte b);
+};
+#define IByteOut_Write(p, b) (p)->Write(p, b)
+
+
+typedef struct ISeqInStream ISeqInStream;
+struct ISeqInStream
+{
+ SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
+ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+ (output(*size) < input(*size)) is allowed */
+};
+#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+
+/* it can return SZ_ERROR_INPUT_EOF */
+SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
+SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
+
+
+typedef struct ISeqOutStream ISeqOutStream;
+struct ISeqOutStream
+{
+ size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
+ /* Returns: result - the number of actually written bytes.
+ (result < size) means error */
+};
+#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
+
+typedef enum
+{
+ SZ_SEEK_SET = 0,
+ SZ_SEEK_CUR = 1,
+ SZ_SEEK_END = 2
+} ESzSeek;
+
+
+typedef struct ISeekInStream ISeekInStream;
+struct ISeekInStream
+{
+ SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
+ SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
+};
+#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+typedef struct ILookInStream ILookInStream;
+struct ILookInStream
+{
+ SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
+ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+ (output(*size) > input(*size)) is not allowed
+ (output(*size) < input(*size)) is allowed */
+ SRes (*Skip)(const ILookInStream *p, size_t offset);
+ /* offset must be <= output(*size) of Look */
+
+ SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
+ /* reads directly (without buffer). It's same as ISeqInStream::Read */
+ SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
+};
+
+#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
+#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)
+#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
+
+
+
+typedef struct
+{
+ ILookInStream vt;
+ const ISeekInStream *realStream;
+
+ size_t pos;
+ size_t size; /* it's data size */
+
+ /* the following variables must be set outside */
+ Byte *buf;
+ size_t bufSize;
+} CLookToRead2;
+
+void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
+
+#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
+
+
+typedef struct
+{
+ ISeqInStream vt;
+ const ILookInStream *realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+
+
+typedef struct
+{
+ ISeqInStream vt;
+ const ILookInStream *realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+
+typedef struct ICompressProgress ICompressProgress;
+
+struct ICompressProgress
+{
+ SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
+ /* Returns: result. (result != SZ_OK) means break.
+ Value (UInt64)(Int64)-1 for size means unknown value. */
+};
+#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
+
+
+
+typedef struct ISzAlloc ISzAlloc;
+typedef const ISzAlloc * ISzAllocPtr;
+
+struct ISzAlloc
+{
+ void *(*Alloc)(ISzAllocPtr p, size_t size);
+ void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
+};
+
+#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
+#define ISzAlloc_Free(p, a) (p)->Free(p, a)
+
+/* deprecated */
+#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
+#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
+
+
+
+
+
+#ifndef MY_offsetof
+ #ifdef offsetof
+ #define MY_offsetof(type, m) offsetof(type, m)
+ /*
+ #define MY_offsetof(type, m) FIELD_OFFSET(type, m)
+ */
+ #else
+ #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
+ #endif
+#endif
+
+
+
+#ifndef MY_container_of
+
+/*
+#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
+#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
+#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
+#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
+*/
+
+/*
+ GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
+ GCC 3.4.4 : classes with constructor
+ GCC 4.8.1 : classes with non-public variable members"
+*/
+
+#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
+
+
+#endif
+
+#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
+
+/*
+#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+*/
+#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
+
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+/*
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
+*/
+
+
+
+#ifdef _WIN32
+
+#define CHAR_PATH_SEPARATOR '\\'
+#define WCHAR_PATH_SEPARATOR L'\\'
+#define STRING_PATH_SEPARATOR "\\"
+#define WSTRING_PATH_SEPARATOR L"\\"
+
+#else
+
+#define CHAR_PATH_SEPARATOR '/'
+#define WCHAR_PATH_SEPARATOR L'/'
+#define STRING_PATH_SEPARATOR "/"
+#define WSTRING_PATH_SEPARATOR L"/"
+
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/htp/lzma/Compiler.h b/htp/lzma/Compiler.h
new file mode 100644
index 0000000..c788648
--- /dev/null
+++ b/htp/lzma/Compiler.h
@@ -0,0 +1,33 @@
+/* Compiler.h
+2017-04-03 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_COMPILER_H
+#define __7Z_COMPILER_H
+
+#ifdef _MSC_VER
+
+ #ifdef UNDER_CE
+ #define RPC_NO_WINDOWS_H
+ /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
+ #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
+ #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+ #endif
+
+ #if _MSC_VER >= 1300
+ #pragma warning(disable : 4996) // This function or variable may be unsafe
+ #else
+ #pragma warning(disable : 4511) // copy constructor could not be generated
+ #pragma warning(disable : 4512) // assignment operator could not be generated
+ #pragma warning(disable : 4514) // unreferenced inline function has been removed
+ #pragma warning(disable : 4702) // unreachable code
+ #pragma warning(disable : 4710) // not inlined
+ #pragma warning(disable : 4714) // function marked as __forceinline not inlined
+ #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
+ #endif
+
+#endif
+
+#define UNUSED_VAR(x) (void)x;
+/* #define UNUSED_VAR(x) x=x; */
+
+#endif
diff --git a/htp/lzma/LzFind.c b/htp/lzma/LzFind.c
new file mode 100644
index 0000000..ee5375c
--- /dev/null
+++ b/htp/lzma/LzFind.c
@@ -0,0 +1,1127 @@
+/* LzFind.c -- Match finder for LZ algorithms
+2018-07-08 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "LzFind.h"
+#include "LzHash.h"
+
+#define kEmptyHashValue 0
+#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
+#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
+#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
+#define kMaxHistorySize ((UInt32)7 << 29)
+
+#define kStartMaxLen 3
+
+static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ if (!p->directInput)
+ {
+ ISzAlloc_Free(alloc, p->bufferBase);
+ p->bufferBase = NULL;
+ }
+}
+
+/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
+
+static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc)
+{
+ UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
+ if (p->directInput)
+ {
+ p->blockSize = blockSize;
+ return 1;
+ }
+ if (!p->bufferBase || p->blockSize != blockSize)
+ {
+ LzInWindow_Free(p, alloc);
+ p->blockSize = blockSize;
+ p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize);
+ }
+ return (p->bufferBase != NULL);
+}
+
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+
+static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
+{
+ p->posLimit -= subValue;
+ p->pos -= subValue;
+ p->streamPos -= subValue;
+}
+
+static void MatchFinder_ReadBlock(CMatchFinder *p)
+{
+ if (p->streamEndWasReached || p->result != SZ_OK)
+ return;
+
+ /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */
+
+ if (p->directInput)
+ {
+ UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos);
+ if (curSize > p->directInputRem)
+ curSize = (UInt32)p->directInputRem;
+ p->directInputRem -= curSize;
+ p->streamPos += curSize;
+ if (p->directInputRem == 0)
+ p->streamEndWasReached = 1;
+ return;
+ }
+
+ for (;;)
+ {
+ Byte *dest = p->buffer + (p->streamPos - p->pos);
+ size_t size = (p->bufferBase + p->blockSize - dest);
+ if (size == 0)
+ return;
+
+ p->result = ISeqInStream_Read(p->stream, dest, &size);
+ if (p->result != SZ_OK)
+ return;
+ if (size == 0)
+ {
+ p->streamEndWasReached = 1;
+ return;
+ }
+ p->streamPos += (UInt32)size;
+ if (p->streamPos - p->pos > p->keepSizeAfter)
+ return;
+ }
+}
+
+void MatchFinder_MoveBlock(CMatchFinder *p)
+{
+ memmove(p->bufferBase,
+ p->buffer - p->keepSizeBefore,
+ (size_t)(p->streamPos - p->pos) + p->keepSizeBefore);
+ p->buffer = p->bufferBase + p->keepSizeBefore;
+}
+
+int MatchFinder_NeedMove(CMatchFinder *p)
+{
+ if (p->directInput)
+ return 0;
+ /* if (p->streamEndWasReached) return 0; */
+ return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+}
+
+void MatchFinder_ReadIfRequired(CMatchFinder *p)
+{
+ if (p->streamEndWasReached)
+ return;
+ if (p->keepSizeAfter >= p->streamPos - p->pos)
+ MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
+{
+ if (MatchFinder_NeedMove(p))
+ MatchFinder_MoveBlock(p);
+ MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+{
+ p->cutValue = 32;
+ p->btMode = 1;
+ p->numHashBytes = 4;
+ p->bigHash = 0;
+}
+
+#define kCrcPoly 0xEDB88320
+
+void MatchFinder_Construct(CMatchFinder *p)
+{
+ unsigned i;
+ p->bufferBase = NULL;
+ p->directInput = 0;
+ p->hash = NULL;
+ p->expectedDataSize = (UInt64)(Int64)-1;
+ MatchFinder_SetDefaultSettings(p);
+
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 r = (UInt32)i;
+ unsigned j;
+ for (j = 0; j < 8; j++)
+ r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
+ p->crc[i] = r;
+ }
+}
+
+static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->hash);
+ p->hash = NULL;
+}
+
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ MatchFinder_FreeThisClassMemory(p, alloc);
+ LzInWindow_Free(p, alloc);
+}
+
+static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
+{
+ size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+ if (sizeInBytes / sizeof(CLzRef) != num)
+ return NULL;
+ return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc)
+{
+ UInt32 sizeReserv;
+
+ if (historySize > kMaxHistorySize)
+ {
+ MatchFinder_Free(p, alloc);
+ return 0;
+ }
+
+ sizeReserv = historySize >> 1;
+ if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3;
+ else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2;
+
+ sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+
+ p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
+ p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
+
+ /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
+
+ if (LzInWindow_Create(p, sizeReserv, alloc))
+ {
+ UInt32 newCyclicBufferSize = historySize + 1;
+ UInt32 hs;
+ p->matchMaxLen = matchMaxLen;
+ {
+ p->fixedHashSize = 0;
+ if (p->numHashBytes == 2)
+ hs = (1 << 16) - 1;
+ else
+ {
+ hs = historySize;
+ if (hs > p->expectedDataSize)
+ hs = (UInt32)p->expectedDataSize;
+ if (hs != 0)
+ hs--;
+ hs |= (hs >> 1);
+ hs |= (hs >> 2);
+ hs |= (hs >> 4);
+ hs |= (hs >> 8);
+ hs >>= 1;
+ hs |= 0xFFFF; /* don't change it! It's required for Deflate */
+ if (hs > (1 << 24))
+ {
+ if (p->numHashBytes == 3)
+ hs = (1 << 24) - 1;
+ else
+ hs >>= 1;
+ /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+ }
+ }
+ p->hashMask = hs;
+ hs++;
+ if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
+ if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
+ if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+ hs += p->fixedHashSize;
+ }
+
+ {
+ size_t newSize;
+ size_t numSons;
+ p->historySize = historySize;
+ p->hashSizeSum = hs;
+ p->cyclicBufferSize = newCyclicBufferSize;
+
+ numSons = newCyclicBufferSize;
+ if (p->btMode)
+ numSons <<= 1;
+ newSize = hs + numSons;
+
+ if (p->hash && p->numRefs == newSize)
+ return 1;
+
+ MatchFinder_FreeThisClassMemory(p, alloc);
+ p->numRefs = newSize;
+ p->hash = AllocRefs(newSize, alloc);
+
+ if (p->hash)
+ {
+ p->son = p->hash + p->hashSizeSum;
+ return 1;
+ }
+ }
+ }
+
+ MatchFinder_Free(p, alloc);
+ return 0;
+}
+
+static void MatchFinder_SetLimits(CMatchFinder *p)
+{
+ UInt32 limit = kMaxValForNormalize - p->pos;
+ UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
+
+ if (limit2 < limit)
+ limit = limit2;
+ limit2 = p->streamPos - p->pos;
+
+ if (limit2 <= p->keepSizeAfter)
+ {
+ if (limit2 > 0)
+ limit2 = 1;
+ }
+ else
+ limit2 -= p->keepSizeAfter;
+
+ if (limit2 < limit)
+ limit = limit2;
+
+ {
+ UInt32 lenLimit = p->streamPos - p->pos;
+ if (lenLimit > p->matchMaxLen)
+ lenLimit = p->matchMaxLen;
+ p->lenLimit = lenLimit;
+ }
+ p->posLimit = p->pos + limit;
+}
+
+
+void MatchFinder_Init_LowHash(CMatchFinder *p)
+{
+ size_t i;
+ CLzRef *items = p->hash;
+ size_t numItems = p->fixedHashSize;
+ for (i = 0; i < numItems; i++)
+ items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_HighHash(CMatchFinder *p)
+{
+ size_t i;
+ CLzRef *items = p->hash + p->fixedHashSize;
+ size_t numItems = (size_t)p->hashMask + 1;
+ for (i = 0; i < numItems; i++)
+ items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_3(CMatchFinder *p, int readData)
+{
+ p->cyclicBufferPos = 0;
+ p->buffer = p->bufferBase;
+ p->pos =
+ p->streamPos = p->cyclicBufferSize;
+ p->result = SZ_OK;
+ p->streamEndWasReached = 0;
+
+ if (readData)
+ MatchFinder_ReadBlock(p);
+
+ MatchFinder_SetLimits(p);
+}
+
+
+void MatchFinder_Init(CMatchFinder *p)
+{
+ MatchFinder_Init_HighHash(p);
+ MatchFinder_Init_LowHash(p);
+ MatchFinder_Init_3(p, True);
+}
+
+
+static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+{
+ return (p->pos - p->historySize - 1) & kNormalizeMask;
+}
+
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+{
+ size_t i;
+ for (i = 0; i < numItems; i++)
+ {
+ UInt32 value = items[i];
+ if (value <= subValue)
+ value = kEmptyHashValue;
+ else
+ value -= subValue;
+ items[i] = value;
+ }
+}
+
+static void MatchFinder_Normalize(CMatchFinder *p)
+{
+ UInt32 subValue = MatchFinder_GetSubValue(p);
+ MatchFinder_Normalize3(subValue, p->hash, p->numRefs);
+ MatchFinder_ReduceOffsets(p, subValue);
+}
+
+
+MY_NO_INLINE
+static void MatchFinder_CheckLimits(CMatchFinder *p)
+{
+ if (p->pos == kMaxValForNormalize)
+ MatchFinder_Normalize(p);
+ if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
+ MatchFinder_CheckAndMoveAndRead(p);
+ if (p->cyclicBufferPos == p->cyclicBufferSize)
+ p->cyclicBufferPos = 0;
+ MatchFinder_SetLimits(p);
+}
+
+
+/*
+ (lenLimit > maxLen)
+*/
+MY_FORCE_INLINE
+static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *distances, unsigned maxLen)
+{
+ /*
+ son[_cyclicBufferPos] = curMatch;
+ for (;;)
+ {
+ UInt32 delta = pos - curMatch;
+ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+ return distances;
+ {
+ const Byte *pb = cur - delta;
+ curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+ if (pb[maxLen] == cur[maxLen] && *pb == *cur)
+ {
+ UInt32 len = 0;
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *distances++ = len;
+ *distances++ = delta - 1;
+ if (len == lenLimit)
+ return distances;
+ }
+ }
+ }
+ }
+ */
+
+ const Byte *lim = cur + lenLimit;
+ son[_cyclicBufferPos] = curMatch;
+ do
+ {
+ UInt32 delta = pos - curMatch;
+ if (delta >= _cyclicBufferSize)
+ break;
+ {
+ ptrdiff_t diff;
+ curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+ diff = (ptrdiff_t)0 - delta;
+ if (cur[maxLen] == cur[maxLen + diff])
+ {
+ const Byte *c = cur;
+ while (*c == c[diff])
+ {
+ if (++c == lim)
+ {
+ distances[0] = (UInt32)(lim - cur);
+ distances[1] = delta - 1;
+ return distances + 2;
+ }
+ }
+ {
+ unsigned len = (unsigned)(c - cur);
+ if (maxLen < len)
+ {
+ maxLen = len;
+ distances[0] = (UInt32)len;
+ distances[1] = delta - 1;
+ distances += 2;
+ }
+ }
+ }
+ }
+ }
+ while (--cutValue);
+
+ return distances;
+}
+
+
+MY_FORCE_INLINE
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *distances, UInt32 maxLen)
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ unsigned len0 = 0, len1 = 0;
+ for (;;)
+ {
+ UInt32 delta = pos - curMatch;
+ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return distances;
+ }
+ {
+ CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+ const Byte *pb = cur - delta;
+ unsigned len = (len0 < len1 ? len0 : len1);
+ UInt32 pair0 = pair[0];
+ if (pb[len] == cur[len])
+ {
+ if (++len != lenLimit && pb[len] == cur[len])
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = (UInt32)len;
+ *distances++ = (UInt32)len;
+ *distances++ = delta - 1;
+ if (len == lenLimit)
+ {
+ *ptr1 = pair0;
+ *ptr0 = pair[1];
+ return distances;
+ }
+ }
+ }
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ curMatch = *ptr1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ curMatch = *ptr0;
+ len0 = len;
+ }
+ }
+ }
+}
+
+static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ unsigned len0 = 0, len1 = 0;
+ for (;;)
+ {
+ UInt32 delta = pos - curMatch;
+ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return;
+ }
+ {
+ CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+ const Byte *pb = cur - delta;
+ unsigned len = (len0 < len1 ? len0 : len1);
+ if (pb[len] == cur[len])
+ {
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ {
+ if (len == lenLimit)
+ {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return;
+ }
+ }
+ }
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ curMatch = *ptr1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ curMatch = *ptr0;
+ len0 = len;
+ }
+ }
+ }
+}
+
+#define MOVE_POS \
+ ++p->cyclicBufferPos; \
+ p->buffer++; \
+ if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+
+#define MOVE_POS_RET MOVE_POS return (UInt32)offset;
+
+static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+
+#define GET_MATCHES_HEADER2(minLen, ret_op) \
+ unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
+ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
+ cur = p->buffer;
+
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
+#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define GET_MATCHES_FOOTER(offset, maxLen) \
+ offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
+ distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
+
+#define SKIP_FOOTER \
+ SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
+
+#define UPDATE_maxLen { \
+ ptrdiff_t diff = (ptrdiff_t)0 - d2; \
+ const Byte *c = cur + maxLen; \
+ const Byte *lim = cur + lenLimit; \
+ for (; c != lim; c++) if (*(c + diff) != *c) break; \
+ maxLen = (unsigned)(c - cur); }
+
+static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ unsigned offset;
+ GET_MATCHES_HEADER(2)
+ HASH2_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ offset = 0;
+ GET_MATCHES_FOOTER(offset, 1)
+}
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ unsigned offset;
+ GET_MATCHES_HEADER(3)
+ HASH_ZIP_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ offset = 0;
+ GET_MATCHES_FOOTER(offset, 2)
+}
+
+static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, d2, pos;
+ unsigned maxLen, offset;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(3)
+
+ HASH3_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash[h2];
+
+ curMatch = (hash + kFix3HashSize)[hv];
+
+ hash[h2] = pos;
+ (hash + kFix3HashSize)[hv] = pos;
+
+ maxLen = 2;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ UPDATE_maxLen
+ distances[0] = (UInt32)maxLen;
+ distances[1] = d2 - 1;
+ offset = 2;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+ MOVE_POS_RET;
+ }
+ }
+
+ GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen, offset;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(4)
+
+ HASH4_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+
+ curMatch = (hash + kFix4HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[hv] = pos;
+
+ maxLen = 0;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ maxLen = 2;
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ offset = 2;
+ }
+
+ if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ maxLen = 3;
+ distances[(size_t)offset + 1] = d3 - 1;
+ offset += 2;
+ d2 = d3;
+ }
+
+ if (offset != 0)
+ {
+ UPDATE_maxLen
+ distances[(size_t)offset - 2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+ MOVE_POS_RET;
+ }
+ }
+
+ if (maxLen < 3)
+ maxLen = 3;
+
+ GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+/*
+static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(5)
+
+ HASH5_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ d4 = pos - (hash + kFix4HashSize)[h4];
+
+ curMatch = (hash + kFix5HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[h4] = pos;
+ (hash + kFix5HashSize)[hv] = pos;
+
+ maxLen = 0;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ distances[0] = maxLen = 2;
+ distances[1] = d2 - 1;
+ offset = 2;
+ if (*(cur - d2 + 2) == cur[2])
+ distances[0] = maxLen = 3;
+ else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ distances[2] = maxLen = 3;
+ distances[3] = d3 - 1;
+ offset = 4;
+ d2 = d3;
+ }
+ }
+ else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ distances[0] = maxLen = 3;
+ distances[1] = d3 - 1;
+ offset = 2;
+ d2 = d3;
+ }
+
+ if (d2 != d4 && d4 < p->cyclicBufferSize
+ && *(cur - d4) == *cur
+ && *(cur - d4 + 3) == *(cur + 3))
+ {
+ maxLen = 4;
+ distances[(size_t)offset + 1] = d4 - 1;
+ offset += 2;
+ d2 = d4;
+ }
+
+ if (offset != 0)
+ {
+ UPDATE_maxLen
+ distances[(size_t)offset - 2] = maxLen;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+ MOVE_POS_RET;
+ }
+ }
+
+ if (maxLen < 4)
+ maxLen = 4;
+
+ GET_MATCHES_FOOTER(offset, maxLen)
+}
+*/
+
+static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen, offset;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(4)
+
+ HASH4_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ curMatch = (hash + kFix4HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[hv] = pos;
+
+ maxLen = 0;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ maxLen = 2;
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ offset = 2;
+ }
+
+ if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ maxLen = 3;
+ distances[(size_t)offset + 1] = d3 - 1;
+ offset += 2;
+ d2 = d3;
+ }
+
+ if (offset != 0)
+ {
+ UPDATE_maxLen
+ distances[(size_t)offset - 2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS_RET;
+ }
+ }
+
+ if (maxLen < 3)
+ maxLen = 3;
+
+ offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+ distances + offset, maxLen) - (distances));
+ MOVE_POS_RET
+}
+
+/*
+static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
+ UInt32 *hash;
+ GET_MATCHES_HEADER(5)
+
+ HASH5_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ d4 = pos - (hash + kFix4HashSize)[h4];
+
+ curMatch = (hash + kFix5HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[h4] = pos;
+ (hash + kFix5HashSize)[hv] = pos;
+
+ maxLen = 0;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ distances[0] = maxLen = 2;
+ distances[1] = d2 - 1;
+ offset = 2;
+ if (*(cur - d2 + 2) == cur[2])
+ distances[0] = maxLen = 3;
+ else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ distances[2] = maxLen = 3;
+ distances[3] = d3 - 1;
+ offset = 4;
+ d2 = d3;
+ }
+ }
+ else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ distances[0] = maxLen = 3;
+ distances[1] = d3 - 1;
+ offset = 2;
+ d2 = d3;
+ }
+
+ if (d2 != d4 && d4 < p->cyclicBufferSize
+ && *(cur - d4) == *cur
+ && *(cur - d4 + 3) == *(cur + 3))
+ {
+ maxLen = 4;
+ distances[(size_t)offset + 1] = d4 - 1;
+ offset += 2;
+ d2 = d4;
+ }
+
+ if (offset != 0)
+ {
+ UPDATE_maxLen
+ distances[(size_t)offset - 2] = maxLen;
+ if (maxLen == lenLimit)
+ {
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS_RET;
+ }
+ }
+
+ if (maxLen < 4)
+ maxLen = 4;
+
+ offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+ distances + offset, maxLen) - (distances));
+ MOVE_POS_RET
+}
+*/
+
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ unsigned offset;
+ GET_MATCHES_HEADER(3)
+ HASH_ZIP_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+ distances, 2) - (distances));
+ MOVE_POS_RET
+}
+
+static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ SKIP_HEADER(2)
+ HASH2_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ SKIP_HEADER(3)
+ HASH_ZIP_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+
+static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2;
+ UInt32 *hash;
+ SKIP_HEADER(3)
+ HASH3_CALC;
+ hash = p->hash;
+ curMatch = (hash + kFix3HashSize)[hv];
+ hash[h2] =
+ (hash + kFix3HashSize)[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+
+static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2, h3;
+ UInt32 *hash;
+ SKIP_HEADER(4)
+ HASH4_CALC;
+ hash = p->hash;
+ curMatch = (hash + kFix4HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+
+/*
+static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2, h3, h4;
+ UInt32 *hash;
+ SKIP_HEADER(5)
+ HASH5_CALC;
+ hash = p->hash;
+ curMatch = (hash + kFix5HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+*/
+
+static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2, h3;
+ UInt32 *hash;
+ SKIP_HEADER(4)
+ HASH4_CALC;
+ hash = p->hash;
+ curMatch = (hash + kFix4HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[hv] = p->pos;
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS
+ }
+ while (--num != 0);
+}
+
+/*
+static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2, h3, h4;
+ UInt32 *hash;
+ SKIP_HEADER(5)
+ HASH5_CALC;
+ hash = p->hash;
+ curMatch = hash + kFix5HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = p->pos;
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS
+ }
+ while (--num != 0);
+}
+*/
+
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ SKIP_HEADER(3)
+ HASH_ZIP_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS
+ }
+ while (--num != 0);
+}
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+{
+ vTable->Init = (Mf_Init_Func)MatchFinder_Init;
+ vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
+ vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
+ if (!p->btMode)
+ {
+ /* if (p->numHashBytes <= 4) */
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
+ }
+ /*
+ else
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
+ }
+ */
+ }
+ else if (p->numHashBytes == 2)
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
+ }
+ else if (p->numHashBytes == 3)
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
+ }
+ else /* if (p->numHashBytes == 4) */
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
+ }
+ /*
+ else
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
+ }
+ */
+}
diff --git a/htp/lzma/LzFind.h b/htp/lzma/LzFind.h
new file mode 100644
index 0000000..c77adde
--- /dev/null
+++ b/htp/lzma/LzFind.h
@@ -0,0 +1,121 @@
+/* LzFind.h -- Match finder for LZ algorithms
+2017-06-10 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_FIND_H
+#define __LZ_FIND_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+typedef UInt32 CLzRef;
+
+typedef struct _CMatchFinder
+{
+ Byte *buffer;
+ UInt32 pos;
+ UInt32 posLimit;
+ UInt32 streamPos;
+ UInt32 lenLimit;
+
+ UInt32 cyclicBufferPos;
+ UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+
+ Byte streamEndWasReached;
+ Byte btMode;
+ Byte bigHash;
+ Byte directInput;
+
+ UInt32 matchMaxLen;
+ CLzRef *hash;
+ CLzRef *son;
+ UInt32 hashMask;
+ UInt32 cutValue;
+
+ Byte *bufferBase;
+ ISeqInStream *stream;
+
+ UInt32 blockSize;
+ UInt32 keepSizeBefore;
+ UInt32 keepSizeAfter;
+
+ UInt32 numHashBytes;
+ size_t directInputRem;
+ UInt32 historySize;
+ UInt32 fixedHashSize;
+ UInt32 hashSizeSum;
+ SRes result;
+ UInt32 crc[256];
+ size_t numRefs;
+
+ UInt64 expectedDataSize;
+} CMatchFinder;
+
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+
+#define Inline_MatchFinder_IsFinishedOK(p) \
+ ((p)->streamEndWasReached \
+ && (p)->streamPos == (p)->pos \
+ && (!(p)->directInput || (p)->directInputRem == 0))
+
+int MatchFinder_NeedMove(CMatchFinder *p);
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+void MatchFinder_MoveBlock(CMatchFinder *p);
+void MatchFinder_ReadIfRequired(CMatchFinder *p);
+
+void MatchFinder_Construct(CMatchFinder *p);
+
+/* Conditions:
+ historySize <= 3 GB
+ keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
+*/
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc);
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
+ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+ UInt32 *distances, UInt32 maxLen);
+
+/*
+Conditions:
+ Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
+ Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
+*/
+
+typedef void (*Mf_Init_Func)(void *object);
+typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
+typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef void (*Mf_Skip_Func)(void *object, UInt32);
+
+typedef struct _IMatchFinder
+{
+ Mf_Init_Func Init;
+ Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+ Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
+ Mf_GetMatches_Func GetMatches;
+ Mf_Skip_Func Skip;
+} IMatchFinder;
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+
+void MatchFinder_Init_LowHash(CMatchFinder *p);
+void MatchFinder_Init_HighHash(CMatchFinder *p);
+void MatchFinder_Init_3(CMatchFinder *p, int readData);
+void MatchFinder_Init(CMatchFinder *p);
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+
+EXTERN_C_END
+
+#endif
diff --git a/htp/lzma/LzHash.h b/htp/lzma/LzHash.h
new file mode 100644
index 0000000..2191444
--- /dev/null
+++ b/htp/lzma/LzHash.h
@@ -0,0 +1,57 @@
+/* LzHash.h -- HASH functions for LZ algorithms
+2015-04-12 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_HASH_H
+#define __LZ_HASH_H
+
+#define kHash2Size (1 << 10)
+#define kHash3Size (1 << 16)
+#define kHash4Size (1 << 20)
+
+#define kFix3HashSize (kHash2Size)
+#define kFix4HashSize (kHash2Size + kHash3Size)
+#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
+
+#define HASH3_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
+
+#define HASH5_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ temp ^= (p->crc[cur[3]] << 5); \
+ h4 = temp & (kHash4Size - 1); \
+ hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
+
+/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
+
+#define MT_HASH2_CALC \
+ h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define MT_HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+
+#endif
diff --git a/htp/lzma/LzmaDec.c b/htp/lzma/LzmaDec.c
new file mode 100644
index 0000000..138c36c
--- /dev/null
+++ b/htp/lzma/LzmaDec.c
@@ -0,0 +1,1223 @@
+/* LzmaDec.c -- LZMA Decoder
+2018-07-04 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+/* #include "CpuArch.h" */
+#include "LzmaDec.h"
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_INIT_SIZE 5
+
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+ { UPDATE_0(p); i = (i + i); A0; } else \
+ { UPDATE_1(p); i = (i + i) + 1; A1; }
+
+#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
+
+#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
+ { UPDATE_0(p + i); A0; } else \
+ { UPDATE_1(p + i); A1; }
+#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
+#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
+#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
+
+#define TREE_DECODE(probs, limit, i) \
+ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define _LZMA_SIZE_OPT */
+
+#ifdef _LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+ { i = 1; \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ i -= 0x40; }
+#endif
+
+#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
+#define MATCHED_LITER_DEC \
+ matchByte += matchByte; \
+ bit = offs; \
+ offs &= matchByte; \
+ probLit = prob + (offs + bit + symbol); \
+ GET_BIT2(probLit, symbol, offs ^= bit; , ;)
+
+
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+ { UPDATE_0_CHECK; i = (i + i); A0; } else \
+ { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+#define TREE_DECODE_CHECK(probs, limit, i) \
+ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
+
+
+#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
+ { UPDATE_0_CHECK; i += m; m += m; } else \
+ { UPDATE_1_CHECK; m += m; i += m; }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenLow 0
+#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+#define LenChoice LenLow
+#define LenChoice2 (LenLow + (1 << kLenNumLowBits))
+
+#define kNumStates 12
+#define kNumStates2 16
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+/* External ASM code needs same CLzmaProb array layout. So don't change it. */
+
+/* (probs_1664) is faster and better for code size at some platforms */
+/*
+#ifdef MY_CPU_X86_OR_AMD64
+*/
+#define kStartOffset 1664
+#define GET_PROBS p->probs_1664
+/*
+#define GET_PROBS p->probs + kStartOffset
+#else
+#define kStartOffset 0
+#define GET_PROBS p->probs
+#endif
+*/
+
+#define SpecPos (-kStartOffset)
+#define IsRep0Long (SpecPos + kNumFullDistances)
+#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
+#define LenCoder (RepLenCoder + kNumLenProbs)
+#define IsMatch (LenCoder + kNumLenProbs)
+#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
+#define IsRep (Align + kAlignTableSize)
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define PosSlot (IsRepG2 + kNumStates)
+#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define NUM_BASE_PROBS (Literal + kStartOffset)
+
+#if Align != 0 && kStartOffset != 0
+ #error Stop_Compiling_Bad_LZMA_kAlign
+#endif
+
+#if NUM_BASE_PROBS != 1984
+ #error Stop_Compiling_Bad_LZMA_PROBS
+#endif
+
+
+#define LZMA_LIT_SIZE 0x300
+
+#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
+
+
+#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)
+#define COMBINED_PS_STATE (posState + state)
+#define GET_LEN_STATE (posState)
+
+#define LZMA_DIC_MIN (1 << 12)
+
+/*
+p->remainLen : shows status of LZMA decoder:
+ < kMatchSpecLenStart : normal remain
+ = kMatchSpecLenStart : finished
+ = kMatchSpecLenStart + 1 : need init range coder
+ = kMatchSpecLenStart + 2 : need init range coder and state
+*/
+
+/* ---------- LZMA_DECODE_REAL ---------- */
+/*
+LzmaDec_DecodeReal_3() can be implemented in external ASM file.
+3 - is the code compatibility version of that function for check at link time.
+*/
+
+#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3
+
+/*
+LZMA_DECODE_REAL()
+In:
+ RangeCoder is normalized
+ if (p->dicPos == limit)
+ {
+ LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
+ So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
+ is not END_OF_PAYALOAD_MARKER, then function returns error code.
+ }
+
+Processing:
+ first LZMA symbol will be decoded in any case
+ All checks for limits are at the end of main loop,
+ It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
+ RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
+
+Out:
+ RangeCoder is normalized
+ Result:
+ SZ_OK - OK
+ SZ_ERROR_DATA - Error
+ p->remainLen:
+ < kMatchSpecLenStart : normal remain
+ = kMatchSpecLenStart : finished
+*/
+
+
+#ifdef _LZMA_DEC_OPT
+
+int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
+
+#else
+
+static
+int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+ CLzmaProb *probs = GET_PROBS;
+ unsigned state = (unsigned)p->state;
+ UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+ unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+ unsigned lc = p->prop.lc;
+ unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
+
+ Byte *dic = p->dic;
+ SizeT dicBufSize = p->dicBufSize;
+ SizeT dicPos = p->dicPos;
+
+ UInt32 processedPos = p->processedPos;
+ UInt32 checkDicSize = p->checkDicSize;
+ unsigned len = 0;
+
+ const Byte *buf = p->buf;
+ UInt32 range = p->range;
+ UInt32 code = p->code;
+
+ do
+ {
+ CLzmaProb *prob;
+ UInt32 bound;
+ unsigned ttt;
+ unsigned posState = CALC_POS_STATE(processedPos, pbMask);
+
+ prob = probs + IsMatch + COMBINED_PS_STATE;
+ IF_BIT_0(prob)
+ {
+ unsigned symbol;
+ UPDATE_0(prob);
+ prob = probs + Literal;
+ if (processedPos != 0 || checkDicSize != 0)
+ prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
+ processedPos++;
+
+ if (state < kNumLitStates)
+ {
+ state -= (state < 4) ? state : 3;
+ symbol = 1;
+ #ifdef _LZMA_SIZE_OPT
+ do { NORMAL_LITER_DEC } while (symbol < 0x100);
+ #else
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ #endif
+ }
+ else
+ {
+ unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ unsigned offs = 0x100;
+ state -= (state < 10) ? 3 : 6;
+ symbol = 1;
+ #ifdef _LZMA_SIZE_OPT
+ do
+ {
+ unsigned bit;
+ CLzmaProb *probLit;
+ MATCHED_LITER_DEC
+ }
+ while (symbol < 0x100);
+ #else
+ {
+ unsigned bit;
+ CLzmaProb *probLit;
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ }
+ #endif
+ }
+
+ dic[dicPos++] = (Byte)symbol;
+ continue;
+ }
+
+ {
+ UPDATE_1(prob);
+ prob = probs + IsRep + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ state += kNumStates;
+ prob = probs + LenCoder;
+ }
+ else
+ {
+ UPDATE_1(prob);
+ /*
+ // that case was checked before with kBadRepCode
+ if (checkDicSize == 0 && processedPos == 0)
+ return SZ_ERROR_DATA;
+ */
+ prob = probs + IsRepG0 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ prob = probs + IsRep0Long + COMBINED_PS_STATE;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ dicPos++;
+ processedPos++;
+ state = state < kNumLitStates ? 9 : 11;
+ continue;
+ }
+ UPDATE_1(prob);
+ }
+ else
+ {
+ UInt32 distance;
+ UPDATE_1(prob);
+ prob = probs + IsRepG1 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ distance = rep1;
+ }
+ else
+ {
+ UPDATE_1(prob);
+ prob = probs + IsRepG2 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ distance = rep2;
+ }
+ else
+ {
+ UPDATE_1(prob);
+ distance = rep3;
+ rep3 = rep2;
+ }
+ rep2 = rep1;
+ }
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ state = state < kNumLitStates ? 8 : 11;
+ prob = probs + RepLenCoder;
+ }
+
+ #ifdef _LZMA_SIZE_OPT
+ {
+ unsigned lim, offset;
+ CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen);
+ probLen = prob + LenLow + GET_LEN_STATE;
+ offset = 0;
+ lim = (1 << kLenNumLowBits);
+ }
+ else
+ {
+ UPDATE_1(probLen);
+ probLen = prob + LenChoice2;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen);
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ offset = kLenNumLowSymbols;
+ lim = (1 << kLenNumLowBits);
+ }
+ else
+ {
+ UPDATE_1(probLen);
+ probLen = prob + LenHigh;
+ offset = kLenNumLowSymbols * 2;
+ lim = (1 << kLenNumHighBits);
+ }
+ }
+ TREE_DECODE(probLen, lim, len);
+ len += offset;
+ }
+ #else
+ {
+ CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen);
+ probLen = prob + LenLow + GET_LEN_STATE;
+ len = 1;
+ TREE_GET_BIT(probLen, len);
+ TREE_GET_BIT(probLen, len);
+ TREE_GET_BIT(probLen, len);
+ len -= 8;
+ }
+ else
+ {
+ UPDATE_1(probLen);
+ probLen = prob + LenChoice2;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen);
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ len = 1;
+ TREE_GET_BIT(probLen, len);
+ TREE_GET_BIT(probLen, len);
+ TREE_GET_BIT(probLen, len);
+ }
+ else
+ {
+ UPDATE_1(probLen);
+ probLen = prob + LenHigh;
+ TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
+ len += kLenNumLowSymbols * 2;
+ }
+ }
+ }
+ #endif
+
+ if (state >= kNumStates)
+ {
+ UInt32 distance;
+ prob = probs + PosSlot +
+ ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+ TREE_6_DECODE(prob, distance);
+ if (distance >= kStartPosModelIndex)
+ {
+ unsigned posSlot = (unsigned)distance;
+ unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+ distance = (2 | (distance & 1));
+ if (posSlot < kEndPosModelIndex)
+ {
+ distance <<= numDirectBits;
+ prob = probs + SpecPos;
+ {
+ UInt32 m = 1;
+ distance++;
+ do
+ {
+ REV_BIT_VAR(prob, distance, m);
+ }
+ while (--numDirectBits);
+ distance -= m;
+ }
+ }
+ else
+ {
+ numDirectBits -= kNumAlignBits;
+ do
+ {
+ NORMALIZE
+ range >>= 1;
+
+ {
+ UInt32 t;
+ code -= range;
+ t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+ distance = (distance << 1) + (t + 1);
+ code += range & t;
+ }
+ /*
+ distance <<= 1;
+ if (code >= range)
+ {
+ code -= range;
+ distance |= 1;
+ }
+ */
+ }
+ while (--numDirectBits);
+ prob = probs + Align;
+ distance <<= kNumAlignBits;
+ {
+ unsigned i = 1;
+ REV_BIT_CONST(prob, i, 1);
+ REV_BIT_CONST(prob, i, 2);
+ REV_BIT_CONST(prob, i, 4);
+ REV_BIT_LAST (prob, i, 8);
+ distance |= i;
+ }
+ if (distance == (UInt32)0xFFFFFFFF)
+ {
+ len = kMatchSpecLenStart;
+ state -= kNumStates;
+ break;
+ }
+ }
+ }
+
+ rep3 = rep2;
+ rep2 = rep1;
+ rep1 = rep0;
+ rep0 = distance + 1;
+ state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+ if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
+ {
+ p->dicPos = dicPos;
+ return SZ_ERROR_DATA;
+ }
+ }
+
+ len += kMatchMinLen;
+
+ {
+ SizeT rem;
+ unsigned curLen;
+ SizeT pos;
+
+ if ((rem = limit - dicPos) == 0)
+ {
+ p->dicPos = dicPos;
+ return SZ_ERROR_DATA;
+ }
+
+ curLen = ((rem < len) ? (unsigned)rem : len);
+ pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+
+ processedPos += (UInt32)curLen;
+
+ len -= curLen;
+ if (curLen <= dicBufSize - pos)
+ {
+ Byte *dest = dic + dicPos;
+ ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+ const Byte *lim = dest + curLen;
+ dicPos += (SizeT)curLen;
+ do
+ *(dest) = (Byte)*(dest + src);
+ while (++dest != lim);
+ }
+ else
+ {
+ do
+ {
+ dic[dicPos++] = dic[pos];
+ if (++pos == dicBufSize)
+ pos = 0;
+ }
+ while (--curLen != 0);
+ }
+ }
+ }
+ }
+ while (dicPos < limit && buf < bufLimit);
+
+ NORMALIZE;
+
+ p->buf = buf;
+ p->range = range;
+ p->code = code;
+ p->remainLen = (UInt32)len;
+ p->dicPos = dicPos;
+ p->processedPos = processedPos;
+ p->reps[0] = rep0;
+ p->reps[1] = rep1;
+ p->reps[2] = rep2;
+ p->reps[3] = rep3;
+ p->state = (UInt32)state;
+
+ return SZ_OK;
+}
+#endif
+
+static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+ if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+ {
+ Byte *dic = p->dic;
+ SizeT dicPos = p->dicPos;
+ SizeT dicBufSize = p->dicBufSize;
+ unsigned len = (unsigned)p->remainLen;
+ SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+ SizeT rem = limit - dicPos;
+ if (rem < len)
+ len = (unsigned)(rem);
+
+ if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+ p->checkDicSize = p->prop.dicSize;
+
+ p->processedPos += (UInt32)len;
+ p->remainLen -= (UInt32)len;
+ while (len != 0)
+ {
+ len--;
+ dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ dicPos++;
+ }
+ p->dicPos = dicPos;
+ }
+}
+
+
+#define kRange0 0xFFFFFFFF
+#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
+#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
+#if kBadRepCode != (0xC0000000 - 0x400)
+ #error Stop_Compiling_Bad_LZMA_Check
+#endif
+
+static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit, SizeT memlimit)
+{
+ do
+ {
+ SizeT limit2 = limit;
+ if (p->checkDicSize == 0)
+ {
+ UInt32 rem = p->prop.dicSize - p->processedPos;
+ if (limit - p->dicPos > rem) {
+ if (p->dicBufSize < p->prop.dicSize) {
+ p->dicBufSize = p->prop.dicSize;
+ if (p->dicBufSize > memlimit) {
+ return SZ_ERROR_MEM;
+ }
+ Byte *tmp = realloc(p->dic, p->dicBufSize);
+ if (!tmp) {
+ return SZ_ERROR_MEM;
+ }
+ p->dic = tmp;
+ }
+ limit2 = p->dicPos + rem;
+ }
+
+ if (p->processedPos == 0)
+ if (p->code >= kBadRepCode)
+ return SZ_ERROR_DATA;
+ }
+
+ RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
+
+ if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
+ p->checkDicSize = p->prop.dicSize;
+
+ LzmaDec_WriteRem(p, limit);
+ }
+ while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
+
+ return 0;
+}
+
+typedef enum
+{
+ DUMMY_ERROR, /* unexpected end of input stream */
+ DUMMY_LIT,
+ DUMMY_MATCH,
+ DUMMY_REP
+} ELzmaDummy;
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+{
+ UInt32 range = p->range;
+ UInt32 code = p->code;
+ const Byte *bufLimit = buf + inSize;
+ const CLzmaProb *probs = GET_PROBS;
+ unsigned state = (unsigned)p->state;
+ ELzmaDummy res;
+
+ {
+ const CLzmaProb *prob;
+ UInt32 bound;
+ unsigned ttt;
+ unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
+
+ prob = probs + IsMatch + COMBINED_PS_STATE;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+
+ /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
+
+ prob = probs + Literal;
+ if (p->checkDicSize != 0 || p->processedPos != 0)
+ prob += ((UInt32)LZMA_LIT_SIZE *
+ ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+ (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+ if (state < kNumLitStates)
+ {
+ unsigned symbol = 1;
+ do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+ }
+ else
+ {
+ unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
+ (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
+ unsigned offs = 0x100;
+ unsigned symbol = 1;
+ do
+ {
+ unsigned bit;
+ const CLzmaProb *probLit;
+ matchByte += matchByte;
+ bit = offs;
+ offs &= matchByte;
+ probLit = prob + (offs + bit + symbol);
+ GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
+ }
+ while (symbol < 0x100);
+ }
+ res = DUMMY_LIT;
+ }
+ else
+ {
+ unsigned len;
+ UPDATE_1_CHECK;
+
+ prob = probs + IsRep + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ state = 0;
+ prob = probs + LenCoder;
+ res = DUMMY_MATCH;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ res = DUMMY_REP;
+ prob = probs + IsRepG0 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ prob = probs + IsRep0Long + COMBINED_PS_STATE;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ NORMALIZE_CHECK;
+ return DUMMY_REP;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ }
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ prob = probs + IsRepG1 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ prob = probs + IsRepG2 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ }
+ }
+ }
+ state = kNumStates;
+ prob = probs + RepLenCoder;
+ }
+ {
+ unsigned limit, offset;
+ const CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0_CHECK(probLen)
+ {
+ UPDATE_0_CHECK;
+ probLen = prob + LenLow + GET_LEN_STATE;
+ offset = 0;
+ limit = 1 << kLenNumLowBits;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ probLen = prob + LenChoice2;
+ IF_BIT_0_CHECK(probLen)
+ {
+ UPDATE_0_CHECK;
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ offset = kLenNumLowSymbols;
+ limit = 1 << kLenNumLowBits;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ probLen = prob + LenHigh;
+ offset = kLenNumLowSymbols * 2;
+ limit = 1 << kLenNumHighBits;
+ }
+ }
+ TREE_DECODE_CHECK(probLen, limit, len);
+ len += offset;
+ }
+
+ if (state < 4)
+ {
+ unsigned posSlot;
+ prob = probs + PosSlot +
+ ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
+ kNumPosSlotBits);
+ TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
+ if (posSlot >= kStartPosModelIndex)
+ {
+ unsigned numDirectBits = ((posSlot >> 1) - 1);
+
+ /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
+
+ if (posSlot < kEndPosModelIndex)
+ {
+ prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
+ }
+ else
+ {
+ numDirectBits -= kNumAlignBits;
+ do
+ {
+ NORMALIZE_CHECK
+ range >>= 1;
+ code -= range & (((code - range) >> 31) - 1);
+ /* if (code >= range) code -= range; */
+ }
+ while (--numDirectBits);
+ prob = probs + Align;
+ numDirectBits = kNumAlignBits;
+ }
+ {
+ unsigned i = 1;
+ unsigned m = 1;
+ do
+ {
+ REV_BIT_CHECK(prob, i, m);
+ }
+ while (--numDirectBits);
+ }
+ }
+ }
+ }
+ }
+ NORMALIZE_CHECK;
+ return res;
+}
+
+
+static void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
+{
+ p->remainLen = kMatchSpecLenStart + 1;
+ p->tempBufSize = 0;
+
+ if (initDic)
+ {
+ p->processedPos = 0;
+ p->checkDicSize = 0;
+ p->remainLen = kMatchSpecLenStart + 2;
+ }
+ if (initState)
+ p->remainLen = kMatchSpecLenStart + 2;
+}
+
+void LzmaDec_Init(CLzmaDec *p)
+{
+ p->dicPos = 0;
+ LzmaDec_InitDicAndState(p, True, True);
+}
+
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
+ ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit)
+{
+ SizeT inSize = *srcLen;
+ (*srcLen) = 0;
+
+ *status = LZMA_STATUS_NOT_SPECIFIED;
+
+ if (p->remainLen > kMatchSpecLenStart)
+ {
+ for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
+ p->tempBuf[p->tempBufSize++] = *src++;
+ if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
+ return SZ_ERROR_DATA;
+ if (p->tempBufSize < RC_INIT_SIZE)
+ {
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+ p->code =
+ ((UInt32)p->tempBuf[1] << 24)
+ | ((UInt32)p->tempBuf[2] << 16)
+ | ((UInt32)p->tempBuf[3] << 8)
+ | ((UInt32)p->tempBuf[4]);
+ p->range = 0xFFFFFFFF;
+ p->tempBufSize = 0;
+
+ if (p->remainLen > kMatchSpecLenStart + 1)
+ {
+ SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
+ SizeT i;
+ CLzmaProb *probs = p->probs;
+ for (i = 0; i < numProbs; i++)
+ probs[i] = kBitModelTotal >> 1;
+ p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
+ p->state = 0;
+ }
+
+ p->remainLen = 0;
+ }
+
+ LzmaDec_WriteRem(p, dicLimit);
+
+ while (p->remainLen != kMatchSpecLenStart)
+ {
+ int checkEndMarkNow = 0;
+
+ if (p->dicPos >= dicLimit)
+ {
+ if (p->remainLen == 0 && p->code == 0)
+ {
+ *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+ return SZ_OK;
+ }
+ if (finishMode == LZMA_FINISH_ANY)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_OK;
+ }
+ if (p->remainLen != 0)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_ERROR_DATA;
+ }
+ checkEndMarkNow = 1;
+ }
+
+ if (p->tempBufSize == 0)
+ {
+ SizeT processed;
+ const Byte *bufLimit;
+ if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+ {
+ int dummyRes = LzmaDec_TryDummy(p, src, inSize);
+ if (dummyRes == DUMMY_ERROR)
+ {
+ memcpy(p->tempBuf, src, inSize);
+ p->tempBufSize = (unsigned)inSize;
+ (*srcLen) += inSize;
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+ if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_ERROR_DATA;
+ }
+ bufLimit = src;
+ }
+ else
+ bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+ p->buf = src;
+ if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit, memlimit) != 0)
+ return SZ_ERROR_DATA;
+ processed = (SizeT)(p->buf - src);
+ (*srcLen) += processed;
+ src += processed;
+ inSize -= processed;
+ }
+ else
+ {
+ unsigned rem = p->tempBufSize, lookAhead = 0;
+ while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
+ p->tempBuf[rem++] = src[lookAhead++];
+ p->tempBufSize = rem;
+ if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+ {
+ int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
+ if (dummyRes == DUMMY_ERROR)
+ {
+ (*srcLen) += (SizeT)lookAhead;
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+ if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_ERROR_DATA;
+ }
+ }
+ p->buf = p->tempBuf;
+ if (LzmaDec_DecodeReal2(p, dicLimit, p->buf, memlimit) != 0)
+ return SZ_ERROR_DATA;
+
+ {
+ unsigned kkk = (unsigned)(p->buf - p->tempBuf);
+ if (rem < kkk)
+ return SZ_ERROR_FAIL; /* some internal error */
+ rem -= kkk;
+ if (lookAhead < rem)
+ return SZ_ERROR_FAIL; /* some internal error */
+ lookAhead -= rem;
+ }
+ (*srcLen) += (SizeT)lookAhead;
+ src += lookAhead;
+ inSize -= (SizeT)lookAhead;
+ p->tempBufSize = 0;
+ }
+ }
+
+ if (p->code != 0)
+ return SZ_ERROR_DATA;
+ *status = LZMA_STATUS_FINISHED_WITH_MARK;
+ return SZ_OK;
+}
+
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit)
+{
+ SizeT outSize = *destLen;
+ SizeT inSize = *srcLen;
+ *srcLen = *destLen = 0;
+ for (;;)
+ {
+ SizeT inSizeCur = inSize, outSizeCur, dicPos;
+ ELzmaFinishMode curFinishMode;
+ SRes res;
+ if (p->dicPos == p->dicBufSize) {
+ if (p->dicBufSize < p->prop.dicSize) {
+ if (p->dicBufSize < memlimit) {
+ p->dicBufSize = p->dicBufSize << 2;
+ if (p->dicBufSize > memlimit) {
+ p->dicBufSize = memlimit;
+ }
+ if (p->dicBufSize > p->prop.dicSize) {
+ p->dicBufSize = p->prop.dicSize;
+ }
+ Byte *tmp = realloc(p->dic, p->dicBufSize);
+ if (!tmp) {
+ return SZ_ERROR_MEM;
+ }
+ p->dic = tmp;
+ } else {
+ return SZ_ERROR_MEM;
+ }
+ } else {
+ p->dicPos = 0;
+ }
+ }
+ dicPos = p->dicPos;
+ if (outSize > p->dicBufSize - dicPos)
+ {
+ outSizeCur = p->dicBufSize;
+ curFinishMode = LZMA_FINISH_ANY;
+ }
+ else
+ {
+ outSizeCur = dicPos + outSize;
+ curFinishMode = finishMode;
+ }
+
+ res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status, memlimit);
+ src += inSizeCur;
+ inSize -= inSizeCur;
+ *srcLen += inSizeCur;
+ outSizeCur = p->dicPos - dicPos;
+ memcpy(dest, p->dic + dicPos, outSizeCur);
+ dest += outSizeCur;
+ outSize -= outSizeCur;
+ *destLen += outSizeCur;
+ if (res != 0)
+ return res;
+ if (outSizeCur == 0 || outSize == 0)
+ return SZ_OK;
+ }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->probs);
+ p->probs = NULL;
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->dic);
+ p->dic = NULL;
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ LzmaDec_FreeProbs(p, alloc);
+ LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+ UInt32 dicSize;
+ Byte d;
+
+ if (size < LZMA_PROPS_SIZE)
+ return SZ_ERROR_UNSUPPORTED;
+ else
+ dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+
+ if (dicSize < LZMA_DIC_MIN)
+ dicSize = LZMA_DIC_MIN;
+ p->dicSize = dicSize;
+
+ d = data[0];
+ if (d >= (9 * 5 * 5))
+ return SZ_ERROR_UNSUPPORTED;
+
+ p->lc = (Byte)(d % 9);
+ d /= 9;
+ p->pb = (Byte)(d / 5);
+ p->lp = (Byte)(d % 5);
+ p->_pad_ = 0;
+
+ return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
+{
+ UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+ if (!p->probs || numProbs != p->numProbs)
+ {
+ LzmaDec_FreeProbs(p, alloc);
+ p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
+ if (!p->probs)
+ return SZ_ERROR_MEM;
+ p->probs_1664 = p->probs + 1664;
+ p->numProbs = numProbs;
+ }
+ return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+ CLzmaProps propNew;
+ RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+ p->prop = propNew;
+ return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+ CLzmaProps propNew;
+ SizeT dicBufSize;
+ RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+
+ {
+ UInt32 dictSize = propNew.dicSize;
+ SizeT mask = ((UInt32)1 << 12) - 1;
+ if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
+ else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
+ dicBufSize = ((SizeT)dictSize + mask) & ~mask;
+ if (dicBufSize < dictSize)
+ dicBufSize = dictSize;
+ }
+ if (dicBufSize > LZMA_DIC_MIN) {
+ dicBufSize = LZMA_DIC_MIN;
+ }
+
+ if (!p->dic || dicBufSize != p->dicBufSize)
+ {
+ LzmaDec_FreeDict(p, alloc);
+ p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
+ if (!p->dic)
+ {
+ LzmaDec_FreeProbs(p, alloc);
+ return SZ_ERROR_MEM;
+ }
+ }
+ p->dicBufSize = dicBufSize;
+ p->prop = propNew;
+ return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+ ELzmaStatus *status, ISzAllocPtr alloc)
+{
+ CLzmaDec p;
+ SRes res;
+ SizeT outSize = *destLen, inSize = *srcLen;
+ *destLen = *srcLen = 0;
+ *status = LZMA_STATUS_NOT_SPECIFIED;
+ if (inSize < RC_INIT_SIZE)
+ return SZ_ERROR_INPUT_EOF;
+ LzmaDec_Construct(&p);
+ RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
+ p.dic = dest;
+ p.dicBufSize = outSize;
+ LzmaDec_Init(&p);
+ *srcLen = inSize;
+ res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status, SIZE_MAX);
+ *destLen = p.dicPos;
+ if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+ res = SZ_ERROR_INPUT_EOF;
+ LzmaDec_FreeProbs(&p, alloc);
+ return res;
+}
diff --git a/htp/lzma/LzmaDec.h b/htp/lzma/LzmaDec.h
new file mode 100644
index 0000000..20b5228
--- /dev/null
+++ b/htp/lzma/LzmaDec.h
@@ -0,0 +1,234 @@
+/* LzmaDec.h -- LZMA Decoder
+2018-04-21 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_DEC_H
+#define __LZMA_DEC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define _LZMA_PROB32 */
+/* _LZMA_PROB32 can increase the speed on some CPUs,
+ but memory usage for CLzmaDec::probs will be doubled in that case */
+
+typedef
+#ifdef _LZMA_PROB32
+ UInt32
+#else
+ UInt16
+#endif
+ CLzmaProb;
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaProps
+{
+ Byte lc;
+ Byte lp;
+ Byte pb;
+ Byte _pad_;
+ UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+ SZ_OK
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+ Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+ /* Don't change this structure. ASM code can use it. */
+ CLzmaProps prop;
+ CLzmaProb *probs;
+ CLzmaProb *probs_1664;
+ Byte *dic;
+ SizeT dicBufSize;
+ SizeT dicPos;
+ const Byte *buf;
+ UInt32 range;
+ UInt32 code;
+ UInt32 processedPos;
+ UInt32 checkDicSize;
+ UInt32 reps[4];
+ UInt32 state;
+ UInt32 remainLen;
+
+ UInt32 numProbs;
+ unsigned tempBufSize;
+ Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+ - Stream with end mark. That end mark adds about 6 bytes to compressed size.
+ - Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum
+{
+ LZMA_FINISH_ANY, /* finish at any point */
+ LZMA_FINISH_END /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+ You must use LZMA_FINISH_END, when you know that current output buffer
+ covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+ If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+ and output value of destLen will be less than output buffer size limit.
+ You can check status result also.
+
+ You can use multiple checks to test data integrity after full decompression:
+ 1) Check Result and "status" variable.
+ 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+ 3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+ You must use correct finish mode in that case. */
+
+typedef enum
+{
+ LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
+ LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
+ LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
+ LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+ 1) Dictionary Interface
+ 2) Buffer Interface
+ 3) One Call Interface
+ You can select any of these interfaces, but don't mix functions from different
+ groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+ 1) LzmaDec_Allocate / LzmaDec_Free
+ 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+ You can use variant 2, if you set dictionary buffer manually.
+ For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+ SZ_OK
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+ dictionary to some other external buffer.
+ You must work with CLzmaDec variables directly in this interface.
+
+ STEPS:
+ LzmaDec_Construct()
+ LzmaDec_Allocate()
+ for (each new stream)
+ {
+ LzmaDec_Init()
+ while (it needs more decompression)
+ {
+ LzmaDec_DecodeToDic()
+ use data from CLzmaDec::dic and update CLzmaDec::dicPos
+ }
+ }
+ LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+
+ The decoding to internal dictionary buffer (CLzmaDec::dic).
+ You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (dicLimit).
+ LZMA_FINISH_ANY - Decode just dicLimit bytes.
+ LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+ SZ_OK
+ status:
+ LZMA_STATUS_FINISHED_WITH_MARK
+ LZMA_STATUS_NOT_FINISHED
+ LZMA_STATUS_NEEDS_MORE_INPUT
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+ SZ_ERROR_DATA - Data error
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+ See LzmaDec_DecodeToDic description for information about STEPS and return results,
+ but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+ to work with CLzmaDec variables manually.
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (*destLen).
+ LZMA_FINISH_ANY - Decode just destLen bytes.
+ LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (*destLen).
+ LZMA_FINISH_ANY - Decode just destLen bytes.
+ LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+ SZ_OK
+ status:
+ LZMA_STATUS_FINISHED_WITH_MARK
+ LZMA_STATUS_NOT_FINISHED
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+ SZ_ERROR_DATA - Data error
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+ SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+ ELzmaStatus *status, ISzAllocPtr alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/htp/lzma/Makefile.am b/htp/lzma/Makefile.am
new file mode 100644
index 0000000..5fa3ec5
--- /dev/null
+++ b/htp/lzma/Makefile.am
@@ -0,0 +1,16 @@
+
+h_sources = LzmaDec.h 7zTypes.h
+
+h_sources_private = LzFind.h LzHash.h Compiler.h Precomp.h
+
+c_sources = LzFind.c LzmaDec.c
+
+AM_CFLAGS = -I$(top_srcdir) -D_GNU_SOURCE -g -Wall -Wextra -std=gnu99 -pedantic \
+ -Wextra -Wno-missing-field-initializers -Wshadow -Wpointer-arith \
+ -Wstrict-prototypes -Wmissing-prototypes -Wno-unused-parameter
+
+library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)/lzma
+library_include_HEADERS = $(h_sources)
+
+noinst_LTLIBRARIES = liblzma-c.la
+liblzma_c_la_SOURCES = $(h_sources) $(h_sources_private) $(c_sources)
diff --git a/htp/lzma/Precomp.h b/htp/lzma/Precomp.h
new file mode 100644
index 0000000..edb5814
--- /dev/null
+++ b/htp/lzma/Precomp.h
@@ -0,0 +1,10 @@
+/* Precomp.h -- StdAfx
+2013-11-12 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_PRECOMP_H
+#define __7Z_PRECOMP_H
+
+#include "Compiler.h"
+/* #include "7zTypes.h" */
+
+#endif
diff --git a/htp/strlcat.c b/htp/strlcat.c
new file mode 100644
index 0000000..fc1776d
--- /dev/null
+++ b/htp/strlcat.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $Id: strlcatu.c,v 1.4 2003/10/20 15:03:27 chrisgreen Exp $ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+#ifndef HAVE_STRLCAT
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char *rcsid = "$OpenBSD: strlcat.c,v 1.5 2001/01/13 16:17:24 millert Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+#include <string.h>
+
+/*
+ * Appends src to string dst of size siz (unlike strncat, siz is the
+ * full size of dst, not space left). At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz <= strlen(dst)).
+ * Returns strlen(initial dst) + strlen(src); if retval >= siz,
+ * truncation occurred.
+ */
+size_t strlcat(char *dst, const char *src, size_t siz)
+{
+ register char *d = dst;
+ register const char *s = src;
+ register size_t n = siz;
+ size_t dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end */
+ while (n-- != 0 && *d != '\0')
+ d++;
+ dlen = d - dst;
+ n = siz - dlen;
+
+ if (n == 0)
+ return(dlen + strlen(s));
+ while (*s != '\0') {
+ if (n != 1) {
+ *d++ = *s;
+ n--;
+ }
+ s++;
+ }
+ *d = '\0';
+
+ return(dlen + (s - src)); /* count does not include NUL */
+}
+#endif
diff --git a/htp/strlcpy.c b/htp/strlcpy.c
new file mode 100644
index 0000000..227f52a
--- /dev/null
+++ b/htp/strlcpy.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $Id: strlcpyu.c,v 1.4 2003/10/20 15:03:27 chrisgreen Exp $ */
+
+#include "htp_config_auto.h"
+
+#include "htp_private.h"
+
+#ifndef HAVE_STRLCPY
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char *rcsid = "$OpenBSD: strlcpy.c,v 1.4 1999/05/01 18:56:41 millert Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+#include <string.h>
+
+/*
+ * Copy src to string dst of size siz. At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t strlcpy(char *dst, const char *src, size_t siz)
+{
+ register char *d = dst;
+ register const char *s = src;
+ register size_t n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0 && --n != 0) {
+ do {
+ if ((*d++ = *s++) == 0)
+ break;
+ } while (--n != 0);
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+
+ return(s - src - 1); /* count does not include NUL */
+}
+#endif