diff options
Diffstat (limited to 'htp')
62 files changed, 23827 insertions, 0 deletions
diff --git a/htp/Makefile.am b/htp/Makefile.am new file mode 100644 index 0000000..a3e8245 --- /dev/null +++ b/htp/Makefile.am @@ -0,0 +1,35 @@ + +SUBDIRS = lzma + +h_sources = bstr.h bstr_builder.h htp.h htp_base64.h htp_config.h htp_connection_parser.h \ + htp_core.h htp_decompressors.h htp_hooks.h htp_list.h \ + htp_multipart.h htp_table.h htp_transaction.h \ + htp_urlencoded.h htp_utf8_decoder.h htp_version.h + +h_sources_private = htp_config_private.h htp_connection_private.h htp_connection_parser_private.h htp_list_private.h \ + htp_multipart_private.h htp_private.h htp_table_private.h htp_config_auto.h + +c_sources = bstr.c bstr_builder.c htp_base64.c htp_config.c htp_connection.c htp_connection_parser.c \ + htp_content_handlers.c htp_cookies.c htp_decompressors.c htp_hooks.c htp_list.c htp_multipart.c htp_parsers.c \ + htp_php.c htp_request.c htp_request_apache_2_2.c htp_request_generic.c htp_request_parsers.c htp_response.c \ + htp_response_generic.c htp_table.c htp_transaction.c htp_transcoder.c htp_urlencoded.c htp_util.c htp_utf8_decoder.c \ + strlcpy.c strlcat.c + +library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME) +library_include_HEADERS = $(h_sources) + +AM_CFLAGS = -I$(top_srcdir) -I$(top_builddir)/htp -D_GNU_SOURCE -g -Wall -Wextra -std=gnu99 -pedantic \ + -Wextra -Wno-missing-field-initializers -Wshadow -Wpointer-arith \ + -Wstrict-prototypes -Wmissing-prototypes -Wno-unused-parameter + +noinst_LTLIBRARIES = libhtp-c.la +libhtp_c_la_SOURCES = $(h_sources) $(h_sources_private) $(c_sources) + +lib_LTLIBRARIES = libhtp.la +libhtp_la_SOURCES = +libhtp_la_LIBADD = libhtp-c.la lzma/liblzma-c.la +libhtp_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) +if CYGWIN +libhtp_la_LIBADD += $(LIBICONV) +libhtp_la_LDFLAGS += -no-undefined +endif diff --git a/htp/bstr.c b/htp/bstr.c new file mode 100644 index 0000000..7673c68 --- /dev/null +++ b/htp/bstr.c @@ -0,0 +1,638 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include <ctype.h> + +#include "bstr.h" + +bstr *bstr_alloc(size_t len) { + bstr *b = malloc(sizeof (bstr) + len); + if (b == NULL) return NULL; + + b->len = 0; + b->size = len; + b->realptr = NULL; + + return b; +} + +bstr *bstr_add(bstr *destination, const bstr *source) { + return bstr_add_mem(destination, bstr_ptr(source), bstr_len(source)); +} + +bstr *bstr_add_c(bstr *bdestination, const char *csource) { + return bstr_add_mem(bdestination, csource, strlen(csource)); +} + +bstr *bstr_add_c_noex(bstr *destination, const char *source) { + return bstr_add_mem_noex(destination, source, strlen(source)); +} + +bstr *bstr_add_mem(bstr *destination, const void *data, size_t len) { + // Expand the destination if necessary + if (bstr_size(destination) < bstr_len(destination) + len) { + destination = bstr_expand(destination, bstr_len(destination) + len); + if (destination == NULL) return NULL; + } + + // Add source to destination + bstr *b = (bstr *) destination; + memcpy(bstr_ptr(destination) + bstr_len(b), data, len); + bstr_adjust_len(b, bstr_len(b) + len); + + return destination; +} + +bstr *bstr_add_mem_noex(bstr *destination, const void *data, size_t len) { + size_t copylen = len; + + // Is there enough room in the destination? + if (bstr_size(destination) < bstr_len(destination) + copylen) { + copylen = bstr_size(destination) - bstr_len(destination); + if (copylen <= 0) return destination; + } + + // Copy over the bytes + bstr *b = (bstr *) destination; + memcpy(bstr_ptr(destination) + bstr_len(b), data, copylen); + bstr_adjust_len(b, bstr_len(b) + copylen); + + return destination; +} + +bstr *bstr_add_noex(bstr *destination, const bstr *source) { + return bstr_add_mem_noex(destination, bstr_ptr(source), bstr_len(source)); +} + +void bstr_adjust_len(bstr *b, size_t newlen) { + b->len = newlen; +} + +void bstr_adjust_realptr(bstr *b, void *newrealptr) { + b->realptr = newrealptr; +} + +void bstr_adjust_size(bstr *b, size_t newsize) { + b->size = newsize; +} + +int bstr_begins_with(const bstr *haystack, const bstr *needle) { + return bstr_begins_with_mem(haystack, bstr_ptr(needle), bstr_len(needle)); +} + +int bstr_begins_with_c(const bstr *haystack, const char *needle) { + return bstr_begins_with_mem(haystack, needle, strlen(needle)); +} + +int bstr_begins_with_c_nocase(const bstr *haystack, const char *needle) { + return bstr_begins_with_mem_nocase(haystack, needle, strlen(needle)); +} + +int bstr_begins_with_nocase(const bstr *haystack, const bstr *needle) { + return bstr_begins_with_mem_nocase(haystack, bstr_ptr(needle), bstr_len(needle)); +} + +int bstr_begins_with_mem(const bstr *haystack, const void *_data, size_t len) { + const unsigned char *data = (unsigned char *) _data; + const unsigned char *hdata = bstr_ptr(haystack); + size_t hlen = bstr_len(haystack); + size_t pos = 0; + + while ((pos < len) && (pos < hlen)) { + if (hdata[pos] != data[pos]) { + return 0; + } + + pos++; + } + + if (pos == len) { + return 1; + } else { + return 0; + } +} + +int bstr_begins_with_mem_nocase(const bstr *haystack, const void *_data, size_t len) { + const unsigned char *data = (const unsigned char *) _data; + const unsigned char *hdata = bstr_ptr(haystack); + size_t hlen = bstr_len(haystack); + size_t pos = 0; + + while ((pos < len) && (pos < hlen)) { + if (tolower((int) hdata[pos]) != tolower((int) data[pos])) { + return 0; + } + + pos++; + } + + if (pos == len) { + return 1; + } else { + return 0; + } +} + +int bstr_char_at(const bstr *b, size_t pos) { + unsigned char *data = bstr_ptr(b); + size_t len = bstr_len(b); + + if (pos >= len) return -1; + return data[pos]; +} + +int bstr_char_at_end(const bstr *b, size_t pos) { + unsigned char *data = bstr_ptr(b); + size_t len = bstr_len(b); + + if (pos >= len) return -1; + return data[len - 1 - pos]; +} + +void bstr_chop(bstr *b) { + if (bstr_len(b) > 0) { + bstr_adjust_len(b, bstr_len(b) - 1); + } +} + +int bstr_chr(const bstr *b, int c) { + unsigned char *data = bstr_ptr(b); + size_t len = bstr_len(b); + + size_t i = 0; + while (i < len) { + if (data[i] == c) { + return (int) i; + } + + i++; + } + + return -1; +} + +int bstr_cmp(const bstr *b1, const bstr *b2) { + return bstr_util_cmp_mem(bstr_ptr(b1), bstr_len(b1), bstr_ptr(b2), bstr_len(b2)); +} + +int bstr_cmp_c(const bstr *b, const char *c) { + return bstr_util_cmp_mem(bstr_ptr(b), bstr_len(b), c, strlen(c)); +} + +int bstr_cmp_c_nocase(const bstr *b, const char *c) { + return bstr_util_cmp_mem_nocase(bstr_ptr(b), bstr_len(b), c, strlen(c)); +} + +int bstr_cmp_c_nocasenorzero(const bstr *b, const char *c) { + return bstr_util_cmp_mem_nocasenorzero(bstr_ptr(b), bstr_len(b), c, strlen(c)); +} + +int bstr_cmp_mem(const bstr *b, const void *data, size_t len) { + return bstr_util_cmp_mem(bstr_ptr(b), bstr_len(b), data, len); +} + +int bstr_cmp_mem_nocase(const bstr *b, const void *data, size_t len) { + return bstr_util_cmp_mem_nocase(bstr_ptr(b), bstr_len(b), data, len); +} + +int bstr_cmp_nocase(const bstr *b1, const bstr *b2) { + return bstr_util_cmp_mem_nocase(bstr_ptr(b1), bstr_len(b1), bstr_ptr(b2), bstr_len(b2)); +} + +bstr *bstr_dup(const bstr *b) { + return bstr_dup_ex(b, 0, bstr_len(b)); +} + +bstr *bstr_dup_c(const char *cstr) { + return bstr_dup_mem(cstr, strlen(cstr)); +} + +bstr *bstr_dup_ex(const bstr *b, size_t offset, size_t len) { + bstr *bnew = bstr_alloc(len); + if (bnew == NULL) return NULL; + memcpy(bstr_ptr(bnew), bstr_ptr(b) + offset, len); + bstr_adjust_len(bnew, len); + return bnew; +} + +bstr *bstr_dup_lower(const bstr *b) { + return bstr_to_lowercase(bstr_dup(b)); +} + +bstr *bstr_dup_mem(const void *data, size_t len) { + bstr *bnew = bstr_alloc(len); + if (bnew == NULL) return NULL; + memcpy(bstr_ptr(bnew), data, len); + bstr_adjust_len(bnew, len); + return bnew; +} + +bstr *bstr_expand(bstr *b, size_t newsize) { + if (bstr_realptr(b) != NULL) { + // Refuse to expand a wrapped bstring. In the future, + // we can change this to make a copy of the data, thus + // leaving the original memory area intact. + return NULL; + } + + // Catch attempts to "expand" to a smaller size + if (bstr_size(b) > newsize) return NULL; + + bstr *bnew = realloc(b, sizeof (bstr) + newsize); + if (bnew == NULL) return NULL; + + bstr_adjust_size(bnew, newsize); + + return bnew; +} + +void bstr_free(bstr *b) { + if (b == NULL) return; + free(b); +} + +int bstr_index_of(const bstr *haystack, const bstr *needle) { + return bstr_index_of_mem(haystack, bstr_ptr(needle), bstr_len(needle)); +} + +int bstr_index_of_c(const bstr *haystack, const char *needle) { + return bstr_index_of_mem(haystack, needle, strlen(needle)); +} + +int bstr_index_of_c_nocase(const bstr *haystack, const char *needle) { + return bstr_index_of_mem_nocase(haystack, needle, strlen(needle)); +} + +int bstr_index_of_c_nocasenorzero(const bstr *haystack, const char *needle) { + return bstr_util_mem_index_of_mem_nocasenorzero(bstr_ptr(haystack), bstr_len(haystack), needle, strlen(needle)); +} + +int bstr_index_of_mem(const bstr *haystack, const void *_data2, size_t len2) { + return bstr_util_mem_index_of_mem(bstr_ptr(haystack), bstr_len(haystack), _data2, len2); +} + +int bstr_index_of_mem_nocase(const bstr *haystack, const void *_data2, size_t len2) { + return bstr_util_mem_index_of_mem_nocase(bstr_ptr(haystack), bstr_len(haystack), _data2, len2); +} + +int bstr_index_of_nocase(const bstr *haystack, const bstr *needle) { + return bstr_index_of_mem_nocase(haystack, bstr_ptr(needle), bstr_len(needle)); +} + +int bstr_rchr(const bstr *b, int c) { + const unsigned char *data = bstr_ptr(b); + size_t len = bstr_len(b); + + size_t i = len; + while (i > 0) { + if (data[i - 1] == c) { + return (int) (i - 1); + } + + i--; + } + + return -1; +} + +bstr *bstr_to_lowercase(bstr *b) { + if (b == NULL) return NULL; + + unsigned char *data = bstr_ptr(b); + size_t len = bstr_len(b); + + size_t i = 0; + while (i < len) { + data[i] = (uint8_t)tolower(data[i]); + i++; + } + + return b; +} + +int bstr_util_cmp_mem(const void *_data1, size_t len1, const void *_data2, size_t len2) { + const unsigned char *data1 = (const unsigned char *) _data1; + const unsigned char *data2 = (const unsigned char *) _data2; + size_t p1 = 0, p2 = 0; + + while ((p1 < len1) && (p2 < len2)) { + if (data1[p1] != data2[p2]) { + // Difference. + return (data1[p1] < data2[p2]) ? -1 : 1; + } + + p1++; + p2++; + } + + if ((p1 == len2) && (p2 == len1)) { + // They're identical. + return 0; + } else { + // One string is shorter. + if (p1 == len1) return -1; + else return 1; + } +} + +int bstr_util_cmp_mem_nocase(const void *_data1, size_t len1, const void *_data2, size_t len2) { + const unsigned char *data1 = (const unsigned char *) _data1; + const unsigned char *data2 = (const unsigned char *) _data2; + size_t p1 = 0, p2 = 0; + + while ((p1 < len1) && (p2 < len2)) { + if (tolower(data1[p1]) != tolower(data2[p2])) { + // Difference. + return (tolower(data1[p1]) < tolower(data2[p2])) ? -1 : 1; + } + + p1++; + p2++; + } + + if ((p1 == len2) && (p2 == len1)) { + // They're identical. + return 0; + } else { + // One string is shorter. + if (p1 == len1) return -1; + else return 1; + } +} + +int bstr_util_cmp_mem_nocasenorzero(const void *_data1, size_t len1, const void *_data2, size_t len2) { + const unsigned char *data1 = (const unsigned char *) _data1; + const unsigned char *data2 = (const unsigned char *) _data2; + size_t p1 = 0, p2 = 0; + + while ((p1 < len1) && (p2 < len2)) { + if (data1[p1] == 0) { + p1++; + continue; + } + if (tolower(data1[p1]) != tolower(data2[p2])) { + // Difference. + return (tolower(data1[p1]) < tolower(data2[p2])) ? -1 : 1; + } + + p1++; + p2++; + } + + while((p1 < len1) && (data1[p1] == 0)) { + p1++; + } + if ((p1 == len1) && (p2 == len2)) { + // They're identical. + return 0; + } else { + // One string is shorter. + if (p1 == len1) return -1; + else return 1; + } +} + +int64_t bstr_util_mem_to_pint(const void *_data, size_t len, int base, size_t *lastlen) { + const unsigned char *data = (unsigned char *) _data; + int64_t rval = 0, tflag = 0; + size_t i = 0; + + *lastlen = i; + + for (i = 0; i < len; i++) { + int d = data[i]; + + *lastlen = i; + + // Convert character to digit. + if ((d >= '0') && (d <= '9')) { + d -= '0'; + } else if ((d >= 'a') && (d <= 'z')) { + d -= 'a' - 10; + } else if ((d >= 'A') && (d <= 'Z')) { + d -= 'A' - 10; + } else { + d = -1; + } + + // Check that the digit makes sense with the base we are using. + if ((d == -1) || (d >= base)) { + if (tflag) { + // Return what we have so far; lastlen points + // to the first non-digit position. + return rval; + } else { + // We didn't see a single digit. + return -1; + } + } + + if (tflag) { + if (((INT64_MAX - d) / base) < rval) { + // Overflow + return -2; + } + + rval *= base; + rval += d; + } else { + rval = d; + tflag = 1; + } + } + + *lastlen = i + 1; + + return rval; +} + +int bstr_util_mem_index_of_c(const void *_data1, size_t len1, const char *cstr) { + return bstr_util_mem_index_of_mem(_data1, len1, cstr, strlen(cstr)); +} + +int bstr_util_mem_index_of_c_nocase(const void *_data1, size_t len1, const char *cstr) { + return bstr_util_mem_index_of_mem_nocase(_data1, len1, cstr, strlen(cstr)); +} + +int bstr_util_mem_index_of_mem(const void *_data1, size_t len1, const void *_data2, size_t len2) { + const unsigned char *data1 = (unsigned char *) _data1; + const unsigned char *data2 = (unsigned char *) _data2; + size_t i, j; + + // If we ever want to optimize this function, the following link + // might be useful: http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm + + for (i = 0; i < len1; i++) { + size_t k = i; + + for (j = 0; ((j < len2) && (k < len1)); j++, k++) { + if (data1[k] != data2[j]) break; + } + + if (j == len2) { + return (int) i; + } + } + + return -1; +} + +int bstr_util_mem_index_of_mem_nocase(const void *_data1, size_t len1, const void *_data2, size_t len2) { + const unsigned char *data1 = (unsigned char *) _data1; + const unsigned char *data2 = (unsigned char *) _data2; + size_t i, j; + + // If we ever want to optimize this function, the following link + // might be useful: http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm + + for (i = 0; i < len1; i++) { + size_t k = i; + + for (j = 0; ((j < len2) && (k < len1)); j++, k++) { + if (toupper(data1[k]) != toupper(data2[j])) break; + } + + if (j == len2) { + return (int) i; + } + } + + return -1; +} + +int bstr_util_mem_index_of_mem_nocasenorzero(const void *_data1, size_t len1, const void *_data2, size_t len2) { + const unsigned char *data1 = (unsigned char *) _data1; + const unsigned char *data2 = (unsigned char *) _data2; + size_t i, j; + + // If we ever want to optimize this function, the following link + // might be useful: http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm + + for (i = 0; i < len1; i++) { + size_t k = i; + if (data1[i] == 0) { + // skip leading zeroes to avoid quadratic complexity + continue; + } + + for (j = 0; ((j < len2) && (k < len1)); j++, k++) { + if (data1[k] == 0) { + j--; + continue; + } + if (toupper(data1[k]) != toupper(data2[j])) break; + } + + if (j == len2) { + return (int) i; + } + } + + return -1; +} + +void bstr_util_mem_trim(unsigned char **data, size_t *len) { + if ((data == NULL)||(len == NULL)) return; + + unsigned char *d = *data; + size_t l = *len; + + // Ignore whitespace at the beginning. + size_t pos = 0; + while ((pos < l) && isspace(d[pos])) pos++; + d += pos; + l -= pos; + + // Ignore whitespace at the end. + while ((l > 0)&&(isspace(d[l - 1]))) l--; + + *data = d; + *len = l; +} + +char *bstr_util_memdup_to_c(const void *_data, size_t len) { + const unsigned char *data = (unsigned char *) _data; + + // Count how many NUL bytes we have in the string. + size_t i, nulls = 0; + for (i = 0; i < len; i++) { + if (data[i] == '\0') { + nulls++; + } + } + + // Now copy the string into a NUL-terminated buffer. + + char *r, *d; + r = d = malloc(len + nulls + 1); + if (d == NULL) return NULL; + + while (len--) { + if (*data == '\0') { + data++; + *d++ = '\\'; + *d++ = '0'; + } else { + *d++ = *data++; + } + } + + *d = '\0'; + + return r; +} + +char *bstr_util_strdup_to_c(const bstr *b) { + if (b == NULL) return NULL; + return bstr_util_memdup_to_c(bstr_ptr(b), bstr_len(b)); +} + +bstr *bstr_wrap_c(const char *cstr) { + return bstr_wrap_mem((unsigned char *) cstr, strlen(cstr)); +} + +bstr *bstr_wrap_mem(const void *data, size_t len) { + bstr *b = (bstr *) malloc(sizeof (bstr)); + if (b == NULL) return NULL; + + b->size = b->len = len; + b->realptr = (unsigned char *) data; + + return b; +} diff --git a/htp/bstr.h b/htp/bstr.h new file mode 100644 index 0000000..eb6497b --- /dev/null +++ b/htp/bstr.h @@ -0,0 +1,678 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _BSTR_H +#define _BSTR_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct bstr_t bstr; + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> + +#include "bstr_builder.h" + +// Data structures + +struct bstr_t { + /** The length of the string stored in the buffer. */ + size_t len; + + /** The current size of the buffer. If there is extra room in the + * buffer the string will be able to expand without reallocation. + */ + size_t size; + + /** Optional buffer pointer. If this pointer is NULL the string buffer + * will immediately follow this structure. If the pointer is not NUL, + * it points to the actual buffer used, and there's no data following + * this structure. + */ + unsigned char *realptr; +}; + + +// Defines + +#define bstr_len(X) ((*(X)).len) +#define bstr_size(X) ((*(X)).size) +#define bstr_ptr(X) ( ((*(X)).realptr == NULL) ? ((unsigned char *)(X) + sizeof(bstr)) : (unsigned char *)(*(X)).realptr ) +#define bstr_realptr(X) ((*(X)).realptr) + + +// Functions + +/** + * Append source bstring to destination bstring, growing destination if + * necessary. If the destination bstring is expanded, the pointer will change. + * You must replace the original destination pointer with the returned one. + * Destination is not changed on memory allocation failure. + * + * @param[in] bdestination + * @param[in] bsource + * @return Updated bstring, or NULL on memory allocation failure. + */ +bstr *bstr_add(bstr *bdestination, const bstr *bsource); + +/** + * Append a NUL-terminated source to destination, growing destination if + * necessary. If the string is expanded, the pointer will change. You must + * replace the original destination pointer with the returned one. Destination + * is not changed on memory allocation failure. + * + * @param[in] b + * @param[in] cstr + * @return Updated bstring, or NULL on memory allocation failure. + */ +bstr *bstr_add_c(bstr *b, const char *cstr); + +/** + * Append as many bytes from the source to destination bstring. The + * destination storage will not be expanded if there is not enough space in it + * already to accommodate all of the data. + * + * @param[in] b + * @param[in] cstr + * @return The destination bstring. + */ +bstr *bstr_add_c_noex(bstr *b, const char *cstr); + +/** + * Append a memory region to destination, growing destination if necessary. If + * the string is expanded, the pointer will change. You must replace the + * original destination pointer with the returned one. Destination is not + * changed on memory allocation failure. + * + * @param[in] b + * @param[in] data + * @param[in] len + * @return Updated bstring, or NULL on memory allocation failure. + */ +bstr *bstr_add_mem(bstr *b, const void *data, size_t len); + +/** + * Append as many bytes from the source to destination bstring. The + * destination storage will not be expanded if there is not enough space in it + * already to accommodate all of the data. + * + * @param[in] b + * @param[in] data + * @param[in] len + * @return The destination bstring. + */ +bstr *bstr_add_mem_noex(bstr *b, const void *data, size_t len); + +/** + * Append as many bytes from the source bstring to destination bstring. The + * destination storage will not be expanded if there is not enough space in it + * already to accommodate all of the data. + * + * @param[in] bdestination + * @param[in] bsource + * @return The destination bstring. + */ +bstr *bstr_add_noex(bstr *bdestination, const bstr *bsource); + +/** + * Adjust bstring length. You will need to use this method whenever + * you work directly with the string contents, and end up changing + * its length by direct structure manipulation. + * + * @param[in] b + * @param[in] newlen + */ +void bstr_adjust_len(bstr *b, size_t newlen); + +/** + * Change the external pointer used by bstring. You will need to use this + * function only if you're messing with bstr internals. Use with caution. + * + * @param[in] b + * @param[in] newrealptr + */ +void bstr_adjust_realptr(bstr *b, void *newrealptr); + +/** + * Adjust bstring size. This does not change the size of the storage behind + * the bstring, just changes the field that keeps track of how many bytes + * there are in the storage. You will need to use this function only if + * you're messing with bstr internals. Use with caution. + * + * @param[in] b + * @param[in] newsize + */ +void bstr_adjust_size(bstr *b, size_t newsize); + +/** + * Allocate a zero-length bstring, reserving space for at least size bytes. + * + * @param[in] size + * @return New string instance + */ +bstr *bstr_alloc(size_t size); + +/** + * Checks whether bstring begins with another bstring. Case sensitive. + * + * @param[in] bhaystack + * @param[in] bneedle + * @return 1 if true, otherwise 0. + */ +int bstr_begins_with(const bstr *bhaystack, const bstr *bneedle); + +/** + * Checks whether bstring begins with NUL-terminated string. Case sensitive. + * + * @param[in] bhaystack + * @param[in] cneedle + * @return 1 if true, otherwise 0. + */ +int bstr_begins_with_c(const bstr *bhaystack, const char *cneedle); + +/** + * Checks whether bstring begins with NUL-terminated string. Case insensitive. + * + * @param[in] bhaystack + * @param[in] cneedle + * @return 1 if true, otherwise 0. + */ +int bstr_begins_with_c_nocase(const bstr *bhaystack, const char *cneedle); + +/** + * Checks whether the bstring begins with the given memory block. Case sensitive. + * + * @param[in] bhaystack + * @param[in] data + * @param[in] len + * @return 1 if true, otherwise 0. + */ +int bstr_begins_with_mem(const bstr *bhaystack, const void *data, size_t len); + +/** + * Checks whether bstring begins with memory block. Case insensitive. + * + * @param[in] bhaystack + * @param[in] data + * @param[in] len + * @return 1 if true, otherwise 0. + */ +int bstr_begins_with_mem_nocase(const bstr *bhaystack, const void *data, size_t len); + +/** + * Checks whether bstring begins with another bstring. Case insensitive. + * + * @param[in] bhaystack + * @param[in] cneedle + * @return 1 if true, otherwise 0. + */ +int bstr_begins_with_nocase(const bstr *bhaystack, const bstr *cneedle); + +/** + * Return the byte at the given position. + * + * @param[in] b + * @param[in] pos + * @return The byte at the given location, or -1 if the position is out of range. + */ +int bstr_char_at(const bstr *b, size_t pos); + +/** + * Return the byte at the given position, counting from the end of the string (e.g., + * byte at position 0 is the last byte in the string.) + * + * @param[in] b + * @param[in] pos + * @return The byte at the given location, or -1 if the position is out of range. + */ +int bstr_char_at_end(const bstr *b, size_t pos); + +/** + * Remove the last byte from bstring, assuming it contains at least one byte. This + * function will not reduce the storage that backs the string, only the amount + * of data used. + * + * @param[in] b + */ +void bstr_chop(bstr *b); + +/** + * Return the first position of the provided byte. + * + * @param[in] b + * @param[in] c + * @return The first position of the byte, or -1 if it could not be found + */ +int bstr_chr(const bstr *b, int c); + +/** + * Case-sensitive comparison of two bstrings. + * + * @param[in] b1 + * @param[in] b2 + * @return Zero on string match, 1 if b1 is greater than b2, and -1 if b2 is + * greater than b1. + */ +int bstr_cmp(const bstr *b1, const bstr *b2); + +/** + * Case-sensitive comparison of a bstring and a NUL-terminated string. + * + * @param[in] b + * @param[in] cstr + * @return Zero on string match, 1 if b is greater than cstr, and -1 if cstr is + * greater than b. + */ +int bstr_cmp_c(const bstr *b, const char *cstr); + +/** + * Case-insensitive comparison of a bstring with a NUL-terminated string. + * + * @param[in] b + * @param[in] cstr + * @return Zero on string match, 1 if b is greater than cstr, and -1 if cstr is greater than b. + */ +int bstr_cmp_c_nocase(const bstr *b, const char *cstr); + +/** + * Case-insensitive zero-skipping comparison of a bstring with a NUL-terminated string. + * + * @param[in] b + * @param[in] cstr + * @return Zero on string match, 1 if b is greater than cstr, and -1 if cstr is greater than b. + */ +int bstr_cmp_c_nocasenorzero(const bstr *b, const char *cstr); + +/** + * Performs a case-sensitive comparison of a bstring with a memory region. + * + * @param[in] b + * @param[in] data + * @param[in] len + * @return Zero ona match, 1 if b is greater than data, and -1 if data is greater than b. + */ +int bstr_cmp_mem(const bstr *b, const void *data, size_t len); + +/** + * Performs a case-insensitive comparison of a bstring with a memory region. + * + * @param[in] b + * @param[in] data + * @param[in] len + * @return Zero ona match, 1 if b is greater than data, and -1 if data is greater than b. + */ +int bstr_cmp_mem_nocase(const bstr *b, const void *data, size_t len); + +/** + * Case-insensitive comparison two bstrings. + * + * @param[in] b1 + * @param[in] b2 + * @return Zero on string match, 1 if b1 is greater than b2, and -1 if b2 is + * greater than b1. + */ +int bstr_cmp_nocase(const bstr *b1, const bstr *b2); + +/** + * Case-insensitive and zero skipping comparison two bstrings. + * + * @param[in] b1 + * @param[in] b2 + * @return Zero on string match, 1 if b1 is greater than b2, and -1 if b2 is + * greater than b1. + */ +int bstr_cmp_nocasenorzero(const bstr *b1, const bstr *b2); + +/** + * Create a new bstring by copying the provided bstring. + * + * @param[in] b + * @return New bstring, or NULL if memory allocation failed. + */ +bstr *bstr_dup(const bstr *b); + +/** + * Create a new bstring by copying the provided NUL-terminated string. + * + * @param[in] cstr + * @return New bstring, or NULL if memory allocation failed. + */ +bstr *bstr_dup_c(const char *cstr); + +/** + * Create a new bstring by copying a part of the provided bstring. + * + * @param[in] b + * @param[in] offset + * @param[in] len + * @return New bstring, or NULL if memory allocation failed. + */ +bstr *bstr_dup_ex(const bstr *b, size_t offset, size_t len); + +/** + * Create a copy of the provided bstring, then convert it to lowercase. + * + * @param[in] b + * @return New bstring, or NULL if memory allocation failed + */ +bstr *bstr_dup_lower(const bstr *b); + +/** + * Create a new bstring by copying the provided memory region. + * + * @param[in] data + * @param[in] len + * @return New bstring, or NULL if memory allocation failed + */ +bstr *bstr_dup_mem(const void *data, size_t len); + +/** + * Expand internal bstring storage to support at least newsize bytes. The storage + * is not expanded if the current size is equal or greater to newsize. Because + * realloc is used underneath, the old pointer to bstring may no longer be valid + * after this function completes successfully. + * + * @param[in] b + * @param[in] newsize + * @return Updated string instance, or NULL if memory allocation failed or if + * attempt was made to "expand" the bstring to a smaller size. + */ +bstr *bstr_expand(bstr *b, size_t newsize); + +/** + * Deallocate the supplied bstring instance and set it to NULL. Allows NULL on + * input. + * + * @param[in] b + */ +void bstr_free(bstr *b); + +/** + * Find the needle in the haystack. + * + * @param[in] bhaystack + * @param[in] bneedle + * @return Position of the match, or -1 if the needle could not be found. + */ +int bstr_index_of(const bstr *bhaystack, const bstr *bneedle); + +/** + * Find the needle in the haystack, ignoring case differences. + * + * @param[in] bhaystack + * @param[in] bneedle + * @return Position of the match, or -1 if the needle could not be found. + */ +int bstr_index_of_nocase(const bstr *bhaystack, const bstr *bneedle); + +/** + * Find the needle in the haystack, with the needle being a NUL-terminated + * string. + * + * @param[in] bhaystack + * @param[in] cneedle + * @return Position of the match, or -1 if the needle could not be found. + */ +int bstr_index_of_c(const bstr *bhaystack, const char *cneedle); + +/** + * Find the needle in the haystack, with the needle being a NUL-terminated + * string. Ignore case differences. + * + * @param[in] bhaystack + * @param[in] cneedle + * @return Position of the match, or -1 if the needle could not be found. + */ +int bstr_index_of_c_nocase(const bstr *bhaystack, const char *cneedle); + +/** + * Find the needle in the haystack, with the needle being a NUL-terminated + * string. Ignore case differences. Skip zeroes in haystack + * + * @param[in] bhaystack + * @param[in] cneedle + * @return Position of the match, or -1 if the needle could not be found. + */ +int bstr_index_of_c_nocasenorzero(const bstr *bhaystack, const char *cneedle); + +/** + * Find the needle in the haystack, with the needle being a memory region. + * + * @param[in] bhaystack + * @param[in] data + * @param[in] len + * @return Position of the match, or -1 if the needle could not be found. + */ +int bstr_index_of_mem(const bstr *bhaystack, const void *data, size_t len); + +/** + * Find the needle in the haystack, with the needle being a memory region. + * Ignore case differences. + * + * @param[in] bhaystack + * @param[in] data + * @param[in] len + * @return Position of the match, or -1 if the needle could not be found. + */ +int bstr_index_of_mem_nocase(const bstr *bhaystack, const void *data, size_t len); + +/** + * Return the last position of a character (byte). + * + * @param[in] b + * @param[in] c + * @return The last position of the character, or -1 if it could not be found. + */ +int bstr_rchr(const bstr *b, int c); + +/** + * Convert bstring to lowercase. This function converts the supplied string, + * it does not create a new string. + * + * @param[in] b + * @return The same bstring received on input + */ +bstr *bstr_to_lowercase(bstr *b); + +/** + * Case-sensitive comparison of two memory regions. + * + * @param[in] data1 + * @param[in] len1 + * @param[in] data2 + * @param[in] len2 + * @return Zero if the memory regions are identical, 1 if data1 is greater than + * data2, and -1 if data2 is greater than data1. + */ +int bstr_util_cmp_mem(const void *data1, size_t len1, const void *data2, size_t len2); + +/** + * Case-insensitive comparison of two memory regions. + * + * @param[in] data1 + * @param[in] len1 + * @param[in] data2 + * @param[in] len2 + * @return Zero if the memory regions are identical, 1 if data1 is greater than + * data2, and -1 if data2 is greater than data1. + */ + int bstr_util_cmp_mem_nocase(const void *data1, size_t len1, const void *data2, size_t len2); + +/** + * Case-insensitive zero-skipping comparison of two memory regions. + * + * @param[in] data1 + * @param[in] len1 + * @param[in] data2 + * @param[in] len2 + * @return Zero if the memory regions are identical, 1 if data1 is greater than + * data2, and -1 if data2 is greater than data1. + */ + int bstr_util_cmp_mem_nocasenorzero(const void *data1, size_t len1, const void *data2, size_t len2); + +/** + * Convert contents of a memory region to a positive integer. + * + * @param[in] data + * @param[in] len + * @param[in] base The desired number base. + * @param[in] lastlen Points to the first unused byte in the region + * @return If the conversion was successful, this function returns the + * number. When the conversion fails, -1 will be returned when not + * one valid digit was found, and -2 will be returned if an overflow + * occurred. + */ +int64_t bstr_util_mem_to_pint(const void *data, size_t len, int base, size_t *lastlen); + +/** + * Searches a memory block for the given NUL-terminated string. Case sensitive. + * + * @param[in] data + * @param[in] len + * @param[in] cstr + * @return Index of the first location of the needle on success, or -1 if the needle was not found. + */ +int bstr_util_mem_index_of_c(const void *data, size_t len, const char *cstr); + +/** + * Searches a memory block for the given NUL-terminated string. Case insensitive. + * + * @param[in] data + * @param[in] len + * @param[in] cstr + * @return Index of the first location of the needle on success, or -1 if the needle was not found. + */ +int bstr_util_mem_index_of_c_nocase(const void *data, size_t len, const char *cstr); + +/** + * Searches the haystack memory block for the needle memory block. Case sensitive. + * + * @param data1 + * @param len1 + * @param data2 + * @param len2 + * @return Index of the first location of the needle on success, or -1 if the needle was not found. + */ +int bstr_util_mem_index_of_mem(const void *data1, size_t len1, const void *data2, size_t len2); + +/** + * Searches the haystack memory block for the needle memory block. Case sensitive. + * + * @param data1 + * @param len1 + * @param data2 + * @param len2 + * @return Index of the first location of the needle on success, or -1 if the needle was not found. + */ +int bstr_util_mem_index_of_mem_nocase(const void *data1, size_t len1, const void *data2, size_t len2); + +/** + * Searches the haystack memory block for the needle memory block. Case sensitive. Skips zeroes in data1 + * + * @param data1 + * @param len1 + * @param data2 + * @param len2 + * @return Index of the first location of the needle on success, or -1 if the needle was not found. + */ +int bstr_util_mem_index_of_mem_nocasenorzero(const void *data1, size_t len1, const void *data2, size_t len2); + +/** + * Removes whitespace from the beginning and the end of a memory region. The data + * itself is not modified; this function only adjusts the provided pointers. + * + * @param[in,out] data + * @param[in,out] len + */ +void bstr_util_mem_trim(unsigned char **data, size_t *len); + +/** + * Take the provided memory region, allocate a new memory buffer, and construct + * a NUL-terminated string, replacing each NUL byte with "\0" (two bytes). The + * caller is responsible to keep track of the allocated memory area and free + * it once it is no longer needed. + * + * @param[in] data + * @param[in] len + * @return The newly created NUL-terminated string, or NULL in case of memory + * allocation failure. + */ +char *bstr_util_memdup_to_c(const void *data, size_t len); + +/** + * Create a new NUL-terminated string out of the provided bstring. If NUL bytes + * are contained in the bstring, each will be replaced with "\0" (two characters). + * The caller is responsible to keep track of the allocated memory area and free + * it once it is no longer needed. + * + * @param[in] b + * @return The newly created NUL-terminated string, or NULL in case of memory + * allocation failure. + */ +char *bstr_util_strdup_to_c(const bstr *b); + +/** + * Create a new bstring from the provided NUL-terminated string and without + * copying the data. The caller must ensure that the input string continues + * to point to a valid memory location for as long as the bstring is used. + * + * @param[in] cstr + * @return New bstring, or NULL on memory allocation failure. + */ +bstr *bstr_wrap_c(const char *cstr); + +/** + * Create a new bstring from the provided memory buffer without + * copying the data. The caller must ensure that the buffer remains + * valid for as long as the bstring is used. + * + * @param[in] data + * @param[in] len + * @return New bstring, or NULL on memory allocation failure. + */ +bstr *bstr_wrap_mem(const void *data, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif /* _BSTR_H */ diff --git a/htp/bstr_builder.c b/htp/bstr_builder.c new file mode 100644 index 0000000..89394f6 --- /dev/null +++ b/htp/bstr_builder.c @@ -0,0 +1,121 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "bstr.h" +#include "htp_list.h" + +htp_status_t bstr_builder_appendn(bstr_builder_t *bb, bstr *b) { + return htp_list_push(bb->pieces, b); +} + +htp_status_t bstr_builder_append_c(bstr_builder_t *bb, const char *cstr) { + bstr *b = bstr_dup_c(cstr); + if (b == NULL) return HTP_ERROR; + return htp_list_push(bb->pieces, b); +} + +htp_status_t bstr_builder_append_mem(bstr_builder_t *bb, const void *data, size_t len) { + bstr *b = bstr_dup_mem(data, len); + if (b == NULL) return HTP_ERROR; + return htp_list_push(bb->pieces, b); +} + +void bstr_builder_clear(bstr_builder_t *bb) { + // Do nothing if the list is empty + if (htp_list_size(bb->pieces) == 0) return; + + for (size_t i = 0, n = htp_list_size(bb->pieces); i < n; i++) { + bstr *b = htp_list_get(bb->pieces, i); + bstr_free(b); + } + + htp_list_clear(bb->pieces); +} + +bstr_builder_t *bstr_builder_create(void) { + bstr_builder_t *bb = calloc(1, sizeof (bstr_builder_t)); + if (bb == NULL) return NULL; + + bb->pieces = htp_list_create(BSTR_BUILDER_DEFAULT_SIZE); + if (bb->pieces == NULL) { + free(bb); + return NULL; + } + + return bb; +} + +void bstr_builder_destroy(bstr_builder_t *bb) { + if (bb == NULL) return; + + // Destroy any pieces we might have + for (size_t i = 0, n = htp_list_size(bb->pieces); i < n; i++) { + bstr *b = htp_list_get(bb->pieces, i); + bstr_free(b); + } + + htp_list_destroy(bb->pieces); + + free(bb); +} + +size_t bstr_builder_size(const bstr_builder_t *bb) { + return htp_list_size(bb->pieces); +} + +bstr *bstr_builder_to_str(const bstr_builder_t *bb) { + size_t len = 0; + + // Determine the size of the string + for (size_t i = 0, n = htp_list_size(bb->pieces); i < n; i++) { + bstr *b = htp_list_get(bb->pieces, i); + len += bstr_len(b); + } + + // Allocate string + bstr *bnew = bstr_alloc(len); + if (bnew == NULL) return NULL; + + // Determine the size of the string + for (size_t i = 0, n = htp_list_size(bb->pieces); i < n; i++) { + bstr *b = htp_list_get(bb->pieces, i); + bstr_add_noex(bnew, b); + } + + return bnew; +} diff --git a/htp/bstr_builder.h b/htp/bstr_builder.h new file mode 100644 index 0000000..335a131 --- /dev/null +++ b/htp/bstr_builder.h @@ -0,0 +1,136 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _BSTR_BUILDER_H +#define _BSTR_BUILDER_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct bstr_builder_t bstr_builder_t; + +#include "htp_list.h" + +struct bstr_builder_t { + htp_list_t *pieces; +}; + +#define BSTR_BUILDER_DEFAULT_SIZE 16 + +/** + * Adds one new string to the builder. This function will adopt the + * string and destroy it when the builder itself is destroyed. + * + * @param[in] bb + * @param[in] b + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t bstr_builder_appendn(bstr_builder_t *bb, bstr *b); + +/** + * Adds one new piece, in the form of a NUL-terminated string, to + * the builder. This function will make a copy of the provided string. + * + * @param[in] bb + * @param[in] cstr + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t bstr_builder_append_c(bstr_builder_t *bb, const char *cstr); + +/** + * Adds one new piece, defined with the supplied pointer and + * length, to the builder. This function will make a copy of the + * provided data region. + * + * @param[in] bb + * @param[in] data + * @param[in] len + * @return @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t bstr_builder_append_mem(bstr_builder_t *bb, const void *data, size_t len); + +/** + * Clears this string builder, destroying all existing pieces. You may + * want to clear a builder once you've either read all the pieces and + * done something with them, or after you've converted the builder into + * a single string. + * + * @param[in] bb + */ +void bstr_builder_clear(bstr_builder_t *bb); + +/** + * Creates a new string builder. + * + * @return New string builder, or NULL on error. + */ +bstr_builder_t *bstr_builder_create(void); + +/** + * Destroys an existing string builder, also destroying all + * the pieces stored within. + * + * @param[in] bb + */ +void bstr_builder_destroy(bstr_builder_t *bb); + +/** + * Returns the size (the number of pieces) currently in a string builder. + * + * @param[in] bb + * @return size + */ +size_t bstr_builder_size(const bstr_builder_t *bb); + +/** + * Creates a single string out of all the pieces held in a + * string builder. This method will not destroy any of the pieces. + * + * @param[in] bb + * @return New string, or NULL on error. + */ +bstr *bstr_builder_to_str(const bstr_builder_t *bb); + + +#ifdef __cplusplus +} +#endif + +#endif /* _BSTR_BUILDER_H */ + diff --git a/htp/htp.h b/htp/htp.h new file mode 100644 index 0000000..36209ad --- /dev/null +++ b/htp/htp.h @@ -0,0 +1,678 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _HTP_H +#define _HTP_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/time.h> + +#include "htp_version.h" +#include "htp_core.h" + +#include "bstr.h" +#include "htp_base64.h" +#include "htp_config.h" +#include "htp_connection_parser.h" +#include "htp_decompressors.h" +#include "htp_hooks.h" +#include "htp_list.h" +#include "htp_multipart.h" +#include "htp_table.h" +#include "htp_transaction.h" +#include "htp_urlencoded.h" +#include "htp_utf8_decoder.h" + +/** + * Represents a single TCP connection. + */ +struct htp_conn_t { + /** Client IP address. */ + char *client_addr; + + /** Client port. */ + int client_port; + + /** Server IP address. */ + char *server_addr; + + /** Server port. */ + int server_port; + + /** + * Transactions carried out on this connection. The list may contain + * NULL elements when some of the transactions are deleted (and then + * removed from a connection by calling htp_conn_remove_tx(). + */ + htp_list_t *transactions; + + /** Log messages associated with this connection. */ + htp_list_t *messages; + + /** Parsing flags: HTP_CONN_PIPELINED. */ + uint8_t flags; + + /** When was this connection opened? Can be NULL. */ + htp_time_t open_timestamp; + + /** When was this connection closed? Can be NULL. */ + htp_time_t close_timestamp; + + /** Inbound data counter. */ + int64_t in_data_counter; + + /** Outbound data counter. */ + int64_t out_data_counter; +}; + +/** + * Used to represent files that are seen during the processing of HTTP traffic. Most + * commonly this refers to files seen in multipart/form-data payloads. In addition, PUT + * request bodies can be treated as files. + */ +struct htp_file_t { + /** Where did this file come from? Possible values: HTP_FILE_MULTIPART and HTP_FILE_PUT. */ + enum htp_file_source_t source; + + /** File name, as provided (e.g., in the Content-Disposition multipart part header. */ + bstr *filename; + + /** File length. */ + int64_t len; + + /** The unique filename in which this file is stored on the filesystem, when applicable.*/ + char *tmpname; + + /** The file descriptor used for external storage, or -1 if unused. */ + int fd; +}; + +/** + * Represents a chunk of file data. + */ +struct htp_file_data_t { + /** File information. */ + htp_file_t *file; + + /** Pointer to the data buffer. */ + const unsigned char *data; + + /** Buffer length. */ + size_t len; +}; + +/** + * Represents a single log entry. + */ +struct htp_log_t { + /** The connection parser associated with this log message. */ + htp_connp_t *connp; + + /** The transaction associated with this log message, if any. */ + htp_tx_t *tx; + + /** Log message. */ + const char *msg; + + /** Message level. */ + enum htp_log_level_t level; + + /** Message code. */ + int code; + + /** File in which the code that emitted the message resides. */ + const char *file; + + /** Line number on which the code that emitted the message resides. */ + unsigned int line; +}; + +/** + * Represents a single request or response header. + */ +struct htp_header_t { + /** Header name. */ + bstr *name; + + /** Header value. */ + bstr *value; + + /** Parsing flags; a combination of: HTP_FIELD_INVALID, HTP_FIELD_FOLDED, HTP_FIELD_REPEATED. */ + uint64_t flags; +}; + +/** + * Represents a single request parameter. + */ +struct htp_param_t { + /** Parameter name. */ + bstr *name; + + /** Parameter value. */ + bstr *value; + + /** Source of the parameter, for example HTP_SOURCE_QUERY_STRING. */ + enum htp_data_source_t source; + + /** Type of the data structure referenced below. */ + enum htp_parser_id_t parser_id; + + /** + * Pointer to the parser data structure that contains + * complete information about the parameter. Can be NULL. + */ + void *parser_data; +}; + +/** + * Represents a single HTTP transaction, which is a combination of a request and a response. + */ +struct htp_tx_t { + /** The connection parser associated with this transaction. */ + htp_connp_t *connp; + + /** The connection to which this transaction belongs. */ + htp_conn_t *conn; + + /** The configuration structure associated with this transaction. */ + htp_cfg_t *cfg; + + /** + * Is the configuration structure shared with other transactions or connections? If + * this field is set to HTP_CONFIG_PRIVATE, the transaction owns the configuration. + */ + int is_config_shared; + + /** The user data associated with this transaction. */ + void *user_data; + + + // Request fields + + /** Contains a count of how many empty lines were skipped before the request line. */ + unsigned int request_ignored_lines; + + /** The first line of this request. */ + bstr *request_line; + + /** Request method. */ + bstr *request_method; + + /** Request method, as number. Available only if we were able to recognize the request method. */ + enum htp_method_t request_method_number; + + /** + * Request URI, raw, as given to us on the request line. This field can take different forms, + * for example authority for CONNECT methods, absolute URIs for proxy requests, and the query + * string when one is provided. Use htp_tx_t::parsed_uri if you need to access to specific + * URI elements. Can be NULL if the request line contains only a request method (which is + * an extreme case of HTTP/0.9, but passes in practice. + */ + bstr *request_uri; + + /** Request protocol, as text. Can be NULL if no protocol was specified. */ + bstr *request_protocol; + + /** + * Protocol version as a number. Multiply the high version number by 100, then add the low + * version number. You should prefer to work the pre-defined HTP_PROTOCOL_* constants. + */ + int request_protocol_number; + + /** + * Is this request using HTTP/0.9? We need a separate field for this purpose because + * the protocol version alone is not sufficient to determine if HTTP/0.9 is used. For + * example, if you submit "GET / HTTP/0.9" to Apache, it will not treat the request + * as HTTP/0.9. + */ + int is_protocol_0_9; + + /** + * This structure holds the individual components parsed out of the request URI, with + * appropriate normalization and transformation applied, per configuration. No information + * is added. In extreme cases when no URI is provided on the request line, all fields + * will be NULL. (Well, except for port_number, which will be -1.) To inspect raw data, use + * htp_tx_t::request_uri or htp_tx_t::parsed_uri_raw. + */ + htp_uri_t *parsed_uri; + + /** + * This structure holds the individual components parsed out of the request URI, but + * without any modification. The purpose of this field is to allow you to look at the data as it + * was supplied on the request line. Fields can be NULL, depending on what data was supplied. + * The port_number field is always -1. + */ + htp_uri_t *parsed_uri_raw; + + /* HTTP 1.1 RFC + * + * 4.3 Message Body + * + * The message-body (if any) of an HTTP message is used to carry the + * entity-body associated with the request or response. The message-body + * differs from the entity-body only when a transfer-coding has been + * applied, as indicated by the Transfer-Encoding header field (section + * 14.41). + * + * message-body = entity-body + * | <entity-body encoded as per Transfer-Encoding> + */ + + /** + * The length of the request message-body. In most cases, this value + * will be the same as request_entity_len. The values will be different + * if request compression or chunking were applied. In that case, + * request_message_len contains the length of the request body as it + * has been seen over TCP; request_entity_len contains length after + * de-chunking and decompression. + */ + int64_t request_message_len; + + /** + * The length of the request entity-body. In most cases, this value + * will be the same as request_message_len. The values will be different + * if request compression or chunking were applied. In that case, + * request_message_len contains the length of the request body as it + * has been seen over TCP; request_entity_len contains length after + * de-chunking and decompression. + */ + int64_t request_entity_len; + + /** Parsed request headers. */ + htp_table_t *request_headers; + + /** + * Request transfer coding. Can be one of HTP_CODING_UNKNOWN (body presence not + * determined yet), HTP_CODING_IDENTITY, HTP_CODING_CHUNKED, HTP_CODING_NO_BODY, + * and HTP_CODING_UNRECOGNIZED. + */ + enum htp_transfer_coding_t request_transfer_coding; + + /** Request body compression. */ + enum htp_content_encoding_t request_content_encoding; + + /** + * This field contain the request content type when that information is + * available in request headers. The contents of the field will be converted + * to lowercase and any parameters (e.g., character set information) removed. + */ + bstr *request_content_type; + + /** + * Contains the value specified in the Content-Length header. The value of this + * field will be -1 from the beginning of the transaction and until request + * headers are processed. It will stay -1 if the C-L header was not provided, + * or if the value in it cannot be parsed. + */ + int64_t request_content_length; + + /** + * Transaction-specific REQUEST_BODY_DATA hook. Behaves as + * the configuration hook with the same name. + */ + htp_hook_t *hook_request_body_data; + + /** + * Transaction-specific RESPONSE_BODY_DATA hook. Behaves as + * the configuration hook with the same name. + */ + htp_hook_t *hook_response_body_data; + + /** + * Query string URLENCODED parser. Available only + * when the query string is not NULL and not empty. + */ + htp_urlenp_t *request_urlenp_query; + + /** + * Request body URLENCODED parser. Available only when the request body is in the + * application/x-www-form-urlencoded format and the parser was configured to run. + */ + htp_urlenp_t *request_urlenp_body; + + /** + * Request body MULTIPART parser. Available only when the body is in the + * multipart/form-data format and the parser was configured to run. + */ + htp_mpartp_t *request_mpartp; + + /** Request parameters. */ + htp_table_t *request_params; + + /** Request cookies */ + htp_table_t *request_cookies; + + /** Authentication type used in the request. */ + enum htp_auth_type_t request_auth_type; + + /** Authentication username. */ + bstr *request_auth_username; + + /** Authentication password. Available only when htp_tx_t::request_auth_type is HTP_AUTH_BASIC. */ + bstr *request_auth_password; + + /** + * Request hostname. Per the RFC, the hostname will be taken from the Host header + * when available. If the host information is also available in the URI, it is used + * instead of whatever might be in the Host header. Can be NULL. This field does + * not contain port information. + */ + bstr *request_hostname; + + /** + * Request port number, if presented. The rules for htp_tx_t::request_host apply. Set to + * -1 by default. + */ + int request_port_number; + + + // Response fields + + /** How many empty lines did we ignore before reaching the status line? */ + unsigned int response_ignored_lines; + + /** Response line. */ + bstr *response_line; + + /** Response protocol, as text. Can be NULL. */ + bstr *response_protocol; + + /** + * Response protocol as number. Available only if we were able to parse the protocol version, + * HTP_PROTOCOL_INVALID otherwise. HTP_PROTOCOL_UNKNOWN until parsing is attempted. + */ + int response_protocol_number; + + /** + * Response status code, as text. Starts as NULL and can remain NULL on + * an invalid response that does not specify status code. + */ + bstr *response_status; + + /** + * Response status code, available only if we were able to parse it, HTP_STATUS_INVALID + * otherwise. HTP_STATUS_UNKNOWN until parsing is attempted. + */ + int response_status_number; + + /** + * This field is set by the protocol decoder with it thinks that the + * backend server will reject a request with a particular status code. + */ + int response_status_expected_number; + + /** The message associated with the response status code. Can be NULL. */ + bstr *response_message; + + /** Have we seen the server respond with a 100 response? */ + int seen_100continue; + + /** Parsed response headers. Contains instances of htp_header_t. */ + htp_table_t *response_headers; + + /* HTTP 1.1 RFC + * + * 4.3 Message Body + * + * The message-body (if any) of an HTTP message is used to carry the + * entity-body associated with the request or response. The message-body + * differs from the entity-body only when a transfer-coding has been + * applied, as indicated by the Transfer-Encoding header field (section + * 14.41). + * + * message-body = entity-body + * | <entity-body encoded as per Transfer-Encoding> + */ + + /** + * The length of the response message-body. In most cases, this value + * will be the same as response_entity_len. The values will be different + * if response compression or chunking were applied. In that case, + * response_message_len contains the length of the response body as it + * has been seen over TCP; response_entity_len contains the length after + * de-chunking and decompression. + */ + int64_t response_message_len; + + /** + * The length of the response entity-body. In most cases, this value + * will be the same as response_message_len. The values will be different + * if request compression or chunking were applied. In that case, + * response_message_len contains the length of the response body as it + * has been seen over TCP; response_entity_len contains length after + * de-chunking and decompression. + */ + int64_t response_entity_len; + + /** + * Contains the value specified in the Content-Length header. The value of this + * field will be -1 from the beginning of the transaction and until response + * headers are processed. It will stay -1 if the C-L header was not provided, + * or if the value in it cannot be parsed. + */ + int64_t response_content_length; + + /** + * Response transfer coding, which indicates if there is a response body, + * and how it is transported (e.g., as-is, or chunked). + */ + enum htp_transfer_coding_t response_transfer_coding; + + /** + * Response body compression, which indicates if compression is used + * for the response body. This field is an interpretation of the information + * available in response headers. + */ + enum htp_content_encoding_t response_content_encoding; + + /** + * Response body compression processing information, which is related to how + * the library is going to process (or has processed) a response body. Changing + * this field mid-processing can influence library actions. For example, setting + * this field to HTP_COMPRESSION_NONE in a RESPONSE_HEADERS callback will prevent + * decompression. + */ + enum htp_content_encoding_t response_content_encoding_processing; + + /** + * This field will contain the response content type when that information + * is available in response headers. The contents of the field will be converted + * to lowercase and any parameters (e.g., character set information) removed. + */ + bstr *response_content_type; + + + // Common fields + + /** + * Parsing flags; a combination of: HTP_REQUEST_INVALID_T_E, HTP_INVALID_FOLDING, + * HTP_REQUEST_SMUGGLING, HTP_MULTI_PACKET_HEAD, and HTP_FIELD_UNPARSEABLE. + */ + uint64_t flags; + + /** Request progress. */ + enum htp_tx_req_progress_t request_progress; + + /** Response progress. */ + enum htp_tx_res_progress_t response_progress; + + /** Transaction index on the connection. */ + size_t index; + + /** Total repetitions for headers in request. */ + uint16_t req_header_repetitions; + + /** Total repetitions for headers in response. */ + uint16_t res_header_repetitions; +}; + +/** + * This structure is used to pass transaction data (for example + * request and response body buffers) to callbacks. + */ +struct htp_tx_data_t { + /** Transaction pointer. */ + htp_tx_t *tx; + + /** Pointer to the data buffer. */ + const unsigned char *data; + + /** Buffer length. */ + size_t len; + + /** + * Indicator if this chunk of data is the last in the series. Currently + * used only by REQUEST_HEADER_DATA, REQUEST_TRAILER_DATA, RESPONSE_HEADER_DATA, + * and RESPONSE_TRAILER_DATA callbacks. + */ + int is_last; +}; + +/** + * URI structure. Each of the fields provides access to a single + * URI element. Where an element is not present in a URI, the + * corresponding field will be set to NULL or -1, depending on the + * field type. + */ +struct htp_uri_t { + /** Scheme, e.g., "http". */ + bstr *scheme; + + /** Username. */ + bstr *username; + + /** Password. */ + bstr *password; + + /** Hostname. */ + bstr *hostname; + + /** Port, as string. */ + bstr *port; + + /** + * Port, as number. This field will contain HTP_PORT_NONE if there was + * no port information in the URI and HTP_PORT_INVALID if the port information + * was invalid (e.g., it's not a number or it falls out of range. + */ + int port_number; + + /** The path part of this URI. */ + bstr *path; + + /** Query string. */ + bstr *query; + + /** + * Fragment identifier. This field will rarely be available in a server-side + * setting, but it's not impossible to see it. */ + bstr *fragment; +}; + +/** + * Frees all data contained in the uri, and then the uri itself. + * + * @param[in] uri + */ +void htp_uri_free(htp_uri_t *uri); + +/** + * Allocates and initializes a new htp_uri_t structure. + * + * @return New structure, or NULL on memory allocation failure. + */ +htp_uri_t *htp_uri_alloc(void); + +/** + * Creates a new log entry and stores it with the connection. The file and line + * parameters are typically auto-generated using the HTP_LOG_MARK macro. +* + * @param[in] connp + * @param[in] file + * @param[in] line + * @param[in] level + * @param[in] code + * @param[in] fmt + * @param[in] ... + */ +void htp_log(htp_connp_t *connp, const char *file, int line, enum htp_log_level_t level, int code, const char *fmt, ...); + +/** + * Performs in-place decoding of the input string, according to the configuration specified + * by cfg and ctx. On output, various flags (HTP_URLEN_*) might be set. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] input + * @param[out] flags + * + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_urldecode_inplace(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags); + +/** + * Performs in-place decoding of the input string, according to the configuration specified + * by cfg and ctx. On output, various flags (HTP_URLEN_*) might be set. If something in the + * input would cause a particular server to respond with an error, the appropriate status + * code will be set. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] input + * @param[out] flags + * @param[out] expected_status_code 0 by default, or status code as necessary + * + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_urldecode_inplace_ex(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags, int *expected_status_code); + +/** + * Returns the LibHTP version string. + * + * @return LibHTP version, for example "LibHTP v0.5.x". + */ +char *htp_get_version(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _HTP_H */ diff --git a/htp/htp_base64.c b/htp/htp_base64.c new file mode 100644 index 0000000..75dc122 --- /dev/null +++ b/htp/htp_base64.c @@ -0,0 +1,196 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +/* Adapted from the libb64 project (http://sourceforge.net/projects/libb64), which is in public domain. */ + +#include "bstr.h" +#include "htp_base64.h" + +/** + * Decode single base64-encoded character. + * + * @param[in] value_in + * @return decoded character + */ +int htp_base64_decode_single(signed char value_in) { + static const signed char decoding[] = {62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + -1, -1, -1, -2, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51}; + static const signed char decoding_size = sizeof (decoding); + + value_in -= 43; + + if ((value_in < 0) || (value_in > decoding_size - 1)) return -1; + + return decoding[(int) value_in]; +} + +/** + * Initialize base64 decoder. + * + * @param[in] decoder + */ +void htp_base64_decoder_init(htp_base64_decoder *decoder) { + decoder->step = step_a; + decoder->plainchar = 0; +} + +/** + * Feed the supplied memory range to the decoder. + * + * @param[in] decoder + * @param[in] _code_in + * @param[in] length_in + * @param[in] _plaintext_out + * @param[in] length_out + * @return how many bytes were placed into plaintext output + */ +int htp_base64_decode(htp_base64_decoder *decoder, const void *_code_in, int length_in, void *_plaintext_out, int length_out) { + const unsigned char *code_in = (const unsigned char *)_code_in; + unsigned char *plaintext_out = (unsigned char *)_plaintext_out; + const unsigned char *codechar = code_in; + unsigned char *plainchar = plaintext_out; + signed char fragment; + + if (length_out <= 0) return 0; + + *plainchar = decoder->plainchar; + + switch (decoder->step) { + while (1) { + case step_a: + do { + if (codechar == code_in + length_in) { + decoder->step = step_a; + decoder->plainchar = *plainchar; + return (int) (plainchar - plaintext_out); + } + fragment = (char) htp_base64_decode_single(*codechar++); + } while (fragment < 0); + *plainchar = (unsigned char) ((fragment & 0x03f) << 2); + /* fall through */ + + case step_b: + do { + if (codechar == code_in + length_in) { + decoder->step = step_b; + decoder->plainchar = *plainchar; + return (int) (plainchar - plaintext_out); + } + fragment = (char) htp_base64_decode_single(*codechar++); + } while (fragment < 0); + *plainchar++ |= (fragment & 0x030) >> 4; + *plainchar = (unsigned char) ((fragment & 0x00f) << 4); + if (--length_out == 0) { + return (int) (plainchar - plaintext_out); + } + /* fall through */ + + case step_c: + do { + if (codechar == code_in + length_in) { + decoder->step = step_c; + decoder->plainchar = *plainchar; + return (int) (plainchar - plaintext_out); + } + fragment = (char) htp_base64_decode_single(*codechar++); + } while (fragment < 0); + *plainchar++ |= (fragment & 0x03c) >> 2; + *plainchar = (unsigned char) ((fragment & 0x003) << 6); + if (--length_out == 0) { + return (int) (plainchar - plaintext_out); + } + /* fall through */ + + case step_d: + do { + if (codechar == code_in + length_in) { + decoder->step = step_d; + decoder->plainchar = *plainchar; + return (int) (plainchar - plaintext_out); + } + fragment = (char) htp_base64_decode_single(*codechar++); + } while (fragment < 0); + *plainchar++ |= (fragment & 0x03f); + if (--length_out == 0) { + return (int) (plainchar - plaintext_out); + } + /* fall through */ + } + } + + /* control should not reach here */ + return plainchar - plaintext_out; +} + +/** + * Base64-decode input, given as bstring. + * + * @param[in] input + * @return new base64-decoded bstring + */ +bstr *htp_base64_decode_bstr(bstr *input) { + return htp_base64_decode_mem(bstr_ptr(input), bstr_len(input)); +} + +/** + * Base64-decode input, given as memory range. + * + * @param[in] data + * @param[in] len + * @return new base64-decoded bstring + */ +bstr *htp_base64_decode_mem(const void *data, size_t len) { + htp_base64_decoder decoder; + bstr *r = NULL; + + htp_base64_decoder_init(&decoder); + + unsigned char *tmpstr = malloc(len); + if (tmpstr == NULL) return NULL; + + int resulting_len = htp_base64_decode(&decoder, data, (int) len, tmpstr, (int) len); + if (resulting_len > 0) { + r = bstr_dup_mem(tmpstr, resulting_len); + } + + free(tmpstr); + + return r; +} diff --git a/htp/htp_base64.h b/htp/htp_base64.h new file mode 100644 index 0000000..8978e7a --- /dev/null +++ b/htp/htp_base64.h @@ -0,0 +1,74 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +/* Adapted from the libb64 project (http://sourceforge.net/projects/libb64), which is in public domain. */ + +#ifndef _HTP_BASE64_H +#define _HTP_BASE64_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "bstr.h" + +typedef enum { + step_a, step_b, step_c, step_d +} htp_base64_decodestep; + +typedef struct { + htp_base64_decodestep step; + char plainchar; +} htp_base64_decoder; + +void htp_base64_decoder_init(htp_base64_decoder *state_in); + +int htp_base64_decode_single(signed char value_in); + +int htp_base64_decode(htp_base64_decoder *decoder, const void *code_in, int length_in, void *plaintext_out, int length_out); + +bstr *htp_base64_decode_bstr(bstr *input); + +bstr *htp_base64_decode_mem(const void *data, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif /* _HTP_BASE64_H */ + diff --git a/htp/htp_config.c b/htp/htp_config.c new file mode 100644 index 0000000..00ae853 --- /dev/null +++ b/htp/htp_config.c @@ -0,0 +1,954 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * This map is used by default for best-fit mapping from the Unicode + * values U+0100-FFFF. + */ +static unsigned char bestfit_1252[] = { + 0x01, 0x00, 0x41, 0x01, 0x01, 0x61, 0x01, 0x02, 0x41, 0x01, 0x03, 0x61, + 0x01, 0x04, 0x41, 0x01, 0x05, 0x61, 0x01, 0x06, 0x43, 0x01, 0x07, 0x63, + 0x01, 0x08, 0x43, 0x01, 0x09, 0x63, 0x01, 0x0a, 0x43, 0x01, 0x0b, 0x63, + 0x01, 0x0c, 0x43, 0x01, 0x0d, 0x63, 0x01, 0x0e, 0x44, 0x01, 0x0f, 0x64, + 0x01, 0x11, 0x64, 0x01, 0x12, 0x45, 0x01, 0x13, 0x65, 0x01, 0x14, 0x45, + 0x01, 0x15, 0x65, 0x01, 0x16, 0x45, 0x01, 0x17, 0x65, 0x01, 0x18, 0x45, + 0x01, 0x19, 0x65, 0x01, 0x1a, 0x45, 0x01, 0x1b, 0x65, 0x01, 0x1c, 0x47, + 0x01, 0x1d, 0x67, 0x01, 0x1e, 0x47, 0x01, 0x1f, 0x67, 0x01, 0x20, 0x47, + 0x01, 0x21, 0x67, 0x01, 0x22, 0x47, 0x01, 0x23, 0x67, 0x01, 0x24, 0x48, + 0x01, 0x25, 0x68, 0x01, 0x26, 0x48, 0x01, 0x27, 0x68, 0x01, 0x28, 0x49, + 0x01, 0x29, 0x69, 0x01, 0x2a, 0x49, 0x01, 0x2b, 0x69, 0x01, 0x2c, 0x49, + 0x01, 0x2d, 0x69, 0x01, 0x2e, 0x49, 0x01, 0x2f, 0x69, 0x01, 0x30, 0x49, + 0x01, 0x31, 0x69, 0x01, 0x34, 0x4a, 0x01, 0x35, 0x6a, 0x01, 0x36, 0x4b, + 0x01, 0x37, 0x6b, 0x01, 0x39, 0x4c, 0x01, 0x3a, 0x6c, 0x01, 0x3b, 0x4c, + 0x01, 0x3c, 0x6c, 0x01, 0x3d, 0x4c, 0x01, 0x3e, 0x6c, 0x01, 0x41, 0x4c, + 0x01, 0x42, 0x6c, 0x01, 0x43, 0x4e, 0x01, 0x44, 0x6e, 0x01, 0x45, 0x4e, + 0x01, 0x46, 0x6e, 0x01, 0x47, 0x4e, 0x01, 0x48, 0x6e, 0x01, 0x4c, 0x4f, + 0x01, 0x4d, 0x6f, 0x01, 0x4e, 0x4f, 0x01, 0x4f, 0x6f, 0x01, 0x50, 0x4f, + 0x01, 0x51, 0x6f, 0x01, 0x54, 0x52, 0x01, 0x55, 0x72, 0x01, 0x56, 0x52, + 0x01, 0x57, 0x72, 0x01, 0x58, 0x52, 0x01, 0x59, 0x72, 0x01, 0x5a, 0x53, + 0x01, 0x5b, 0x73, 0x01, 0x5c, 0x53, 0x01, 0x5d, 0x73, 0x01, 0x5e, 0x53, + 0x01, 0x5f, 0x73, 0x01, 0x62, 0x54, 0x01, 0x63, 0x74, 0x01, 0x64, 0x54, + 0x01, 0x65, 0x74, 0x01, 0x66, 0x54, 0x01, 0x67, 0x74, 0x01, 0x68, 0x55, + 0x01, 0x69, 0x75, 0x01, 0x6a, 0x55, 0x01, 0x6b, 0x75, 0x01, 0x6c, 0x55, + 0x01, 0x6d, 0x75, 0x01, 0x6e, 0x55, 0x01, 0x6f, 0x75, 0x01, 0x70, 0x55, + 0x01, 0x71, 0x75, 0x01, 0x72, 0x55, 0x01, 0x73, 0x75, 0x01, 0x74, 0x57, + 0x01, 0x75, 0x77, 0x01, 0x76, 0x59, 0x01, 0x77, 0x79, 0x01, 0x79, 0x5a, + 0x01, 0x7b, 0x5a, 0x01, 0x7c, 0x7a, 0x01, 0x80, 0x62, 0x01, 0x97, 0x49, + 0x01, 0x9a, 0x6c, 0x01, 0x9f, 0x4f, 0x01, 0xa0, 0x4f, 0x01, 0xa1, 0x6f, + 0x01, 0xab, 0x74, 0x01, 0xae, 0x54, 0x01, 0xaf, 0x55, 0x01, 0xb0, 0x75, + 0x01, 0xb6, 0x7a, 0x01, 0xc0, 0x7c, 0x01, 0xc3, 0x21, 0x01, 0xcd, 0x41, + 0x01, 0xce, 0x61, 0x01, 0xcf, 0x49, 0x01, 0xd0, 0x69, 0x01, 0xd1, 0x4f, + 0x01, 0xd2, 0x6f, 0x01, 0xd3, 0x55, 0x01, 0xd4, 0x75, 0x01, 0xd5, 0x55, + 0x01, 0xd6, 0x75, 0x01, 0xd7, 0x55, 0x01, 0xd8, 0x75, 0x01, 0xd9, 0x55, + 0x01, 0xda, 0x75, 0x01, 0xdb, 0x55, 0x01, 0xdc, 0x75, 0x01, 0xde, 0x41, + 0x01, 0xdf, 0x61, 0x01, 0xe4, 0x47, 0x01, 0xe5, 0x67, 0x01, 0xe6, 0x47, + 0x01, 0xe7, 0x67, 0x01, 0xe8, 0x4b, 0x01, 0xe9, 0x6b, 0x01, 0xea, 0x4f, + 0x01, 0xeb, 0x6f, 0x01, 0xec, 0x4f, 0x01, 0xed, 0x6f, 0x01, 0xf0, 0x6a, + 0x02, 0x61, 0x67, 0x02, 0xb9, 0x27, 0x02, 0xba, 0x22, 0x02, 0xbc, 0x27, + 0x02, 0xc4, 0x5e, 0x02, 0xc8, 0x27, 0x02, 0xcb, 0x60, 0x02, 0xcd, 0x5f, + 0x03, 0x00, 0x60, 0x03, 0x02, 0x5e, 0x03, 0x03, 0x7e, 0x03, 0x0e, 0x22, + 0x03, 0x31, 0x5f, 0x03, 0x32, 0x5f, 0x03, 0x7e, 0x3b, 0x03, 0x93, 0x47, + 0x03, 0x98, 0x54, 0x03, 0xa3, 0x53, 0x03, 0xa6, 0x46, 0x03, 0xa9, 0x4f, + 0x03, 0xb1, 0x61, 0x03, 0xb4, 0x64, 0x03, 0xb5, 0x65, 0x03, 0xc0, 0x70, + 0x03, 0xc3, 0x73, 0x03, 0xc4, 0x74, 0x03, 0xc6, 0x66, 0x04, 0xbb, 0x68, + 0x05, 0x89, 0x3a, 0x06, 0x6a, 0x25, 0x20, 0x00, 0x20, 0x20, 0x01, 0x20, + 0x20, 0x02, 0x20, 0x20, 0x03, 0x20, 0x20, 0x04, 0x20, 0x20, 0x05, 0x20, + 0x20, 0x06, 0x20, 0x20, 0x10, 0x2d, 0x20, 0x11, 0x2d, 0x20, 0x17, 0x3d, + 0x20, 0x32, 0x27, 0x20, 0x35, 0x60, 0x20, 0x44, 0x2f, 0x20, 0x74, 0x34, + 0x20, 0x75, 0x35, 0x20, 0x76, 0x36, 0x20, 0x77, 0x37, 0x20, 0x78, 0x38, + 0x20, 0x7f, 0x6e, 0x20, 0x80, 0x30, 0x20, 0x81, 0x31, 0x20, 0x82, 0x32, + 0x20, 0x83, 0x33, 0x20, 0x84, 0x34, 0x20, 0x85, 0x35, 0x20, 0x86, 0x36, + 0x20, 0x87, 0x37, 0x20, 0x88, 0x38, 0x20, 0x89, 0x39, 0x20, 0xa7, 0x50, + 0x21, 0x02, 0x43, 0x21, 0x07, 0x45, 0x21, 0x0a, 0x67, 0x21, 0x0b, 0x48, + 0x21, 0x0c, 0x48, 0x21, 0x0d, 0x48, 0x21, 0x0e, 0x68, 0x21, 0x10, 0x49, + 0x21, 0x11, 0x49, 0x21, 0x12, 0x4c, 0x21, 0x13, 0x6c, 0x21, 0x15, 0x4e, + 0x21, 0x18, 0x50, 0x21, 0x19, 0x50, 0x21, 0x1a, 0x51, 0x21, 0x1b, 0x52, + 0x21, 0x1c, 0x52, 0x21, 0x1d, 0x52, 0x21, 0x24, 0x5a, 0x21, 0x28, 0x5a, + 0x21, 0x2a, 0x4b, 0x21, 0x2c, 0x42, 0x21, 0x2d, 0x43, 0x21, 0x2e, 0x65, + 0x21, 0x2f, 0x65, 0x21, 0x30, 0x45, 0x21, 0x31, 0x46, 0x21, 0x33, 0x4d, + 0x21, 0x34, 0x6f, 0x22, 0x12, 0x2d, 0x22, 0x15, 0x2f, 0x22, 0x16, 0x5c, + 0x22, 0x17, 0x2a, 0x22, 0x1a, 0x76, 0x22, 0x1e, 0x38, 0x22, 0x23, 0x7c, + 0x22, 0x29, 0x6e, 0x22, 0x36, 0x3a, 0x22, 0x3c, 0x7e, 0x22, 0x61, 0x3d, + 0x22, 0x64, 0x3d, 0x22, 0x65, 0x3d, 0x23, 0x03, 0x5e, 0x23, 0x20, 0x28, + 0x23, 0x21, 0x29, 0x23, 0x29, 0x3c, 0x23, 0x2a, 0x3e, 0x25, 0x00, 0x2d, + 0x25, 0x0c, 0x2b, 0x25, 0x10, 0x2b, 0x25, 0x14, 0x2b, 0x25, 0x18, 0x2b, + 0x25, 0x1c, 0x2b, 0x25, 0x2c, 0x2d, 0x25, 0x34, 0x2d, 0x25, 0x3c, 0x2b, + 0x25, 0x50, 0x2d, 0x25, 0x52, 0x2b, 0x25, 0x53, 0x2b, 0x25, 0x54, 0x2b, + 0x25, 0x55, 0x2b, 0x25, 0x56, 0x2b, 0x25, 0x57, 0x2b, 0x25, 0x58, 0x2b, + 0x25, 0x59, 0x2b, 0x25, 0x5a, 0x2b, 0x25, 0x5b, 0x2b, 0x25, 0x5c, 0x2b, + 0x25, 0x5d, 0x2b, 0x25, 0x64, 0x2d, 0x25, 0x65, 0x2d, 0x25, 0x66, 0x2d, + 0x25, 0x67, 0x2d, 0x25, 0x68, 0x2d, 0x25, 0x69, 0x2d, 0x25, 0x6a, 0x2b, + 0x25, 0x6b, 0x2b, 0x25, 0x6c, 0x2b, 0x25, 0x84, 0x5f, 0x27, 0x58, 0x7c, + 0x30, 0x00, 0x20, 0x30, 0x08, 0x3c, 0x30, 0x09, 0x3e, 0x30, 0x1a, 0x5b, + 0x30, 0x1b, 0x5d, 0xff, 0x01, 0x21, 0xff, 0x02, 0x22, 0xff, 0x03, 0x23, + 0xff, 0x04, 0x24, 0xff, 0x05, 0x25, 0xff, 0x06, 0x26, 0xff, 0x07, 0x27, + 0xff, 0x08, 0x28, 0xff, 0x09, 0x29, 0xff, 0x0a, 0x2a, 0xff, 0x0b, 0x2b, + 0xff, 0x0c, 0x2c, 0xff, 0x0d, 0x2d, 0xff, 0x0e, 0x2e, 0xff, 0x0f, 0x2f, + 0xff, 0x10, 0x30, 0xff, 0x11, 0x31, 0xff, 0x12, 0x32, 0xff, 0x13, 0x33, + 0xff, 0x14, 0x34, 0xff, 0x15, 0x35, 0xff, 0x16, 0x36, 0xff, 0x17, 0x37, + 0xff, 0x18, 0x38, 0xff, 0x19, 0x39, 0xff, 0x1a, 0x3a, 0xff, 0x1b, 0x3b, + 0xff, 0x1c, 0x3c, 0xff, 0x1d, 0x3d, 0xff, 0x1e, 0x3e, 0xff, 0x20, 0x40, + 0xff, 0x21, 0x41, 0xff, 0x22, 0x42, 0xff, 0x23, 0x43, 0xff, 0x24, 0x44, + 0xff, 0x25, 0x45, 0xff, 0x26, 0x46, 0xff, 0x27, 0x47, 0xff, 0x28, 0x48, + 0xff, 0x29, 0x49, 0xff, 0x2a, 0x4a, 0xff, 0x2b, 0x4b, 0xff, 0x2c, 0x4c, + 0xff, 0x2d, 0x4d, 0xff, 0x2e, 0x4e, 0xff, 0x2f, 0x4f, 0xff, 0x30, 0x50, + 0xff, 0x31, 0x51, 0xff, 0x32, 0x52, 0xff, 0x33, 0x53, 0xff, 0x34, 0x54, + 0xff, 0x35, 0x55, 0xff, 0x36, 0x56, 0xff, 0x37, 0x57, 0xff, 0x38, 0x58, + 0xff, 0x39, 0x59, 0xff, 0x3a, 0x5a, 0xff, 0x3b, 0x5b, 0xff, 0x3c, 0x5c, + 0xff, 0x3d, 0x5d, 0xff, 0x3e, 0x5e, 0xff, 0x3f, 0x5f, 0xff, 0x40, 0x60, + 0xff, 0x41, 0x61, 0xff, 0x42, 0x62, 0xff, 0x43, 0x63, 0xff, 0x44, 0x64, + 0xff, 0x45, 0x65, 0xff, 0x46, 0x66, 0xff, 0x47, 0x67, 0xff, 0x48, 0x68, + 0xff, 0x49, 0x69, 0xff, 0x4a, 0x6a, 0xff, 0x4b, 0x6b, 0xff, 0x4c, 0x6c, + 0xff, 0x4d, 0x6d, 0xff, 0x4e, 0x6e, 0xff, 0x4f, 0x6f, 0xff, 0x50, 0x70, + 0xff, 0x51, 0x71, 0xff, 0x52, 0x72, 0xff, 0x53, 0x73, 0xff, 0x54, 0x74, + 0xff, 0x55, 0x75, 0xff, 0x56, 0x76, 0xff, 0x57, 0x77, 0xff, 0x58, 0x78, + 0xff, 0x59, 0x79, 0xff, 0x5a, 0x7a, 0xff, 0x5b, 0x7b, 0xff, 0x5c, 0x7c, + 0xff, 0x5d, 0x7d, 0xff, 0x5e, 0x7e, 0x00, 0x00, 0x00 +}; + +htp_cfg_t *htp_config_create(void) { + htp_cfg_t *cfg = calloc(1, sizeof (htp_cfg_t)); + if (cfg == NULL) return NULL; + + cfg->field_limit_hard = HTP_FIELD_LIMIT_HARD; + cfg->field_limit_soft = HTP_FIELD_LIMIT_SOFT; + cfg->log_level = HTP_LOG_NOTICE; + cfg->response_decompression_enabled = 1; + cfg->request_decompression_enabled = 0; // disabled by default + cfg->parse_request_cookies = 1; + cfg->parse_request_auth = 1; + cfg->extract_request_files = 0; + cfg->extract_request_files_limit = -1; // Use the parser default. + cfg->response_decompression_layer_limit = 2; // 2 layers seem fairly common + cfg->lzma_memlimit = HTP_LZMA_MEMLIMIT; + cfg->response_lzma_layer_limit = 1; // default is only one layer + cfg->compression_bomb_limit = HTP_COMPRESSION_BOMB_LIMIT; + cfg->compression_time_limit = HTP_COMPRESSION_TIME_LIMIT_USEC; + cfg->allow_space_uri = 0; + + // Default settings for URL-encoded data. + + htp_config_set_bestfit_map(cfg, HTP_DECODER_DEFAULTS, bestfit_1252); + htp_config_set_bestfit_replacement_byte(cfg, HTP_DECODER_DEFAULTS, '?'); + + htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_DEFAULTS, HTP_URL_DECODE_PRESERVE_PERCENT); + htp_config_set_nul_raw_terminates(cfg, HTP_DECODER_DEFAULTS, 0); + htp_config_set_nul_encoded_terminates(cfg, HTP_DECODER_DEFAULTS, 0); + htp_config_set_u_encoding_decode(cfg, HTP_DECODER_DEFAULTS, 0); + + htp_config_set_plusspace_decode(cfg, HTP_DECODER_URLENCODED, 1); + + htp_config_set_server_personality(cfg, HTP_SERVER_MINIMAL); + + return cfg; +} + +htp_cfg_t *htp_config_copy(htp_cfg_t *cfg) { + if (cfg == NULL) return NULL; + + // Start by making a copy of the entire structure, + // which is essentially a shallow copy. + htp_cfg_t *copy = malloc(sizeof (htp_cfg_t)); + if (copy == NULL) return NULL; + memcpy(copy, cfg, sizeof (htp_cfg_t)); + + // Now create copies of the hooks' structures. + + if (cfg->hook_request_start != NULL) { + copy->hook_request_start = htp_hook_copy(cfg->hook_request_start); + if (copy->hook_request_start == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_request_line != NULL) { + copy->hook_request_line = htp_hook_copy(cfg->hook_request_line); + if (copy->hook_request_line == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_request_uri_normalize != NULL) { + copy->hook_request_uri_normalize = htp_hook_copy(cfg->hook_request_uri_normalize); + if (copy->hook_request_uri_normalize == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_request_header_data != NULL) { + copy->hook_request_header_data = htp_hook_copy(cfg->hook_request_header_data); + if (copy->hook_request_header_data == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_request_headers != NULL) { + copy->hook_request_headers = htp_hook_copy(cfg->hook_request_headers); + if (copy->hook_request_headers == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_request_body_data != NULL) { + copy->hook_request_body_data = htp_hook_copy(cfg->hook_request_body_data); + if (copy->hook_request_body_data == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_request_file_data != NULL) { + copy->hook_request_file_data = htp_hook_copy(cfg->hook_request_file_data); + if (copy->hook_request_file_data == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_request_trailer != NULL) { + copy->hook_request_trailer = htp_hook_copy(cfg->hook_request_trailer); + if (copy->hook_request_trailer == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_request_trailer_data != NULL) { + copy->hook_request_trailer_data = htp_hook_copy(cfg->hook_request_trailer_data); + if (copy->hook_request_trailer_data == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_request_complete != NULL) { + copy->hook_request_complete = htp_hook_copy(cfg->hook_request_complete); + if (copy->hook_request_complete == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_response_start != NULL) { + copy->hook_response_start = htp_hook_copy(cfg->hook_response_start); + if (copy->hook_response_start == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_response_line != NULL) { + copy->hook_response_line = htp_hook_copy(cfg->hook_response_line); + if (copy->hook_response_line == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_response_header_data != NULL) { + copy->hook_response_header_data = htp_hook_copy(cfg->hook_response_header_data); + if (copy->hook_response_header_data == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_response_headers != NULL) { + copy->hook_response_headers = htp_hook_copy(cfg->hook_response_headers); + if (copy->hook_response_headers == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_response_body_data != NULL) { + copy->hook_response_body_data = htp_hook_copy(cfg->hook_response_body_data); + if (copy->hook_response_body_data == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_response_trailer != NULL) { + copy->hook_response_trailer = htp_hook_copy(cfg->hook_response_trailer); + if (copy->hook_response_trailer == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_response_trailer_data != NULL) { + copy->hook_response_trailer_data = htp_hook_copy(cfg->hook_response_trailer_data); + if (copy->hook_response_trailer_data == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_response_complete != NULL) { + copy->hook_response_complete = htp_hook_copy(cfg->hook_response_complete); + if (copy->hook_response_complete == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_transaction_complete != NULL) { + copy->hook_transaction_complete = htp_hook_copy(cfg->hook_transaction_complete); + if (copy->hook_transaction_complete == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + if (cfg->hook_log != NULL) { + copy->hook_log = htp_hook_copy(cfg->hook_log); + if (copy->hook_log == NULL) { + htp_config_destroy(copy); + return NULL; + } + } + + return copy; +} + +void htp_config_destroy(htp_cfg_t *cfg) { + if (cfg == NULL) return; + + htp_hook_destroy(cfg->hook_request_start); + htp_hook_destroy(cfg->hook_request_line); + htp_hook_destroy(cfg->hook_request_uri_normalize); + htp_hook_destroy(cfg->hook_request_header_data); + htp_hook_destroy(cfg->hook_request_headers); + htp_hook_destroy(cfg->hook_request_body_data); + htp_hook_destroy(cfg->hook_request_file_data); + htp_hook_destroy(cfg->hook_request_trailer); + htp_hook_destroy(cfg->hook_request_trailer_data); + htp_hook_destroy(cfg->hook_request_complete); + htp_hook_destroy(cfg->hook_response_start); + htp_hook_destroy(cfg->hook_response_line); + htp_hook_destroy(cfg->hook_response_header_data); + htp_hook_destroy(cfg->hook_response_headers); + htp_hook_destroy(cfg->hook_response_body_data); + htp_hook_destroy(cfg->hook_response_trailer); + htp_hook_destroy(cfg->hook_response_trailer_data); + htp_hook_destroy(cfg->hook_response_complete); + htp_hook_destroy(cfg->hook_transaction_complete); + htp_hook_destroy(cfg->hook_log); + + free(cfg); +} + +void *htp_config_get_user_data(htp_cfg_t *cfg) { + if (cfg == NULL) return NULL; + return cfg->user_data; +} + +void htp_config_register_log(htp_cfg_t *cfg, int (*callback_fn)(htp_log_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_log, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_multipart_parser(htp_cfg_t *cfg) { + if (cfg == NULL) return; + htp_config_register_request_headers(cfg, htp_ch_multipart_callback_request_headers); +} + +void htp_config_register_request_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_complete, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_request_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_body_data, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_request_file_data(htp_cfg_t *cfg, int (*callback_fn)(htp_file_data_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_file_data, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_request_uri_normalize(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_uri_normalize, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_request_header_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_header_data, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_request_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_headers, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_request_line(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_line, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_request_start(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_start, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_request_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_trailer, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_request_trailer_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *d)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_request_trailer_data, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_response_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_response_body_data, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_response_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_response_complete, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_response_header_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_response_header_data, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_response_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_response_headers, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_response_line(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_response_line, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_response_start(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_response_start, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_response_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_response_trailer, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_response_trailer_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *d)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_response_trailer_data, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_transaction_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)) { + if (cfg == NULL) return; + htp_hook_register(&cfg->hook_transaction_complete, (htp_callback_fn_t) callback_fn); +} + +void htp_config_register_urlencoded_parser(htp_cfg_t *cfg) { + if (cfg == NULL) return; + htp_config_register_request_line(cfg, htp_ch_urlencoded_callback_request_line); + htp_config_register_request_headers(cfg, htp_ch_urlencoded_callback_request_headers); +} + +htp_status_t htp_config_set_extract_request_files(htp_cfg_t *cfg, int extract_request_files, int limit) { + if (cfg == NULL) return HTP_ERROR; + if (cfg->tmpdir == NULL) return HTP_ERROR; + cfg->extract_request_files = extract_request_files; + cfg->extract_request_files_limit = limit; + return HTP_OK; +} + +void htp_config_set_field_limits(htp_cfg_t *cfg, size_t soft_limit, size_t hard_limit) { + if (cfg == NULL) return; + cfg->field_limit_soft = soft_limit; + cfg->field_limit_hard = hard_limit; +} + +void htp_config_set_lzma_memlimit(htp_cfg_t *cfg, size_t memlimit) { + if (cfg == NULL) return; + cfg->lzma_memlimit = memlimit; +} + +void htp_config_set_lzma_layers(htp_cfg_t *cfg, int limit) { + if (cfg == NULL) return; + cfg->response_lzma_layer_limit = limit; +} + +void htp_config_set_max_tx(htp_cfg_t *cfg, uint32_t limit) { + if (cfg == NULL) return; + cfg->max_tx = limit; +} + +void htp_config_set_compression_bomb_limit(htp_cfg_t *cfg, size_t bomblimit) { + if (cfg == NULL) return; + if (bomblimit > INT32_MAX) { + cfg->compression_bomb_limit = INT32_MAX; + } else { + cfg->compression_bomb_limit = (int32_t) bomblimit; + } +} + +void htp_config_set_compression_time_limit(htp_cfg_t *cfg, size_t useclimit) { + if (cfg == NULL) return; + // max limit is one second + if (useclimit >= 1000000) { + cfg->compression_time_limit = 1000000; + } else { + cfg->compression_time_limit = (int32_t) useclimit; + } +} + +void htp_config_set_log_level(htp_cfg_t *cfg, enum htp_log_level_t log_level) { + if (cfg == NULL) return; + cfg->log_level = log_level; +} + +void htp_config_set_parse_request_auth(htp_cfg_t *cfg, int parse_request_auth) { + if (cfg == NULL) return; + cfg->parse_request_auth = parse_request_auth; +} + +void htp_config_set_parse_request_cookies(htp_cfg_t *cfg, int parse_request_cookies) { + if (cfg == NULL) return; + cfg->parse_request_cookies = parse_request_cookies; +} + +void htp_config_set_response_decompression(htp_cfg_t *cfg, int enabled) { + if (cfg == NULL) return; + cfg->response_decompression_enabled = enabled; +} + +void htp_config_set_request_decompression(htp_cfg_t *cfg, int enabled) { + if (cfg == NULL) return; + cfg->request_decompression_enabled = enabled; +} + +void htp_config_set_allow_space_uri(htp_cfg_t *cfg, int allow_space_uri) { + if (cfg == NULL) return; + cfg->allow_space_uri = allow_space_uri; +} + +int htp_config_set_server_personality(htp_cfg_t *cfg, enum htp_server_personality_t personality) { + if (cfg == NULL) return HTP_ERROR; + + switch (personality) { + + case HTP_SERVER_MINIMAL: + cfg->parse_request_line = htp_parse_request_line_generic; + cfg->process_request_header = htp_process_request_header_generic; + cfg->parse_response_line = htp_parse_response_line_generic; + cfg->process_response_header = htp_process_response_header_generic; + break; + + case HTP_SERVER_GENERIC: + cfg->parse_request_line = htp_parse_request_line_generic; + cfg->process_request_header = htp_process_request_header_generic; + cfg->parse_response_line = htp_parse_response_line_generic; + cfg->process_response_header = htp_process_response_header_generic; + + htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1); + break; + + case HTP_SERVER_IDS: + cfg->parse_request_line = htp_parse_request_line_generic; + cfg->process_request_header = htp_process_request_header_generic; + cfg->parse_response_line = htp_parse_response_line_generic; + cfg->process_response_header = htp_process_response_header_generic; + + htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_convert_lowercase(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_utf8_convert_bestfit(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_IGNORE); + break; + + case HTP_SERVER_APACHE_2: + cfg->parse_request_line = htp_parse_request_line_apache_2_2; + cfg->process_request_header = htp_process_request_header_apache_2_2; + cfg->parse_response_line = htp_parse_response_line_generic; + cfg->process_response_header = htp_process_response_header_generic; + + htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 0); + htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 0); + htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 0); + + htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_URL_PATH, HTP_URL_DECODE_PRESERVE_PERCENT); + htp_config_set_url_encoding_invalid_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400); + htp_config_set_control_chars_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_IGNORE); + htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_400); + break; + + case HTP_SERVER_IIS_5_1: + cfg->parse_request_line = htp_parse_request_line_generic; + cfg->process_request_header = htp_process_request_header_generic; + cfg->parse_response_line = htp_parse_response_line_generic; + cfg->process_response_header = htp_process_response_header_generic; + + htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 0); + + htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_URL_PATH, HTP_URL_DECODE_PRESERVE_PERCENT); + htp_config_set_control_chars_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_IGNORE); + htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_IGNORE); + break; + + case HTP_SERVER_IIS_6_0: + cfg->parse_request_line = htp_parse_request_line_generic; + cfg->process_request_header = htp_process_request_header_generic; + cfg->parse_response_line = htp_parse_response_line_generic; + cfg->process_response_header = htp_process_response_header_generic; + + htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 1); + + htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_URL_PATH, HTP_URL_DECODE_PRESERVE_PERCENT); + htp_config_set_u_encoding_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400); + htp_config_set_control_chars_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400); + htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_IGNORE); + break; + + case HTP_SERVER_IIS_7_0: + case HTP_SERVER_IIS_7_5: + cfg->parse_request_line = htp_parse_request_line_generic; + cfg->process_request_header = htp_process_request_header_generic; + cfg->parse_response_line = htp_parse_response_line_generic; + cfg->process_response_header = htp_process_response_header_generic; + + htp_config_set_backslash_convert_slashes(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_decode(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_path_separators_compress(cfg, HTP_DECODER_URL_PATH, 1); + htp_config_set_u_encoding_decode(cfg, HTP_DECODER_URL_PATH, 1); + + htp_config_set_url_encoding_invalid_handling(cfg, HTP_DECODER_URL_PATH, HTP_URL_DECODE_PRESERVE_PERCENT); + htp_config_set_url_encoding_invalid_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400); + htp_config_set_control_chars_unwanted(cfg, HTP_DECODER_URL_PATH, HTP_UNWANTED_400); + htp_config_set_requestline_leading_whitespace_unwanted(cfg, HTP_DECODER_DEFAULTS, HTP_UNWANTED_IGNORE); + break; + + default: + return HTP_ERROR; + } + + // Remember the personality + cfg->server_personality = personality; + + return HTP_OK; +} + +void htp_config_set_tmpdir(htp_cfg_t *cfg, char *tmpdir) { + if (cfg == NULL) return; + cfg->tmpdir = tmpdir; +} + +void htp_config_set_tx_auto_destroy(htp_cfg_t *cfg, int tx_auto_destroy) { + if (cfg == NULL) return; + cfg->tx_auto_destroy = tx_auto_destroy; +} + +void htp_config_set_user_data(htp_cfg_t *cfg, void *user_data) { + if (cfg == NULL) return; + cfg->user_data = user_data; +} + + +static int convert_to_0_or_1(int b) { + if (b) return 1; + return 0; +} + +void htp_config_set_bestfit_map(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, void *map) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].bestfit_map = map; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].bestfit_map = map; + } + } +} + +void htp_config_set_bestfit_replacement_byte(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int b) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].bestfit_replacement_byte = (unsigned char) b; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].bestfit_replacement_byte = (unsigned char) b; + } + } +} + +void htp_config_set_url_encoding_invalid_handling(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_url_encoding_handling_t handling) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].url_encoding_invalid_handling = handling; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].url_encoding_invalid_handling = handling; + } + } +} + +void htp_config_set_nul_raw_terminates(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].nul_raw_terminates = convert_to_0_or_1(enabled); + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].nul_raw_terminates = convert_to_0_or_1(enabled); + } + } +} + +void htp_config_set_nul_encoded_terminates(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].nul_encoded_terminates = convert_to_0_or_1(enabled); + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].nul_encoded_terminates = convert_to_0_or_1(enabled); + } + } +} + +void htp_config_set_u_encoding_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].u_encoding_decode = convert_to_0_or_1(enabled); + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].u_encoding_decode = convert_to_0_or_1(enabled); + } + } +} + +void htp_config_set_backslash_convert_slashes(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].backslash_convert_slashes = convert_to_0_or_1(enabled); + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].backslash_convert_slashes = convert_to_0_or_1(enabled); + } + } +} + +void htp_config_set_path_separators_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].path_separators_decode = convert_to_0_or_1(enabled); + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].path_separators_decode = convert_to_0_or_1(enabled); + } + } +} + +void htp_config_set_path_separators_compress(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].path_separators_compress = convert_to_0_or_1(enabled); + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].path_separators_compress = convert_to_0_or_1(enabled); + } + } +} + +void htp_config_set_plusspace_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].plusspace_decode = convert_to_0_or_1(enabled); + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].plusspace_decode = convert_to_0_or_1(enabled); + } + } +} + +void htp_config_set_convert_lowercase(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].convert_lowercase = convert_to_0_or_1(enabled); + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].convert_lowercase = convert_to_0_or_1(enabled); + } + } +} + +void htp_config_set_utf8_convert_bestfit(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].utf8_convert_bestfit = convert_to_0_or_1(enabled); + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].utf8_convert_bestfit = convert_to_0_or_1(enabled); + } + } +} + +void htp_config_set_u_encoding_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].u_encoding_unwanted = unwanted; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].u_encoding_unwanted = unwanted; + } + } +} + +void htp_config_set_control_chars_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].u_encoding_unwanted = unwanted; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].u_encoding_unwanted = unwanted; + } + } +} + +void htp_config_set_url_encoding_invalid_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted = unwanted; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].url_encoding_invalid_unwanted = unwanted; + } + } +} + +void htp_config_set_nul_encoded_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].nul_encoded_unwanted = unwanted; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].nul_encoded_unwanted = unwanted; + } + } +} + +void htp_config_set_nul_raw_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].nul_raw_unwanted = unwanted; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].nul_raw_unwanted = unwanted; + } + } +} + +void htp_config_set_path_separators_encoded_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].path_separators_encoded_unwanted = unwanted; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].path_separators_encoded_unwanted = unwanted; + } + } +} + +void htp_config_set_utf8_invalid_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->decoder_cfgs[ctx].utf8_invalid_unwanted = unwanted; + + if (ctx == HTP_DECODER_DEFAULTS) { + for (size_t i = 0; i < HTP_DECODER_CONTEXTS_MAX; i++) { + cfg->decoder_cfgs[i].utf8_invalid_unwanted = unwanted; + } + } +} + +void htp_config_set_requestline_leading_whitespace_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted) { + if (ctx >= HTP_DECODER_CONTEXTS_MAX) return; + + cfg->requestline_leading_whitespace_unwanted = unwanted; +} + +void htp_config_set_response_decompression_layer_limit(htp_cfg_t *cfg, int limit) { + if (cfg == NULL) return; + cfg->response_decompression_layer_limit = limit; +} diff --git a/htp/htp_config.h b/htp/htp_config.h new file mode 100644 index 0000000..57544f7 --- /dev/null +++ b/htp/htp_config.h @@ -0,0 +1,719 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_CONFIG_H +#define HTP_CONFIG_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "htp.h" + +/** + * Decoder contexts. + */ +enum htp_decoder_ctx_t { + /** Default settings. Settings applied to this context are propagated to all other contexts. */ + HTP_DECODER_DEFAULTS = 0, + + /** Urlencoded decoder settings. */ + HTP_DECODER_URLENCODED = 1, + + /** URL path decoder settings. */ + HTP_DECODER_URL_PATH = 2 +}; + +/** + * Enumerates the possible server personalities. + */ +enum htp_server_personality_t { + /** + * Minimal personality that performs at little work as possible. All optional + * features are disabled. This personality is a good starting point for customization. + */ + HTP_SERVER_MINIMAL = 0, + + /** A generic personality that aims to work reasonably well for all server types. */ + HTP_SERVER_GENERIC = 1, + + /** The IDS personality tries to perform as much decoding as possible. */ + HTP_SERVER_IDS = 2, + + /** Mimics the behavior of IIS 4.0, as shipped with Windows NT 4.0. */ + HTP_SERVER_IIS_4_0 = 3, + + /** Mimics the behavior of IIS 5.0, as shipped with Windows 2000. */ + HTP_SERVER_IIS_5_0 = 4, + + /** Mimics the behavior of IIS 5.1, as shipped with Windows XP Professional. */ + HTP_SERVER_IIS_5_1 = 5, + + /** Mimics the behavior of IIS 6.0, as shipped with Windows 2003. */ + HTP_SERVER_IIS_6_0 = 6, + + /** Mimics the behavior of IIS 7.0, as shipped with Windows 2008. */ + HTP_SERVER_IIS_7_0 = 7, + + /* Mimics the behavior of IIS 7.5, as shipped with Windows 7. */ + HTP_SERVER_IIS_7_5 = 8, + + /* Mimics the behavior of Apache 2.x. */ + HTP_SERVER_APACHE_2 = 9 +}; + +/** + * Enumerates the ways in which servers respond to malformed data. + */ +enum htp_unwanted_t { + + /** Ignores problem. */ + HTP_UNWANTED_IGNORE = 0, + + /** Responds with HTTP 400 status code. */ + HTP_UNWANTED_400 = 400, + + /** Responds with HTTP 404 status code. */ + HTP_UNWANTED_404 = 404 +}; + +/** + * Enumerates the possible approaches to handling invalid URL-encodings. + */ +enum htp_url_encoding_handling_t { + /** Ignore invalid URL encodings and leave the % in the data. */ + HTP_URL_DECODE_PRESERVE_PERCENT = 0, + + /** Ignore invalid URL encodings, but remove the % from the data. */ + HTP_URL_DECODE_REMOVE_PERCENT = 1, + + /** Decode invalid URL encodings. */ + HTP_URL_DECODE_PROCESS_INVALID = 2 +}; + +/** + * Creates a new configuration structure. Configuration structures created at + * configuration time must not be changed afterwards in order to support lock-less + * copying. + * + * @return New configuration structure. + */ +htp_cfg_t *htp_config_create(void); + +/** + * Creates a copy of the supplied configuration structure. The idea is to create + * one or more configuration objects at configuration-time, but to use this + * function to create per-connection copies. That way it will be possible to + * adjust per-connection configuration as necessary, without affecting the + * global configuration. Make sure no other thread changes the configuration + * object while this function is operating. + * + * @param[in] cfg + * @return A copy of the configuration structure. + */ +htp_cfg_t *htp_config_copy(htp_cfg_t *cfg); + +/** + * Destroy a configuration structure. + * + * @param[in] cfg + */ +void htp_config_destroy(htp_cfg_t *cfg); + +/** + * Retrieves user data associated with this configuration. + * + * @param[in] cfg + * @return User data pointer, or NULL if not set. + */ +void *htp_config_get_user_data(htp_cfg_t *cfg); + +/** + * Registers a callback that is invoked every time there is a log message with + * severity equal and higher than the configured log level. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_log(htp_cfg_t *cfg, int (*callback_fn)(htp_log_t *)); + +/** + * Adds the built-in Multipart parser to the configuration. This parser will extract information + * stored in request bodies, when they are in multipart/form-data format. + * + * @param[in] cfg + */ +void htp_config_register_multipart_parser(htp_cfg_t *cfg); + +/** + * Registers a REQUEST_START callback, which is invoked every time a new + * request begins and before any parsing is done. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_start(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a REQUEST_BODY_DATA callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)); + +/** + * Registers a REQUEST_COMPLETE callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a REQUEST_FILE_DATA callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_file_data(htp_cfg_t *cfg, int (*callback_fn)(htp_file_data_t *)); + +/** + * Registers a REQUEST_HEADER_DATA callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_header_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)); + +/** + * Registers a REQUEST_HEADERS callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a REQUEST_LINE callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_line(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a REQUEST_URI_NORMALIZE callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_uri_normalize(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a HTP_REQUEST_TRAILER callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a REQUEST_TRAILER_DATA callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_request_trailer_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *d)); + +/** + * Registers a RESPONSE_BODY_DATA callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_response_body_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)); + +/** + * Registers a RESPONSE_COMPLETE callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_response_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a RESPONSE_HEADER_DATA callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_response_header_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *)); + +/** + * Registers a RESPONSE_HEADERS callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_response_headers(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a RESPONSE_LINE callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_response_line(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a RESPONSE_START callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_response_start(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a RESPONSE_TRAILER callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_response_trailer(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Registers a RESPONSE_TRAILER_DATA callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_response_trailer_data(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_data_t *d)); + +/** + * Registers a TRANSACTION_COMPLETE callback. + * + * @param[in] cfg + * @param[in] callback_fn + */ +void htp_config_register_transaction_complete(htp_cfg_t *cfg, int (*callback_fn)(htp_tx_t *)); + +/** + * Adds the built-in Urlencoded parser to the configuration. The parser will + * parse query strings and request bodies with the appropriate MIME type. + * + * @param[in] cfg + */ +void htp_config_register_urlencoded_parser(htp_cfg_t *cfg); + +/** + * Configures whether backslash characters are treated as path segment separators. They + * are not on Unix systems, but are on Windows systems. If this setting is enabled, a path + * such as "/one\two/three" will be converted to "/one/two/three". Implemented only for HTP_DECODER_URL_PATH. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] enabled + */ +void htp_config_set_backslash_convert_slashes(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled); + +/** + * Configures a best-fit map, which is used whenever characters longer than one byte + * need to be converted to a single-byte. By default a Windows 1252 best-fit map is used. + * The map is an list of triplets, the first 2 bytes being an UCS-2 character to map from, + * and the third byte being the single byte to map to. Make sure that your map contains + * the mappings to cover the full-width and half-width form characters (U+FF00-FFEF). The + * last triplet in the map must be all zeros (3 NUL bytes). + * + * @param[in] cfg + * @param[in] ctx + * @param[in] map + */ +void htp_config_set_bestfit_map(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, void *map); + +/** + * Sets the replacement character that will be used to in the lossy best-fit + * mapping from multi-byte to single-byte streams. The question mark character + * is used as the default replacement byte. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] replacement_byte + */ +void htp_config_set_bestfit_replacement_byte(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int replacement_byte); + +/** + * Controls reaction to raw control characters in the data. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] unwanted + */ +void htp_config_set_control_chars_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted); + +/** + * Configures whether input data will be converted to lowercase. Useful when set on the + * HTP_DECODER_URL_PATH context, in order to handle servers with case-insensitive filesystems. + * Implemented only for HTP_DECODER_URL_PATH. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] enabled + */ +void htp_config_set_convert_lowercase(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled); + +/** + * Enables or disables Multipart file extraction. This function can be invoked only + * after a previous htp_config_set_tmpdir() invocation. Otherwise, the configuration + * change will fail, and extraction will not be enabled. Disabled by default. Please + * note that the built-in file extraction implementation uses synchronous I/O, which + * means that it is not suitable for use in an event-driven container. There's an + * upper limit to how many files can be created on the filesystem during a single + * request. The limit exists in order to mitigate against a DoS attack with a + * Multipart payload that contains hundreds and thousands of files (it's cheap for the + * attacker to do this, but costly for the server to support it). The default limit + * may be pretty conservative. + * + * @param[in] cfg + * @param[in] extract_files 1 if you wish extraction to be enabled, 0 otherwise + * @param[in] limit the maximum number of files allowed; use -1 to use the parser default. + */ +htp_status_t htp_config_set_extract_request_files(htp_cfg_t *cfg, int extract_files, int limit); + +/** + * Configures the maximum size of the buffer LibHTP will use when all data is not available + * in the current buffer (e.g., a very long header line that might span several packets). This + * limit is controlled by the hard_limit parameter. The soft_limit parameter is not implemented. + * + * @param[in] cfg + * @param[in] soft_limit NOT IMPLEMENTED. + * @param[in] hard_limit + */ +void htp_config_set_field_limits(htp_cfg_t *cfg, size_t soft_limit, size_t hard_limit); + +/** + * Configures the maximum memlimit LibHTP will pass to liblzma. + * + * @param[in] cfg + * @param[in] memlimit + */ +void htp_config_set_lzma_memlimit(htp_cfg_t *cfg, size_t memlimit); + +/** + * Configures the maximum layers LibHTP will pass to liblzma. + * + * @param[in] cfg + * @param[in] limit + */ +void htp_config_set_lzma_layers(htp_cfg_t *cfg, int limit); + +/** + * Configures the maximum compression bomb size LibHTP will decompress. + * + * @param[in] cfg + * @param[in] bomblimit + */ +void htp_config_set_compression_bomb_limit(htp_cfg_t *cfg, size_t bomblimit); + +/** + * Configures the maximum compression bomb time LibHTP will decompress. + * + * @param[in] cfg + * @param[in] useclimit + */ +void htp_config_set_compression_time_limit(htp_cfg_t *cfg, size_t useclimit); + +/** + * Configures the maximum number of tx LibHTP will have per connection. + * + * @param[in] cfg + * @param[in] limit + */ +void htp_config_set_max_tx(htp_cfg_t *cfg, uint32_t limit); + +/** + * Configures the desired log level. + * + * @param[in] cfg + * @param[in] log_level + */ +void htp_config_set_log_level(htp_cfg_t *cfg, enum htp_log_level_t log_level); + +/** + * Configures how the server reacts to encoded NUL bytes. Some servers will stop at + * at NUL, while some will respond with 400 or 404. When the termination option is not + * used, the NUL byte will remain in the path. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] enabled + */ +void htp_config_set_nul_encoded_terminates(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled); + +/** + * Configures reaction to encoded NUL bytes in input data. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] unwanted + */ +void htp_config_set_nul_encoded_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted); + +/** + * Configures the handling of raw NUL bytes. If enabled, raw NUL terminates strings. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] enabled + */ +void htp_config_set_nul_raw_terminates(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled); + +/** + * Configures how the server reacts to raw NUL bytes. Some servers will terminate + * path at NUL, while some will respond with 400 or 404. When the termination option + * is not used, the NUL byte will remain in the data. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] unwanted + */ +void htp_config_set_nul_raw_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted); + +/** + * Enable or disable request HTTP Authentication parsing. Enabled by default. + * + * @param[in] cfg + * @param[in] parse_request_auth + */ +void htp_config_set_parse_request_auth(htp_cfg_t *cfg, int parse_request_auth); + +/** + * Enable or disable request cookie parsing. Enabled by default. + * + * @param[in] cfg + * @param[in] parse_request_cookies + */ +void htp_config_set_parse_request_cookies(htp_cfg_t *cfg, int parse_request_cookies); + +/** + * Enable or disable spaces in URIs. Disabled by default. + * + * @param[in] cfg + * @param[in] allow_space_uri + */ +void htp_config_set_allow_space_uri(htp_cfg_t *cfg, int allow_space_uri); + +/** + * Configures whether consecutive path segment separators will be compressed. When enabled, a path + * such as "/one//two" will be normalized to "/one/two". Backslash conversion and path segment separator + * decoding are carried out before compression. For example, the path "/one\\/two\/%5cthree/%2f//four" + * will be converted to "/one/two/three/four" (assuming all 3 options are enabled). Implemented only for + * HTP_DECODER_URL_PATH. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] enabled + */ +void htp_config_set_path_separators_compress(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled); + +/** + * Configures whether encoded path segment separators will be decoded. Apache does not do + * this by default, but IIS does. If enabled, a path such as "/one%2ftwo" will be normalized + * to "/one/two". If the backslash_separators option is also enabled, encoded backslash + * characters will be converted too (and subsequently normalized to forward slashes). Implemented + * only for HTP_DECODER_URL_PATH. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] enabled + */ +void htp_config_set_path_separators_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled); + +/** + * Configures reaction to encoded path separator characters (e.g., %2f). Implemented only for HTP_DECODER_URL_PATH. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] unwanted + */ +void htp_config_set_path_separators_encoded_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted); + +/** + * Configures whether plus characters are converted to spaces when decoding URL-encoded strings. This + * is appropriate to do for parameters, but not for URLs. Only applies to contexts where decoding + * is taking place. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] enabled + */ +void htp_config_set_plusspace_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled); + +/** + * Controls whether compressed response bodies will be automatically decompressed. + * + * @param[in] cfg + * @param[in] enabled set to 1 to enable decompression, 0 otherwise + */ +void htp_config_set_response_decompression(htp_cfg_t *cfg, int enabled); + +/** + * Controls whether compressed request bodies will be automatically decompressed. + * + * @param[in] cfg + * @param[in] enabled set to 1 to enable decompression, 0 otherwise + */ +void htp_config_set_request_decompression(htp_cfg_t *cfg, int enabled); + +/** + * Configure desired server personality. + * + * @param[in] cfg + * @param[in] personality + * @return HTP_OK if the personality is supported, HTP_ERROR if it isn't. + */ +htp_status_t htp_config_set_server_personality(htp_cfg_t *cfg, enum htp_server_personality_t personality); + +/** + * Configures the path where temporary files should be stored. Must be set + * in order to use the Multipart file extraction functionality. + * + * @param[in] cfg + * @param[in] tmpdir + */ +void htp_config_set_tmpdir(htp_cfg_t *cfg, char *tmpdir); + +/** + * Configures whether transactions will be automatically destroyed once they + * are processed and all callbacks invoked. This option is appropriate for + * programs that process transactions as they are processed. + * + * @param[in] cfg + * @param[in] tx_auto_destroy + */ +void htp_config_set_tx_auto_destroy(htp_cfg_t *cfg, int tx_auto_destroy); + +/** + * Associates provided opaque user data with the configuration. + * + * @param[in] cfg + * @param[in] user_data + */ +void htp_config_set_user_data(htp_cfg_t *cfg, void *user_data); + +/** + * Configures whether %u-encoded sequences are decoded. Such sequences + * will be treated as invalid URL encoding if decoding is not desirable. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] enabled + */ +void htp_config_set_u_encoding_decode(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled); + +/** + * Configures reaction to %u-encoded sequences in input data. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] unwanted + */ +void htp_config_set_u_encoding_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted); + +/** + * Configures how the server handles to invalid URL encoding. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] handling + */ +void htp_config_set_url_encoding_invalid_handling(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_url_encoding_handling_t handling); + +/** + * Configures how the server reacts to invalid URL encoding. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] unwanted + */ +void htp_config_set_url_encoding_invalid_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted); + +/** + * Controls whether the data should be treated as UTF-8 and converted to a single-byte + * stream using best-fit mapping. Implemented only for HTP_DECODER_URL_PATH. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] enabled + */ +void htp_config_set_utf8_convert_bestfit(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, int enabled); + +/** + * Configures how the server reacts to invalid UTF-8 characters. This setting does + * not affect path normalization; it only controls what response status will be expect for + * a request that contains invalid UTF-8 characters. Implemented only for HTP_DECODER_URL_PATH. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] unwanted + */ +void htp_config_set_utf8_invalid_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted); + +/** + * Configures how the server reacts to leading whitespace on the request line. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] unwanted + */ +void htp_config_set_requestline_leading_whitespace_unwanted(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, enum htp_unwanted_t unwanted); + +/** + * Configures many layers of compression we try to decompress. + * + * @param[in] cfg + * @param[in] limit 0 disables limit + */ +void htp_config_set_response_decompression_layer_limit(htp_cfg_t *cfg, int limit); + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_CONFIG_H */ + diff --git a/htp/htp_config_auto.h b/htp/htp_config_auto.h new file mode 100644 index 0000000..7fbed4b --- /dev/null +++ b/htp/htp_config_auto.h @@ -0,0 +1,51 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * + * This file wraps the generated autoconf header to provide define + * blocks to prevent issue when included more than once. + * + * @warning Only include this in source files. + * + * @author Brian Rectanus <brectanus@qualys.com> + */ + +#ifndef _HTP_CONFIG_AUTO_H +#define _HTP_CONFIG_AUTO_H + +#ifdef HAVE_CONFIG_H +#include "htp_config_auto_gen.h" +#endif + +#endif /* _HTP_CONFIG_AUTO_H */ diff --git a/htp/htp_config_private.h b/htp/htp_config_private.h new file mode 100644 index 0000000..83ff8f6 --- /dev/null +++ b/htp/htp_config_private.h @@ -0,0 +1,373 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_CONFIG_PRIVATE_H +#define HTP_CONFIG_PRIVATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define HTP_DECODER_CONTEXTS_MAX 3 + +typedef struct htp_decoder_cfg_t { + + // Path-specific decoding options. + + /** Convert backslash characters to slashes. */ + int backslash_convert_slashes; + + /** Convert to lowercase. */ + int convert_lowercase; + + /** Compress slash characters. */ + int path_separators_compress; + + /** Should we URL-decode encoded path segment separators? */ + int path_separators_decode; + + /** Should we decode '+' characters to spaces? */ + int plusspace_decode; + + /** Reaction to encoded path separators. */ + enum htp_unwanted_t path_separators_encoded_unwanted; + + + // Special characters options. + + /** Controls how raw NUL bytes are handled. */ + int nul_raw_terminates; + + /** Determines server response to a raw NUL byte in the path. */ + enum htp_unwanted_t nul_raw_unwanted; + + /** Reaction to control characters. */ + enum htp_unwanted_t control_chars_unwanted; + + + // URL encoding options. + + /** Should we decode %u-encoded characters? */ + int u_encoding_decode; + + /** Reaction to %u encoding. */ + enum htp_unwanted_t u_encoding_unwanted; + + /** Handling of invalid URL encodings. */ + enum htp_url_encoding_handling_t url_encoding_invalid_handling; + + /** Reaction to invalid URL encoding. */ + enum htp_unwanted_t url_encoding_invalid_unwanted; + + /** Controls how encoded NUL bytes are handled. */ + int nul_encoded_terminates; + + /** How are we expected to react to an encoded NUL byte? */ + enum htp_unwanted_t nul_encoded_unwanted; + + + // UTF-8 options. + + /** Controls how invalid UTF-8 characters are handled. */ + enum htp_unwanted_t utf8_invalid_unwanted; + + /** Convert UTF-8 characters into bytes using best-fit mapping. */ + int utf8_convert_bestfit; + + + // Best-fit mapping options. + + /** The best-fit map to use to decode %u-encoded characters. */ + unsigned char *bestfit_map; + + /** The replacement byte used when there is no best-fit mapping. */ + unsigned char bestfit_replacement_byte; + +} htp_decoder_cfg_t; + +struct htp_cfg_t { + /** + * The maximum size of the buffer that is used when the current + * input chunk does not contain all the necessary data (e.g., a very header + * line that spans several packets). + */ + size_t field_limit_hard; + + /** + * Soft field limit length. If this limit is reached the parser will issue + * a warning but continue to run. NOT IMPLEMENTED. + */ + size_t field_limit_soft; + + /** + * Log level, which will be used when deciding whether to store or + * ignore the messages issued by the parser. + */ + enum htp_log_level_t log_level; + + /** + * Whether to delete each transaction after the last hook is invoked. This + * feature should be used when parsing traffic streams in real time. + */ + int tx_auto_destroy; + + /** + * Server personality identifier. + */ + enum htp_server_personality_t server_personality; + + /** The function used for request line parsing. Depends on the personality. */ + int (*parse_request_line)(htp_connp_t *connp); + + /** The function used for response line parsing. Depends on the personality. */ + int (*parse_response_line)(htp_connp_t *connp); + + /** The function used for request header parsing. Depends on the personality. */ + int (*process_request_header)(htp_connp_t *connp, unsigned char *data, size_t len); + + /** The function used for response header parsing. Depends on the personality. */ + int (*process_response_header)(htp_connp_t *connp, unsigned char *data, size_t len); + + /** The function to use to transform parameters after parsing. */ + int (*parameter_processor)(htp_param_t *param); + + /** Decoder configuration array, one per context. */ + htp_decoder_cfg_t decoder_cfgs[HTP_DECODER_CONTEXTS_MAX]; + + /** Whether to generate the request_uri_normalized field. */ + int generate_request_uri_normalized; + + /** Whether to decompress compressed response bodies. */ + int response_decompression_enabled; + + /** Not fully implemented at the moment. */ + char *request_encoding; + + /** Not fully implemented at the moment. */ + char *internal_encoding; + + /** Whether to parse request cookies. */ + int parse_request_cookies; + + /** Whether to parse HTTP Authentication headers. */ + int parse_request_auth; + + /** Whether to extract files from requests using Multipart encoding. */ + int extract_request_files; + + /** How many extracted files are allowed in a single Multipart request? */ + int extract_request_files_limit; + + /** Whether to allow spaces in URI. */ + int allow_space_uri; + + /** The location on disk where temporary files will be created. */ + char *tmpdir; + + // Hooks + + /** + * Request start hook, invoked when the parser receives the first byte of a new + * request. Because in HTTP a transaction always starts with a request, this hook + * doubles as a transaction start hook. + */ + htp_hook_t *hook_request_start; + + /** + * Request line hook, invoked after a request line has been parsed. + */ + htp_hook_t *hook_request_line; + + /** + * Request URI normalization hook, for overriding default normalization of URI. + */ + htp_hook_t *hook_request_uri_normalize; + + /** + * Receives raw request header data, starting immediately after the request line, + * including all headers as they are seen on the TCP connection, and including the + * terminating empty line. Not available on genuine HTTP/0.9 requests (because + * they don't use headers). + */ + htp_hook_t *hook_request_header_data; + + /** + * Request headers hook, invoked after all request headers are seen. + */ + htp_hook_t *hook_request_headers; + + /** + * Request body data hook, invoked every time body data is available. Each + * invocation will provide a htp_tx_data_t instance. Chunked data + * will be dechunked before the data is passed to this hook. Decompression + * is not currently implemented. At the end of the request body + * there will be a call with the data pointer set to NULL. + */ + htp_hook_t *hook_request_body_data; + + /** + * Request file data hook, which is invoked whenever request file data is + * available. Currently used only by the Multipart parser. + */ + htp_hook_t *hook_request_file_data; + + /** + * Receives raw request trailer data, which can be available on requests that have + * chunked bodies. The data starts immediately after the zero-length chunk + * and includes the terminating empty line. + */ + htp_hook_t *hook_request_trailer_data; + + /** + * Request trailer hook, invoked after all trailer headers are seen, + * and if they are seen (not invoked otherwise). + */ + htp_hook_t *hook_request_trailer; + + /** + * Request hook, invoked after a complete request is seen. + */ + htp_hook_t *hook_request_complete; + + /** + * Response startup hook, invoked when a response transaction is found and + * processing started. + */ + htp_hook_t *hook_response_start; + + /** + * Response line hook, invoked after a response line has been parsed. + */ + htp_hook_t *hook_response_line; + + /** + * Receives raw response header data, starting immediately after the status line + * and including all headers as they are seen on the TCP connection, and including the + * terminating empty line. Not available on genuine HTTP/0.9 responses (because + * they don't have response headers). + */ + htp_hook_t *hook_response_header_data; + + /** + * Response headers book, invoked after all response headers have been seen. + */ + htp_hook_t *hook_response_headers; + + /** + * Response body data hook, invoked every time body data is available. Each + * invocation will provide a htp_tx_data_t instance. Chunked data + * will be dechunked before the data is passed to this hook. By default, + * compressed data will be decompressed, but decompression can be disabled + * in configuration. At the end of the response body there will be a call + * with the data pointer set to NULL. + */ + htp_hook_t *hook_response_body_data; + + /** + * Receives raw response trailer data, which can be available on responses that have + * chunked bodies. The data starts immediately after the zero-length chunk + * and includes the terminating empty line. + */ + htp_hook_t *hook_response_trailer_data; + + /** + * Response trailer hook, invoked after all trailer headers have been processed, + * and only if the trailer exists. + */ + htp_hook_t *hook_response_trailer; + + /** + * Response hook, invoked after a response has been seen. Because sometimes servers + * respond before receiving complete requests, a response_complete callback may be + * invoked prior to a request_complete callback. + */ + htp_hook_t *hook_response_complete; + + /** + * Transaction complete hook, which is invoked once the entire transaction is + * considered complete (request and response are both complete). This is always + * the last hook to be invoked. + */ + htp_hook_t *hook_transaction_complete; + + /** + * Log hook, invoked every time the library wants to log. + */ + htp_hook_t *hook_log; + + /** + * Opaque user data associated with this configuration structure. + */ + void *user_data; + + // Request Line parsing options. + + // TODO this was added here to maintain a stable ABI, once we can break that + // we may want to move this into htp_decoder_cfg_t (VJ) + + /** Reaction to leading whitespace on the request line */ + enum htp_unwanted_t requestline_leading_whitespace_unwanted; + + /** How many layers of compression we will decompress (0 => no limit). */ + int response_decompression_layer_limit; + + /** max memory use by a the lzma decompressor. */ + size_t lzma_memlimit; + + /** max output size for a compression bomb. */ + int32_t compression_bomb_limit; + + /** max time for a decompression bomb. */ + int32_t compression_time_limit; + + /** How many layers of compression we will decompress (0 => no lzma). */ + int response_lzma_layer_limit; + + /** Whether to decompress compressed request bodies. */ + int request_decompression_enabled; + + /** Maximum number of transactions. */ + uint32_t max_tx; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_CONFIG_PRIVATE H */ + diff --git a/htp/htp_connection.c b/htp/htp_connection.c new file mode 100644 index 0000000..3fe7c89 --- /dev/null +++ b/htp/htp_connection.c @@ -0,0 +1,168 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +htp_conn_t *htp_conn_create(void) { + htp_conn_t *conn = calloc(1, sizeof (htp_conn_t)); + if (conn == NULL) return NULL; + + conn->transactions = htp_list_create(16); + if (conn->transactions == NULL) { + free(conn); + return NULL; + } + + conn->messages = htp_list_create(8); + if (conn->messages == NULL) { + htp_list_destroy(conn->transactions); + conn->transactions = NULL; + free(conn); + return NULL; + } + + return conn; +} + +void htp_conn_close(htp_conn_t *conn, const htp_time_t *timestamp) { + if (conn == NULL) return; + + // Update timestamp. + if (timestamp != NULL) { + memcpy(&(conn->close_timestamp), timestamp, sizeof(htp_time_t)); + } +} + +void htp_conn_destroy(htp_conn_t *conn) { + if (conn == NULL) return; + + if (conn->transactions != NULL) { + // Destroy individual transactions. Do note that iterating + // using the iterator does not work here because some of the + // list element may be NULL (and with the iterator it is impossible + // to distinguish a NULL element from the end of the list). + for (size_t i = 0, n = htp_list_size(conn->transactions); i < n; i++) { + htp_tx_t *tx = htp_list_get(conn->transactions, i); + if (tx != NULL) { + htp_tx_destroy_incomplete(tx); + } + } + + htp_list_destroy(conn->transactions); + conn->transactions = NULL; + } + + if (conn->messages != NULL) { + // Destroy individual messages. + for (size_t i = 0, n = htp_list_size(conn->messages); i < n; i++) { + htp_log_t *l = htp_list_get(conn->messages, i); + free((void *) l->msg); + free(l); + } + + htp_list_destroy(conn->messages); + conn->messages = NULL; + } + + if (conn->server_addr != NULL) { + free(conn->server_addr); + } + + if (conn->client_addr != NULL) { + free(conn->client_addr); + } + + free(conn); +} + +htp_status_t htp_conn_open(htp_conn_t *conn, const char *client_addr, int client_port, + const char *server_addr, int server_port, const htp_time_t *timestamp) +{ + if (conn == NULL) return HTP_ERROR; + + if (client_addr != NULL) { + conn->client_addr = strdup(client_addr); + if (conn->client_addr == NULL) return HTP_ERROR; + } + + conn->client_port = client_port; + + if (server_addr != NULL) { + conn->server_addr = strdup(server_addr); + if (conn->server_addr == NULL) { + if (conn->client_addr != NULL) { + free(conn->client_addr); + } + + return HTP_ERROR; + } + } + + conn->server_port = server_port; + + // Remember when the connection was opened. + if (timestamp != NULL) { + memcpy(&(conn->open_timestamp), timestamp, sizeof(*timestamp)); + } + + return HTP_OK; +} + +htp_status_t htp_conn_remove_tx(htp_conn_t *conn, const htp_tx_t *tx) { + if ((tx == NULL) || (conn == NULL)) return HTP_ERROR; + if (conn->transactions == NULL) return HTP_ERROR; + for (size_t i = 0, n = htp_list_size(conn->transactions); i < n; i++) { + htp_tx_t *tx2 = htp_list_get(conn->transactions, i); + if (tx2 == tx) { + return htp_list_replace(conn->transactions, i, NULL); + } + } + return HTP_DECLINED; +} + +void htp_conn_track_inbound_data(htp_conn_t *conn, size_t len, const htp_time_t *timestamp) { + if (conn == NULL) return; + conn->in_data_counter += len; +} + +void htp_conn_track_outbound_data(htp_conn_t *conn, size_t len, const htp_time_t *timestamp) { + if (conn == NULL) return; + conn->out_data_counter += len; +} diff --git a/htp/htp_connection_parser.c b/htp/htp_connection_parser.c new file mode 100644 index 0000000..8fd4ed6 --- /dev/null +++ b/htp/htp_connection_parser.c @@ -0,0 +1,260 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +void htp_connp_clear_error(htp_connp_t *connp) { + connp->last_error = NULL; +} + +void htp_connp_req_close(htp_connp_t *connp, const htp_time_t *timestamp) { + if (connp == NULL) return; + + // Update internal flags + if (connp->in_status != HTP_STREAM_ERROR) + connp->in_status = HTP_STREAM_CLOSED; + + // Call the parsers one last time, which will allow them + // to process the events that depend on stream closure + htp_connp_req_data(connp, timestamp, NULL, 0); +} + +void htp_connp_close(htp_connp_t *connp, const htp_time_t *timestamp) { + if (connp == NULL) return; + + // Close the underlying connection. + htp_conn_close(connp->conn, timestamp); + + // Update internal flags + if (connp->in_status != HTP_STREAM_ERROR) + connp->in_status = HTP_STREAM_CLOSED; + if (connp->out_status != HTP_STREAM_ERROR) + connp->out_status = HTP_STREAM_CLOSED; + + // Call the parsers one last time, which will allow them + // to process the events that depend on stream closure + htp_connp_req_data(connp, timestamp, NULL, 0); + htp_connp_res_data(connp, timestamp, NULL, 0); +} + +htp_connp_t *htp_connp_create(htp_cfg_t *cfg) { + htp_connp_t *connp = calloc(1, sizeof (htp_connp_t)); + if (connp == NULL) return NULL; + + // Use the supplied configuration structure + connp->cfg = cfg; + + // Create a new connection. + connp->conn = htp_conn_create(); + if (connp->conn == NULL) { + free(connp); + return NULL; + } + + // Request parsing + connp->in_state = htp_connp_REQ_IDLE; + connp->in_status = HTP_STREAM_NEW; + + // Response parsing + connp->out_state = htp_connp_RES_IDLE; + connp->out_status = HTP_STREAM_NEW; + + return connp; +} + +void htp_connp_destroy(htp_connp_t *connp) { + if (connp == NULL) return; + + if (connp->in_buf != NULL) { + free(connp->in_buf); + } + + if (connp->out_buf != NULL) { + free(connp->out_buf); + } + + htp_connp_destroy_decompressors(connp); + + if (connp->put_file != NULL) { + bstr_free(connp->put_file->filename); + free(connp->put_file); + } + + if (connp->in_header) { + bstr_free(connp->in_header); + connp->in_header = NULL; + } + if (connp->out_header) { + bstr_free(connp->out_header); + connp->out_header = NULL; + } + free(connp); +} + +void htp_connp_destroy_all(htp_connp_t *connp) { + if (connp == NULL) return; + + // Destroy connection + htp_conn_destroy(connp->conn); + connp->conn = NULL; + + // Destroy everything else + htp_connp_destroy(connp); +} + +htp_conn_t *htp_connp_get_connection(const htp_connp_t *connp) { + if (connp == NULL) return NULL; + return connp->conn; +} + +htp_tx_t *htp_connp_get_in_tx(const htp_connp_t *connp) { + if (connp == NULL) return NULL; + return connp->in_tx; +} + +htp_log_t *htp_connp_get_last_error(const htp_connp_t *connp) { + if (connp == NULL) return NULL; + return connp->last_error; +} + +htp_tx_t *htp_connp_get_out_tx(const htp_connp_t *connp) { + if (connp == NULL) return NULL; + return connp->out_tx; +} + +void *htp_connp_get_user_data(const htp_connp_t *connp) { + if (connp == NULL) return NULL; + return (void *)connp->user_data; +} + +void htp_connp_in_reset(htp_connp_t *connp) { + if (connp == NULL) return; + connp->in_content_length = -1; + connp->in_body_data_left = -1; + connp->in_chunk_request_index = connp->in_chunk_count; +} + +void htp_connp_open(htp_connp_t *connp, const char *client_addr, int client_port, const char *server_addr, + int server_port, htp_time_t *timestamp) +{ + if (connp == NULL) return; + + // Check connection parser state first. + if ((connp->in_status != HTP_STREAM_NEW) || (connp->out_status != HTP_STREAM_NEW)) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Connection is already open"); + return; + } + + if (htp_conn_open(connp->conn, client_addr, client_port, server_addr, server_port, timestamp) != HTP_OK) { + return; + } + + connp->in_status = HTP_STREAM_OPEN; + connp->out_status = HTP_STREAM_OPEN; +} + +void htp_connp_set_user_data(htp_connp_t *connp, const void *user_data) { + if (connp == NULL) return; + connp->user_data = user_data; +} + +htp_tx_t *htp_connp_tx_create(htp_connp_t *connp) { + if (connp == NULL) return NULL; + + // Detect pipelining. + if (htp_list_size(connp->conn->transactions) > connp->out_next_tx_index) { + connp->conn->flags |= HTP_CONN_PIPELINED; + } + if (connp->cfg->max_tx > 0 && + htp_list_size(connp->conn->transactions) > connp->cfg->max_tx) { + return NULL; + } + + htp_tx_t *tx = htp_tx_create(connp); + if (tx == NULL) return NULL; + + connp->in_tx = tx; + + htp_connp_in_reset(connp); + + return tx; +} + +/** + * Removes references to the supplied transaction. + * + * @param[in] connp + * @param[in] tx + */ +void htp_connp_tx_remove(htp_connp_t *connp, htp_tx_t *tx) { + if (connp == NULL) return; + + if (connp->in_tx == tx) { + connp->in_tx = NULL; + } + + if (connp->out_tx == tx) { + connp->out_tx = NULL; + } +} + +/** + * Removes all front NULL-ed transactions + * + * @param[in] connp + * @return numbers of removed NULL transactions + */ +size_t htp_connp_tx_freed(htp_connp_t *connp) { + // Transactions first got freed and NULLed + // Now, we can recycle the space that hold them by shifting the list + size_t r = 0; + size_t nb = htp_list_size(connp->conn->transactions); + for (size_t i = 0; i < nb; i++) { + // 0 and not i because at next iteration, we have removed the first + void * tx = htp_list_get(connp->conn->transactions, 0); + if (tx != NULL) { + break; + } + htp_list_shift(connp->conn->transactions); + r++; + connp->out_next_tx_index--; + } + return r; +} diff --git a/htp/htp_connection_parser.h b/htp/htp_connection_parser.h new file mode 100644 index 0000000..b2c8d3a --- /dev/null +++ b/htp/htp_connection_parser.h @@ -0,0 +1,218 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_CONNECTION_PARSER_H +#define HTP_CONNECTION_PARSER_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Clears the most recent error, if any. + * + * @param[in] connp + */ +void htp_connp_clear_error(htp_connp_t *connp); + +/** + * Closes the connection associated with the supplied parser. + * + * @param[in] connp + * @param[in] timestamp Optional. + */ +void htp_connp_close(htp_connp_t *connp, const htp_time_t *timestamp); +void htp_connp_req_close(htp_connp_t *connp, const htp_time_t *timestamp); + +/** + * Creates a new connection parser using the provided configuration. Because + * the configuration structure is used directly, in a multithreaded environment + * you are not allowed to change the structure, ever. If you have a need to + * change configuration on per-connection basis, make a copy of the configuration + * structure to go along with every connection parser. + * + * @param[in] cfg + * @return New connection parser instance, or NULL on error. + */ +htp_connp_t *htp_connp_create(htp_cfg_t *cfg); + +/** + * Destroys the connection parser and its data structures, leaving + * all the data (connection, transactions, etc) intact. + * + * @param[in] connp + */ +void htp_connp_destroy(htp_connp_t *connp); + +/** + * Destroys the connection parser, its data structures, as well + * as the connection and its transactions. + * + * @param[in] connp + */ +void htp_connp_destroy_all(htp_connp_t *connp); + +/** + * Returns the connection associated with the connection parser. + * + * @param[in] connp + * @return htp_conn_t instance, or NULL if one is not available. + */ +htp_conn_t *htp_connp_get_connection(const htp_connp_t *connp); + +/** + * Retrieves the pointer to the active inbound transaction. In connection + * parsing mode there can be many open transactions, and up to 2 active + * transactions at any one time. This is due to HTTP pipelining. Can be NULL. + * + * @param[in] connp + * @return Active inbound transaction, or NULL if there isn't one. + */ +htp_tx_t *htp_connp_get_in_tx(const htp_connp_t *connp); + +/** + * Returns the last error that occurred with this connection parser. Do note, however, + * that the value in this field will only be valid immediately after an error condition, + * but it is not guaranteed to remain valid if the parser is invoked again. + * + * @param[in] connp + * @return A pointer to an htp_log_t instance if there is an error, or NULL + * if there isn't. + */ +htp_log_t *htp_connp_get_last_error(const htp_connp_t *connp); + +/** + * Retrieves the pointer to the active outbound transaction. In connection + * parsing mode there can be many open transactions, and up to 2 active + * transactions at any one time. This is due to HTTP pipelining. Can be NULL. + * + * @param[in] connp + * @return Active outbound transaction, or NULL if there isn't one. + */ +htp_tx_t *htp_connp_get_out_tx(const htp_connp_t *connp); + +/** + * Retrieve the user data associated with this connection parser. + * + * @param[in] connp + * @return User data, or NULL if there isn't any. + */ +void *htp_connp_get_user_data(const htp_connp_t *connp); + +/** + * Opens connection. + * + * @param[in] connp + * @param[in] client_addr Client address + * @param[in] client_port Client port + * @param[in] server_addr Server address + * @param[in] server_port Server port + * @param[in] timestamp Optional. + */ +void htp_connp_open(htp_connp_t *connp, const char *client_addr, int client_port, const char *server_addr, + int server_port, htp_time_t *timestamp); + +/** + * Associate user data with the supplied parser. + * + * @param[in] connp + * @param[in] user_data + */ +void htp_connp_set_user_data(htp_connp_t *connp, const void *user_data); + +/** + * + * @param[in] connp + * @param[in] timestamp + * @param[in] data + * @param[in] len + * @return HTP_STREAM_DATA, HTP_STREAM_ERROR or STEAM_STATE_DATA_OTHER (see QUICK_START). + * HTP_STREAM_CLOSED and HTP_STREAM_TUNNEL are also possible. + */ +int htp_connp_req_data(htp_connp_t *connp, const htp_time_t *timestamp, const void *data, size_t len); + +/** + * Returns the number of bytes consumed from the most recent inbound data chunk. Normally, an invocation + * of htp_connp_req_data() will consume all data from the supplied buffer, but there are circumstances + * where only partial consumption is possible. In such cases HTP_STREAM_DATA_OTHER will be returned. + * Consumed bytes are no longer necessary, but the remainder of the buffer will be need to be saved + * for later. + * + * @param[in] connp + * @return The number of bytes consumed from the last data chunk sent for inbound processing. + */ +size_t htp_connp_req_data_consumed(htp_connp_t *connp); + +/** + * Process a chunk of outbound (server or response) data. + * + * @param[in] connp + * @param[in] timestamp Optional. + * @param[in] data + * @param[in] len + * @return HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed + */ +int htp_connp_res_data(htp_connp_t *connp, const htp_time_t *timestamp, const void *data, size_t len); + +/** + * Returns the number of bytes consumed from the most recent outbound data chunk. Normally, an invocation + * of htp_connp_res_data() will consume all data from the supplied buffer, but there are circumstances + * where only partial consumption is possible. In such cases HTP_STREAM_DATA_OTHER will be returned. + * Consumed bytes are no longer necessary, but the remainder of the buffer will be need to be saved + * for later. + * + * @param[in] connp + * @return The number of bytes consumed from the last data chunk sent for outbound processing. + */ +size_t htp_connp_res_data_consumed(htp_connp_t *connp); + +/** + * Create a new transaction using the connection parser provided. + * + * @param[in] connp + * @return Transaction instance on success, NULL on failure. + */ +htp_tx_t *htp_connp_tx_create(htp_connp_t *connp); + +size_t htp_connp_tx_freed(htp_connp_t *connp); + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_CONNECTION_PARSER_H */ diff --git a/htp/htp_connection_parser_private.h b/htp/htp_connection_parser_private.h new file mode 100644 index 0000000..a055aa8 --- /dev/null +++ b/htp/htp_connection_parser_private.h @@ -0,0 +1,275 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_CONNECTION_PARSER_PRIVATE_H +#define HTP_CONNECTION_PARSER_PRIVATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "htp_core.h" + +/** + * Connection parser structure. + */ +struct htp_connp_t { + + // General fields + + /** Current parser configuration structure. */ + htp_cfg_t *cfg; + + /** The connection structure associated with this parser. */ + htp_conn_t *conn; + + /** Opaque user data associated with this parser. */ + const void *user_data; + + /** + * On parser failure, this field will contain the error information. Do note, however, + * that the value in this field will only be valid immediately after an error condition, + * but it is not guaranteed to remain valid if the parser is invoked again. + */ + htp_log_t *last_error; + + + // Request parser fields + + /** Parser inbound status. Starts as HTP_OK, but may turn into HTP_ERROR. */ + enum htp_stream_state_t in_status; + + /** Parser output status. Starts as HTP_OK, but may turn into HTP_ERROR. */ + enum htp_stream_state_t out_status; + + /** + * When true, this field indicates that there is unprocessed inbound data, and + * that the response parsing code should stop at the end of the current request + * in order to allow more requests to be produced. + */ + unsigned int out_data_other_at_tx_end; + + /** + * The time when the last request data chunk was received. Can be NULL if + * the upstream code is not providing the timestamps when calling us. + */ + htp_time_t in_timestamp; + + /** Pointer to the current request data chunk. */ + unsigned char *in_current_data; + + /** The length of the current request data chunk. */ + int64_t in_current_len; + + /** The offset of the next byte in the request data chunk to read. */ + int64_t in_current_read_offset; + + /** + * The starting point of the data waiting to be consumed. This field is used + * in the states where reading data is not the same as consumption. + */ + int64_t in_current_consume_offset; + + /** + * Marks the starting point of raw data within the inbound data chunk. Raw + * data (e.g., complete headers) is sent to appropriate callbacks (e.g., + * REQUEST_HEADER_DATA). + */ + int64_t in_current_receiver_offset; + + /** How many data chunks does the inbound connection stream consist of? */ + size_t in_chunk_count; + + /** The index of the first chunk used in the current request. */ + size_t in_chunk_request_index; + + /** The offset, in the entire connection stream, of the next request byte. */ + int64_t in_stream_offset; + + /** + * The value of the request byte currently being processed. This field is + * populated when the IN_NEXT_* or IN_PEEK_* macros are invoked. + */ + int in_next_byte; + + /** Used to buffer a line of inbound data when buffering cannot be avoided. */ + unsigned char *in_buf; + + /** Stores the size of the buffer. Valid only when htp_tx_t::in_buf is not NULL. */ + size_t in_buf_size; + + /** + * Stores the current value of a folded request header. Such headers span + * multiple lines, and are processed only when all data is available. + */ + bstr *in_header; + + /** Ongoing inbound transaction. */ + htp_tx_t *in_tx; + + /** + * The request body length declared in a valid request header. The key here + * is "valid". This field will not be populated if the request contains both + * a Transfer-Encoding header and a Content-Length header. + */ + int64_t in_content_length; + + /** + * Holds the remaining request body length that we expect to read. This + * field will be available only when the length of a request body is known + * in advance, i.e. when request headers contain a Content-Length header. + */ + int64_t in_body_data_left; + + /** + * Holds the amount of data that needs to be read from the + * current data chunk. Only used with chunked request bodies. + */ + int64_t in_chunked_length; + + /** Current request parser state. */ + int (*in_state)(htp_connp_t *); + + /** Previous request parser state. Used to detect state changes. */ + int (*in_state_previous)(htp_connp_t *); + + /** The hook that should be receiving raw connection data. */ + htp_hook_t *in_data_receiver_hook; + + // Response parser fields + + /** + * Response counter, incremented with every new response. This field is + * used to match responses to requests. The expectation is that for every + * response there will already be a transaction (request) waiting. + */ + size_t out_next_tx_index; + + /** The time when the last response data chunk was received. Can be NULL. */ + htp_time_t out_timestamp; + + /** Pointer to the current response data chunk. */ + unsigned char *out_current_data; + + /** The length of the current response data chunk. */ + int64_t out_current_len; + + /** The offset of the next byte in the response data chunk to consume. */ + int64_t out_current_read_offset; + + /** + * The starting point of the data waiting to be consumed. This field is used + * in the states where reading data is not the same as consumption. + */ + int64_t out_current_consume_offset; + + /** + * Marks the starting point of raw data within the outbound data chunk. Raw + * data (e.g., complete headers) is sent to appropriate callbacks (e.g., + * RESPONSE_HEADER_DATA). + */ + int64_t out_current_receiver_offset; + + /** The offset, in the entire connection stream, of the next response byte. */ + int64_t out_stream_offset; + + /** The value of the response byte currently being processed. */ + int out_next_byte; + + /** Used to buffer a line of outbound data when buffering cannot be avoided. */ + unsigned char *out_buf; + + /** Stores the size of the buffer. Valid only when htp_tx_t::out_buf is not NULL. */ + size_t out_buf_size; + + /** + * Stores the current value of a folded response header. Such headers span + * multiple lines, and are processed only when all data is available. + */ + bstr *out_header; + + /** Ongoing outbound transaction */ + htp_tx_t *out_tx; + + /** + * The length of the current response body as presented in the + * Content-Length response header. + */ + int64_t out_content_length; + + /** The remaining length of the current response body, if known. Set to -1 otherwise. */ + int64_t out_body_data_left; + + /** + * Holds the amount of data that needs to be read from the + * current response data chunk. Only used with chunked response bodies. + */ + int64_t out_chunked_length; + + /** Current response parser state. */ + int (*out_state)(htp_connp_t *); + + /** Previous response parser state. */ + int (*out_state_previous)(htp_connp_t *); + + /** The hook that should be receiving raw connection data. */ + htp_hook_t *out_data_receiver_hook; + + /** Response decompressor used to decompress response body data. */ + htp_decompressor_t *out_decompressor; + + /** On a PUT request, this field contains additional file data. */ + htp_file_t *put_file; + + /** Request decompressor used to decompress request body data. */ + htp_decompressor_t *req_decompressor; +}; + +/** + * This function is most likely not used and/or not needed. + * + * @param[in] connp + */ +void htp_connp_in_reset(htp_connp_t *connp); + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_CONNECTION_PARSER_PRIVATE_H */ + diff --git a/htp/htp_connection_private.h b/htp/htp_connection_private.h new file mode 100644 index 0000000..e4beccc --- /dev/null +++ b/htp/htp_connection_private.h @@ -0,0 +1,121 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_CONNECTION_H +#define HTP_CONNECTION_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Creates a new connection structure. + * + * @return A new connection structure on success, NULL on memory allocation failure. + */ +htp_conn_t *htp_conn_create(void); + +/** + * Closes the connection. + * + * @param[in] conn + * @param[in] timestamp + */ +void htp_conn_close(htp_conn_t *conn, const htp_time_t *timestamp); + +/** + * Destroys a connection, as well as all the transactions it contains. It is + * not possible to destroy a connection structure yet leave any of its + * transactions intact. This is because transactions need its connection and + * connection structures hold little data anyway. The opposite is true, though + * it is possible to delete a transaction but leave its connection alive. + * + * @param[in] conn + */ +void htp_conn_destroy(htp_conn_t *conn); + +/** + * Opens a connection. This function will essentially only store the provided data + * for future reference. The timestamp parameter is optional. + * + * @param[in] conn + * @param[in] remote_addr + * @param[in] remote_port + * @param[in] local_addr + * @param[in] local_port + * @param[in] timestamp + * @return + */ +htp_status_t htp_conn_open(htp_conn_t *conn, const char *remote_addr, int remote_port, + const char *local_addr, int local_port, const htp_time_t *timestamp); + +/** + * Removes the given transaction structure, which makes it possible to + * safely destroy it. It is safe to destroy transactions in this way + * because the index of the transactions (in a connection) is preserved. + * + * @param[in] conn + * @param[in] tx + * @return HTP_OK if transaction was removed (replaced with NULL) or HTP_ERROR if it wasn't found. + */ +htp_status_t htp_conn_remove_tx(htp_conn_t *conn, const htp_tx_t *tx); + +/** + * Keeps track of inbound packets and data. + * + * @param[in] conn + * @param[in] len + * @param[in] timestamp + */ +void htp_conn_track_inbound_data(htp_conn_t *conn, size_t len, const htp_time_t *timestamp); + +/** + * Keeps track of outbound packets and data. + * + * @param[in] conn + * @param[in] len + * @param[in] timestamp + */ +void htp_conn_track_outbound_data(htp_conn_t *conn, size_t len, const htp_time_t *timestamp); + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_CONNECTION_H */ + diff --git a/htp/htp_content_handlers.c b/htp/htp_content_handlers.c new file mode 100644 index 0000000..183a0f2 --- /dev/null +++ b/htp/htp_content_handlers.c @@ -0,0 +1,299 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * This callback function feeds request body data to a Urlencoded parser + * and, later, feeds the parsed parameters to the correct structures. + * + * @param[in] d + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_ch_urlencoded_callback_request_body_data(htp_tx_data_t *d) { + htp_tx_t *tx = d->tx; + + // Check that we were not invoked again after the finalization. + if (tx->request_urlenp_body->params == NULL) return HTP_ERROR; + + if (d->data != NULL) { + // Process one chunk of data. + htp_urlenp_parse_partial(tx->request_urlenp_body, d->data, d->len); + } else { + // Finalize parsing. + htp_urlenp_finalize(tx->request_urlenp_body); + + // Add all parameters to the transaction. + bstr *name = NULL; + bstr *value = NULL; + + for (size_t i = 0, n = htp_table_size(tx->request_urlenp_body->params); i < n; i++) { + value = htp_table_get_index(tx->request_urlenp_body->params, i, &name); + + htp_param_t *param = calloc(1, sizeof (htp_param_t)); + if (param == NULL) return HTP_ERROR; + + param->name = name; + param->value = value; + param->source = HTP_SOURCE_BODY; + param->parser_id = HTP_PARSER_URLENCODED; + param->parser_data = NULL; + + if (htp_tx_req_add_param(tx, param) != HTP_OK) { + free(param); + return HTP_ERROR; + } + } + + // All the parameter data is now owned by the transaction, and + // the parser table used to store it is no longer needed. The + // line below will destroy just the table, leaving keys intact. + htp_table_destroy_ex(tx->request_urlenp_body->params); + tx->request_urlenp_body->params = NULL; + } + + return HTP_OK; +} + +/** + * Determine if the request has a Urlencoded body, and, if it does, create and + * attach an instance of the Urlencoded parser to the transaction. + * + * @param[in] connp + * @return HTP_OK if a new parser has been setup, HTP_DECLINED if the MIME type + * is not appropriate for this parser, and HTP_ERROR on failure. + */ +htp_status_t htp_ch_urlencoded_callback_request_headers(htp_tx_t *tx) { + // Check the request content type to see if it matches our MIME type. + if ((tx->request_content_type == NULL) || (!bstr_begins_with_c(tx->request_content_type, HTP_URLENCODED_MIME_TYPE))) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_ch_urlencoded_callback_request_headers: Body not URLENCODED\n"); + #endif + + return HTP_DECLINED; + } + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_ch_urlencoded_callback_request_headers: Parsing URLENCODED body\n"); + #endif + + // Create parser instance. + tx->request_urlenp_body = htp_urlenp_create(tx); + if (tx->request_urlenp_body == NULL) return HTP_ERROR; + + // Register a request body data callback. + htp_tx_register_request_body_data(tx, htp_ch_urlencoded_callback_request_body_data); + + return HTP_OK; +} + +/** + * Parses request query string, if present. + * + * @param[in] connp + * @param[in] raw_data + * @param[in] raw_len + * @return HTP_OK if query string was parsed, HTP_DECLINED if there was no query + * string, and HTP_ERROR on failure. + */ +htp_status_t htp_ch_urlencoded_callback_request_line(htp_tx_t *tx) { + // Proceed only if there's something for us to parse. + if ((tx->parsed_uri->query == NULL) || (bstr_len(tx->parsed_uri->query) == 0)) { + return HTP_DECLINED; + } + + // We have a non-zero length query string. + + tx->request_urlenp_query = htp_urlenp_create(tx); + if (tx->request_urlenp_query == NULL) return HTP_ERROR; + + if (htp_urlenp_parse_complete(tx->request_urlenp_query, bstr_ptr(tx->parsed_uri->query), + bstr_len(tx->parsed_uri->query)) != HTP_OK) { + htp_urlenp_destroy(tx->request_urlenp_query); + return HTP_ERROR; + } + + // Add all parameters to the transaction. + + bstr *name = NULL; + bstr *value = NULL; + for (size_t i = 0, n = htp_table_size(tx->request_urlenp_query->params); i < n; i++) { + value = htp_table_get_index(tx->request_urlenp_query->params, i, &name); + + htp_param_t *param = calloc(1, sizeof (htp_param_t)); + if (param == NULL) return HTP_ERROR; + + param->name = name; + param->value = value; + param->source = HTP_SOURCE_QUERY_STRING; + param->parser_id = HTP_PARSER_URLENCODED; + param->parser_data = NULL; + + if (htp_tx_req_add_param(tx, param) != HTP_OK) { + free(param); + return HTP_ERROR; + } + } + + // All the parameter data is now owned by the transaction, and + // the parser table used to store it is no longer needed. The + // line below will destroy just the table, leaving keys intact. + htp_table_destroy_ex(tx->request_urlenp_query->params); + tx->request_urlenp_query->params = NULL; + + htp_urlenp_destroy(tx->request_urlenp_query); + tx->request_urlenp_query = NULL; + + return HTP_OK; +} + +/** + * Finalize Multipart processing. + * + * @param[in] d + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_ch_multipart_callback_request_body_data(htp_tx_data_t *d) { + htp_tx_t *tx = d->tx; + + // Check that we were not invoked again after the finalization. + if (tx->request_mpartp->gave_up_data == 1) return HTP_ERROR; + + if (d->data != NULL) { + // Process one chunk of data. + htp_mpartp_parse(tx->request_mpartp, d->data, d->len); + } else { + // Finalize parsing. + htp_mpartp_finalize(tx->request_mpartp); + + htp_multipart_t *body = htp_mpartp_get_multipart(tx->request_mpartp); + + for (size_t i = 0, n = htp_list_size(body->parts); i < n; i++) { + htp_multipart_part_t *part = htp_list_get(body->parts, i); + + // Use text parameters. + if (part->type == MULTIPART_PART_TEXT) { + htp_param_t *param = calloc(1, sizeof (htp_param_t)); + if (param == NULL) return HTP_ERROR; + param->name = part->name; + param->value = part->value; + param->source = HTP_SOURCE_BODY; + param->parser_id = HTP_PARSER_MULTIPART; + param->parser_data = part; + + if (htp_tx_req_add_param(tx, param) != HTP_OK) { + free(param); + return HTP_ERROR; + } + } + } + + // Tell the parser that it no longer owns names + // and values of MULTIPART_PART_TEXT parts. + tx->request_mpartp->gave_up_data = 1; + } + + return HTP_OK; +} + +/** + * Inspect request headers and register the Multipart request data hook + * if it contains a multipart/form-data body. + * + * @param[in] connp + * @return HTP_OK if a new parser has been setup, HTP_DECLINED if the MIME type + * is not appropriate for this parser, and HTP_ERROR on failure. + */ +htp_status_t htp_ch_multipart_callback_request_headers(htp_tx_t *tx) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_ch_multipart_callback_request_headers: Need to determine if multipart body is present\n"); + #endif + + // The field tx->request_content_type does not contain the entire C-T + // value and so we cannot use it to look for a boundary, but we can + // use it for a quick check to determine if the C-T header exists. + if (tx->request_content_type == NULL) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_ch_multipart_callback_request_headers: Not multipart body (no C-T header)\n"); + #endif + + return HTP_DECLINED; + } + + // Look for a boundary. + + htp_header_t *ct = htp_table_get_c(tx->request_headers, "content-type"); + if (ct == NULL) return HTP_ERROR; + + bstr *boundary = NULL; + uint64_t flags = 0; + + htp_status_t rc = htp_mpartp_find_boundary(ct->value, &boundary, &flags); + if (rc != HTP_OK) { + #ifdef HTP_DEBUG + if (rc == HTP_DECLINED) { + fprintf(stderr, "htp_ch_multipart_callback_request_headers: Not multipart body\n"); + } + #endif + + // No boundary (HTP_DECLINED) or error (HTP_ERROR). + return rc; + } + + if (boundary == NULL) return HTP_ERROR; + + // Create a Multipart parser instance. + tx->request_mpartp = htp_mpartp_create(tx->connp->cfg, boundary, flags); + if (tx->request_mpartp == NULL) { + bstr_free(boundary); + return HTP_ERROR; + } + + // Configure file extraction. + if (tx->cfg->extract_request_files) { + tx->request_mpartp->extract_files = 1; + tx->request_mpartp->extract_dir = tx->connp->cfg->tmpdir; + } + + // Register a request body data callback. + htp_tx_register_request_body_data(tx, htp_ch_multipart_callback_request_body_data); + + return HTP_OK; +} diff --git a/htp/htp_cookies.c b/htp/htp_cookies.c new file mode 100644 index 0000000..bb26822 --- /dev/null +++ b/htp/htp_cookies.c @@ -0,0 +1,119 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * Parses a single v0 request cookie and places the results into tx->request_cookies. + * + * @param[in] connp + * @param[in] data + * @param[in] len + * @return HTP_OK on success, HTP_ERROR on error. + */ +int htp_parse_single_cookie_v0(htp_connp_t *connp, unsigned char *data, size_t len) { + if (len == 0) return HTP_OK; + + size_t pos = 0; + + // Look for '='. + while ((pos < len) && (data[pos] != '=')) pos++; + if (pos == 0) return HTP_OK; // Ignore a nameless cookie. + + bstr *name = bstr_dup_mem(data, pos); + if (name == NULL) return HTP_ERROR; + + bstr *value = NULL; + if (pos == len) { + // The cookie is empty. + value = bstr_dup_c(""); + } else { + // The cookie is not empty. + value = bstr_dup_mem(data + pos + 1, len - pos - 1); + } + + if (value == NULL) { + bstr_free(name); + return HTP_ERROR; + } + + htp_table_addn(connp->in_tx->request_cookies, name, value); + + return HTP_OK; +} + +/** + * Parses the Cookie request header in v0 format. + * + * @param[in] connp + * @return HTP_OK on success, HTP_ERROR on error + */ +htp_status_t htp_parse_cookies_v0(htp_connp_t *connp) { + htp_header_t *cookie_header = htp_table_get_c(connp->in_tx->request_headers, "cookie"); + if (cookie_header == NULL) return HTP_OK; + + // Create a new table to store cookies. + connp->in_tx->request_cookies = htp_table_create(4); + if (connp->in_tx->request_cookies == NULL) return HTP_ERROR; + + unsigned char *data = bstr_ptr(cookie_header->value); + size_t len = bstr_len(cookie_header->value); + size_t pos = 0; + + while (pos < len) { + // Ignore whitespace at the beginning. + while ((pos < len) && (isspace((int)data[pos]))) pos++; + if (pos == len) return HTP_OK; + + size_t start = pos; + + // Find the end of the cookie. + while ((pos < len) && (data[pos] != ';')) pos++; + + if (htp_parse_single_cookie_v0(connp, data + start, pos - start) != HTP_OK) { + return HTP_ERROR; + } + + // Go over the semicolon. + if (pos < len) pos++; + } + + return HTP_OK; +} diff --git a/htp/htp_core.h b/htp/htp_core.h new file mode 100644 index 0000000..e4c933e --- /dev/null +++ b/htp/htp_core.h @@ -0,0 +1,353 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_CORE_H +#define HTP_CORE_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int htp_status_t; + +typedef struct htp_cfg_t htp_cfg_t; +typedef struct htp_conn_t htp_conn_t; +typedef struct htp_connp_t htp_connp_t; +typedef struct htp_file_t htp_file_t; +typedef struct htp_file_data_t htp_file_data_t; +typedef struct htp_header_t htp_header_t; +typedef struct htp_header_line_t htp_header_line_t; +typedef struct htp_log_t htp_log_t; +typedef struct htp_param_t htp_param_t; +typedef struct htp_tx_data_t htp_tx_data_t; +typedef struct htp_tx_t htp_tx_t; +typedef struct htp_uri_t htp_uri_t; +typedef struct timeval htp_time_t; + +// Below are all htp_status_t return codes used by LibHTP. Enum is not +// used here to allow applications to define their own codes. + +/** + * The lowest htp_status_t value LibHTP will use internally. + */ +#define HTP_ERROR_RESERVED -1000 + +/** General-purpose error code. */ +#define HTP_ERROR -1 + +/** + * No processing or work was done. This is typically used by callbacks + * to indicate that they were not interested in doing any work in the + * given context. + */ +#define HTP_DECLINED 0 + +/** Returned by a function when its work was successfully completed. */ +#define HTP_OK 1 + +/** + * Returned when processing a connection stream, after consuming all + * provided data. The caller should call again with more data. + */ +#define HTP_DATA 2 + +/** + * Returned when processing a connection stream, after encountering + * a situation where processing needs to continue on the alternate + * stream (e.g., the inbound parser needs to observe some outbound + * data). The data provided was not completely consumed. On the next + * invocation the caller should supply only the data that has not + * been processed already. Use htp_connp_req_data_consumed() and + * htp_connp_res_data_consumed() to determine how much of the most + * recent data chunk was consumed. + */ +#define HTP_DATA_OTHER 3 + +/** + * Used by callbacks to indicate that the processing should stop. For example, + * returning HTP_STOP from a connection callback indicates that LibHTP should + * stop following that particular connection. + */ +#define HTP_STOP 4 + +/** + * Same as HTP_DATA, but indicates that any non-consumed part of the + * data chunk should be preserved (buffered) for later. + */ +#define HTP_DATA_BUFFER 5 + +/** + * The highest htp_status_t value LibHTP will use internally. + */ +#define HTP_STATUS_RESERVED 1000 + +/** + * Enumerates the possible values for authentication type. + */ +enum htp_auth_type_t { + /** + * This is the default value that is used before + * the presence of authentication is determined (e.g., + * before request headers are seen). + */ + HTP_AUTH_UNKNOWN = 0, + + /** No authentication. */ + HTP_AUTH_NONE = 1, + + /** HTTP Basic authentication used. */ + HTP_AUTH_BASIC = 2, + + /** HTTP Digest authentication used. */ + HTP_AUTH_DIGEST = 3, + + /** HTTP Digest authentication used. */ + HTP_AUTH_BEARER = 4, + + /** Unrecognized authentication method. */ + HTP_AUTH_UNRECOGNIZED = 9 +}; + +enum htp_content_encoding_t { + /** + * This is the default value, which is used until the presence + * of content encoding is determined (e.g., before request headers + * are seen. + */ + HTP_COMPRESSION_UNKNOWN = 0, + + /** No compression. */ + HTP_COMPRESSION_NONE = 1, + + /** Gzip compression. */ + HTP_COMPRESSION_GZIP = 2, + + /** Deflate compression. */ + HTP_COMPRESSION_DEFLATE = 3, + + /** LZMA compression. */ + HTP_COMPRESSION_LZMA = 4 +}; + +/** + * Enumerates the possible request and response body codings. + */ +enum htp_transfer_coding_t { + /** Body coding not determined yet. */ + HTP_CODING_UNKNOWN = 0, + + /** No body. */ + HTP_CODING_NO_BODY = 1, + + /** Identity coding is used, which means that the body was sent as is. */ + HTP_CODING_IDENTITY = 2, + + /** Chunked encoding. */ + HTP_CODING_CHUNKED = 3, + + /** We could not recognize the encoding. */ + HTP_CODING_INVALID = 4 +}; + +enum htp_file_source_t { + + HTP_FILE_MULTIPART = 1, + + HTP_FILE_PUT = 2 +}; + +// Various flag bits. Even though we have a flag field in several places +// (header, transaction, connection), these fields are all in the same namespace +// because we may want to set the same flag in several locations. For example, we +// may set HTP_FIELD_FOLDED on the actual folded header, but also on the transaction +// that contains the header. Both uses are useful. + +// Connection flags are 8 bits wide. +#define HTP_CONN_PIPELINED 0x000000001ULL +#define HTP_CONN_HTTP_0_9_EXTRA 0x000000002ULL + +// All other flags are 64 bits wide. +#define HTP_FIELD_UNPARSEABLE 0x000000004ULL +#define HTP_FIELD_INVALID 0x000000008ULL +#define HTP_FIELD_FOLDED 0x000000010ULL +#define HTP_FIELD_REPEATED 0x000000020ULL +#define HTP_FIELD_LONG 0x000000040ULL +#define HTP_FIELD_RAW_NUL 0x000000080ULL +#define HTP_REQUEST_SMUGGLING 0x000000100ULL +#define HTP_INVALID_FOLDING 0x000000200ULL +#define HTP_REQUEST_INVALID_T_E 0x000000400ULL +#define HTP_MULTI_PACKET_HEAD 0x000000800ULL +#define HTP_HOST_MISSING 0x000001000ULL +#define HTP_HOST_AMBIGUOUS 0x000002000ULL +#define HTP_PATH_ENCODED_NUL 0x000004000ULL +#define HTP_PATH_RAW_NUL 0x000008000ULL +#define HTP_PATH_INVALID_ENCODING 0x000010000ULL +#define HTP_PATH_INVALID 0x000020000ULL +#define HTP_PATH_OVERLONG_U 0x000040000ULL +#define HTP_PATH_ENCODED_SEPARATOR 0x000080000ULL +#define HTP_PATH_UTF8_VALID 0x000100000ULL /* At least one valid UTF-8 character and no invalid ones. */ +#define HTP_PATH_UTF8_INVALID 0x000200000ULL +#define HTP_PATH_UTF8_OVERLONG 0x000400000ULL +#define HTP_PATH_HALF_FULL_RANGE 0x000800000ULL /* Range U+FF00 - U+FFEF detected. */ +#define HTP_STATUS_LINE_INVALID 0x001000000ULL +#define HTP_HOSTU_INVALID 0x002000000ULL /* Host in the URI. */ +#define HTP_HOSTH_INVALID 0x004000000ULL /* Host in the Host header. */ +#define HTP_URLEN_ENCODED_NUL 0x008000000ULL +#define HTP_URLEN_INVALID_ENCODING 0x010000000ULL +#define HTP_URLEN_OVERLONG_U 0x020000000ULL +#define HTP_URLEN_HALF_FULL_RANGE 0x040000000ULL /* Range U+FF00 - U+FFEF detected. */ +#define HTP_URLEN_RAW_NUL 0x080000000ULL +#define HTP_REQUEST_INVALID 0x100000000ULL +#define HTP_REQUEST_INVALID_C_L 0x200000000ULL +#define HTP_AUTH_INVALID 0x400000000ULL + +#define HTP_MAX_HEADERS_REPETITIONS 64 + +#define HTP_HOST_INVALID ( HTP_HOSTU_INVALID | HTP_HOSTH_INVALID ) + +// Logging-related constants. +#define HTP_LOG_MARK __FILE__,__LINE__ + +/** + * Enumerates all log levels. + */ +enum htp_log_level_t { + HTP_LOG_NONE = 0, + HTP_LOG_ERROR = 1, + HTP_LOG_WARNING = 2, + HTP_LOG_NOTICE = 3, + HTP_LOG_INFO = 4, + HTP_LOG_DEBUG = 5, + HTP_LOG_DEBUG2 = 6 +}; + +/** + * HTTP methods. + */ +enum htp_method_t { + /** + * Used by default, until the method is determined (e.g., before + * the request line is processed. + */ + HTP_M_UNKNOWN = 0, + HTP_M_HEAD = 1, + HTP_M_GET = 2, + HTP_M_PUT = 3, + HTP_M_POST = 4, + HTP_M_DELETE = 5, + HTP_M_CONNECT = 6, + HTP_M_OPTIONS = 7, + HTP_M_TRACE = 8, + HTP_M_PATCH = 9, + HTP_M_PROPFIND = 10, + HTP_M_PROPPATCH = 11, + HTP_M_MKCOL = 12, + HTP_M_COPY = 13, + HTP_M_MOVE = 14, + HTP_M_LOCK = 15, + HTP_M_UNLOCK = 16, + HTP_M_VERSION_CONTROL = 17, + HTP_M_CHECKOUT = 18, + HTP_M_UNCHECKOUT = 19, + HTP_M_CHECKIN = 20, + HTP_M_UPDATE = 21, + HTP_M_LABEL = 22, + HTP_M_REPORT = 23, + HTP_M_MKWORKSPACE = 24, + HTP_M_MKACTIVITY = 25, + HTP_M_BASELINE_CONTROL = 26, + HTP_M_MERGE = 27, + HTP_M_INVALID = 28 +}; + +// A collection of unique parser IDs. +enum htp_parser_id_t { + /** application/x-www-form-urlencoded parser. */ + HTP_PARSER_URLENCODED = 0, + + /** multipart/form-data parser. */ + HTP_PARSER_MULTIPART = 1 +}; + +// Protocol version constants; an enum cannot be +// used here because we allow any properly-formatted protocol +// version (e.g., 1.3), even those that do not actually exist. +#define HTP_PROTOCOL_INVALID -2 +#define HTP_PROTOCOL_UNKNOWN -1 +#define HTP_PROTOCOL_0_9 9 +#define HTP_PROTOCOL_1_0 100 +#define HTP_PROTOCOL_1_1 101 + +// A collection of possible data sources. +enum htp_data_source_t { + /** Embedded in the URL. */ + HTP_SOURCE_URL = 0, + + /** Transported in the query string. */ + HTP_SOURCE_QUERY_STRING = 1, + + /** Cookies. */ + HTP_SOURCE_COOKIE = 2, + + /** Transported in the request body. */ + HTP_SOURCE_BODY = 3 +}; + +#define HTP_STATUS_INVALID -1 +#define HTP_STATUS_UNKNOWN 0 + +/** + * Enumerates all stream states. Each connection has two streams, one + * inbound and one outbound. Their states are tracked separately. + */ +enum htp_stream_state_t { + HTP_STREAM_NEW = 0, + HTP_STREAM_OPEN = 1, + HTP_STREAM_CLOSED = 2, + HTP_STREAM_ERROR = 3, + HTP_STREAM_TUNNEL = 4, + HTP_STREAM_DATA_OTHER = 5, + HTP_STREAM_STOP = 6, + HTP_STREAM_DATA = 9 +}; + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_CORE_H */ diff --git a/htp/htp_decompressors.c b/htp/htp_decompressors.c new file mode 100644 index 0000000..fe12833 --- /dev/null +++ b/htp/htp_decompressors.c @@ -0,0 +1,490 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + + +static void *SzAlloc(ISzAllocPtr p, size_t size) { return malloc(size); } +static void SzFree(ISzAllocPtr p, void *address) { free(address); } +const ISzAlloc lzma_Alloc = { SzAlloc, SzFree }; + + +/** + * @brief See if the header has extensions + * @return number of bytes to skip + */ +static size_t htp_gzip_decompressor_probe(const unsigned char *data, + size_t data_len) +{ + if (data_len < 4) + return 0; + + size_t consumed = 0; + + if (data[0] == 0x1f && data[1] == 0x8b && data[3] != 0) { + if (data[3] & (1 << 3) || data[3] & (1 << 4)) { + /* skip past + * - FNAME extension, which is a name ended in a NUL terminator + * or + * - FCOMMENT extension, which is a commend ended in a NULL terminator + */ + + size_t len; + for (len = 10; len < data_len && data[len] != '\0'; len++); + consumed = len + 1; + + //printf("skipped %u bytes for FNAME/FCOMMENT header (GZIP)\n", (uint)consumed); + + } else if (data[3] & (1 << 1)) { + consumed = 12; + //printf("skipped %u bytes for FHCRC header (GZIP)\n", 12); + + } else { + //printf("GZIP unknown/unsupported flags %02X\n", data[3]); + consumed = 10; + } + } + + if (consumed > data_len) + return 0; + + return consumed; +} + +/** + * @brief restart the decompressor + * @return 1 if it restarted, 0 otherwise + */ +static int htp_gzip_decompressor_restart(htp_decompressor_gzip_t *drec, + const unsigned char *data, + size_t data_len, size_t *consumed_back) +{ + size_t consumed = 0; + int rc = 0; + + if (drec->restart < 3) { + + // first retry with the existing type, but now consider the + // extensions + if (drec->restart == 0) { + consumed = htp_gzip_decompressor_probe(data, data_len); + + if (drec->zlib_initialized == HTP_COMPRESSION_GZIP) { + //printf("GZIP restart, consumed %u\n", (uint)consumed); + rc = inflateInit2(&drec->stream, 15 + 32); + } else { + //printf("DEFLATE restart, consumed %u\n", (uint)consumed); + rc = inflateInit2(&drec->stream, -15); + } + if (rc != Z_OK) + return 0; + + goto restart; + + // if that still fails, try the other method we support + + } else if (drec->zlib_initialized == HTP_COMPRESSION_DEFLATE) { + rc = inflateInit2(&drec->stream, 15 + 32); + if (rc != Z_OK) + return 0; + + drec->zlib_initialized = HTP_COMPRESSION_GZIP; + consumed = htp_gzip_decompressor_probe(data, data_len); +#if 0 + printf("DEFLATE -> GZIP consumed %u\n", (uint)consumed); +#endif + goto restart; + + } else if (drec->zlib_initialized == HTP_COMPRESSION_GZIP) { + rc = inflateInit2(&drec->stream, -15); + if (rc != Z_OK) + return 0; + + drec->zlib_initialized = HTP_COMPRESSION_DEFLATE; + consumed = htp_gzip_decompressor_probe(data, data_len); +#if 0 + printf("GZIP -> DEFLATE consumed %u\n", (uint)consumed); +#endif + goto restart; + } + } + return 0; + +restart: +#if 0 + gz_header y; + gz_headerp x = &y; + int res = inflateGetHeader(&drec->stream, x); + printf("HEADER res %d x.os %d x.done %d\n", res, x->os, x->done); +#endif + *consumed_back = consumed; + drec->restart++; + return 1; +} + +/** + * Ends decompressor. + * + * @param[in] drec + */ +static void htp_gzip_decompressor_end(htp_decompressor_gzip_t *drec) { + if (drec->zlib_initialized == HTP_COMPRESSION_LZMA) { + LzmaDec_Free(&drec->state, &lzma_Alloc); + drec->zlib_initialized = 0; + } else if (drec->zlib_initialized) { + inflateEnd(&drec->stream); + drec->zlib_initialized = 0; + } +} + +/** + * Decompress a chunk of gzip-compressed data. + * If we have more than one decompressor, call this function recursively. + * + * @param[in] drec + * @param[in] d + * @return HTP_OK on success, HTP_ERROR or some other negative integer on failure. + */ +htp_status_t htp_gzip_decompressor_decompress(htp_decompressor_t *drec1, htp_tx_data_t *d) { + size_t consumed = 0; + int rc = 0; + htp_status_t callback_rc; + htp_decompressor_gzip_t *drec = (htp_decompressor_gzip_t*) drec1; + + // Pass-through the NULL chunk, which indicates the end of the stream. + + if (drec->super.passthrough) { + htp_tx_data_t d2; + d2.tx = d->tx; + d2.data = d->data; + d2.len = d->len; + d2.is_last = d->is_last; + + callback_rc = drec->super.callback(&d2); + if (callback_rc != HTP_OK) { + return HTP_ERROR; + } + + return HTP_OK; + } + + if (d->data == NULL) { + // Prepare data for callback. + htp_tx_data_t dout; + dout.tx = d->tx; + // This is last call, so output uncompressed data so far + dout.len = GZIP_BUF_SIZE - drec->stream.avail_out; + if (dout.len > 0) { + dout.data = drec->buffer; + } else { + dout.data = NULL; + } + dout.is_last = d->is_last; + if (drec->super.next != NULL && drec->zlib_initialized) { + return htp_gzip_decompressor_decompress(drec->super.next, &dout); + } else { + // Send decompressed data to the callback. + callback_rc = drec->super.callback(&dout); + if (callback_rc != HTP_OK) { + htp_gzip_decompressor_end(drec); + return callback_rc; + } + } + + return HTP_OK; + } + +restart: + if (consumed > d->len || d->len > UINT32_MAX ) { + htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "GZip decompressor: consumed > d->len"); + return HTP_ERROR; + } + drec->stream.next_in = (unsigned char *) (d->data + consumed); + drec->stream.avail_in = (uint32_t) (d->len - consumed); + + while (drec->stream.avail_in != 0) { + // If there's no more data left in the + // buffer, send that information out. + if (drec->stream.avail_out == 0) { + drec->crc = crc32(drec->crc, drec->buffer, GZIP_BUF_SIZE); + + // Prepare data for callback. + htp_tx_data_t d2; + d2.tx = d->tx; + d2.data = drec->buffer; + d2.len = GZIP_BUF_SIZE; + d2.is_last = d->is_last; + + if (drec->super.next != NULL && drec->zlib_initialized) { + callback_rc = htp_gzip_decompressor_decompress(drec->super.next, &d2); + } else { + // Send decompressed data to callback. + callback_rc = drec->super.callback(&d2); + } + if (callback_rc != HTP_OK) { + htp_gzip_decompressor_end(drec); + return callback_rc; + } + + drec->stream.next_out = drec->buffer; + drec->stream.avail_out = GZIP_BUF_SIZE; + } + + if (drec->zlib_initialized == HTP_COMPRESSION_LZMA) { + if (drec->header_len < LZMA_PROPS_SIZE + 8) { + consumed = LZMA_PROPS_SIZE + 8 - drec->header_len; + if (consumed > drec->stream.avail_in) { + consumed = drec->stream.avail_in; + } + memcpy(drec->header + drec->header_len, drec->stream.next_in, consumed); + drec->stream.next_in = (unsigned char *) (d->data + consumed); + drec->stream.avail_in = (uint32_t) (d->len - consumed); + drec->header_len += consumed; + } + if (drec->header_len == LZMA_PROPS_SIZE + 8) { + rc = LzmaDec_Allocate(&drec->state, drec->header, LZMA_PROPS_SIZE, &lzma_Alloc); + if (rc != SZ_OK) + return rc; + LzmaDec_Init(&drec->state); + // hacky to get to next step end retry allocate in case of failure + drec->header_len++; + } + if (drec->header_len > LZMA_PROPS_SIZE + 8) { + size_t inprocessed = drec->stream.avail_in; + size_t outprocessed = drec->stream.avail_out; + ELzmaStatus status; + rc = LzmaDec_DecodeToBuf(&drec->state, drec->stream.next_out, &outprocessed, + drec->stream.next_in, &inprocessed, LZMA_FINISH_ANY, &status, d->tx->cfg->lzma_memlimit); + drec->stream.avail_in -= inprocessed; + drec->stream.next_in += inprocessed; + drec->stream.avail_out -= outprocessed; + drec->stream.next_out += outprocessed; + switch (rc) { + case SZ_OK: + rc = Z_OK; + if (status == LZMA_STATUS_FINISHED_WITH_MARK) { + rc = Z_STREAM_END; + } + break; + case SZ_ERROR_MEM: + htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "LZMA decompressor: memory limit reached"); + // fall through + default: + rc = Z_DATA_ERROR; + } + } + } else if (drec->zlib_initialized) { + rc = inflate(&drec->stream, Z_NO_FLUSH); + } else { + // no initialization means previous error on stream + return HTP_ERROR; + } + if (GZIP_BUF_SIZE > drec->stream.avail_out) { + if (rc == Z_DATA_ERROR) { + // There is data even if there is an error + // So use this data and log a warning + htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "GZip decompressor: inflate failed with %d", rc); + rc = Z_STREAM_END; + } + } + if (rc == Z_STREAM_END) { + // How many bytes do we have? + size_t len = GZIP_BUF_SIZE - drec->stream.avail_out; + + // Update CRC + + // Prepare data for the callback. + htp_tx_data_t d2; + d2.tx = d->tx; + d2.data = drec->buffer; + d2.len = len; + d2.is_last = d->is_last; + + if (drec->super.next != NULL && drec->zlib_initialized) { + callback_rc = htp_gzip_decompressor_decompress(drec->super.next, &d2); + } else { + // Send decompressed data to the callback. + callback_rc = drec->super.callback(&d2); + } + if (callback_rc != HTP_OK) { + htp_gzip_decompressor_end(drec); + return callback_rc; + } + drec->stream.avail_out = GZIP_BUF_SIZE; + drec->stream.next_out = drec->buffer; + // TODO Handle trailer. + + return HTP_OK; + } + else if (rc != Z_OK) { + htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "GZip decompressor: inflate failed with %d", rc); + if (drec->zlib_initialized == HTP_COMPRESSION_LZMA) { + LzmaDec_Free(&drec->state, &lzma_Alloc); + // so as to clean zlib ressources after restart + drec->zlib_initialized = HTP_COMPRESSION_NONE; + } else { + inflateEnd(&drec->stream); + } + + // see if we want to restart the decompressor + if (htp_gzip_decompressor_restart(drec, + d->data, d->len, &consumed) == 1) + { + // we'll be restarting the compressor + goto restart; + } + + drec->zlib_initialized = 0; + + // all our inflate attempts have failed, simply + // pass the raw data on to the callback in case + // it's not compressed at all + + htp_tx_data_t d2; + d2.tx = d->tx; + d2.data = d->data; + d2.len = d->len; + d2.is_last = d->is_last; + + callback_rc = drec->super.callback(&d2); + if (callback_rc != HTP_OK) { + return HTP_ERROR; + } + + drec->stream.avail_out = GZIP_BUF_SIZE; + drec->stream.next_out = drec->buffer; + + /* successfully passed through, lets continue doing that */ + drec->super.passthrough = 1; + return HTP_OK; + } + } + + return HTP_OK; +} + +/** + * Shut down gzip decompressor. + * + * @param[in] drec + */ +void htp_gzip_decompressor_destroy(htp_decompressor_t *drec1) { + htp_decompressor_gzip_t *drec = (htp_decompressor_gzip_t*) drec1; + if (drec == NULL) return; + + htp_gzip_decompressor_end(drec); + + free(drec->buffer); + free(drec); +} + +/** + * Create a new decompressor instance. + * + * @param[in] connp + * @param[in] format + * @return New htp_decompressor_t instance on success, or NULL on failure. + */ +htp_decompressor_t *htp_gzip_decompressor_create(htp_connp_t *connp, enum htp_content_encoding_t format) { + htp_decompressor_gzip_t *drec = calloc(1, sizeof (htp_decompressor_gzip_t)); + if (drec == NULL) return NULL; + + drec->super.decompress = NULL; + drec->super.destroy = NULL; + drec->super.next = NULL; + + drec->buffer = malloc(GZIP_BUF_SIZE); + if (drec->buffer == NULL) { + free(drec); + return NULL; + } + + // Initialize zlib. + int rc; + + switch (format) { + case HTP_COMPRESSION_LZMA: + if (connp->cfg->lzma_memlimit > 0 && + connp->cfg->response_lzma_layer_limit > 0) { + LzmaDec_Construct(&drec->state); + } else { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "LZMA decompression disabled"); + drec->super.passthrough = 1; + } + rc = Z_OK; + break; + case HTP_COMPRESSION_DEFLATE: + // Negative values activate raw processing, + // which is what we need for deflate. + rc = inflateInit2(&drec->stream, -15); + break; + case HTP_COMPRESSION_GZIP: + // Increased windows size activates gzip header processing. + rc = inflateInit2(&drec->stream, 15 + 32); + break; + default: + // do nothing + rc = Z_DATA_ERROR; + } + + if (rc != Z_OK) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "GZip decompressor: inflateInit2 failed with code %d", rc); + + if (format == HTP_COMPRESSION_DEFLATE || format == HTP_COMPRESSION_GZIP) { + inflateEnd(&drec->stream); + } + free(drec->buffer); + free(drec); + + return NULL; + } + + drec->zlib_initialized = format; + drec->stream.avail_out = GZIP_BUF_SIZE; + drec->stream.next_out = drec->buffer; + + #if 0 + if (format == COMPRESSION_DEFLATE) { + drec->initialized = 1; + } + #endif + + return (htp_decompressor_t *) drec; +} diff --git a/htp/htp_decompressors.h b/htp/htp_decompressors.h new file mode 100644 index 0000000..a357de1 --- /dev/null +++ b/htp/htp_decompressors.h @@ -0,0 +1,94 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _HTP_DECOMPRESSORS_H +#define _HTP_DECOMPRESSORS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <zlib.h> +#include "lzma/LzmaDec.h" + +typedef struct htp_decompressor_gzip_t htp_decompressor_gzip_t; +typedef struct htp_decompressor_t htp_decompressor_t; + +#define GZIP_BUF_SIZE 8192 + +#define DEFLATE_MAGIC_1 0x1f +#define DEFLATE_MAGIC_2 0x8b + +struct htp_decompressor_t { + // no longer used + htp_status_t (*decompress)(htp_decompressor_t *, htp_tx_data_t *); + htp_status_t (*callback)(htp_tx_data_t *); + // no longer used + void (*destroy)(htp_decompressor_t *); + struct htp_decompressor_t *next; + struct timeval time_before; + int32_t time_spent; + uint32_t nb_callbacks; + uint8_t passthrough; /**< decompression failed, pass through raw data */ +}; + +struct htp_decompressor_gzip_t { + htp_decompressor_t super; + #if 0 + int initialized; + #endif + int zlib_initialized; + uint8_t restart; /**< deflate restarted to try rfc1950 instead of 1951 */ + z_stream stream; + uint8_t header[LZMA_PROPS_SIZE + 8]; + uint8_t header_len; + CLzmaDec state; + unsigned char *buffer; + unsigned long crc; +}; + +htp_decompressor_t *htp_gzip_decompressor_create(htp_connp_t *connp, enum htp_content_encoding_t format); +htp_status_t htp_gzip_decompressor_decompress(htp_decompressor_t *drec, htp_tx_data_t *d); +void htp_gzip_decompressor_destroy(htp_decompressor_t *drec); + +#ifdef __cplusplus +} +#endif + +#endif /* _HTP_DECOMPRESSORS_H */ + diff --git a/htp/htp_hooks.c b/htp/htp_hooks.c new file mode 100644 index 0000000..37d0fd4 --- /dev/null +++ b/htp/htp_hooks.c @@ -0,0 +1,160 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +htp_hook_t *htp_hook_copy(const htp_hook_t *hook) { + if (hook == NULL) return NULL; + + htp_hook_t *copy = htp_hook_create(); + if (copy == NULL) return NULL; + + for (size_t i = 0, n = htp_list_size(hook->callbacks); i < n; i++) { + htp_callback_t *callback = htp_list_get(hook->callbacks, i); + if (htp_hook_register(©, callback->fn) != HTP_OK) { + htp_hook_destroy(copy); + return NULL; + } + } + + return copy; +} + +htp_hook_t *htp_hook_create(void) { + htp_hook_t *hook = calloc(1, sizeof (htp_hook_t)); + if (hook == NULL) return NULL; + + hook->callbacks = (htp_list_array_t *) htp_list_array_create(4); + if (hook->callbacks == NULL) { + free(hook); + return NULL; + } + + return hook; +} + +void htp_hook_destroy(htp_hook_t *hook) { + if (hook == NULL) return; + + for (size_t i = 0, n = htp_list_size(hook->callbacks); i < n; i++) { + free((htp_callback_t *) htp_list_get(hook->callbacks, i)); + } + + htp_list_array_destroy(hook->callbacks); + + free(hook); +} + +htp_status_t htp_hook_register(htp_hook_t **hook, const htp_callback_fn_t callback_fn) { + if (hook == NULL) return HTP_ERROR; + + htp_callback_t *callback = calloc(1, sizeof (htp_callback_t)); + if (callback == NULL) return HTP_ERROR; + + callback->fn = callback_fn; + + // Create a new hook if one does not exist + int hook_created = 0; + + if (*hook == NULL) { + hook_created = 1; + + *hook = htp_hook_create(); + if (*hook == NULL) { + free(callback); + return HTP_ERROR; + } + } + + // Add callback + if (htp_list_array_push((*hook)->callbacks, callback) != HTP_OK) { + if (hook_created) { + free(*hook); + } + + free(callback); + + return HTP_ERROR; + } + + return HTP_OK; +} + +htp_status_t htp_hook_run_all(htp_hook_t *hook, void *user_data) { + if (hook == NULL) return HTP_OK; + + // Loop through the registered callbacks, giving each a chance to run. + for (size_t i = 0, n = htp_list_size(hook->callbacks); i < n; i++) { + htp_callback_t *callback = htp_list_get(hook->callbacks, i); + + htp_status_t rc = callback->fn(user_data); + + // A hook can return HTP_OK to say that it did some work, + // or HTP_DECLINED to say that it did no work. Anything else + // is treated as an error. + if ((rc != HTP_OK) && (rc != HTP_DECLINED)) { + return rc; + } + } + + return HTP_OK; +} + +htp_status_t htp_hook_run_one(htp_hook_t *hook, void *user_data) { + if (hook == NULL) return HTP_DECLINED; + + for (size_t i = 0, n = htp_list_size(hook->callbacks); i < n; i++) { + htp_callback_t *callback = htp_list_get(hook->callbacks, i); + + htp_status_t rc = callback->fn(user_data); + + // A hook can return HTP_DECLINED to say that it did no work, + // and we'll ignore that. If we see HTP_OK or anything else, + // we stop processing (because it was either a successful + // handling or an error). + if (rc != HTP_DECLINED) { + // Return HTP_OK or an error. + return rc; + } + } + + // No hook wanted to process the callback. + return HTP_DECLINED; +} diff --git a/htp/htp_hooks.h b/htp/htp_hooks.h new file mode 100644 index 0000000..902a7d4 --- /dev/null +++ b/htp/htp_hooks.h @@ -0,0 +1,122 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _HOOKS_H +#define _HOOKS_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct htp_hook_t htp_hook_t; +typedef struct htp_callback_t htp_callback_t; +typedef int (*htp_callback_fn_t) (void *); + +#include "htp.h" + +struct htp_hook_t { + htp_list_array_t *callbacks; +}; + +struct htp_callback_t { + htp_callback_fn_t fn; +}; + +/** + * Creates a copy of the provided hook. The hook is allowed to be NULL, + * in which case this function simply returns a NULL. + * + * @param[in] hook + * @return A copy of the hook, or NULL (if the provided hook was NULL + * or, if it wasn't, if there was a memory allocation problem while + * constructing a copy). + */ +htp_hook_t *htp_hook_copy(const htp_hook_t *hook); + +/** + * Creates a new hook. + * + * @return New htp_hook_t structure on success, NULL on failure. + */ +htp_hook_t *htp_hook_create(void); + +/** + * Destroys an existing hook. It is all right to send a NULL + * to this method because it will simply return straight away. + * + * @param[in] hook + */ +void htp_hook_destroy(htp_hook_t *hook); + +/** + * Registers a new callback with the hook. + * + * @param[in] hook + * @param[in] callback_fn + * @return HTP_OK on success, HTP_ERROR on memory allocation error. + */ +htp_status_t htp_hook_register(htp_hook_t **hook, const htp_callback_fn_t callback_fn); + +/** + * Runs all the callbacks associated with a given hook. Only stops if + * one of the callbacks returns an error (HTP_ERROR) or stop (HTP_STOP). + * + * @param[in] hook + * @param[in] user_data + * @return HTP_OK if at least one hook ran successfully, HTP_STOP if there was + * no error but processing should stop, and HTP_ERROR or any other value + * less than zero on error. + */ +htp_status_t htp_hook_run_all(htp_hook_t *hook, void *user_data); + +/** + * Run callbacks one by one until one of them accepts to service the hook. + * + * @param[in] hook + * @param[in] user_data + * @return HTP_OK if a hook was found to process the callback, HTP_DECLINED if + * no hook could be found, HTP_STOP if a hook signalled the processing + * to stop, and HTP_ERROR or any other value less than zero on error. + */ +htp_status_t htp_hook_run_one(htp_hook_t *hook, void *user_data); + +#ifdef __cplusplus +} +#endif + +#endif /* _HOOKS_H */ diff --git a/htp/htp_list.c b/htp/htp_list.c new file mode 100644 index 0000000..b7c42bf --- /dev/null +++ b/htp/htp_list.c @@ -0,0 +1,360 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +// Array-backed list + +htp_status_t htp_list_array_init(htp_list_t *l, size_t size) { + // Allocate the initial batch of elements. + l->elements = malloc(size * sizeof (void *)); + if (l->elements == NULL) { + return HTP_ERROR; + } + + // Initialize the structure. + l->first = 0; + l->last = 0; + l->current_size = 0; + l->max_size = size; + + return HTP_OK; +} + +htp_list_t *htp_list_array_create(size_t size) { + // It makes no sense to create a zero-size list. + if (size == 0) return NULL; + + // Allocate the list structure. + htp_list_array_t *l = calloc(1, sizeof (htp_list_array_t)); + if (l == NULL) return NULL; + + if (htp_list_array_init(l, size) == HTP_ERROR) { + free(l); + return NULL; + } + + return (htp_list_t *) l; +} + +void htp_list_array_clear(htp_list_array_t *l) { + if (l == NULL) return; + + // Continue using already allocated memory; just reset the fields. + l->first = 0; + l->last = 0; + l->current_size = 0; +} + +void htp_list_array_destroy(htp_list_array_t *l) { + if (l == NULL) return; + + free(l->elements); + free(l); +} + +void htp_list_array_release(htp_list_array_t *l) { + if (l == NULL) return; + + free(l->elements); +} + +void *htp_list_array_get(const htp_list_array_t *l, size_t idx) { + if (l == NULL) return NULL; + if (idx >= l->current_size) return NULL; + + if (l->first + idx < l->max_size) { + return (void *) l->elements[l->first + idx]; + } else { + return (void *) l->elements[idx - (l->max_size - l->first)]; + } +} + +void *htp_list_array_pop(htp_list_array_t *l) { + if (l == NULL) return NULL; + + const void *r = NULL; + + if (l->current_size == 0) { + return NULL; + } + + size_t pos = l->first + l->current_size - 1; + if (pos > l->max_size - 1) pos -= l->max_size; + + r = l->elements[pos]; + l->last = pos; + + l->current_size--; + + return (void *) r; +} + +htp_status_t htp_list_array_push(htp_list_array_t *l, void *e) { + if (l == NULL) return HTP_ERROR; + + // Check whether we're full + if (l->current_size >= l->max_size) { + size_t new_size = l->max_size * 2; + void *newblock = NULL; + + if (l->first == 0) { + // The simple case of expansion is when the first + // element in the list resides in the first slot. In + // that case we just add some new space to the end, + // adjust the max_size and that's that. + newblock = realloc(l->elements, new_size * sizeof (void *)); + if (newblock == NULL) return HTP_ERROR; + } else { + // When the first element is not in the first + // memory slot, we need to rearrange the order + // of the elements in order to expand the storage area. + /* coverity[suspicious_sizeof] */ + newblock = malloc((size_t) (new_size * sizeof (void *))); + if (newblock == NULL) return HTP_ERROR; + + // Copy the beginning of the list to the beginning of the new memory block + /* coverity[suspicious_sizeof] */ + memcpy(newblock, + (void *) ((char *) l->elements + l->first * sizeof (void *)), + (size_t) ((l->max_size - l->first) * sizeof (void *))); + + // Append the second part of the list to the end + memcpy((void *) ((char *) newblock + (l->max_size - l->first) * sizeof (void *)), + (void *) l->elements, + (size_t) (l->first * sizeof (void *))); + + free(l->elements); + } + + l->first = 0; + l->last = l->current_size; + l->max_size = new_size; + l->elements = newblock; + } + + l->elements[l->last] = e; + l->current_size++; + + l->last++; + if (l->last == l->max_size) { + l->last = 0; + } + + return HTP_OK; +} + +htp_status_t htp_list_array_replace(htp_list_array_t *l, size_t idx, void *e) { + if (l == NULL) return HTP_ERROR; + + if (idx + 1 > l->current_size) return HTP_DECLINED; + + l->elements[(l->first + idx) % l->max_size] = e; + + return HTP_OK; +} + +size_t htp_list_array_size(const htp_list_array_t *l) { + if (l == NULL) return HTP_ERROR; + + return l->current_size; +} + +void *htp_list_array_shift(htp_list_array_t *l) { + if (l == NULL) return NULL; + + void *r = NULL; + + if (l->current_size == 0) { + return NULL; + } + + r = l->elements[l->first]; + l->first++; + if (l->first == l->max_size) { + l->first = 0; + } + + l->current_size--; + + return r; +} + +#if 0 +// Linked list + +htp_list_linked_t *htp_list_linked_create(void) { + htp_list_linked_t *l = calloc(1, sizeof (htp_list_linked_t)); + if (l == NULL) return NULL; + + return l; +} + +void htp_list_linked_destroy(htp_list_linked_t *l) { + if (l == NULL) return; + + // Free the list structures + htp_list_linked_element_t *temp = l->first; + htp_list_linked_element_t *prev = NULL; + while (temp != NULL) { + free(temp->data); + prev = temp; + temp = temp->next; + free(prev); + } + + // Free the list itself + free(l); +} + +int htp_list_linked_empty(const htp_list_linked_t *l) { + if (!l->first) { + return 1; + } else { + return 0; + } +} + +void *htp_list_linked_pop(htp_list_linked_t *l) { + void *r = NULL; + + if (!l->first) { + return NULL; + } + + // Find the last element + htp_list_linked_element_t *qprev = NULL; + htp_list_linked_element_t *qe = l->first; + while (qe->next != NULL) { + qprev = qe; + qe = qe->next; + } + + r = qe->data; + free(qe); + + if (qprev != NULL) { + qprev->next = NULL; + l->last = qprev; + } else { + l->first = NULL; + l->last = NULL; + } + + return r; +} + +int htp_list_linked_push(htp_list_linked_t *l, void *e) { + htp_list_linked_element_t *le = calloc(1, sizeof (htp_list_linked_element_t)); + if (le == NULL) return -1; + + // Remember the element + le->data = e; + + // If the queue is empty, make this element first + if (!l->first) { + l->first = le; + } + + if (l->last) { + l->last->next = le; + } + + l->last = le; + + return 1; +} + +void *htp_list_linked_shift(htp_list_linked_t *l) { + void *r = NULL; + + if (!l->first) { + return NULL; + } + + htp_list_linked_element_t *le = l->first; + l->first = le->next; + r = le->data; + + if (!l->first) { + l->last = NULL; + } + + free(le); + + return r; +} +#endif + +#if 0 + +int main(int argc, char **argv) { + htp_list_t *q = htp_list_array_create(4); + + htp_list_push(q, "1"); + htp_list_push(q, "2"); + htp_list_push(q, "3"); + htp_list_push(q, "4"); + + htp_list_shift(q); + htp_list_push(q, "5"); + htp_list_push(q, "6"); + + char *s = NULL; + while ((s = (char *) htp_list_pop(q)) != NULL) { + printf("Got: %s\n", s); + } + + printf("---\n"); + + htp_list_push(q, "1"); + htp_list_push(q, "2"); + htp_list_push(q, "3"); + htp_list_push(q, "4"); + + while ((s = (char *) htp_list_shift(q)) != NULL) { + printf("Got: %s\n", s); + } + + free(q); + + return 0; +} +#endif diff --git a/htp/htp_list.h b/htp/htp_list.h new file mode 100644 index 0000000..8a2bd63 --- /dev/null +++ b/htp/htp_list.h @@ -0,0 +1,227 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_LIST_H +#define HTP_LIST_H + +#ifdef __cplusplus +extern "C" { +#endif + +// The default list implementation is array-based. The +// linked list version is not fully implemented yet. +#define htp_list_t htp_list_array_t +#define htp_list_add htp_list_array_push +#define htp_list_create htp_list_array_create +#define htp_list_init htp_list_array_init +#define htp_list_clear htp_list_array_clear +#define htp_list_destroy htp_list_array_destroy +#define htp_list_release htp_list_array_release +#define htp_list_get htp_list_array_get +#define htp_list_pop htp_list_array_pop +#define htp_list_push htp_list_array_push +#define htp_list_replace htp_list_array_replace +#define htp_list_size htp_list_array_size +#define htp_list_shift htp_list_array_shift + +// Data structures + +typedef struct htp_list_array_t htp_list_array_t; +typedef struct htp_list_linked_t htp_list_linked_t; + +#include "htp_core.h" +#include "bstr.h" + +// Functions + +/** + * Create new array-backed list. + * + * @param[in] size + * @return Newly created list. + */ +htp_list_array_t *htp_list_array_create(size_t size); + +/** + * Initialize an array-backed list. + * + * @param[in] l + * @param[in] size + * @return HTP_OK or HTP_ERROR if allocation failed + */ +htp_status_t htp_list_array_init(htp_list_array_t *l, size_t size); + +/** + * Remove all elements from the list. It is the responsibility of the caller + * to iterate over list elements and deallocate them if necessary, prior to + * invoking this function. + * + * @param[in] l + */ +void htp_list_array_clear(htp_list_array_t *l); + +/** + * Free the memory occupied by this list. This function assumes + * the elements held by the list were freed beforehand. + * + * @param[in] l + */ +void htp_list_array_destroy(htp_list_array_t *l); + +/** + * Free the memory occupied by this list, except itself. + * This function assumes the elements held by the list + * were freed beforehand. + * + * @param[in] l + */ +void htp_list_array_release(htp_list_array_t *l); + +/** + * Find the element at the given index. + * + * @param[in] l + * @param[in] idx + * @return the desired element, or NULL if the list is too small, or + * if the element at that position carries a NULL + */ +void *htp_list_array_get(const htp_list_array_t *l, size_t idx); + +/** + * Remove one element from the end of the list. + * + * @param[in] l + * @return The removed element, or NULL if the list is empty. + */ +void *htp_list_array_pop(htp_list_array_t *l); + +/** + * Add new element to the end of the list, expanding the list as necessary. + * + * @param[in] l + * @param[in] e + * @return HTP_OK on success or HTP_ERROR on failure. + * + */ +htp_status_t htp_list_array_push(htp_list_array_t *l, void *e); + +/** + * Replace the element at the given index with the provided element. + * + * @param[in] l + * @param[in] idx + * @param[in] e + * + * @return HTP_OK if an element with the given index was replaced; HTP_ERROR + * if the desired index does not exist. + */ +htp_status_t htp_list_array_replace(htp_list_array_t *l, size_t idx, void *e); + +/** + * Returns the size of the list. + * + * @param[in] l + * @return List size. + */ +size_t htp_list_array_size(const htp_list_array_t *l); + +/** + * Remove one element from the beginning of the list. + * + * @param[in] l + * @return The removed element, or NULL if the list is empty. + */ +void *htp_list_array_shift(htp_list_array_t *l); + + +// Linked list + +/** + * Create a new linked list. + * + * @return The newly created list, or NULL on memory allocation failure + */ +htp_list_linked_t *htp_list_linked_create(void); + +/** + * Destroy list. This function will not destroy any of the + * data stored in it. You'll have to do that manually beforehand. + * + * @param[in] l + */ +void htp_list_linked_destroy(htp_list_linked_t *l); + +/** + * Is the list empty? + * + * @param[in] l + * @return 1 if the list is empty, 0 if it is not + */ +int htp_list_linked_empty(const htp_list_linked_t *l); + +/** + * Remove one element from the end of the list. + * + * @param[in] l + * @return Pointer to the removed element, or NULL if the list is empty. + */ +void *htp_list_linked_pop(htp_list_linked_t *l); + +/** + * Add element to list. + * + * @param[in] l + * @param[in] e + * @return HTP_OK on success, HTP_ERROR on error. + */ +htp_status_t htp_list_linked_push(htp_list_linked_t *l, void *e); + +/** + * Remove one element from the beginning of the list. + * + * @param[in] l + * @return Pointer to the removed element, or NULL if the list is empty. + */ +void *htp_list_linked_shift(htp_list_linked_t *l); + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_LIST_H */ + diff --git a/htp/htp_list_private.h b/htp/htp_list_private.h new file mode 100644 index 0000000..6f462c0 --- /dev/null +++ b/htp/htp_list_private.h @@ -0,0 +1,73 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_LIST_PRIVATE_H +#define HTP_LIST_PRIVATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "htp_list.h" + +typedef struct htp_list_linked_element_t htp_list_linked_element_t; + +struct htp_list_array_t { + size_t first; + size_t last; + size_t max_size; + size_t current_size; + void **elements; +}; + +struct htp_list_linked_element_t { + void *data; + htp_list_linked_element_t *next; +}; + +struct htp_list_linked_t { + htp_list_linked_element_t *first; + htp_list_linked_element_t *last; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_LIST_PRIVATE_H */ + diff --git a/htp/htp_multipart.c b/htp/htp_multipart.c new file mode 100644 index 0000000..ea73072 --- /dev/null +++ b/htp/htp_multipart.c @@ -0,0 +1,1615 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * Determines the type of a Content-Disposition parameter. + * + * @param[in] data + * @param[in] startpos + * @param[in] pos + * @return CD_PARAM_OTHER, CD_PARAM_NAME or CD_PARAM_FILENAME. + */ +static int htp_mpartp_cd_param_type(unsigned char *data, size_t startpos, size_t endpos) { + if ((endpos - startpos) == 4) { + if (memcmp(data + startpos, "name", 4) == 0) return CD_PARAM_NAME; + } else if ((endpos - startpos) == 8) { + if (memcmp(data + startpos, "filename", 8) == 0) return CD_PARAM_FILENAME; + } + + return CD_PARAM_OTHER; +} + +htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser) { + return &(parser->multipart); +} + +/** + * Decodes a C-D header value. This is impossible to do correctly without a + * parsing personality because most browsers are broken: + * - Firefox encodes " as \", and \ is not encoded. + * - Chrome encodes " as %22. + * - IE encodes " as \", and \ is not encoded. + * - Opera encodes " as \" and \ as \\. + * @param[in] b + */ +static void htp_mpart_decode_quoted_cd_value_inplace(bstr *b) { + unsigned char *s = bstr_ptr(b); + unsigned char *d = bstr_ptr(b); + size_t len = bstr_len(b); + size_t pos = 0; + + while (pos < len) { + // Ignore \ when before \ or ". + if ((*s == '\\')&&(pos + 1 < len)&&((*(s + 1) == '"')||(*(s + 1) == '\\'))) { + s++; + pos++; + } + + *d++ = *s++; + pos++; + } + + bstr_adjust_len(b, len - (s - d)); +} + +/** + * Parses the Content-Disposition part header. + * + * @param[in] part + * @return HTP_OK on success (header found and parsed), HTP_DECLINED if there is no C-D header or if + * it could not be processed, and HTP_ERROR on fatal error. + */ +htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part) { + // Find the C-D header. + htp_header_t *h = htp_table_get_c(part->headers, "content-disposition"); + if (h == NULL) { + part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN; + return HTP_DECLINED; + } + + // Require "form-data" at the beginning of the header. + if (bstr_index_of_c(h->value, "form-data") != 0) { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + // The parsing starts here. + unsigned char *data = bstr_ptr(h->value); + size_t len = bstr_len(h->value); + size_t pos = 9; // Start after "form-data" + + // Main parameter parsing loop (once per parameter). + while (pos < len) { + // Ignore whitespace. + while ((pos < len) && isspace(data[pos])) pos++; + if (pos == len) { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + // Expecting a semicolon. + if (data[pos] != ';') { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + pos++; + + // Go over the whitespace before parameter name. + while ((pos < len) && isspace(data[pos])) pos++; + if (pos == len) { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + // Found the starting position of the parameter name. + size_t start = pos; + + // Look for the ending position. + while ((pos < len) && (!isspace(data[pos]) && (data[pos] != '='))) pos++; + if (pos == len) { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + // Ending position is in "pos" now. + + // Determine parameter type ("name", "filename", or other). + int param_type = htp_mpartp_cd_param_type(data, start, pos); + + // Ignore whitespace after parameter name, if any. + while ((pos < len) && isspace(data[pos])) pos++; + if (pos == len) { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + // Equals. + if (data[pos] != '=') { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + pos++; + + // Go over the whitespace before the parameter value. + while ((pos < len) && isspace(data[pos])) pos++; + if (pos == len) { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + // Expecting a double quote. + if (data[pos] != '"') { + // Bare string or non-standard quoting, which we don't like. + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + pos++; // Over the double quote. + + // We have the starting position of the value. + start = pos; + + // Find the end of the value. + while ((pos < len) && (data[pos] != '"')) { + // Check for escaping. + if (data[pos] == '\\') { + if (pos + 1 >= len) { + // A backslash as the last character in the C-D header. + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + // Allow " and \ to be escaped. + if ((data[pos + 1] == '"')||(data[pos + 1] == '\\')) { + // Go over the quoted character. + pos++; + } + } + + pos++; + } + + // If we've reached the end of the string that means the + // value was not terminated properly (the second double quote is missing). + if (pos == len) { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + // Expecting the terminating double quote. + if (data[pos] != '"') { + part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID; + return HTP_DECLINED; + } + + pos++; // Over the terminating double quote. + + // Finally, process the parameter value. + + switch (param_type) { + case CD_PARAM_NAME: + // Check that we have not seen the name parameter already. + if (part->name != NULL) { + part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED; + return HTP_DECLINED; + } + + part->name = bstr_dup_mem(data + start, pos - start - 1); + if (part->name == NULL) return HTP_ERROR; + + htp_mpart_decode_quoted_cd_value_inplace(part->name); + + break; + + case CD_PARAM_FILENAME: + // Check that we have not seen the filename parameter already. + if (part->file != NULL) { + part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED; + return HTP_DECLINED; + } + + part->file = calloc(1, sizeof (htp_file_t)); + if (part->file == NULL) return HTP_ERROR; + + part->file->fd = -1; + part->file->source = HTP_FILE_MULTIPART; + + part->file->filename = bstr_dup_mem(data + start, pos - start - 1); + if (part->file->filename == NULL) { + free(part->file); + return HTP_ERROR; + } + + htp_mpart_decode_quoted_cd_value_inplace(part->file->filename); + + break; + + default: + // Unknown parameter. + part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_UNKNOWN; + return HTP_DECLINED; + break; + } + + // Continue to parse the next parameter, if any. + } + + return HTP_OK; +} + +/** + * Parses the Content-Type part header, if present. + * + * @param[in] part + * @return HTP_OK on success, HTP_DECLINED if the C-T header is not present, and HTP_ERROR on failure. + */ +static htp_status_t htp_mpart_part_parse_c_t(htp_multipart_part_t *part) { + htp_header_t *h = (htp_header_t *) htp_table_get_c(part->headers, "content-type"); + if (h == NULL) return HTP_DECLINED; + return htp_parse_ct_header(h->value, &part->content_type); +} + +/** + * Processes part headers. + * + * @param[in] part + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part) { + if (htp_mpart_part_parse_c_d(part) == HTP_ERROR) return HTP_ERROR; + if (htp_mpart_part_parse_c_t(part) == HTP_ERROR) return HTP_ERROR; + + return HTP_OK; +} + +/** + * Parses one part header. + * + * @param[in] part + * @param[in] data + * @param[in] len + * @return HTP_OK on success, HTP_DECLINED on parsing error, HTP_ERROR on fatal error. + */ +htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len) { + size_t name_start, name_end; + size_t value_start, value_end; + + // We do not allow NUL bytes here. + if (memchr(data, '\0', len) != NULL) { + part->parser->multipart.flags |= HTP_MULTIPART_NUL_BYTE; + return HTP_DECLINED; + } + + name_start = 0; + + // Look for the starting position of the name first. + size_t colon_pos = 0; + + while ((colon_pos < len)&&(htp_is_space(data[colon_pos]))) colon_pos++; + if (colon_pos != 0) { + // Whitespace before header name. + part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID; + return HTP_DECLINED; + } + + // Now look for the colon. + while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++; + + if (colon_pos == len) { + // Missing colon. + part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID; + return HTP_DECLINED; + } + + if (colon_pos == 0) { + // Empty header name. + part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID; + return HTP_DECLINED; + } + + name_end = colon_pos; + + // Ignore LWS after header name. + size_t prev = name_end; + while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) { + prev--; + name_end--; + + // LWS after field name. Not allowing for now. + part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID; + return HTP_DECLINED; + } + + // Header value. + + value_start = colon_pos + 1; + + // Ignore LWS before value. + while ((value_start < len) && (htp_is_lws(data[value_start]))) value_start++; + + if (value_start == len) { + // No header value. + part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID; + return HTP_DECLINED; + } + + // Assume the value is at the end. + value_end = len; + + // Check that the header name is a token. + size_t i = name_start; + while (i < name_end) { + if (!htp_is_token(data[i])) { + part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID; + return HTP_DECLINED; + } + + i++; + } + + // Now extract the name and the value. + htp_header_t *h = calloc(1, sizeof (htp_header_t)); + if (h == NULL) return HTP_ERROR; + + h->name = bstr_dup_mem(data + name_start, name_end - name_start); + if (h->name == NULL) { + free(h); + return HTP_ERROR; + } + + h->value = bstr_dup_mem(data + value_start, value_end - value_start); + if (h->value == NULL) { + bstr_free(h->name); + free(h); + return HTP_ERROR; + } + + if ((bstr_cmp_c_nocase(h->name, "content-disposition") != 0) && (bstr_cmp_c_nocase(h->name, "content-type") != 0)) { + part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_UNKNOWN; + } + + // Check if the header already exists. + htp_header_t * h_existing = htp_table_get(part->headers, h->name); + if (h_existing != NULL) { + // Add to the existing header. + bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + + 2 + bstr_len(h->value)); + if (new_value == NULL) { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_ERROR; + } + + h_existing->value = new_value; + bstr_add_mem_noex(h_existing->value, ", ", 2); + bstr_add_noex(h_existing->value, h->value); + + // The header is no longer needed. + bstr_free(h->name); + bstr_free(h->value); + free(h); + + // Keep track of same-name headers. + h_existing->flags |= HTP_MULTIPART_PART_HEADER_REPEATED; + part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_REPEATED; + } else { + // Add as a new header. + if (htp_table_add(part->headers, h->name, h) != HTP_OK) { + bstr_free(h->value); + bstr_free(h->name); + free(h); + return HTP_ERROR; + } + } + + return HTP_OK; +} + +/** + * Creates a new Multipart part. + * + * @param[in] parser + * @return New part instance, or NULL on memory allocation failure. + */ +htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser) { + htp_multipart_part_t * part = calloc(1, sizeof (htp_multipart_part_t)); + if (part == NULL) return NULL; + + part->headers = htp_table_create(4); + if (part->headers == NULL) { + free(part); + return NULL; + } + + part->parser = parser; + bstr_builder_clear(parser->part_data_pieces); + bstr_builder_clear(parser->part_header_pieces); + + return part; +} + +/** + * Destroys a part. + * + * @param[in] part + * @param[in] gave_up_data + */ +void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data) { + if (part == NULL) return; + + if (part->file != NULL) { + bstr_free(part->file->filename); + + if (part->file->tmpname != NULL) { + unlink(part->file->tmpname); + free(part->file->tmpname); + } + + free(part->file); + part->file = NULL; + } + + if ((!gave_up_data) || (part->type != MULTIPART_PART_TEXT)) { + bstr_free(part->name); + bstr_free(part->value); + } + + bstr_free(part->content_type); + + if (part->headers != NULL) { + htp_header_t *h = NULL; + for (size_t i = 0, n = htp_table_size(part->headers); i < n; i++) { + h = htp_table_get_index(part->headers, i, NULL); + bstr_free(h->name); + bstr_free(h->value); + free(h); + } + + htp_table_destroy(part->headers); + } + + free(part); +} + +/** + * Finalizes part processing. + * + * @param[in] part + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part) { + // Determine if this part is the epilogue. + + if (part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) { + if (part->type == MULTIPART_PART_UNKNOWN) { + // Assume that the unknown part after the last boundary is the epilogue. + part->parser->current_part->type = MULTIPART_PART_EPILOGUE; + + // But if we've already seen a part we thought was the epilogue, + // raise HTP_MULTIPART_PART_UNKNOWN. Multiple epilogues are not allowed. + if (part->parser->multipart.flags & HTP_MULTIPART_HAS_EPILOGUE) { + part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN; + } + + part->parser->multipart.flags |= HTP_MULTIPART_HAS_EPILOGUE; + } else { + part->parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY; + } + } + + // Sanity checks. + + // Have we seen complete part headers? If we have not, that means that the part ended prematurely. + if ((part->parser->current_part->type != MULTIPART_PART_EPILOGUE) && (part->parser->current_part_mode != MODE_DATA)) { + part->parser->multipart.flags |= HTP_MULTIPART_PART_INCOMPLETE; + } + + // Have we been able to determine the part type? If not, this means + // that the part did not contain the C-D header. + if (part->type == MULTIPART_PART_UNKNOWN) { + part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN; + } + + // Finalize part value. + + if (part->type == MULTIPART_PART_FILE) { + // Notify callbacks about the end of the file. + htp_mpartp_run_request_file_data_hook(part, NULL, 0); + + // If we are storing the file to disk, close the file descriptor. + if (part->file->fd != -1) { + close(part->file->fd); + } + } else { + // Combine value pieces into a single buffer. + if (bstr_builder_size(part->parser->part_data_pieces) > 0) { + part->value = bstr_builder_to_str(part->parser->part_data_pieces); + bstr_builder_clear(part->parser->part_data_pieces); + } + } + + return HTP_OK; +} + +htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len) { + if (part->parser->cfg == NULL) return HTP_OK; + + // Keep track of the file length. + part->file->len += len; + + // Package data for the callbacks. + htp_file_data_t file_data; + file_data.file = part->file; + file_data.data = data; + file_data.len = (const size_t) len; + + // Send data to callbacks + htp_status_t rc = htp_hook_run_all(part->parser->cfg->hook_request_file_data, &file_data); + if (rc != HTP_OK) return rc; + + return HTP_OK; +} + +/** + * Handles part data. + * + * @param[in] part + * @param[in] data + * @param[in] len + * @param[in] is_line + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line) { + #if HTP_DEBUG + fprintf(stderr, "Part type %d mode %d is_line %d\n", part->type, part->parser->current_part_mode, is_line); + fprint_raw_data(stderr, "htp_mpart_part_handle_data: data chunk", data, len); + #endif + + // Keep track of raw part length. + part->len += len; + + // If we're processing a part that came after the last boundary, then we're not sure if it + // is the epilogue part or some other part (in case of evasion attempt). For that reason we + // will keep all its data in the part_data_pieces structure. If it ends up not being the + // epilogue, this structure will be cleared. + if ((part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) && (part->type == MULTIPART_PART_UNKNOWN)) { + bstr_builder_append_mem(part->parser->part_data_pieces, data, len); + } + + if (part->parser->current_part_mode == MODE_LINE) { + // Line mode. + + if (is_line) { + // End of the line. + + bstr *line = NULL; + + // If this line came to us in pieces, combine them now into a single buffer. + if (bstr_builder_size(part->parser->part_header_pieces) > 0) { + bstr_builder_append_mem(part->parser->part_header_pieces, data, len); + line = bstr_builder_to_str(part->parser->part_header_pieces); + if (line == NULL) return HTP_ERROR; + bstr_builder_clear(part->parser->part_header_pieces); + + data = bstr_ptr(line); + len = bstr_len(line); + } + + // Ignore the line endings. + if (len > 1) { + if (data[len - 1] == LF) len--; + if (data[len - 1] == CR) len--; + } else if (len > 0) { + if (data[len - 1] == LF) len--; + } + + // Is it an empty line? + if (len == 0) { + // Empty line; process headers and switch to data mode. + + // Process the pending header, if any. + if (part->parser->pending_header_line != NULL) { + if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line), + bstr_len(part->parser->pending_header_line)) == HTP_ERROR) + { + bstr_free(line); + return HTP_ERROR; + } + + bstr_free(part->parser->pending_header_line); + part->parser->pending_header_line = NULL; + } + + if (htp_mpart_part_process_headers(part) == HTP_ERROR) { + bstr_free(line); + return HTP_ERROR; + } + + part->parser->current_part_mode = MODE_DATA; + bstr_builder_clear(part->parser->part_header_pieces); + + if (part->file != NULL) { + // Changing part type because we have a filename. + part->type = MULTIPART_PART_FILE; + + if ((part->parser->extract_files) && (part->parser->file_count < part->parser->extract_limit)) { + char buf[255]; + + strncpy(buf, part->parser->extract_dir, 254); + strncat(buf, "/libhtp-multipart-file-XXXXXX", 254 - strlen(buf)); + + part->file->tmpname = strdup(buf); + if (part->file->tmpname == NULL) { + bstr_free(line); + return HTP_ERROR; + } + + mode_t previous_mask = umask(S_IXUSR | S_IRWXG | S_IRWXO); + part->file->fd = mkstemp(part->file->tmpname); + umask(previous_mask); + + if (part->file->fd < 0) { + bstr_free(line); + return HTP_ERROR; + } + + part->parser->file_count++; + } + } else if (part->name != NULL) { + // Changing part type because we have a name. + part->type = MULTIPART_PART_TEXT; + bstr_builder_clear(part->parser->part_data_pieces); + } else { + // Do nothing; the type stays MULTIPART_PART_UNKNOWN. + } + } else { + // Not an empty line. + + // Is there a pending header? + if (part->parser->pending_header_line == NULL) { + if (line != NULL) { + part->parser->pending_header_line = line; + line = NULL; + } else { + part->parser->pending_header_line = bstr_dup_mem(data, len); + if (part->parser->pending_header_line == NULL) return HTP_ERROR; + } + } else { + // Is this a folded line? + if (isspace(data[0])) { + // Folding; add to the existing line. + part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_FOLDING; + part->parser->pending_header_line = bstr_add_mem(part->parser->pending_header_line, data, len); + if (part->parser->pending_header_line == NULL) { + bstr_free(line); + return HTP_ERROR; + } + } else { + // Process the pending header line. + if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line), + bstr_len(part->parser->pending_header_line)) == HTP_ERROR) + { + bstr_free(line); + return HTP_ERROR; + } + + bstr_free(part->parser->pending_header_line); + + if (line != NULL) { + part->parser->pending_header_line = line; + line = NULL; + } else { + part->parser->pending_header_line = bstr_dup_mem(data, len); + if (part->parser->pending_header_line == NULL) return HTP_ERROR; + } + } + } + } + + bstr_free(line); + line = NULL; + } else { + // Not end of line; keep the data chunk for later. + bstr_builder_append_mem(part->parser->part_header_pieces, data, len); + } + } else { + // Data mode; keep the data chunk for later (but not if it is a file). + switch (part->type) { + case MULTIPART_PART_EPILOGUE: + case MULTIPART_PART_PREAMBLE: + case MULTIPART_PART_TEXT: + case MULTIPART_PART_UNKNOWN: + // Make a copy of the data in RAM. + bstr_builder_append_mem(part->parser->part_data_pieces, data, len); + break; + + case MULTIPART_PART_FILE: + // Invoke file data callbacks. + htp_mpartp_run_request_file_data_hook(part, data, len); + + // Optionally, store the data in a file. + if (part->file->fd != -1) { + if (write(part->file->fd, data, len) < 0) { + return HTP_ERROR; + } + } + break; + + default: + // Internal error. + return HTP_ERROR; + break; + } + } + + return HTP_OK; +} + +/** + * Handles data, creating new parts as necessary. + * + * @param[in] mpartp + * @param[in] data + * @param[in] len + * @param[in] is_line + * @return HTP_OK on success, HTP_ERROR on failure. + */ +static htp_status_t htp_mpartp_handle_data(htp_mpartp_t *parser, const unsigned char *data, size_t len, int is_line) { + if (len == 0) return HTP_OK; + + // Do we have a part already? + if (parser->current_part == NULL) { + // Create a new part. + parser->current_part = htp_mpart_part_create(parser); + if (parser->current_part == NULL) return HTP_ERROR; + + if (parser->multipart.boundary_count == 0) { + // We haven't seen a boundary yet, so this must be the preamble part. + parser->current_part->type = MULTIPART_PART_PREAMBLE; + parser->multipart.flags |= HTP_MULTIPART_HAS_PREAMBLE; + parser->current_part_mode = MODE_DATA; + } else { + // Part after preamble. + parser->current_part_mode = MODE_LINE; + } + + // Add part to the list. + htp_list_push(parser->multipart.parts, parser->current_part); + + #ifdef HTP_DEBUG + fprintf(stderr, "Created new part type %d\n", parser->current_part->type); + #endif + } + + // Send data to the part. + return htp_mpart_part_handle_data(parser->current_part, data, len, is_line); +} + +/** + * Handles a boundary event, which means that it will finalize a part if one exists. + * + * @param[in] mpartp + * @return HTP_OK on success, HTP_ERROR on failure. + */ +static htp_status_t htp_mpartp_handle_boundary(htp_mpartp_t *parser) { + #if HTP_DEBUG + fprintf(stderr, "htp_mpartp_handle_boundary\n"); + #endif + + if (parser->current_part != NULL) { + if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) { + return HTP_ERROR; + } + + // We're done with this part + parser->current_part = NULL; + + // Revert to line mode + parser->current_part_mode = MODE_LINE; + } + + return HTP_OK; +} + +static htp_status_t htp_mpartp_init_boundary(htp_mpartp_t *parser, unsigned char *data, size_t len) { + if ((parser == NULL) || (data == NULL)) return HTP_ERROR; + + // Copy the boundary and convert it to lowercase. + + parser->multipart.boundary_len = len + 4; + parser->multipart.boundary = malloc(parser->multipart.boundary_len + 1); + if (parser->multipart.boundary == NULL) return HTP_ERROR; + + parser->multipart.boundary[0] = CR; + parser->multipart.boundary[1] = LF; + parser->multipart.boundary[2] = '-'; + parser->multipart.boundary[3] = '-'; + + for (size_t i = 0; i < len; i++) { + parser->multipart.boundary[i + 4] = data[i]; + } + + parser->multipart.boundary[parser->multipart.boundary_len] = '\0'; + + // We're starting in boundary-matching mode. The first boundary can appear without the + // CRLF, and our starting state expects that. If we encounter non-boundary data, the + // state will switch to data mode. Then, if the data is CRLF or LF, we will go back + // to boundary matching. Thus, we handle all the possibilities. + + parser->parser_state = STATE_BOUNDARY; + parser->boundary_match_pos = 2; + + return HTP_OK; +} + +htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags) { + if ((cfg == NULL) || (boundary == NULL)) return NULL; + + htp_mpartp_t *parser = calloc(1, sizeof (htp_mpartp_t)); + if (parser == NULL) return NULL; + + parser->cfg = cfg; + + parser->boundary_pieces = bstr_builder_create(); + if (parser->boundary_pieces == NULL) { + htp_mpartp_destroy(parser); + return NULL; + } + + parser->part_data_pieces = bstr_builder_create(); + if (parser->part_data_pieces == NULL) { + htp_mpartp_destroy(parser); + return NULL; + } + + parser->part_header_pieces = bstr_builder_create(); + if (parser->part_header_pieces == NULL) { + htp_mpartp_destroy(parser); + return NULL; + } + + parser->multipart.parts = htp_list_create(64); + if (parser->multipart.parts == NULL) { + htp_mpartp_destroy(parser); + return NULL; + } + + parser->multipart.flags = flags; + parser->parser_state = STATE_INIT; + parser->extract_files = cfg->extract_request_files; + parser->extract_dir = cfg->tmpdir; + if (cfg->extract_request_files_limit >= 0) { + parser->extract_limit = cfg->extract_request_files_limit; + } else { + parser->extract_limit = DEFAULT_FILE_EXTRACT_LIMIT; + } + parser->handle_data = htp_mpartp_handle_data; + parser->handle_boundary = htp_mpartp_handle_boundary; + + // Initialize the boundary. + htp_status_t rc = htp_mpartp_init_boundary(parser, bstr_ptr(boundary), bstr_len(boundary)); + if (rc != HTP_OK) { + htp_mpartp_destroy(parser); + return NULL; + } + + // On success, the ownership of the boundary parameter + // is transferred to us. We made a copy, and so we + // don't need it any more. + bstr_free(boundary); + + return parser; +} + +void htp_mpartp_destroy(htp_mpartp_t *parser) { + if (parser == NULL) return; + + if (parser->multipart.boundary != NULL) { + free(parser->multipart.boundary); + } + + bstr_builder_destroy(parser->boundary_pieces); + bstr_builder_destroy(parser->part_header_pieces); + bstr_free(parser->pending_header_line); + bstr_builder_destroy(parser->part_data_pieces); + + // Free the parts. + if (parser->multipart.parts != NULL) { + for (size_t i = 0, n = htp_list_size(parser->multipart.parts); i < n; i++) { + htp_multipart_part_t * part = htp_list_get(parser->multipart.parts, i); + htp_mpart_part_destroy(part, parser->gave_up_data); + } + + htp_list_destroy(parser->multipart.parts); + } + + free(parser); +} + +/** + * Processes set-aside data. + * + * @param[in] mpartp + * @param[in] data + * @param[in] pos + * @param[in] startpos + * @param[in] return_pos + * @param[in] matched + * @return HTP_OK on success, HTP_ERROR on failure. + */ +static htp_status_t htp_martp_process_aside(htp_mpartp_t *parser, int matched) { + // The stored data pieces can contain up to one line. If we're in data mode and there + // was no boundary match, things are straightforward -- we process everything as data. + // If there was a match, we need to take care to not send the line ending as data, nor + // anything that follows (because it's going to be a part of the boundary). Similarly, + // when we are in line mode, we need to split the first data chunk, processing the first + // part as line and the second part as data. + + #ifdef HTP_DEBUG + fprintf(stderr, "mpartp_process_aside matched %d current_part_mode %d\n", matched, parser->current_part_mode); + #endif + + // Do we need to do any chunk splitting? + if (matched || (parser->current_part_mode == MODE_LINE)) { + // Line mode or boundary match + + // Process the CR byte, if set aside. + if ((!matched) && (parser->cr_aside)) { + // Treat as part data, when there is not a match. + parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0); + parser->cr_aside = 0; + } else { + // Treat as boundary, when there is a match. + parser->cr_aside = 0; + } + + // We know that we went to match a boundary because + // we saw a new line. Now we have to find that line and + // process it. It's either going to be in the current chunk, + // or in the first stored chunk. + if (bstr_builder_size(parser->boundary_pieces) > 0) { + int first = 1; + for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) { + bstr *b = htp_list_get(parser->boundary_pieces->pieces, i); + + if (first) { + first = 0; + + // Split the first chunk. + + if (!matched) { + // In line mode, we are OK with line endings. + parser->handle_data(parser, bstr_ptr(b), parser->boundary_candidate_pos, /* line */ 1); + } else { + // But if there was a match, the line ending belongs to the boundary. + unsigned char *dx = bstr_ptr(b); + size_t lx = parser->boundary_candidate_pos; + + // Remove LF or CRLF. + if ((lx > 0) && (dx[lx - 1] == LF)) { + lx--; + // Remove CR. + if ((lx > 0) && (dx[lx - 1] == CR)) { + lx--; + } + } + + parser->handle_data(parser, dx, lx, /* not a line */ 0); + } + + // The second part of the split chunks belongs to the boundary + // when matched, data otherwise. + if (!matched) { + parser->handle_data(parser, bstr_ptr(b) + parser->boundary_candidate_pos, + bstr_len(b) - parser->boundary_candidate_pos, /* not a line */ 0); + } + } else { + // Do not send data if there was a boundary match. The stored + // data belongs to the boundary. + if (!matched) { + parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0); + } + } + } + + bstr_builder_clear(parser->boundary_pieces); + } + } else { + // Data mode and no match. + + // In data mode, we process the lone CR byte as data. + if (parser->cr_aside) { + parser->handle_data(parser, (const unsigned char *)&"\r", 1, /* not a line */ 0); + parser->cr_aside = 0; + } + + // We then process any pieces that we might have stored, also as data. + if (bstr_builder_size(parser->boundary_pieces) > 0) { + for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) { + bstr *b = htp_list_get(parser->boundary_pieces->pieces, i); + parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0); + } + + bstr_builder_clear(parser->boundary_pieces); + } + } + + return HTP_OK; +} + +htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser) { + if (parser->current_part != NULL) { + // Process buffered data, if any. + htp_martp_process_aside(parser, 0); + + // Finalize the last part. + if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) return HTP_ERROR; + + // It is OK to end abruptly in the epilogue part, but not in any other. + if (parser->current_part->type != MULTIPART_PART_EPILOGUE) { + parser->multipart.flags |= HTP_MULTIPART_INCOMPLETE; + } + } + + bstr_builder_clear(parser->boundary_pieces); + + return HTP_OK; +} + +htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *_data, size_t len) { + unsigned char *data = (unsigned char *) _data; + + // The current position in the entire input buffer. + size_t pos = 0; + + // The position of the first unprocessed byte of data. We split the + // input buffer into smaller chunks, according to their purpose. Once + // an entire such smaller chunk is processed, we move to the next + // and update startpos. + size_t startpos = 0; + + // The position of the (possible) boundary. We investigate for possible + // boundaries whenever we encounter CRLF or just LF. If we don't find a + // boundary we need to go back, and this is what data_return_pos helps with. + size_t data_return_pos = 0; + + #if HTP_DEBUG + fprint_raw_data(stderr, "htp_mpartp_parse: data chunk", data, len); + #endif + + // While there's data in the input buffer. + + while (pos < len) { + +STATE_SWITCH: + #if HTP_DEBUG + fprintf(stderr, "htp_mpartp_parse: state %d pos %zd startpos %zd\n", parser->parser_state, pos, startpos); + #endif + + switch (parser->parser_state) { + + case STATE_INIT: + // Incomplete initialization. + return HTP_ERROR; + break; + + case STATE_DATA: // Handle part data. + + // While there's data in the input buffer. + + while (pos < len) { + // Check for a CRLF-terminated line. + if (data[pos] == CR) { + // We have a CR byte. + + // Is this CR the last byte in the input buffer? + if (pos + 1 == len) { + // We have CR as the last byte in input. We are going to process + // what we have in the buffer as data, except for the CR byte, + // which we're going to leave for later. If it happens that a + // CR is followed by a LF and then a boundary, the CR is going + // to be discarded. + pos++; // Advance over CR. + parser->cr_aside = 1; + } else { + // We have CR and at least one more byte in the buffer, so we + // are able to test for the LF byte too. + if (data[pos + 1] == LF) { + pos += 2; // Advance over CR and LF. + + parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE; + + // Prepare to switch to boundary testing. + data_return_pos = pos; + parser->boundary_candidate_pos = pos - startpos; + parser->boundary_match_pos = 2; // After LF; position of the first dash. + parser->parser_state = STATE_BOUNDARY; + + goto STATE_SWITCH; + } else { + // This is not a new line; advance over the + // byte and clear the CR set-aside flag. + pos++; + parser->cr_aside = 0; + } + } + } else if (data[pos] == LF) { // Check for a LF-terminated line. + pos++; // Advance over LF. + + // Did we have a CR in the previous input chunk? + if (parser->cr_aside == 0) { + parser->multipart.flags |= HTP_MULTIPART_LF_LINE; + } else { + parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE; + } + + // Prepare to switch to boundary testing. + data_return_pos = pos; + parser->boundary_candidate_pos = pos - startpos; + parser->boundary_match_pos = 2; // After LF; position of the first dash. + parser->parser_state = STATE_BOUNDARY; + + goto STATE_SWITCH; + } else { + // Take one byte from input + pos++; + + // Earlier we might have set aside a CR byte not knowing if the next + // byte is a LF. Now we know that it is not, and so we can release the CR. + if (parser->cr_aside) { + parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0); + parser->cr_aside = 0; + } + } + } // while + + // No more data in the input buffer; process the data chunk. + parser->handle_data(parser, data + startpos, pos - startpos - parser->cr_aside, /* not a line */ 0); + + break; + + case STATE_BOUNDARY: // Handle a possible boundary. + while (pos < len) { + #ifdef HTP_DEBUG + fprintf(stderr, "boundary (len %zd pos %zd char %d) data char %d\n", parser->multipart.boundary_len, + parser->boundary_match_pos, parser->multipart.boundary[parser->boundary_match_pos], tolower(data[pos])); + #endif + + // Check if the bytes match. + if (!(data[pos] == parser->multipart.boundary[parser->boundary_match_pos])) { + // Boundary mismatch. + + // Process stored (buffered) data. + htp_martp_process_aside(parser, /* no match */ 0); + + // Return back where data parsing left off. + if (parser->current_part_mode == MODE_LINE) { + // In line mode, we process the line. + parser->handle_data(parser, data + startpos, data_return_pos - startpos, /* line */ 1); + startpos = data_return_pos; + } else { + // In data mode, we go back where we left off. + pos = data_return_pos; + } + + parser->parser_state = STATE_DATA; + + goto STATE_SWITCH; + } + + // Consume one matched boundary byte + pos++; + parser->boundary_match_pos++; + + // Have we seen all boundary bytes? + if (parser->boundary_match_pos == parser->multipart.boundary_len) { + // Boundary match! + + // Process stored (buffered) data. + htp_martp_process_aside(parser, /* boundary match */ 1); + + // Process data prior to the boundary in the current input buffer. + // Because we know this is the last chunk before boundary, we can + // remove the line endings. + size_t dlen = data_return_pos - startpos; + if ((dlen > 0) && (data[startpos + dlen - 1] == LF)) dlen--; + if ((dlen > 0) && (data[startpos + dlen - 1] == CR)) dlen--; + parser->handle_data(parser, data + startpos, dlen, /* line */ 1); + + // Keep track of how many boundaries we've seen. + parser->multipart.boundary_count++; + + if (parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) { + parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY; + } + + // Run boundary match. + parser->handle_boundary(parser); + + // We now need to check if this is the last boundary in the payload + parser->parser_state = STATE_BOUNDARY_IS_LAST2; + + goto STATE_SWITCH; + } + } // while + + // No more data in the input buffer; store (buffer) the unprocessed + // part for later, for after we find out if this is a boundary. + bstr_builder_append_mem(parser->boundary_pieces, data + startpos, len - startpos); + + break; + + case STATE_BOUNDARY_IS_LAST2: + // Examine the first byte after the last boundary character. If it is + // a dash, then we maybe processing the last boundary in the payload. If + // it is not, move to eat all bytes until the end of the line. + + if (data[pos] == '-') { + // Found one dash, now go to check the next position. + pos++; + parser->parser_state = STATE_BOUNDARY_IS_LAST1; + } else { + // This is not the last boundary. Change state but + // do not advance the position, allowing the next + // state to process the byte. + parser->parser_state = STATE_BOUNDARY_EAT_LWS; + } + break; + + case STATE_BOUNDARY_IS_LAST1: + // Examine the byte after the first dash; expected to be another dash. + // If not, eat all bytes until the end of the line. + + if (data[pos] == '-') { + // This is indeed the last boundary in the payload. + pos++; + parser->multipart.flags |= HTP_MULTIPART_SEEN_LAST_BOUNDARY; + parser->parser_state = STATE_BOUNDARY_EAT_LWS; + } else { + // The second character is not a dash, and so this is not + // the final boundary. Raise the flag for the first dash, + // and change state to consume the rest of the boundary line. + parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER; + parser->parser_state = STATE_BOUNDARY_EAT_LWS; + } + break; + + case STATE_BOUNDARY_EAT_LWS: + if (data[pos] == CR) { + // CR byte, which could indicate a CRLF line ending. + pos++; + parser->parser_state = STATE_BOUNDARY_EAT_LWS_CR; + } else if (data[pos] == LF) { + // LF line ending; we're done with boundary processing; data bytes follow. + pos++; + startpos = pos; + parser->multipart.flags |= HTP_MULTIPART_LF_LINE; + parser->parser_state = STATE_DATA; + } else { + if (htp_is_lws(data[pos])) { + // Linear white space is allowed here. + parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_LWS_AFTER; + pos++; + } else { + // Unexpected byte; consume, but remain in the same state. + parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER; + pos++; + } + } + break; + + case STATE_BOUNDARY_EAT_LWS_CR: + if (data[pos] == LF) { + // CRLF line ending; we're done with boundary processing; data bytes follow. + pos++; + startpos = pos; + parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE; + parser->parser_state = STATE_DATA; + } else { + // Not a line ending; start again, but do not process this byte. + parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER; + parser->parser_state = STATE_BOUNDARY_EAT_LWS; + } + break; + } // switch + } + + return HTP_OK; +} + +static void htp_mpartp_validate_boundary(bstr *boundary, uint64_t *flags) { + /* + + RFC 1341: + + The only mandatory parameter for the multipart Content-Type + is the boundary parameter, which consists of 1 to 70 + characters from a set of characters known to be very robust + through email gateways, and NOT ending with white space. + (If a boundary appears to end with white space, the white + space must be presumed to have been added by a gateway, and + should be deleted.) It is formally specified by the + following BNF: + + boundary := 0*69<bchars> bcharsnospace + + bchars := bcharsnospace / " " + + bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" + / "," / "-" / "." / "/" / ":" / "=" / "?" + */ + + /* + Chrome: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD + Firefox: Content-Type: multipart/form-data; boundary=---------------------------21071316483088 + MSIE: Content-Type: multipart/form-data; boundary=---------------------------7dd13e11c0452 + Opera: Content-Type: multipart/form-data; boundary=----------2JL5oh7QWEDwyBllIRc7fh + Safari: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryre6zL3b0BelnTY5S + */ + + unsigned char *data = bstr_ptr(boundary); + size_t len = bstr_len(boundary); + + // The RFC allows up to 70 characters. In real life, + // boundaries tend to be shorter. + if ((len == 0) || (len > 70)) { + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + } + + // Check boundary characters. This check is stricter than the + // RFC, which seems to allow many separator characters. + size_t pos = 0; + while (pos < len) { + if (!(((data[pos] >= '0') && (data[pos] <= '9')) + || ((data[pos] >= 'a') && (data[pos] <= 'z')) + || ((data[pos] >= 'A') && (data[pos] <= 'Z')) + || (data[pos] == '-'))) { + + switch (data[pos]) { + case '\'': + case '(': + case ')': + case '+': + case '_': + case ',': + case '.': + case '/': + case ':': + case '=': + case '?': + // These characters are allowed by the RFC, but not common. + *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL; + break; + + default: + // Invalid character. + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + break; + } + } + + pos++; + } +} + +static void htp_mpartp_validate_content_type(bstr *content_type, uint64_t *flags) { + unsigned char *data = bstr_ptr(content_type); + size_t len = bstr_len(content_type); + size_t counter = 0; + + while (len > 0) { + int i = bstr_util_mem_index_of_c_nocase(data, len, "boundary"); + if (i == -1) break; + + data = data + i; + len = len - i; + + // In order to work around the fact that WebKit actually uses + // the word "boundary" in their boundary, we also require one + // equals character the follow the words. + // "multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD" + if (memchr(data, '=', len) == NULL) break; + + counter++; + + // Check for case variations. + for (size_t j = 0; j < 8; j++) { + if (!((*data >= 'a') && (*data <= 'z'))) { + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + } + + data++; + len--; + } + } + + // How many boundaries have we seen? + if (counter > 1) { + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + } +} + +htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *flags) { + if ((content_type == NULL) || (boundary == NULL) || (flags == NULL)) return HTP_ERROR; + + // Our approach is to ignore the MIME type and instead just look for + // the boundary. This approach is more reliable in the face of various + // evasion techniques that focus on submitting invalid MIME types. + + // Reset flags. + *flags = 0; + + // Look for the boundary, case insensitive. + int i = bstr_index_of_c_nocase(content_type, "boundary"); + if (i == -1) return HTP_DECLINED; + + unsigned char *data = bstr_ptr(content_type) + i + 8; + size_t len = bstr_len(content_type) - i - 8; + + // Look for the boundary value. + size_t pos = 0; + while ((pos < len) && (data[pos] != '=')) { + if (htp_is_space(data[pos])) { + // It is unusual to see whitespace before the equals sign. + *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL; + } else { + // But seeing a non-whitespace character may indicate evasion. + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + } + + pos++; + } + + if (pos >= len) { + // No equals sign in the header. + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + return HTP_DECLINED; + } + + // Go over the '=' character. + pos++; + + // Ignore any whitespace after the equals sign. + while ((pos < len) && (htp_is_space(data[pos]))) { + if (htp_is_space(data[pos])) { + // It is unusual to see whitespace after + // the equals sign. + *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL; + } + + pos++; + } + + if (pos >= len) { + // No value after the equals sign. + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + return HTP_DECLINED; + } + + if (data[pos] == '"') { + // Quoted boundary. + + // Possibly not very unusual, but let's see. + *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL; + + pos++; // Over the double quote. + size_t startpos = pos; // Starting position of the boundary. + + // Look for the terminating double quote. + while ((pos < len) && (data[pos] != '"')) pos++; + + if (pos >= len) { + // Ran out of space without seeing + // the terminating double quote. + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + + // Include the starting double quote in the boundary. + startpos--; + } + + *boundary = bstr_dup_mem(data + startpos, pos - startpos); + if (*boundary == NULL) return HTP_ERROR; + + pos++; // Over the double quote. + } else { + // Boundary not quoted. + + size_t startpos = pos; + + // Find the end of the boundary. For the time being, we replicate + // the behavior of PHP 5.4.x. This may result with a boundary that's + // closer to what would be accepted in real life. Our subsequent + // checks of boundary characters will catch irregularities. + while ((pos < len) && (data[pos] != ',') && (data[pos] != ';') && (!htp_is_space(data[pos]))) pos++; + + *boundary = bstr_dup_mem(data + startpos, pos - startpos); + if (*boundary == NULL) return HTP_ERROR; + } + + // Check for a zero-length boundary. + if (bstr_len(*boundary) == 0) { + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + bstr_free(*boundary); + *boundary = NULL; + return HTP_DECLINED; + } + + // Allow only whitespace characters after the boundary. + int seen_space = 0, seen_non_space = 0; + + while (pos < len) { + if (!htp_is_space(data[pos])) { + seen_non_space = 1; + } else { + seen_space = 1; + } + + pos++; + } + + // Raise INVALID if we see any non-space characters, + // but raise UNUSUAL if we see _only_ space characters. + if (seen_non_space) { + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + } else if (seen_space) { + *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL; + } + + #ifdef HTP_DEBUG + fprint_bstr(stderr, "Multipart boundary", *boundary); + #endif + + // Validate boundary characters. + htp_mpartp_validate_boundary(*boundary, flags); + + // Correlate with the MIME type. This might be a tad too + // sensitive because it may catch non-browser access with sloppy + // implementations, but let's go with it for now. + if (bstr_begins_with_c(content_type, "multipart/form-data;") == 0) { + *flags |= HTP_MULTIPART_HBOUNDARY_INVALID; + } + + htp_mpartp_validate_content_type(content_type, flags); + + return HTP_OK; +} diff --git a/htp/htp_multipart.h b/htp/htp_multipart.h new file mode 100644 index 0000000..614ef63 --- /dev/null +++ b/htp/htp_multipart.h @@ -0,0 +1,345 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _HTP_MULTIPART_H +#define _HTP_MULTIPART_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "bstr.h" +#include "htp.h" +#include "htp_table.h" + + +// Constants and enums. + +/** + * Seen a LF line in the payload. LF lines are not allowed, but + * some clients do use them and some backends do accept them. Mixing + * LF and CRLF lines within some payload might be unusual. + */ +#define HTP_MULTIPART_LF_LINE 0x0001 + +/** Seen a CRLF line in the payload. This is normal and expected. */ +#define HTP_MULTIPART_CRLF_LINE 0x0002 + +/** Seen LWS after a boundary instance in the body. Unusual. */ +#define HTP_MULTIPART_BBOUNDARY_LWS_AFTER 0x0004 + +/** Seen non-LWS content after a boundary instance in the body. Highly unusual. */ +#define HTP_MULTIPART_BBOUNDARY_NLWS_AFTER 0x0008 + +/** + * Payload has a preamble part. Might not be that unusual. + */ +#define HTP_MULTIPART_HAS_PREAMBLE 0x0010 + +/** + * Payload has an epilogue part. Unusual. + */ +#define HTP_MULTIPART_HAS_EPILOGUE 0x0020 + +/** + * The last boundary was seen in the payload. Absence of the last boundary + * may not break parsing with some (most?) backends, but it means that the payload + * is not well formed. Can occur if the client gives up, or if the connection is + * interrupted. Incomplete payloads should be blocked whenever possible. + */ +#define HTP_MULTIPART_SEEN_LAST_BOUNDARY 0x0040 + +/** + * There was a part after the last boundary. This is highly irregular + * and indicative of evasion. + */ +#define HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY 0x0080 + +/** + * The payloads ends abruptly, without proper termination. Can occur if the client gives up, + * or if the connection is interrupted. When this flag is raised, HTP_MULTIPART_PART_INCOMPLETE + * will also be raised for the part that was only partially processed. (But the opposite may not + * always be the case -- there are other ways in which a part can be left incomplete.) + */ +#define HTP_MULTIPART_INCOMPLETE 0x0100 + +/** The boundary in the Content-Type header is invalid. */ +#define HTP_MULTIPART_HBOUNDARY_INVALID 0x0200 + +/** + * The boundary in the Content-Type header is unusual. This may mean that evasion + * is attempted, but it could also mean that we have encountered a client that does + * not do things in the way it should. + */ +#define HTP_MULTIPART_HBOUNDARY_UNUSUAL 0x0400 + +/** + * The boundary in the Content-Type header is quoted. This is very unusual, + * and may be indicative of an evasion attempt. + */ +#define HTP_MULTIPART_HBOUNDARY_QUOTED 0x0800 + +/** Header folding was used in part headers. Very unusual. */ +#define HTP_MULTIPART_PART_HEADER_FOLDING 0x1000 + +/** + * A part of unknown type was encountered, which probably means that the part is lacking + * a Content-Disposition header, or that the header is invalid. Highly unusual. + */ +#define HTP_MULTIPART_PART_UNKNOWN 0x2000 + +/** There was a repeated part header, possibly in an attempt to confuse the parser. Very unusual. */ +#define HTP_MULTIPART_PART_HEADER_REPEATED 0x4000 + +/** Unknown part header encountered. */ +#define HTP_MULTIPART_PART_HEADER_UNKNOWN 0x8000 + +/** Invalid part header encountered. */ +#define HTP_MULTIPART_PART_HEADER_INVALID 0x10000 + +/** Part type specified in the C-D header is neither MULTIPART_PART_TEXT nor MULTIPART_PART_FILE. */ +#define HTP_MULTIPART_CD_TYPE_INVALID 0x20000 + +/** Content-Disposition part header with multiple parameters with the same name. */ +#define HTP_MULTIPART_CD_PARAM_REPEATED 0x40000 + +/** Unknown Content-Disposition parameter. */ +#define HTP_MULTIPART_CD_PARAM_UNKNOWN 0x80000 + +/** Invalid Content-Disposition syntax. */ +#define HTP_MULTIPART_CD_SYNTAX_INVALID 0x100000 + +/** + * There is an abruptly terminated part. This can happen when the payload itself is abruptly + * terminated (in which case HTP_MULTIPART_INCOMPLETE) will be raised. However, it can also + * happen when a boundary is seen before any part data. + */ +#define HTP_MULTIPART_PART_INCOMPLETE 0x200000 + +/** A NUL byte was seen in a part header area. */ +#define HTP_MULTIPART_NUL_BYTE 0x400000 + +/** A collection of flags that all indicate an invalid C-D header. */ +#define HTP_MULTIPART_CD_INVALID ( \ + HTP_MULTIPART_CD_TYPE_INVALID | \ + HTP_MULTIPART_CD_PARAM_REPEATED | \ + HTP_MULTIPART_CD_PARAM_UNKNOWN | \ + HTP_MULTIPART_CD_SYNTAX_INVALID ) + +/** A collection of flags that all indicate an invalid part. */ +#define HTP_MULTIPART_PART_INVALID ( \ + HTP_MULTIPART_CD_INVALID | \ + HTP_MULTIPART_NUL_BYTE | \ + HTP_MULTIPART_PART_UNKNOWN | \ + HTP_MULTIPART_PART_HEADER_REPEATED | \ + HTP_MULTIPART_PART_INCOMPLETE | \ + HTP_MULTIPART_PART_HEADER_UNKNOWN | \ + HTP_MULTIPART_PART_HEADER_INVALID ) + +/** A collection of flags that all indicate an invalid Multipart payload. */ +#define HTP_MULTIPART_INVALID ( \ + HTP_MULTIPART_PART_INVALID | \ + HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY | \ + HTP_MULTIPART_INCOMPLETE | \ + HTP_MULTIPART_HBOUNDARY_INVALID ) + +/** A collection of flags that all indicate an unusual Multipart payload. */ +#define HTP_MULTIPART_UNUSUAL ( \ + HTP_MULTIPART_INVALID | \ + HTP_MULTIPART_PART_HEADER_FOLDING | \ + HTP_MULTIPART_BBOUNDARY_NLWS_AFTER | \ + HTP_MULTIPART_HAS_EPILOGUE | \ + HTP_MULTIPART_HBOUNDARY_UNUSUAL \ + HTP_MULTIPART_HBOUNDARY_QUOTED ) + +/** A collection of flags that all indicate an unusual Multipart payload, with a low sensitivity to irregularities. */ +#define HTP_MULTIPART_UNUSUAL_PARANOID ( \ + HTP_MULTIPART_UNUSUAL | \ + HTP_MULTIPART_LF_LINE | \ + HTP_MULTIPART_BBOUNDARY_LWS_AFTER | \ + HTP_MULTIPART_HAS_PREAMBLE ) + +#define HTP_MULTIPART_MIME_TYPE "multipart/form-data" + +enum htp_multipart_type_t { + + /** Unknown part. */ + MULTIPART_PART_UNKNOWN = 0, + + /** Text (parameter) part. */ + MULTIPART_PART_TEXT = 1, + + /** File part. */ + MULTIPART_PART_FILE = 2, + + /** Free-text part before the first boundary. */ + MULTIPART_PART_PREAMBLE = 3, + + /** Free-text part after the last boundary. */ + MULTIPART_PART_EPILOGUE = 4 +}; + + +// Structures + +/** + * Holds multipart parser configuration and state. Private. + */ +typedef struct htp_mpartp_t htp_mpartp_t; + +/** + * Holds information related to a multipart body. + */ +typedef struct htp_multipart_t { + /** Multipart boundary. */ + char *boundary; + + /** Boundary length. */ + size_t boundary_len; + + /** How many boundaries were there? */ + int boundary_count; + + /** List of parts, in the order in which they appeared in the body. */ + htp_list_t *parts; + + /** Parsing flags. */ + uint64_t flags; +} htp_multipart_t; + +/** + * Holds information related to a part. + */ +typedef struct htp_multipart_part_t { + /** Pointer to the parser. */ + htp_mpartp_t *parser; + + /** Part type; see the MULTIPART_PART_* constants. */ + enum htp_multipart_type_t type; + + /** Raw part length (i.e., headers and data). */ + size_t len; + + /** Part name, from the Content-Disposition header. Can be NULL. */ + bstr *name; + + /** + * Part value; the contents depends on the type of the part: + * 1) NULL for files; 2) contains complete part contents for + * preamble and epilogue parts (they have no headers), and + * 3) data only (headers excluded) for text and unknown parts. + */ + bstr *value; + + /** Part content type, from the Content-Type header. Can be NULL. */ + bstr *content_type; + + /** Part headers (htp_header_t instances), using header name as the key. */ + htp_table_t *headers; + + /** File data, available only for MULTIPART_PART_FILE parts. */ + htp_file_t *file; +} htp_multipart_part_t; + + +// Functions + +/** + * Creates a new multipart/form-data parser. On a successful invocation, + * the ownership of the boundary parameter is transferred to the parser. + * + * @param[in] cfg + * @param[in] boundary + * @param[in] flags + * @return New parser instance, or NULL on memory allocation failure. + */ +htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags); + +/** + * Looks for boundary in the supplied Content-Type request header. The extracted + * boundary will be allocated on the heap. + * + * @param[in] content_type + * @param[out] boundary + * @param[out] multipart_flags Multipart flags, which are not compatible from general LibHTP flags. + * @return HTP_OK on success (boundary found), HTP_DECLINED if boundary was not found, + * and HTP_ERROR on failure. Flags may be set on HTP_OK and HTP_DECLINED. For + * example, if a boundary could not be extracted but there is indication that + * one is present, HTP_MULTIPART_HBOUNDARY_INVALID will be set. + */ +htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *multipart_flags); + +/** + * Returns the multipart structure created by the parser. + * + * @param[in] parser + * @return The main multipart structure. + */ +htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser); + +/** + * Destroys the provided parser. + * + * @param[in] parser + */ +void htp_mpartp_destroy(htp_mpartp_t *parser); + +/** + * Finalize parsing. + * + * @param[in] parser + * @returns HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser); + +/** + * Parses a chunk of multipart/form-data data. This function should be called + * as many times as necessary until all data has been consumed. + * + * @param[in] parser + * @param[in] data + * @param[in] len + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *data, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif /* _HTP_MULTIPART_H */ diff --git a/htp/htp_multipart_private.h b/htp/htp_multipart_private.h new file mode 100644 index 0000000..5b8d228 --- /dev/null +++ b/htp/htp_multipart_private.h @@ -0,0 +1,203 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _HTP_MULTIPART_PRIVATE_H +#define _HTP_MULTIPART_PRIVATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "htp_multipart.h" + +#define CD_PARAM_OTHER 0 +#define CD_PARAM_NAME 1 +#define CD_PARAM_FILENAME 2 + +#define DEFAULT_FILE_EXTRACT_LIMIT 16 + +enum htp_part_mode_t { + /** When in line mode, the parser is handling part headers. */ + MODE_LINE = 0, + + /** When in data mode, the parser is consuming part data. */ + MODE_DATA = 1 +}; + +enum htp_multipart_state_t { + /** Initial state, after the parser has been created but before the boundary initialized. */ + STATE_INIT = 0, + + /** Processing data, waiting for a new line (which might indicate a new boundary). */ + STATE_DATA = 1, + + /** Testing a potential boundary. */ + STATE_BOUNDARY = 2, + + /** Checking the first byte after a boundary. */ + STATE_BOUNDARY_IS_LAST1 = 3, + + /** Checking the second byte after a boundary. */ + STATE_BOUNDARY_IS_LAST2 = 4, + + /** Consuming linear whitespace after a boundary. */ + STATE_BOUNDARY_EAT_LWS = 5, + + /** Used after a CR byte is detected in STATE_BOUNDARY_EAT_LWS. */ + STATE_BOUNDARY_EAT_LWS_CR = 6 +}; + +struct htp_mpartp_t { + htp_multipart_t multipart; + + htp_cfg_t *cfg; + + int extract_files; + + int extract_limit; + + char *extract_dir; + + int file_count; + + // Parsing callbacks + + int (*handle_data)(htp_mpartp_t *mpartp, const unsigned char *data, + size_t len, int line_end); + int (*handle_boundary)(htp_mpartp_t *mpartp); + + // Internal parsing fields; move into a private structure + + /** + * Parser state; one of MULTIPART_STATE_* constants. + */ + enum htp_multipart_state_t parser_state; + + /** + * Keeps track of the current position in the boundary matching progress. + * When this field reaches boundary_len, we have a boundary match. + */ + size_t boundary_match_pos; + + /** + * Pointer to the part that is currently being processed. + */ + htp_multipart_part_t *current_part; + + /** + * This parser consists of two layers: the outer layer is charged with + * finding parts, and the internal layer handles part data. There is an + * interesting interaction between the two parsers. Because the + * outer layer is seeing every line (it has to, in order to test for + * boundaries), it also effectively also splits input into lines. The + * inner parser deals with two areas: first is the headers, which are + * line based, followed by binary data. When parsing headers, the inner + * parser can reuse the lines identified by the outer parser. In this + * variable we keep the current parsing mode of the part, which helps + * us process input data more efficiently. The possible values are + * MULTIPART_MODE_LINE and MULTIPART_MODE_DATA. + */ + enum htp_part_mode_t current_part_mode; + + /** + * Used for buffering when a potential boundary is fragmented + * across many input data buffers. On a match, the data stored here is + * discarded. When there is no match, the buffer is processed as data + * (belonging to the currently active part). + */ + bstr_builder_t *boundary_pieces; + + bstr_builder_t *part_header_pieces; + + bstr *pending_header_line; + + /** + * Stores text part pieces until the entire part is seen, at which + * point the pieces are assembled into a single buffer, and the + * builder cleared. + */ + bstr_builder_t *part_data_pieces; + + /** + * The offset of the current boundary candidate, relative to the most + * recent data chunk (first unprocessed chunk of data). + */ + size_t boundary_candidate_pos; + + /** + * When we encounter a CR as the last byte in a buffer, we don't know + * if the byte is part of a CRLF combination. If it is, then the CR + * might be a part of a boundary. But if it is not, it's current + * part's data. Because we know how to handle everything before the + * CR, we do, and we use this flag to indicate that a CR byte is + * effectively being buffered. This is probably a case of premature + * optimization, but I am going to leave it in for now. + */ + int cr_aside; + + /** + * When set, indicates that this parser no longer owns names and + * values of MULTIPART_PART_TEXT parts. It is used to avoid data + * duplication when the parser is used by LibHTP internally. + */ + int gave_up_data; +}; + +htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len); + +htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part); + +htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len); + +htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line); + +int htp_mpartp_is_boundary_character(int c); + +htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser); + +htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part); + +void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data); + +htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part); + +#ifdef __cplusplus +} +#endif + +#endif /* _HTP_MULTIPART_PRIVATE_H */ diff --git a/htp/htp_parsers.c b/htp/htp_parsers.c new file mode 100644 index 0000000..3f41abb --- /dev/null +++ b/htp/htp_parsers.c @@ -0,0 +1,214 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * Determines protocol number from a textual representation (i.e., "HTTP/1.1"). This + * function will only understand a properly formatted protocol information. It does + * not try to be flexible. + * + * @param[in] protocol + * @return Protocol version or PROTOCOL_UNKNOWN. + */ +int htp_parse_protocol(bstr *protocol) { + if (protocol == NULL) return HTP_PROTOCOL_INVALID; + + // TODO This function uses a very strict approach to parsing, whereas + // browsers will typically be more flexible, allowing whitespace + // before and after the forward slash, as well as allowing leading + // zeroes in the numbers. We should be able to parse such malformed + // content correctly (but emit a warning). + if (bstr_len(protocol) == 8) { + unsigned char *ptr = bstr_ptr(protocol); + if ((ptr[0] == 'H') && (ptr[1] == 'T') && (ptr[2] == 'T') && (ptr[3] == 'P') + && (ptr[4] == '/') && (ptr[6] == '.')) { + // Check the version numbers + if (ptr[5] == '0') { + if (ptr[7] == '9') { + return HTP_PROTOCOL_0_9; + } + } else if (ptr[5] == '1') { + if (ptr[7] == '0') { + return HTP_PROTOCOL_1_0; + } else if (ptr[7] == '1') { + return HTP_PROTOCOL_1_1; + } + } + } + } + + return HTP_PROTOCOL_INVALID; +} + +/** + * Determines the numerical value of a response status given as a string. + * + * @param[in] status + * @return Status code on success, or HTP_STATUS_INVALID on error. + */ +int htp_parse_status(bstr *status) { + int64_t r = htp_parse_positive_integer_whitespace((unsigned char *) bstr_ptr(status), bstr_len(status), 10); + if (r >= HTP_VALID_STATUS_MIN && r <= HTP_VALID_STATUS_MAX) { + return (int)r; + } else { + return HTP_STATUS_INVALID; + } +} + +/** + * Parses Digest Authorization request header. + * + * @param[in] connp + * @param[in] auth_header + */ +int htp_parse_authorization_digest(htp_connp_t *connp, htp_header_t *auth_header) { + // Extract the username + int i = bstr_index_of_c(auth_header->value, "username="); + if (i == -1) return HTP_DECLINED; + + unsigned char *data = bstr_ptr(auth_header->value); + size_t len = bstr_len(auth_header->value); + size_t pos = i + 9; + + // Ignore whitespace + while ((pos < len) && (isspace((int) data[pos]))) pos++; + if (pos == len) return HTP_DECLINED; + + if (data[pos] != '"') return HTP_DECLINED; + + return htp_extract_quoted_string_as_bstr(data + pos, len - pos, &(connp->in_tx->request_auth_username), NULL); +} + +/** + * Parses Basic Authorization request header. + * + * @param[in] connp + * @param[in] auth_header + */ +int htp_parse_authorization_basic(htp_connp_t *connp, htp_header_t *auth_header) { + unsigned char *data = bstr_ptr(auth_header->value); + size_t len = bstr_len(auth_header->value); + size_t pos = 5; + + // Ignore whitespace + while ((pos < len) && (isspace((int) data[pos]))) pos++; + if (pos == len) return HTP_DECLINED; + + // Decode base64-encoded data + bstr *decoded = htp_base64_decode_mem(data + pos, len - pos); + if (decoded == NULL) return HTP_ERROR; + + // Now extract the username and password + int i = bstr_index_of_c(decoded, ":"); + if (i == -1) { + bstr_free(decoded); + return HTP_DECLINED; + } + + connp->in_tx->request_auth_username = bstr_dup_ex(decoded, 0, i); + if (connp->in_tx->request_auth_username == NULL) { + bstr_free(decoded); + return HTP_ERROR; + } + + connp->in_tx->request_auth_password = bstr_dup_ex(decoded, i + 1, bstr_len(decoded) - i - 1); + if (connp->in_tx->request_auth_password == NULL) { + bstr_free(decoded); + bstr_free(connp->in_tx->request_auth_username); + return HTP_ERROR; + } + + bstr_free(decoded); + + return HTP_OK; +} + +/** + * Parses Bearer Authorization request header. + * + * @param[in] connp + * @param[in] auth_header + */ +int htp_parse_authorization_bearer(htp_connp_t *connp, htp_header_t *auth_header) { + unsigned char *data = bstr_ptr(auth_header->value); + size_t len = bstr_len(auth_header->value); + size_t pos = 6; + + // Ignore whitespace + while ((pos < len) && (isspace((int) data[pos]))) pos++; + if (pos == len) return HTP_DECLINED; + + // There is nothing much else to check with Bearer auth so we just return + return HTP_OK; +} +/** + * Parses Authorization request header. + * + * @param[in] connp + */ +int htp_parse_authorization(htp_connp_t *connp) { + htp_header_t *auth_header = htp_table_get_c(connp->in_tx->request_headers, "authorization"); + if (auth_header == NULL) { + connp->in_tx->request_auth_type = HTP_AUTH_NONE; + return HTP_OK; + } + + // TODO Need a flag to raise when failing to parse authentication headers. + + if (bstr_begins_with_c_nocase(auth_header->value, "basic")) { + // Basic authentication + connp->in_tx->request_auth_type = HTP_AUTH_BASIC; + return htp_parse_authorization_basic(connp, auth_header); + } else if (bstr_begins_with_c_nocase(auth_header->value, "digest")) { + // Digest authentication + connp->in_tx->request_auth_type = HTP_AUTH_DIGEST; + return htp_parse_authorization_digest(connp, auth_header); + } else if (bstr_begins_with_c_nocase(auth_header->value, "bearer")) { + // OAuth Bearer authentication + connp->in_tx->request_auth_type = HTP_AUTH_BEARER; + return htp_parse_authorization_bearer(connp, auth_header); + } else { + // Unrecognized authentication method + connp->in_tx->request_auth_type = HTP_AUTH_UNRECOGNIZED; + } + + return HTP_OK; +} diff --git a/htp/htp_php.c b/htp/htp_php.c new file mode 100644 index 0000000..582d5b3 --- /dev/null +++ b/htp/htp_php.c @@ -0,0 +1,116 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * This is a proof-of-concept processor that processes parameter names in + * a way _similar_ to PHP. Whitespace at the beginning is removed, and the + * remaining whitespace characters are converted to underscores. Proper + * research of PHP's behavior is needed before we can claim to be emulating it. + * + * @param[in,out] p + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_php_parameter_processor(htp_param_t *p) { + if (p == NULL) return HTP_ERROR; + + // Name transformation + + bstr *new_name = NULL; + + // Ignore whitespace characters at the beginning of parameter name. + + unsigned char *data = bstr_ptr(p->name); + size_t len = bstr_len(p->name); + size_t pos = 0; + + // Advance over any whitespace characters at the beginning of the name. + while ((pos < len) && (isspace(data[pos]))) pos++; + + // Have we seen any whitespace? + if (pos > 0) { + // Make a copy of the name, starting with + // the first non-whitespace character. + new_name = bstr_dup_mem(data + pos, len - pos); + if (new_name == NULL) return HTP_ERROR; + } + + // Replace remaining whitespace characters with underscores. + + size_t offset = pos; + pos = 0; + + // Advance to the end of name or to the first whitespace character. + while ((offset + pos < len)&&(!isspace(data[pos]))) pos++; + + // Are we at the end of the name? + if (offset + pos < len) { + // Seen whitespace within the string. + + // Make a copy of the name if needed (which would be the case + // with a parameter that does not have any whitespace in front). + if (new_name == NULL) { + new_name = bstr_dup(p->name); + if (new_name == NULL) return HTP_ERROR; + } + + // Change the pointers to the new name and ditch the offset. + data = bstr_ptr(new_name); + len = bstr_len(new_name); + + // Replace any whitespace characters in the copy with underscores. + while (pos < len) { + if (isspace(data[pos])) { + data[pos] = '_'; + } + + pos++; + } + } + + // If we made any changes, free the old parameter name and put the new one in. + if (new_name != NULL) { + bstr_free(p->name); + p->name = new_name; + } + + return HTP_OK; +} diff --git a/htp/htp_private.h b/htp/htp_private.h new file mode 100644 index 0000000..9bcf19d --- /dev/null +++ b/htp/htp_private.h @@ -0,0 +1,269 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _HTP_PRIVATE_H +#define _HTP_PRIVATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__cplusplus) && !defined(__STDC_FORMAT_MACROS) +/* C99 requires that inttypes.h only exposes PRI* macros + * for C++ implementations if this is defined: */ +#define __STDC_FORMAT_MACROS +#endif + +#include <ctype.h> +#include <errno.h> +#include <iconv.h> +#include <inttypes.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <stdint.h> + +#include "htp_config_auto_gen.h" +#include "htp.h" +#include "htp_config_private.h" +#include "htp_connection_parser_private.h" +#include "htp_connection_private.h" +#include "htp_list_private.h" +#include "htp_multipart_private.h" +#include "htp_table_private.h" + +#ifndef CR +#define CR '\r' +#endif + +#ifndef LF +#define LF '\n' +#endif + +// 1048576 is 1 Mbyte +#define HTP_LZMA_MEMLIMIT 1048576 +//deflate max ratio is about 1000 +#define HTP_COMPRESSION_BOMB_RATIO 2048 +#define HTP_COMPRESSION_BOMB_LIMIT 1048576 +// 0.1 second +#define HTP_COMPRESSION_TIME_LIMIT_USEC 100000 +// test time for compression every 256 callbacks +#define HTP_COMPRESSION_TIME_FREQ_TEST 256 + +#define HTP_FIELD_LIMIT_HARD 18000 +#define HTP_FIELD_LIMIT_SOFT 9000 + +#define HTP_VALID_STATUS_MIN 100 +#define HTP_VALID_STATUS_MAX 999 + +// Parser states, in the order in which they are +// used as a single transaction is processed. + +htp_status_t htp_connp_REQ_IDLE(htp_connp_t *connp); +htp_status_t htp_connp_REQ_LINE(htp_connp_t *connp); +htp_status_t htp_connp_REQ_LINE_complete(htp_connp_t *connp); +htp_status_t htp_connp_REQ_PROTOCOL(htp_connp_t *connp); +htp_status_t htp_connp_REQ_HEADERS(htp_connp_t *connp); +htp_status_t htp_connp_REQ_CONNECT_CHECK(htp_connp_t *connp); +htp_status_t htp_connp_REQ_CONNECT_WAIT_RESPONSE(htp_connp_t *connp); +htp_status_t htp_connp_REQ_CONNECT_PROBE_DATA(htp_connp_t *connp); +htp_status_t htp_connp_REQ_BODY_DETERMINE(htp_connp_t *connp); +htp_status_t htp_connp_REQ_BODY_IDENTITY(htp_connp_t *connp); +htp_status_t htp_connp_REQ_BODY_CHUNKED_LENGTH(htp_connp_t *connp); +htp_status_t htp_connp_REQ_BODY_CHUNKED_DATA(htp_connp_t *connp); +htp_status_t htp_connp_REQ_BODY_CHUNKED_DATA_END(htp_connp_t *connp); +htp_status_t htp_connp_REQ_FINALIZE(htp_connp_t *connp); +htp_status_t htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9(htp_connp_t *connp); + +htp_status_t htp_connp_RES_IDLE(htp_connp_t *connp); +htp_status_t htp_connp_RES_LINE(htp_connp_t *connp); +htp_status_t htp_connp_RES_HEADERS(htp_connp_t *connp); +htp_status_t htp_connp_RES_BODY_DETERMINE(htp_connp_t *connp); +htp_status_t htp_connp_RES_BODY_IDENTITY_CL_KNOWN(htp_connp_t *connp); +htp_status_t htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE(htp_connp_t *connp); +htp_status_t htp_connp_RES_BODY_CHUNKED_LENGTH(htp_connp_t *connp); +htp_status_t htp_connp_RES_BODY_CHUNKED_DATA(htp_connp_t *connp); +htp_status_t htp_connp_RES_BODY_CHUNKED_DATA_END(htp_connp_t *connp); +htp_status_t htp_connp_RES_FINALIZE(htp_connp_t *connp); + +// Parsing functions + +htp_status_t htp_parse_request_line_generic(htp_connp_t *connp); +htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates); +htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len); +htp_status_t htp_process_request_header_generic(htp_connp_t *, unsigned char *data, size_t len); + +htp_status_t htp_parse_request_line_apache_2_2(htp_connp_t *connp); +htp_status_t htp_process_request_header_apache_2_2(htp_connp_t *, unsigned char *data, size_t len); + +htp_status_t htp_parse_response_line_generic(htp_connp_t *connp); +htp_status_t htp_parse_response_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len); +htp_status_t htp_process_response_header_generic(htp_connp_t *connp, unsigned char *data, size_t len); + + +// Private transaction functions + +htp_status_t htp_tx_state_response_complete_ex(htp_tx_t *tx, int hybrid_mode); + + +// Utility functions + +int htp_convert_method_to_number(bstr *); +int htp_is_lws(int c); +int htp_is_separator(int c); +int htp_is_text(int c); +int htp_is_token(int c); +int htp_chomp(unsigned char *data, size_t *len); +int htp_is_space(int c); + +int htp_parse_protocol(bstr *protocol); + +int htp_is_line_empty(unsigned char *data, size_t len); +int htp_is_line_whitespace(unsigned char *data, size_t len); + +int htp_connp_is_line_folded(unsigned char *data, size_t len); +int htp_is_folding_char(int c); +int htp_connp_is_line_terminator(htp_connp_t *connp, unsigned char *data, size_t len, int next_no_lf); +int htp_connp_is_line_ignorable(htp_connp_t *connp, unsigned char *data, size_t len); + +int htp_parse_uri(bstr *input, htp_uri_t **uri); +htp_status_t htp_parse_hostport(bstr *authority, bstr **hostname, bstr **port, int *port_number, int *invalid); +htp_status_t htp_parse_header_hostport(bstr *authority, bstr **hostname, bstr **port, int *port_number, uint64_t *flags); +int htp_validate_hostname(bstr *hostname); +int htp_parse_uri_hostport(htp_connp_t *connp, bstr *input, htp_uri_t *uri); +int htp_normalize_parsed_uri(htp_tx_t *tx, htp_uri_t *parsed_uri_incomplete, htp_uri_t *parsed_uri); +bstr *htp_normalize_hostname_inplace(bstr *input); + +int htp_decode_path_inplace(htp_tx_t *tx, bstr *path); + + int htp_prenormalize_uri_path_inplace(bstr *s, int *flags, int case_insensitive, int backslash, int decode_separators, int remove_consecutive); +void htp_normalize_uri_path_inplace(bstr *s); + +void htp_utf8_decode_path_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *path); +void htp_utf8_validate_path(htp_tx_t *tx, bstr *path); + +int64_t htp_parse_content_length(bstr *b, htp_connp_t *connp); +int64_t htp_parse_chunked_length(unsigned char *data, size_t len, int *extension); +int64_t htp_parse_positive_integer_whitespace(unsigned char *data, size_t len, int base); +int htp_parse_status(bstr *status); +int htp_parse_authorization_digest(htp_connp_t *connp, htp_header_t *auth_header); +int htp_parse_authorization_basic(htp_connp_t *connp, htp_header_t *auth_header); +int htp_parse_authorization_bearer(htp_connp_t *connp, htp_header_t *auth_header); + +void htp_print_log(FILE *stream, htp_log_t *log); + +void fprint_bstr(FILE *stream, const char *name, bstr *b); +void fprint_raw_data(FILE *stream, const char *name, const void *data, size_t len); +void fprint_raw_data_ex(FILE *stream, const char *name, const void *data, size_t offset, size_t len); + +char *htp_connp_in_state_as_string(htp_connp_t *connp); +char *htp_connp_out_state_as_string(htp_connp_t *connp); +char *htp_tx_request_progress_as_string(htp_tx_t *tx); +char *htp_tx_response_progress_as_string(htp_tx_t *tx); + +bstr *htp_unparse_uri_noencode(htp_uri_t *uri); + +int htp_treat_response_line_as_body(const uint8_t *data, size_t len); + +htp_status_t htp_req_run_hook_body_data(htp_connp_t *connp, htp_tx_data_t *d); +htp_status_t htp_res_run_hook_body_data(htp_connp_t *connp, htp_tx_data_t *d); + +htp_status_t htp_ch_urlencoded_callback_request_body_data(htp_tx_data_t *d); +htp_status_t htp_ch_urlencoded_callback_request_headers(htp_tx_t *tx); +htp_status_t htp_ch_urlencoded_callback_request_line(htp_tx_t *tx); +htp_status_t htp_ch_multipart_callback_request_body_data(htp_tx_data_t *d); +htp_status_t htp_ch_multipart_callback_request_headers(htp_tx_t *tx); + +htp_status_t htp_php_parameter_processor(htp_param_t *p); + +int htp_transcode_params(htp_connp_t *connp, htp_table_t **params, int destroy_old); +int htp_transcode_bstr(iconv_t cd, bstr *input, bstr **output); + +int htp_parse_single_cookie_v0(htp_connp_t *connp, unsigned char *data, size_t len); +int htp_parse_cookies_v0(htp_connp_t *connp); +int htp_parse_authorization(htp_connp_t *connp); + +htp_status_t htp_extract_quoted_string_as_bstr(unsigned char *data, size_t len, bstr **out, size_t *endoffset); + +htp_header_t *htp_connp_header_parse(htp_connp_t *, unsigned char *, size_t); + +htp_status_t htp_parse_ct_header(bstr *header, bstr **ct); + +htp_status_t htp_connp_req_receiver_finalize_clear(htp_connp_t *connp); +htp_status_t htp_connp_res_receiver_finalize_clear(htp_connp_t *connp); + +htp_status_t htp_tx_finalize(htp_tx_t *tx); + +int htp_tx_is_complete(htp_tx_t *tx); + +htp_status_t htp_tx_state_request_complete_partial(htp_tx_t *tx); + +void htp_connp_tx_remove(htp_connp_t *connp, htp_tx_t *tx); + +void htp_tx_destroy_incomplete(htp_tx_t *tx); + +htp_status_t htp_tx_req_process_body_data_ex(htp_tx_t *tx, const void *data, size_t len); +htp_status_t htp_tx_res_process_body_data_ex(htp_tx_t *tx, const void *data, size_t len); + +htp_status_t htp_tx_urldecode_uri_inplace(htp_tx_t *tx, bstr *input); +htp_status_t htp_tx_urldecode_params_inplace(htp_tx_t *tx, bstr *input); + +void htp_connp_destroy_decompressors(htp_connp_t *connp); + +htp_status_t htp_header_has_token(const unsigned char *hvp, size_t hvlen, const unsigned char *value); + +#ifndef HAVE_STRLCAT +size_t strlcat(char *dst, const char *src, size_t size); +#endif + +#ifndef HAVE_STRLCPY +size_t strlcpy(char *dst, const char *src, size_t size); +#endif + +#ifdef __cplusplus +} +#endif + +// as CURL_MAX_HTTP_HEADER +#define HTP_MAX_HEADER_FOLDED 102400 + +#endif /* _HTP_PRIVATE_H */ + diff --git a/htp/htp_request.c b/htp/htp_request.c new file mode 100644 index 0000000..9fddbd8 --- /dev/null +++ b/htp/htp_request.c @@ -0,0 +1,1173 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +#define IN_TEST_NEXT_BYTE_OR_RETURN(X) \ +if ((X)->in_current_read_offset >= (X)->in_current_len) { \ + return HTP_DATA; \ +} + +#define IN_PEEK_NEXT(X) \ +if ((X)->in_current_read_offset >= (X)->in_current_len) { \ + (X)->in_next_byte = -1; \ +} else { \ + (X)->in_next_byte = (X)->in_current_data[(X)->in_current_read_offset]; \ +} + +#define IN_NEXT_BYTE(X) \ +if ((X)->in_current_read_offset < (X)->in_current_len) { \ + (X)->in_next_byte = (X)->in_current_data[(X)->in_current_read_offset]; \ + (X)->in_current_read_offset++; \ + (X)->in_current_consume_offset++; \ + (X)->in_stream_offset++; \ +} else { \ + (X)->in_next_byte = -1; \ +} + +#define IN_NEXT_BYTE_OR_RETURN(X) \ +if ((X)->in_current_read_offset < (X)->in_current_len) { \ + (X)->in_next_byte = (X)->in_current_data[(X)->in_current_read_offset]; \ + (X)->in_current_read_offset++; \ + (X)->in_current_consume_offset++; \ + (X)->in_stream_offset++; \ +} else { \ + return HTP_DATA; \ +} + +#define IN_COPY_BYTE_OR_RETURN(X) \ +if ((X)->in_current_read_offset < (X)->in_current_len) { \ + (X)->in_next_byte = (X)->in_current_data[(X)->in_current_read_offset]; \ + (X)->in_current_read_offset++; \ + (X)->in_stream_offset++; \ +} else { \ + return HTP_DATA_BUFFER; \ +} + +/** + * Sends outstanding connection data to the currently active data receiver hook. + * + * @param[in] connp + * @param[in] is_last + * @return HTP_OK, or a value returned from a callback. + */ +static htp_status_t htp_connp_req_receiver_send_data(htp_connp_t *connp, int is_last) { + if (connp->in_data_receiver_hook == NULL) return HTP_OK; + + htp_tx_data_t d; + d.tx = connp->in_tx; + d.data = connp->in_current_data + connp->in_current_receiver_offset; + d.len = connp->in_current_read_offset - connp->in_current_receiver_offset; + d.is_last = is_last; + + htp_status_t rc = htp_hook_run_all(connp->in_data_receiver_hook, &d); + if (rc != HTP_OK) return rc; + + connp->in_current_receiver_offset = connp->in_current_read_offset; + + return HTP_OK; +} + +/** + * Configures the data receiver hook. If there is a previous hook, it will be finalized and cleared. + * + * @param[in] connp + * @param[in] data_receiver_hook + * @return HTP_OK, or a value returned from a callback. + */ +static htp_status_t htp_connp_req_receiver_set(htp_connp_t *connp, htp_hook_t *data_receiver_hook) { + htp_status_t rc = htp_connp_req_receiver_finalize_clear(connp); + + connp->in_data_receiver_hook = data_receiver_hook; + connp->in_current_receiver_offset = connp->in_current_read_offset; + + return rc; +} + +/** + * Finalizes an existing data receiver hook by sending any outstanding data to it. The + * hook is then removed so that it receives no more data. + * + * @param[in] connp + * @return HTP_OK, or a value returned from a callback. + */ +htp_status_t htp_connp_req_receiver_finalize_clear(htp_connp_t *connp) { + if (connp->in_data_receiver_hook == NULL) return HTP_OK; + + htp_status_t rc = htp_connp_req_receiver_send_data(connp, 1 /* last */); + + connp->in_data_receiver_hook = NULL; + + return rc; +} + +/** + * Handles request parser state changes. At the moment, this function is used only + * to configure data receivers, which are sent raw connection data. + * + * @param[in] connp + * @return HTP_OK, or a value returned from a callback. + */ +static htp_status_t htp_req_handle_state_change(htp_connp_t *connp) { + if (connp->in_state_previous == connp->in_state) return HTP_OK; + + if (connp->in_state == htp_connp_REQ_HEADERS) { + htp_status_t rc = HTP_OK; + + switch (connp->in_tx->request_progress) { + case HTP_REQUEST_HEADERS: + rc = htp_connp_req_receiver_set(connp, connp->in_tx->cfg->hook_request_header_data); + break; + + case HTP_REQUEST_TRAILER: + rc = htp_connp_req_receiver_set(connp, connp->in_tx->cfg->hook_request_trailer_data); + break; + + default: + // Do nothing; receivers are currently used only for header blocks. + break; + } + + if (rc != HTP_OK) return rc; + } + + // Initially, I had the finalization of raw data sending here, but that + // caused the last REQUEST_HEADER_DATA hook to be invoked after the + // REQUEST_HEADERS hook -- which I thought made no sense. For that reason, + // the finalization is now initiated from the request header processing code, + // which is less elegant but provides a better user experience. Having some + // (or all) hooks to be invoked on state change might work better. + + connp->in_state_previous = connp->in_state; + + return HTP_OK; +} + +/** + * If there is any data left in the inbound data chunk, this function will preserve + * it for later consumption. The maximum amount accepted for buffering is controlled + * by htp_config_t::field_limit_hard. + * + * @param[in] connp + * @return HTP_OK, or HTP_ERROR on fatal failure. + */ +static htp_status_t htp_connp_req_buffer(htp_connp_t *connp) { + if (connp->in_current_data == NULL) return HTP_OK; + + unsigned char *data = connp->in_current_data + connp->in_current_consume_offset; + size_t len = connp->in_current_read_offset - connp->in_current_consume_offset; + + if (len == 0) + return HTP_OK; + + // Check the hard (buffering) limit. + + size_t newlen = connp->in_buf_size + len; + + // When calculating the size of the buffer, take into account the + // space we're using for the request header buffer. + if (connp->in_header != NULL) { + newlen += bstr_len(connp->in_header); + } + + if (newlen > connp->in_tx->cfg->field_limit_hard) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request buffer over the limit: size %zd limit %zd.", + newlen, connp->in_tx->cfg->field_limit_hard); + return HTP_ERROR; + } + + // Copy the data remaining in the buffer. + + if (connp->in_buf == NULL) { + connp->in_buf = malloc(len); + if (connp->in_buf == NULL) return HTP_ERROR; + memcpy(connp->in_buf, data, len); + connp->in_buf_size = len; + } else { + size_t newsize = connp->in_buf_size + len; + unsigned char *newbuf = realloc(connp->in_buf, newsize); + if (newbuf == NULL) return HTP_ERROR; + connp->in_buf = newbuf; + memcpy(connp->in_buf + connp->in_buf_size, data, len); + connp->in_buf_size = newsize; + } + + // Reset the consumer position. + connp->in_current_consume_offset = connp->in_current_read_offset; + + return HTP_OK; +} + +/** + * Returns to the caller the memory region that should be processed next. This function + * hides away the buffering process from the rest of the code, allowing it to work with + * non-buffered data that's in the inbound chunk, or buffered data that's in our structures. + * + * @param[in] connp + * @param[out] data + * @param[out] len + * @return HTP_OK + */ +static htp_status_t htp_connp_req_consolidate_data(htp_connp_t *connp, unsigned char **data, size_t *len) { + if (connp->in_buf == NULL) { + // We do not have any data buffered; point to the current data chunk. + *data = connp->in_current_data + connp->in_current_consume_offset; + *len = connp->in_current_read_offset - connp->in_current_consume_offset; + } else { + // We already have some data in the buffer. Add the data from the current + // chunk to it, and point to the consolidated buffer. + if (htp_connp_req_buffer(connp) != HTP_OK) { + return HTP_ERROR; + } + + *data = connp->in_buf; + *len = connp->in_buf_size; + } + + return HTP_OK; +} + +/** + * Clears buffered inbound data and resets the consumer position to the reader position. + * + * @param[in] connp + */ +static void htp_connp_req_clear_buffer(htp_connp_t *connp) { + connp->in_current_consume_offset = connp->in_current_read_offset; + + if (connp->in_buf != NULL) { + free(connp->in_buf); + connp->in_buf = NULL; + connp->in_buf_size = 0; + } +} + +/** + * Performs a check for a CONNECT transaction to decide whether inbound + * parsing needs to be suspended. + * + * @param[in] connp + * @return HTP_OK if the request does not use CONNECT, HTP_DATA_OTHER if + * inbound parsing needs to be suspended until we hear from the + * other side + */ +htp_status_t htp_connp_REQ_CONNECT_CHECK(htp_connp_t *connp) { + // If the request uses the CONNECT method, then there will + // not be a request body, but first we need to wait to see the + // response in order to determine if the tunneling request + // was a success. + if (connp->in_tx->request_method_number == HTP_M_CONNECT) { + connp->in_state = htp_connp_REQ_CONNECT_WAIT_RESPONSE; + connp->in_status = HTP_STREAM_DATA_OTHER; + return HTP_DATA_OTHER; + } + + // Continue to the next step to determine + // the presence of request body + connp->in_state = htp_connp_REQ_BODY_DETERMINE; + + return HTP_OK; +} + +/** + * Determines whether inbound parsing needs to continue or stop. In + * case the data appears to be plain text HTTP, we try to continue. + * + * @param[in] connp + * @return HTP_OK if the parser can resume parsing, HTP_DATA_BUFFER if + * we need more data. + */ +htp_status_t htp_connp_REQ_CONNECT_PROBE_DATA(htp_connp_t *connp) { + for (;;) {//;i < max_read; i++) { + IN_PEEK_NEXT(connp); + // Have we reached the end of the line? For some reason + // we can't test after IN_COPY_BYTE_OR_RETURN */ + if (connp->in_next_byte == LF || connp->in_next_byte == 0x00) + break; + + IN_COPY_BYTE_OR_RETURN(connp); + + } + + unsigned char *data; + size_t len; + if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) { + return HTP_ERROR; + } +#ifdef HTP_DEBUG + fprint_raw_data(stderr, "PROBING", data, len); +#endif + + size_t pos = 0; + size_t mstart = 0; + // skip past leading whitespace. IIS allows this + while ((pos < len) && htp_is_space(data[pos])) + pos++; + if (pos) + mstart = pos; + // The request method starts at the beginning of the + // line and ends with the first whitespace character. + while ((pos < len) && (!htp_is_space(data[pos]))) + pos++; + + int methodi = HTP_M_UNKNOWN; + bstr *method = bstr_dup_mem(data + mstart, pos - mstart); + if (method) { + methodi = htp_convert_method_to_number(method); + bstr_free(method); + } + if (methodi != HTP_M_UNKNOWN) { +#ifdef HTP_DEBUG + fprint_raw_data(stderr, "htp_connp_REQ_CONNECT_PROBE_DATA: tunnel contains plain text HTTP", data, len); +#endif + return htp_tx_state_request_complete(connp->in_tx); + } else { +#ifdef HTP_DEBUG + fprint_raw_data(stderr, "htp_connp_REQ_CONNECT_PROBE_DATA: tunnel is not HTTP", data, len); +#endif + connp->in_status = HTP_STREAM_TUNNEL; + connp->out_status = HTP_STREAM_TUNNEL; + } + + // not calling htp_connp_req_clear_buffer, we're not consuming the data + + return HTP_OK; +} + +/** + * Determines whether inbound parsing, which was suspended after + * encountering a CONNECT transaction, can proceed (after receiving + * the response). + * + * @param[in] connp + * @return HTP_OK if the parser can resume parsing, HTP_DATA_OTHER if + * it needs to continue waiting. + */ +htp_status_t htp_connp_REQ_CONNECT_WAIT_RESPONSE(htp_connp_t *connp) { + // Check that we saw the response line of the current inbound transaction. + if (connp->in_tx->response_progress <= HTP_RESPONSE_LINE) { + return HTP_DATA_OTHER; + } + + // A 2xx response means a tunnel was established. Anything + // else means we continue to follow the HTTP stream. + if ((connp->in_tx->response_status_number >= 200) && (connp->in_tx->response_status_number <= 299)) { + // TODO Check that the server did not accept a connection to itself. + + // The requested tunnel was established: we are going + // to probe the remaining data on this stream to see + // if we need to ignore it or parse it + connp->in_state = htp_connp_REQ_CONNECT_PROBE_DATA; + } else { + // No tunnel; continue to the next transaction + connp->in_state = htp_connp_REQ_FINALIZE; + } + + return HTP_OK; +} + +/** + * Consumes bytes until the end of the current line. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_REQ_BODY_CHUNKED_DATA_END(htp_connp_t *connp) { + // TODO We shouldn't really see anything apart from CR and LF, + // so we should warn about anything else. + + for (;;) { + IN_NEXT_BYTE_OR_RETURN(connp); + + connp->in_tx->request_message_len++; + + if (connp->in_next_byte == LF) { + connp->in_state = htp_connp_REQ_BODY_CHUNKED_LENGTH; + return HTP_OK; + } + } + + return HTP_ERROR; +} + +/** + * Processes a chunk of data. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_REQ_BODY_CHUNKED_DATA(htp_connp_t *connp) { + // Determine how many bytes we can consume. + size_t bytes_to_consume; + if (connp->in_current_len - connp->in_current_read_offset >= connp->in_chunked_length) { + // Entire chunk available in the buffer; read all of it. + bytes_to_consume = connp->in_chunked_length; + } else { + // Partial chunk available in the buffer; read as much as we can. + bytes_to_consume = connp->in_current_len - connp->in_current_read_offset; + } + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_REQ_BODY_CHUNKED_DATA Consuming %zd bytes\n", bytes_to_consume); + #endif + + // If the input buffer is empty, ask for more data. + if (bytes_to_consume == 0) return HTP_DATA; + + // Consume the data. + htp_status_t rc = htp_tx_req_process_body_data_ex(connp->in_tx, connp->in_current_data + connp->in_current_read_offset, bytes_to_consume); + if (rc != HTP_OK) return rc; + + // Adjust counters. + connp->in_current_read_offset += bytes_to_consume; + connp->in_current_consume_offset += bytes_to_consume; + connp->in_stream_offset += bytes_to_consume; + connp->in_tx->request_message_len += bytes_to_consume; + connp->in_chunked_length -= bytes_to_consume; + + if (connp->in_chunked_length == 0) { + // End of the chunk. + connp->in_state = htp_connp_REQ_BODY_CHUNKED_DATA_END; + return HTP_OK; + } + + // Ask for more data. + return HTP_DATA; +} + +/** + * Extracts chunk length. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_REQ_BODY_CHUNKED_LENGTH(htp_connp_t *connp) { + for (;;) { + IN_COPY_BYTE_OR_RETURN(connp); + + // Have we reached the end of the line? + if (connp->in_next_byte == LF) { + unsigned char *data; + size_t len; + + if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) { + return HTP_ERROR; + } + + connp->in_tx->request_message_len += len; + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, "Chunk length line", data, len); + #endif + + htp_chomp(data, &len); + + int chunk_ext = 0; + connp->in_chunked_length = htp_parse_chunked_length(data, len, &chunk_ext); + if (chunk_ext == 1) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request chunk extension"); + } + + htp_connp_req_clear_buffer(connp); + + // Handle chunk length. + if (connp->in_chunked_length > 0) { + // More data available. + connp->in_state = htp_connp_REQ_BODY_CHUNKED_DATA; + } else if (connp->in_chunked_length == 0) { + // End of data. + connp->in_state = htp_connp_REQ_HEADERS; + connp->in_tx->request_progress = HTP_REQUEST_TRAILER; + } else { + // Invalid chunk length. + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request chunk encoding: Invalid chunk length"); + return HTP_ERROR; + } + + return HTP_OK; + } + } + + return HTP_ERROR; +} + +/** + * Processes identity request body. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_REQ_BODY_IDENTITY(htp_connp_t *connp) { + // Determine how many bytes we can consume. + size_t bytes_to_consume; + if (connp->in_current_len - connp->in_current_read_offset >= connp->in_body_data_left) { + bytes_to_consume = connp->in_body_data_left; + } else { + bytes_to_consume = connp->in_current_len - connp->in_current_read_offset; + } + + // If the input buffer is empty, ask for more data. + if (bytes_to_consume == 0) return HTP_DATA; + + // Consume data. + int rc = htp_tx_req_process_body_data_ex(connp->in_tx, connp->in_current_data + connp->in_current_read_offset, bytes_to_consume); + if (rc != HTP_OK) return rc; + + // Adjust counters. + connp->in_current_read_offset += bytes_to_consume; + connp->in_current_consume_offset += bytes_to_consume; + connp->in_stream_offset += bytes_to_consume; + connp->in_tx->request_message_len += bytes_to_consume; + connp->in_body_data_left -= bytes_to_consume; + + if (connp->in_body_data_left == 0) { + // End of request body. + connp->in_state = htp_connp_REQ_FINALIZE; + return HTP_OK; + } + + // Ask for more data. + return HTP_DATA; +} + +/** + * Determines presence (and encoding) of a request body. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_REQ_BODY_DETERMINE(htp_connp_t *connp) { + // Determine the next state based on the presence of the request + // body, and the coding used. + switch (connp->in_tx->request_transfer_coding) { + + case HTP_CODING_CHUNKED: + connp->in_state = htp_connp_REQ_BODY_CHUNKED_LENGTH; + connp->in_tx->request_progress = HTP_REQUEST_BODY; + break; + + case HTP_CODING_IDENTITY: + connp->in_content_length = connp->in_tx->request_content_length; + connp->in_body_data_left = connp->in_content_length; + + if (connp->in_content_length != 0) { + connp->in_state = htp_connp_REQ_BODY_IDENTITY; + connp->in_tx->request_progress = HTP_REQUEST_BODY; + } else { + connp->in_tx->connp->in_state = htp_connp_REQ_FINALIZE; + } + break; + + case HTP_CODING_NO_BODY: + // This request does not have a body, which + // means that we're done with it + connp->in_state = htp_connp_REQ_FINALIZE; + break; + + default: + // Should not be here + return HTP_ERROR; + break; + } + + return HTP_OK; +} + +/** + * Parses request headers. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_REQ_HEADERS(htp_connp_t *connp) { + for (;;) { + if (connp->in_status == HTP_STREAM_CLOSED) { + // Parse previous header, if any. + if (connp->in_header != NULL) { + if (connp->cfg->process_request_header(connp, bstr_ptr(connp->in_header), + bstr_len(connp->in_header)) != HTP_OK) + return HTP_ERROR; + bstr_free(connp->in_header); + connp->in_header = NULL; + } + + htp_connp_req_clear_buffer(connp); + + connp->in_tx->request_progress = HTP_REQUEST_TRAILER; + + // We've seen all the request headers. + return htp_tx_state_request_headers(connp->in_tx); + } + IN_COPY_BYTE_OR_RETURN(connp); + + // Have we reached the end of the line? + if (connp->in_next_byte == LF) { + unsigned char *data; + size_t len; + + if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) { + return HTP_ERROR; + } + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, data, len); + #endif + + // Should we terminate headers? + if (htp_connp_is_line_terminator(connp, data, len, 0)) { + // Parse previous header, if any. + if (connp->in_header != NULL) { + if (connp->cfg->process_request_header(connp, bstr_ptr(connp->in_header), + bstr_len(connp->in_header)) != HTP_OK) return HTP_ERROR; + + bstr_free(connp->in_header); + connp->in_header = NULL; + } + + htp_connp_req_clear_buffer(connp); + + // We've seen all the request headers. + return htp_tx_state_request_headers(connp->in_tx); + } + + htp_chomp(data, &len); + + // Check for header folding. + if (htp_connp_is_line_folded(data, len) == 0) { + // New header line. + + // Parse previous header, if any. + if (connp->in_header != NULL) { + if (connp->cfg->process_request_header(connp, bstr_ptr(connp->in_header), + bstr_len(connp->in_header)) != HTP_OK) return HTP_ERROR; + + bstr_free(connp->in_header); + connp->in_header = NULL; + } + + IN_PEEK_NEXT(connp); + + if (connp->in_next_byte != -1 && htp_is_folding_char(connp->in_next_byte) == 0) { + // Because we know this header is not folded, we can process the buffer straight away. + if (connp->cfg->process_request_header(connp, data, len) != HTP_OK) return HTP_ERROR; + } else { + // Keep the partial header data for parsing later. + connp->in_header = bstr_dup_mem(data, len); + if (connp->in_header == NULL) return HTP_ERROR; + } + } else { + // Folding; check that there's a previous header line to add to. + if (connp->in_header == NULL) { + // Invalid folding. + + // Warn only once per transaction. + if (!(connp->in_tx->flags & HTP_INVALID_FOLDING)) { + connp->in_tx->flags |= HTP_INVALID_FOLDING; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Invalid request field folding"); + } + + // Keep the header data for parsing later. + size_t trim = 0; + while(trim < len) { + if (!htp_is_folding_char(data[trim])) { + break; + } + trim++; + } + connp->in_header = bstr_dup_mem(data + trim, len - trim); + if (connp->in_header == NULL) return HTP_ERROR; + } else { + // Add to the existing header. + if (bstr_len(connp->in_header) < HTP_MAX_HEADER_FOLDED) { + bstr *new_in_header = bstr_add_mem(connp->in_header, data, len); + if (new_in_header == NULL) return HTP_ERROR; + connp->in_header = new_in_header; + } else { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field length exceeds folded maximum"); + } + } + } + + htp_connp_req_clear_buffer(connp); + } + } + + return HTP_ERROR; +} + +// HTTP/0.9 is supposed to be only a request line without protocol. +// Libhtp will still consider the request to be HTTP/0.9 if there +// are some junk whitespaces after that request line. +// Libhtp allows the small value of 16 extra bytes/whitespaces, +// otherwise we consider it to be a HTTP/1.x request with missing protocol. +// It is unlikely to meet HTTP/0.9, and we want to limit probing. +#define HTTP09_MAX_JUNK_LEN 16 + +/** + * Determines request protocol. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_REQ_PROTOCOL(htp_connp_t *connp) { + // Is this a short-style HTTP/0.9 request? If it is, + // we will not want to parse request headers. + if (connp->in_tx->is_protocol_0_9 == 0) { + // Switch to request header parsing. + connp->in_state = htp_connp_REQ_HEADERS; + connp->in_tx->request_progress = HTP_REQUEST_HEADERS; + } else { + // Let's check if the protocol was simply missing + int64_t pos = connp->in_current_read_offset; + // Probe if data looks like a header line + if (connp->in_current_len > connp->in_current_read_offset + HTTP09_MAX_JUNK_LEN) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: missing protocol"); + connp->in_tx->is_protocol_0_9 = 0; + // Switch to request header parsing. + connp->in_state = htp_connp_REQ_HEADERS; + connp->in_tx->request_progress = HTP_REQUEST_HEADERS; + return HTP_OK; + } + while (pos < connp->in_current_len) { + if (!htp_is_space(connp->in_current_data[pos])) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: missing protocol"); + connp->in_tx->is_protocol_0_9 = 0; + // Switch to request header parsing. + connp->in_state = htp_connp_REQ_HEADERS; + connp->in_tx->request_progress = HTP_REQUEST_HEADERS; + return HTP_OK; + } + pos++; + } + // We're done with this request. + connp->in_state = htp_connp_REQ_FINALIZE; + } + + return HTP_OK; +} + +/** + * Parse the request line. + * + * @param[in] connp + * @returns HTP_OK on succesful parse, HTP_ERROR on error. + */ +htp_status_t htp_connp_REQ_LINE_complete(htp_connp_t *connp) { + unsigned char *data; + size_t len; + + if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) { + return HTP_ERROR; + } + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, data, len); + #endif + if (len == 0) { + htp_connp_req_clear_buffer(connp); + return HTP_DATA; + } + + // Is this a line that should be ignored? + if (htp_connp_is_line_ignorable(connp, data, len)) { + // We have an empty/whitespace line, which we'll note, ignore and move on. + connp->in_tx->request_ignored_lines++; + + htp_connp_req_clear_buffer(connp); + + return HTP_OK; + } + + // Process request line. + + htp_chomp(data, &len); + + connp->in_tx->request_line = bstr_dup_mem(data, len); + if (connp->in_tx->request_line == NULL) + return HTP_ERROR; + + if (connp->cfg->parse_request_line(connp) != HTP_OK) + return HTP_ERROR; + + // Finalize request line parsing. + + if (htp_tx_state_request_line(connp->in_tx) != HTP_OK) + return HTP_ERROR; + + htp_connp_req_clear_buffer(connp); + + return HTP_OK; +} + +/** + * Parses request line. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_REQ_LINE(htp_connp_t *connp) { + for (;;) { + // Get one byte + IN_PEEK_NEXT(connp); + if (connp->in_status == HTP_STREAM_CLOSED && connp->in_next_byte == -1) { + return htp_connp_REQ_LINE_complete(connp); + } + IN_COPY_BYTE_OR_RETURN(connp); + + // Have we reached the end of the line? + if (connp->in_next_byte == LF) { + return htp_connp_REQ_LINE_complete(connp); + } + } + + return HTP_ERROR; +} + +htp_status_t htp_connp_REQ_FINALIZE(htp_connp_t *connp) { + if (connp->in_status != HTP_STREAM_CLOSED) { + IN_PEEK_NEXT(connp); + if (connp->in_next_byte == -1) { + return htp_tx_state_request_complete(connp->in_tx); + } + if (connp->in_next_byte != LF || connp->in_current_consume_offset >= connp->in_current_read_offset) { + for (;;) {//;i < max_read; i++) { + // peek until LF but do not mark it read so that REQ_LINE works + IN_PEEK_NEXT(connp); + if (connp->in_next_byte == LF) + break; + IN_COPY_BYTE_OR_RETURN(connp); + } + } + } + + unsigned char *data; + size_t len; + if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) { + return HTP_ERROR; + } +#ifdef HTP_DEBUG + fprint_raw_data(stderr, "PROBING request finalize", data, len); +#endif + if (len == 0) { + //closing + return htp_tx_state_request_complete(connp->in_tx); + } + + size_t pos = 0; + size_t mstart = 0; + // skip past leading whitespace. IIS allows this + while ((pos < len) && htp_is_space(data[pos])) + pos++; + if (pos) + mstart = pos; + // The request method starts at the beginning of the + // line and ends with the first whitespace character. + while ((pos < len) && (!htp_is_space(data[pos]))) + pos++; + + if (pos > mstart) { + //non empty whitespace line + int methodi = HTP_M_UNKNOWN; + bstr *method = bstr_dup_mem(data + mstart, pos - mstart); + if (method) { + methodi = htp_convert_method_to_number(method); + bstr_free(method); + } + if (methodi != HTP_M_UNKNOWN) { + connp->in_body_data_left = -1; + return htp_tx_state_request_complete(connp->in_tx); + } // else continue + if (connp->in_body_data_left <= 0) { + // log only once per transaction + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Unexpected request body"); + } else { + connp->in_body_data_left = 1; + } + } + //Adds linefeed to the buffer if there was one + if (connp->in_next_byte == LF) { + IN_COPY_BYTE_OR_RETURN(connp); + htp_connp_req_consolidate_data(connp, &data, &len); + } + // Interpret remaining bytes as body data + htp_status_t rc = htp_tx_req_process_body_data_ex(connp->in_tx, data, len); + htp_connp_req_clear_buffer(connp); + return rc; +} + +htp_status_t htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9(htp_connp_t *connp) { + // Consume whatever is left in the buffer. + + size_t bytes_left = connp->in_current_len - connp->in_current_read_offset; + + if (bytes_left > 0) { + connp->conn->flags |= HTP_CONN_HTTP_0_9_EXTRA; + } + + connp->in_current_read_offset += bytes_left; + connp->in_current_consume_offset += bytes_left; + connp->in_stream_offset += bytes_left; + + return HTP_DATA; +} + +/** + * The idle state is where the parser will end up after a transaction is processed. + * If there is more data available, a new request will be started. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_REQ_IDLE(htp_connp_t * connp) { + // We want to start parsing the next request (and change + // the state from IDLE) only if there's at least one + // byte of data available. Otherwise we could be creating + // new structures even if there's no more data on the + // connection. + IN_TEST_NEXT_BYTE_OR_RETURN(connp); + + connp->in_tx = htp_connp_tx_create(connp); + if (connp->in_tx == NULL) return HTP_ERROR; + + // Change state to TRANSACTION_START + htp_tx_state_request_start(connp->in_tx); + + return HTP_OK; +} + +/** + * Returns how many bytes from the current data chunks were consumed so far. + * + * @param[in] connp + * @return The number of bytes consumed. + */ +size_t htp_connp_req_data_consumed(htp_connp_t *connp) { + return connp->in_current_read_offset; +} + +int htp_connp_req_data(htp_connp_t *connp, const htp_time_t *timestamp, const void *data, size_t len) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data(connp->in_status %x)\n", connp->in_status); + fprint_raw_data(stderr, __func__, data, len); + #endif + + // Return if the connection is in stop state. + if (connp->in_status == HTP_STREAM_STOP) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_INFO, 0, "Inbound parser is in HTP_STREAM_STOP"); + return HTP_STREAM_STOP; + } + + // Return if the connection had a fatal error earlier + if (connp->in_status == HTP_STREAM_ERROR) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Inbound parser is in HTP_STREAM_ERROR"); + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA (previous error)\n"); + #endif + + return HTP_STREAM_ERROR; + } + + // Sanity check: we must have a transaction pointer if the state is not IDLE (no inbound transaction) + if ((connp->in_tx == NULL)&&(connp->in_state != htp_connp_REQ_IDLE)) { + connp->in_status = HTP_STREAM_ERROR; + + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Missing inbound transaction data"); + + return HTP_STREAM_ERROR; + } + + // If the length of the supplied data chunk is zero, proceed + // only if the stream has been closed. We do not allow zero-sized + // chunks in the API, but we use them internally to force the parsers + // to finalize parsing. + if (len == 0 && connp->in_status != HTP_STREAM_CLOSED) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Zero-length data chunks are not allowed"); + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA (zero-length chunk)\n"); + #endif + + return HTP_STREAM_CLOSED; + } + + // Remember the timestamp of the current request data chunk + if (timestamp != NULL) { + memcpy(&connp->in_timestamp, timestamp, sizeof (*timestamp)); + } + + // Store the current chunk information + connp->in_current_data = (unsigned char *) data; + connp->in_current_len = len; + connp->in_current_read_offset = 0; + connp->in_current_consume_offset = 0; + connp->in_current_receiver_offset = 0; + connp->in_chunk_count++; + + htp_conn_track_inbound_data(connp->conn, len, timestamp); + + + // Return without processing any data if the stream is in tunneling + // mode (which it would be after an initial CONNECT transaction). + if (connp->in_status == HTP_STREAM_TUNNEL) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_TUNNEL\n"); + #endif + + return HTP_STREAM_TUNNEL; + } + + if (connp->out_status == HTP_STREAM_DATA_OTHER) { + connp->out_status = HTP_STREAM_DATA; + } + + // Invoke a processor, in a loop, until an error + // occurs or until we run out of data. Many processors + // will process a request, each pointing to the next + // processor that needs to run. + for (;;) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: in state=%s, progress=%s\n", + htp_connp_in_state_as_string(connp), + htp_tx_request_progress_as_string(connp->in_tx)); + #endif + + // Return if there's been an error or if we've run out of data. We are relying + // on processors to supply error messages, so we'll keep quiet here. + + htp_status_t rc; + //handle gap + if (data == NULL && len > 0) { + //cannot switch over a function pointer in C + if (connp->in_state == htp_connp_REQ_BODY_IDENTITY || + connp->in_state == htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9) { + rc = connp->in_state(connp); + } else if (connp->in_state == htp_connp_REQ_FINALIZE) { + //simple version without probing + rc = htp_tx_state_request_complete(connp->in_tx); + } else { + // go to htp_connp_REQ_CONNECT_PROBE_DATA ? + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Gaps are not allowed during this state"); + return HTP_STREAM_CLOSED; + } + } else { + rc = connp->in_state(connp); + } + if (rc == HTP_OK) { + if (connp->in_status == HTP_STREAM_TUNNEL) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_TUNNEL\n"); + #endif + + return HTP_STREAM_TUNNEL; + } + + rc = htp_req_handle_state_change(connp); + } + + if (rc != HTP_OK) { + // Do we need more data? + if ((rc == HTP_DATA) || (rc == HTP_DATA_BUFFER)) { + htp_connp_req_receiver_send_data(connp, 0 /* not last */); + + if (rc == HTP_DATA_BUFFER) { + if (htp_connp_req_buffer(connp) != HTP_OK) { + connp->in_status = HTP_STREAM_ERROR; + return HTP_STREAM_ERROR; + } + } + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA\n"); + #endif + + connp->in_status = HTP_STREAM_DATA; + + return HTP_STREAM_DATA; + } + + // Check for suspended parsing. + if (rc == HTP_DATA_OTHER) { + // We might have actually consumed the entire data chunk? + if (connp->in_current_read_offset >= connp->in_current_len) { + // Do not send STREAM_DATE_DATA_OTHER if we've consumed the entire chunk. + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA (suspended parsing)\n"); + #endif + + connp->in_status = HTP_STREAM_DATA; + + return HTP_STREAM_DATA; + } else { + // Partial chunk consumption. + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_DATA_OTHER\n"); + #endif + + connp->in_status = HTP_STREAM_DATA_OTHER; + + return HTP_STREAM_DATA_OTHER; + } + } + + // Check for the stop signal. + if (rc == HTP_STOP) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_STOP\n"); + #endif + + connp->in_status = HTP_STREAM_STOP; + + return HTP_STREAM_STOP; + } + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_req_data: returning HTP_STREAM_ERROR\n"); + #endif + + // Permanent stream error. + connp->in_status = HTP_STREAM_ERROR; + + return HTP_STREAM_ERROR; + } + } +} diff --git a/htp/htp_request_apache_2_2.c b/htp/htp_request_apache_2_2.c new file mode 100644 index 0000000..6a56e4e --- /dev/null +++ b/htp/htp_request_apache_2_2.c @@ -0,0 +1,64 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * Extract one request header. A header can span multiple lines, in + * which case they will be folded into one before parsing is attempted. + * + * @param[in] connp + * @param[in] data + * @param[in] len + * @return HTP_OK or HTP_ERROR + */ +htp_status_t htp_process_request_header_apache_2_2(htp_connp_t *connp, unsigned char *data, size_t len) { + return htp_process_request_header_generic(connp, data, len); +} + +/** + * Parse request line as Apache 2.2 does. + * + * @param[in] connp + * @return HTP_OK or HTP_ERROR + */ +htp_status_t htp_parse_request_line_apache_2_2(htp_connp_t *connp) { + return htp_parse_request_line_generic_ex(connp, 1 /* NUL terminates line */); +} diff --git a/htp/htp_request_generic.c b/htp/htp_request_generic.c new file mode 100644 index 0000000..435cf0a --- /dev/null +++ b/htp/htp_request_generic.c @@ -0,0 +1,462 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * Extract one request header. A header can span multiple lines, in + * which case they will be folded into one before parsing is attempted. + * + * @param[in] connp + * @param[in] data + * @param[in] len + * @return HTP_OK or HTP_ERROR + */ +htp_status_t htp_process_request_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) { + // Create a new header structure. + htp_header_t *h = calloc(1, sizeof (htp_header_t)); + if (h == NULL) return HTP_ERROR; + + // Now try to parse the header. + if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) { + free(h); + return HTP_ERROR; + } + + #ifdef HTP_DEBUG + fprint_bstr(stderr, "Header name", h->name); + fprint_bstr(stderr, "Header value", h->value); + #endif + + // Do we already have a header with the same name? + htp_header_t *h_existing = htp_table_get(connp->in_tx->request_headers, h->name); + if (h_existing != NULL) { + // TODO Do we want to have a list of the headers that are + // allowed to be combined in this way? + if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) { + // This is the second occurence for this header. + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header"); + } else { + // For simplicity reasons, we count the repetitions of all headers + if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) { + connp->in_tx->req_header_repetitions++; + } else { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_OK; + } + } + // Keep track of repeated same-name headers. + h_existing->flags |= HTP_FIELD_REPEATED; + + // Having multiple C-L headers is against the RFC but + // servers may ignore the subsequent headers if the values are the same. + if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) { + // Don't use string comparison here because we want to + // ignore small formatting differences. + + int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL); + int64_t new_cl = htp_parse_content_length(h->value, NULL); + // Ambiguous response C-L value. + if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value"); + } + // Ignoring the new C-L header that has the same value as the previous ones. + } else { + // Add to the existing header. + bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value)); + if (new_value == NULL) { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_ERROR; + } + + h_existing->value = new_value; + bstr_add_mem_noex(h_existing->value, ", ", 2); + bstr_add_noex(h_existing->value, h->value); + } + + // The new header structure is no longer needed. + bstr_free(h->name); + bstr_free(h->value); + free(h); + } else { + // Add as a new header. + if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) { + bstr_free(h->name); + bstr_free(h->value); + free(h); + } + } + + return HTP_OK; +} + +/** + * Generic request header parser. + * + * @param[in] connp + * @param[in] h + * @param[in] data + * @param[in] len + * @return HTP_OK or HTP_ERROR + */ +htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) { + size_t name_start, name_end; + size_t value_start, value_end; + + htp_chomp(data, &len); + + name_start = 0; + + // Look for the colon. + size_t colon_pos = 0; + while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++; + + if ((colon_pos == len) || (data[colon_pos] == '\0')) { + // Missing colon. + + h->flags |= HTP_FIELD_UNPARSEABLE; + + // Log only once per transaction. + if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) { + connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: colon missing"); + } + + // We handle this case as a header with an empty name, with the value equal + // to the entire input string. + + // TODO Apache will respond to this problem with a 400. + + // Now extract the name and the value + h->name = bstr_dup_c(""); + if (h->name == NULL) return HTP_ERROR; + + h->value = bstr_dup_mem(data, len); + if (h->value == NULL) { + bstr_free(h->name); + return HTP_ERROR; + } + + return HTP_OK; + } + + if (colon_pos == 0) { + // Empty header name. + + h->flags |= HTP_FIELD_INVALID; + + // Log only once per transaction. + if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { + connp->in_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name"); + } + } + + name_end = colon_pos; + + // Ignore LWS after field-name. + size_t prev = name_end; + while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) { + // LWS after header name. + + prev--; + name_end--; + + h->flags |= HTP_FIELD_INVALID; + + // Log only once per transaction. + if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { + connp->in_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name"); + } + } + + // Header value. + + value_start = colon_pos; + + // Go over the colon. + if (value_start < len) { + value_start++; + } + + // Ignore LWS before field-content. + while ((value_start < len) && (htp_is_lws(data[value_start]))) { + value_start++; + } + + // Look for the end of field-content. + value_end = len; + + // Ignore LWS after field-content. + prev = value_end - 1; + while ((prev > value_start) && (htp_is_lws(data[prev]))) { + prev--; + value_end--; + } + + // Check that the header name is a token. + size_t i = name_start; + while (i < name_end) { + if (!htp_is_token(data[i])) { + // Incorrectly formed header name. + + h->flags |= HTP_FIELD_INVALID; + + // Log only once per transaction. + if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { + connp->in_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token"); + } + + break; + } + + i++; + } + + // Now extract the name and the value + h->name = bstr_dup_mem(data + name_start, name_end - name_start); + if (h->name == NULL) return HTP_ERROR; + + h->value = bstr_dup_mem(data + value_start, value_end - value_start); + if (h->value == NULL) { + bstr_free(h->name); + return HTP_ERROR; + } + + return HTP_OK; +} + +/** + * Generic request line parser. + * + * @param[in] connp + * @return HTP_OK or HTP_ERROR + */ +htp_status_t htp_parse_request_line_generic(htp_connp_t *connp) { + return htp_parse_request_line_generic_ex(connp, 0 /* NUL does not terminates line */); +} + +htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates) { + htp_tx_t *tx = connp->in_tx; + unsigned char *data = bstr_ptr(tx->request_line); + size_t len = bstr_len(tx->request_line); + size_t pos = 0; + size_t mstart = 0; + size_t start; + size_t bad_delim; + + if (nul_terminates) { + // The line ends with the first NUL byte. + + size_t newlen = 0; + while ((pos < len) && (data[pos] != '\0')) { + pos++; + newlen++; + } + + // Start again, with the new length. + len = newlen; + pos = 0; + } + + // skip past leading whitespace. IIS allows this + while ((pos < len) && htp_is_space(data[pos])) pos++; + if (pos) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: leading whitespace"); + mstart = pos; + + if (connp->cfg->requestline_leading_whitespace_unwanted != HTP_UNWANTED_IGNORE) { + // reset mstart so that we copy the whitespace into the method + mstart = 0; + // set expected response code to this anomaly + tx->response_status_expected_number = connp->cfg->requestline_leading_whitespace_unwanted; + } + } + + // The request method starts at the beginning of the + // line and ends with the first whitespace character. + while ((pos < len) && (!htp_is_space(data[pos]))) pos++; + + // No, we don't care if the method is empty. + + tx->request_method = bstr_dup_mem(data + mstart, pos - mstart); + if (tx->request_method == NULL) return HTP_ERROR; + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_method), bstr_len(tx->request_method)); + #endif + + tx->request_method_number = htp_convert_method_to_number(tx->request_method); + + bad_delim = 0; + // Ignore whitespace after request method. The RFC allows + // for only one SP, but then suggests any number of SP and HT + // should be permitted. Apache uses isspace(), which is even + // more permitting, so that's what we use here. + while ((pos < len) && (isspace(data[pos]))) { + if (!bad_delim && data[pos] != 0x20) { + bad_delim++; + } + pos++; + } +// Too much performance overhead for fuzzing +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (bad_delim) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI"); + } +#endif + + // Is there anything after the request method? + if (pos == len) { + // No, this looks like a HTTP/0.9 request. + + tx->is_protocol_0_9 = 1; + tx->request_protocol_number = HTP_PROTOCOL_0_9; + if (tx->request_method_number == HTP_M_UNKNOWN) + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only"); + + return HTP_OK; + } + + start = pos; + bad_delim = 0; + if (tx->connp->cfg->allow_space_uri) { + pos = len - 1; + // Skips the spaces at the end of line (after protocol) + while (pos > start && htp_is_space(data[pos])) pos--; + // The URI ends with the last whitespace. + while ((pos > start) && (data[pos] != 0x20)) { + if (!bad_delim && htp_is_space(data[pos])) { + bad_delim++; + } + pos--; + } + /* if we've seen some 'bad' delimiters, we retry with those */ + if (bad_delim && pos == start) { + // special case: even though RFC's allow only SP (0x20), many + // implementations allow other delimiters, like tab or other + // characters that isspace() accepts. + pos = len - 1; + while ((pos > start) && (!htp_is_space(data[pos]))) pos--; + } else { + // reset bad_delim found in protocol part + bad_delim = 0; + for (size_t i = start; i < pos; i++) { + if (data[i] != 0x20 && htp_is_space(data[i])) { + bad_delim = 1; + break; + } + } + } + if (bad_delim) { +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // warn regardless if we've seen non-compliant chars + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter"); +#endif + } else if (pos == start) { + pos = len; + } + } else { + // The URI ends with the first whitespace. + while ((pos < len) && (data[pos] != 0x20)) { + if (!bad_delim && htp_is_space(data[pos])) { + bad_delim++; + } + pos++; + } + /* if we've seen some 'bad' delimiters, we retry with those */ + if (bad_delim && pos == len) { + // special case: even though RFC's allow only SP (0x20), many + // implementations allow other delimiters, like tab or other + // characters that isspace() accepts. + pos = start; + while ((pos < len) && (!htp_is_space(data[pos]))) pos++; + } +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (bad_delim) { + // warn regardless if we've seen non-compliant chars + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter"); + } +#endif + } + + tx->request_uri = bstr_dup_mem(data + start, pos - start); + if (tx->request_uri == NULL) return HTP_ERROR; + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_uri), bstr_len(tx->request_uri)); + #endif + + // Ignore whitespace after URI. + while ((pos < len) && (htp_is_space(data[pos]))) pos++; + + // Is there protocol information available? + if (pos == len) { + // No, this looks like a HTTP/0.9 request. + + tx->is_protocol_0_9 = 1; + tx->request_protocol_number = HTP_PROTOCOL_0_9; + if (tx->request_method_number == HTP_M_UNKNOWN) + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol"); + + return HTP_OK; + } + + // The protocol information continues until the end of the line. + tx->request_protocol = bstr_dup_mem(data + pos, len - pos); + if (tx->request_protocol == NULL) return HTP_ERROR; + + tx->request_protocol_number = htp_parse_protocol(tx->request_protocol); + if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID) + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol"); + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol)); + #endif + + return HTP_OK; +} + diff --git a/htp/htp_request_parsers.c b/htp/htp_request_parsers.c new file mode 100644 index 0000000..448ed4c --- /dev/null +++ b/htp/htp_request_parsers.c @@ -0,0 +1,149 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +#if 0 + +/** + * + */ +int htp_header_parse_internal_strict(unsigned char *data, size_t len, htp_header_t *h) { + size_t name_start, name_end; + size_t value_start, value_end; + + // Deal with the name first + name_start = name_end = 0; + + // Find where the header name ends + while (name_end < len) { + if (htp_is_lws(data[name_end]) || data[name_end] == ':') break; + name_end++; + } + + if (name_end == 0) { + // Empty header name + return -1; + } + + if (name_end == len) { + // TODO + return -1; + } + + // Is there any LWS before colon? + size_t pos = name_end; + while (pos < len) { + if (!htp_is_lws(data[pos])) break; + pos++; + // TODO + // return -1; + } + + if (pos == len) { + // TODO + return -1; + } + + // The next character must be a colon + if (data[pos] != ':') { + // TODO + return -1; + } + + // Move over the colon + pos++; + + // Again, ignore any LWS + while (pos < len) { + if (!htp_is_lws(data[pos])) break; + pos++; + } + + if (pos == len) { + // TODO + return -1; + } + + value_start = value_end = pos; + + while (value_end < len) { + if (htp_is_lws(data[value_end])) break; + value_end++; + } + + h->name_offset = name_start; + h->name_len = name_end - name_start; + h->value_offset = value_start; + h->value_len = value_end - value_start; + + return 1; +} + */ + +/** + * + */ +htp_header_t *htp_connp_header_parse(htp_connp_t *reqp, unsigned char *data, size_t len) { + htp_header_t *h = calloc(1, sizeof (htp_header_t)); + if (h == NULL) return NULL; + + // Parse the header line + if (reqp->impl_header_parse(data, len, h) < 0) { + // Invalid header line + h->is_parsed = 0; + h->name = bstr_dup_mem(data, len); + + return h; + } + + // Now extract the name and the value + h->name = bstr_dup_mem(data + h->name_offset, h->name_len); + h->value = bstr_dup_mem(data + h->value_offset, h->value_len); + h->is_parsed = 1; + + // Because header names are case-insensitive, we will convert + // the name to lowercase to use it as a lookup key. + h->name_lowercase = bstr_to_lowercase(h->name); + + return h; +} + +#endif diff --git a/htp/htp_response.c b/htp/htp_response.c new file mode 100644 index 0000000..121004c --- /dev/null +++ b/htp/htp_response.c @@ -0,0 +1,1436 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +#define OUT_TEST_NEXT_BYTE_OR_RETURN(X) \ +if ((X)->out_current_read_offset >= (X)->out_current_len) { \ + return HTP_DATA; \ +} + +#define OUT_PEEK_NEXT(X) \ +if ((X)->out_current_read_offset >= (X)->out_current_len) { \ + (X)->out_next_byte = -1; \ +} else { \ + (X)->out_next_byte = (X)->out_current_data[(X)->out_current_read_offset]; \ +} + +#define OUT_NEXT_BYTE(X) \ +if ((X)->out_current_read_offset < (X)->out_current_len) { \ + (X)->out_next_byte = (X)->out_current_data[(X)->out_current_read_offset]; \ + (X)->out_current_read_offset++; \ + (X)->out_current_consume_offset++; \ + (X)->out_stream_offset++; \ +} else { \ + (X)->out_next_byte = -1; \ +} + +#define OUT_NEXT_BYTE_OR_RETURN(X) \ +if ((X)->out_current_read_offset < (X)->out_current_len) { \ + (X)->out_next_byte = (X)->out_current_data[(X)->out_current_read_offset]; \ + (X)->out_current_read_offset++; \ + (X)->out_current_consume_offset++; \ + (X)->out_stream_offset++; \ +} else { \ + return HTP_DATA; \ +} + +#define OUT_COPY_BYTE_OR_RETURN(X) \ +if ((X)->out_current_read_offset < (X)->out_current_len) { \ + (X)->out_next_byte = (X)->out_current_data[(X)->out_current_read_offset]; \ + (X)->out_current_read_offset++; \ + (X)->out_stream_offset++; \ +} else { \ + return HTP_DATA_BUFFER; \ +} + +#define REQUEST_URI_NOT_SEEN "/libhtp::request_uri_not_seen" + +/** + * Sends outstanding connection data to the currently active data receiver hook. + * + * @param[in] connp + * @param[in] is_last + * @return HTP_OK, or a value returned from a callback. + */ +static htp_status_t htp_connp_res_receiver_send_data(htp_connp_t *connp, int is_last) { + if (connp->out_data_receiver_hook == NULL) return HTP_OK; + + htp_tx_data_t d; + d.tx = connp->out_tx; + d.data = connp->out_current_data + connp->out_current_receiver_offset; + d.len = connp->out_current_read_offset - connp->out_current_receiver_offset; + d.is_last = is_last; + + htp_status_t rc = htp_hook_run_all(connp->out_data_receiver_hook, &d); + if (rc != HTP_OK) return rc; + + connp->out_current_receiver_offset = connp->out_current_read_offset; + + return HTP_OK; +} + +/** + * Finalizes an existing data receiver hook by sending any outstanding data to it. The + * hook is then removed so that it receives no more data. + * + * @param[in] connp + * @return HTP_OK, or a value returned from a callback. + */ +htp_status_t htp_connp_res_receiver_finalize_clear(htp_connp_t *connp) { + if (connp->out_data_receiver_hook == NULL) return HTP_OK; + + htp_status_t rc = htp_connp_res_receiver_send_data(connp, 1 /* last */); + + connp->out_data_receiver_hook = NULL; + + return rc; +} + +/** + * Configures the data receiver hook. If there is a previous hook, it will be finalized and cleared. + * + * @param[in] connp + * @param[in] data_receiver_hook + * @return HTP_OK, or a value returned from a callback. + */ +static htp_status_t htp_connp_res_receiver_set(htp_connp_t *connp, htp_hook_t *data_receiver_hook) { + htp_status_t rc = htp_connp_res_receiver_finalize_clear(connp); + + connp->out_data_receiver_hook = data_receiver_hook; + connp->out_current_receiver_offset = connp->out_current_read_offset; + + return rc; +} + +/** + * Handles request parser state changes. At the moment, this function is used only + * to configure data receivers, which are sent raw connection data. + * + * @param[in] connp + * @return HTP_OK, or a value returned from a callback. + */ +static htp_status_t htp_res_handle_state_change(htp_connp_t *connp) { + if (connp->out_state_previous == connp->out_state) return HTP_OK; + + if (connp->out_state == htp_connp_RES_HEADERS) { + htp_status_t rc = HTP_OK; + + switch (connp->out_tx->response_progress) { + case HTP_RESPONSE_HEADERS: + rc = htp_connp_res_receiver_set(connp, connp->out_tx->cfg->hook_response_header_data); + break; + + case HTP_RESPONSE_TRAILER: + rc = htp_connp_res_receiver_set(connp, connp->out_tx->cfg->hook_response_trailer_data); + break; + + default: + // Do nothing; receivers are currently used only for header blocks. + break; + } + + if (rc != HTP_OK) return rc; + } + + // Same comment as in htp_req_handle_state_change(). Below is a copy. + + // Initially, I had the finalization of raw data sending here, but that + // caused the last REQUEST_HEADER_DATA hook to be invoked after the + // REQUEST_HEADERS hook -- which I thought made no sense. For that reason, + // the finalization is now initiated from the request header processing code, + // which is less elegant but provides a better user experience. Having some + // (or all) hooks to be invoked on state change might work better. + + connp->out_state_previous = connp->out_state; + + return HTP_OK; +} + +/** + * If there is any data left in the outbound data chunk, this function will preserve + * it for later consumption. The maximum amount accepted for buffering is controlled + * by htp_config_t::field_limit_hard. + * + * @param[in] connp + * @return HTP_OK, or HTP_ERROR on fatal failure. + */ +static htp_status_t htp_connp_res_buffer(htp_connp_t *connp) { + if (connp->out_current_data == NULL) return HTP_OK; + + unsigned char *data = connp->out_current_data + connp->out_current_consume_offset; + size_t len = connp->out_current_read_offset - connp->out_current_consume_offset; + + // Check the hard (buffering) limit. + + size_t newlen = connp->out_buf_size + len; + + // When calculating the size of the buffer, take into account the + // space we're using for the response header buffer. + if (connp->out_header != NULL) { + newlen += bstr_len(connp->out_header); + } + + if (newlen > connp->out_tx->cfg->field_limit_hard) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Response the buffer limit: size %zd limit %zd.", + newlen, connp->out_tx->cfg->field_limit_hard); + return HTP_ERROR; + } + + // Copy the data remaining in the buffer. + + if (connp->out_buf == NULL) { + connp->out_buf = malloc(len); + if (connp->out_buf == NULL) return HTP_ERROR; + memcpy(connp->out_buf, data, len); + connp->out_buf_size = len; + } else { + size_t newsize = connp->out_buf_size + len; + unsigned char *newbuf = realloc(connp->out_buf, newsize); + if (newbuf == NULL) return HTP_ERROR; + connp->out_buf = newbuf; + memcpy(connp->out_buf + connp->out_buf_size, data, len); + connp->out_buf_size = newsize; + } + + // Reset the consumer position. + connp->out_current_consume_offset = connp->out_current_read_offset; + + return HTP_OK; +} + +/** + * Returns to the caller the memory region that should be processed next. This function + * hides away the buffering process from the rest of the code, allowing it to work with + * non-buffered data that's in the outbound chunk, or buffered data that's in our structures. + * + * @param[in] connp + * @param[out] data + * @param[out] len + * @return HTP_OK + */ +static htp_status_t htp_connp_res_consolidate_data(htp_connp_t *connp, unsigned char **data, size_t *len) { + if (connp->out_buf == NULL) { + // We do not have any data buffered; point to the current data chunk. + *data = connp->out_current_data + connp->out_current_consume_offset; + *len = connp->out_current_read_offset - connp->out_current_consume_offset; + } else { + // We do have data in the buffer. Add data from the current + // chunk, and point to the consolidated buffer. + if (htp_connp_res_buffer(connp) != HTP_OK) { + return HTP_ERROR; + } + + *data = connp->out_buf; + *len = connp->out_buf_size; + } + + return HTP_OK; +} + +/** + * Clears buffered outbound data and resets the consumer position to the reader position. + * + * @param[in] connp + */ +static void htp_connp_res_clear_buffer(htp_connp_t *connp) { + connp->out_current_consume_offset = connp->out_current_read_offset; + + if (connp->out_buf != NULL) { + free(connp->out_buf); + connp->out_buf = NULL; + connp->out_buf_size = 0; + } +} + +/** + * Consumes bytes until the end of the current line. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_RES_BODY_CHUNKED_DATA_END(htp_connp_t *connp) { + // TODO We shouldn't really see anything apart from CR and LF, + // so we should warn about anything else. + + for (;;) { + OUT_NEXT_BYTE_OR_RETURN(connp); + + connp->out_tx->response_message_len++; + + if (connp->out_next_byte == LF) { + connp->out_state = htp_connp_RES_BODY_CHUNKED_LENGTH; + + return HTP_OK; + } + } + + return HTP_ERROR; +} + +/** + * Processes a chunk of data. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_RES_BODY_CHUNKED_DATA(htp_connp_t *connp) { + size_t bytes_to_consume; + + // Determine how many bytes we can consume. + if (connp->out_current_len - connp->out_current_read_offset >= connp->out_chunked_length) { + bytes_to_consume = connp->out_chunked_length; + } else { + bytes_to_consume = connp->out_current_len - connp->out_current_read_offset; + } + + if (bytes_to_consume == 0) return HTP_DATA; + + // Consume the data. + htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, connp->out_current_data + connp->out_current_read_offset, bytes_to_consume); + if (rc != HTP_OK) return rc; + + // Adjust the counters. + connp->out_current_read_offset += bytes_to_consume; + connp->out_current_consume_offset += bytes_to_consume; + connp->out_stream_offset += bytes_to_consume; + connp->out_chunked_length -= bytes_to_consume; + + // Have we seen the entire chunk? + if (connp->out_chunked_length == 0) { + connp->out_state = htp_connp_RES_BODY_CHUNKED_DATA_END; + return HTP_OK; + } + + return HTP_DATA; +} + +static inline int is_chunked_ctl_char(const unsigned char c) { + switch (c) { + case 0x0d: + case 0x0a: + case 0x20: + case 0x09: + case 0x0b: + case 0x0c: + return 1; + default: + return 0; + } +} + +/** + * Peeks ahead into the data to try to see if it starts with a valid Chunked + * length field. + * + * @returns 1 if it looks valid, 0 if it looks invalid + */ +static inline int data_probe_chunk_length(htp_connp_t *connp) { + if (connp->out_current_read_offset - connp->out_current_consume_offset < 8) { + // not enough data so far, consider valid still + return 1; + } + + unsigned char *data = connp->out_current_data + connp->out_current_consume_offset; + size_t len = connp->out_current_read_offset - connp->out_current_consume_offset; + + size_t i = 0; + while (i < len) { + unsigned char c = data[i]; + + if (is_chunked_ctl_char(c)) { + // ctl char, still good. + } else if (isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + // real chunklen char + return 1; + } else { + // leading junk, bad + return 0; + } + i++; + } + return 1; +} + +/** + * Extracts chunk length. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_RES_BODY_CHUNKED_LENGTH(htp_connp_t *connp) { + for (;;) { + OUT_COPY_BYTE_OR_RETURN(connp); + + // Have we reached the end of the line? Or is this not chunked after all? + if (connp->out_next_byte == LF || + (!is_chunked_ctl_char((unsigned char) connp->out_next_byte) && !data_probe_chunk_length(connp))) { + unsigned char *data; + size_t len; + + if (htp_connp_res_consolidate_data(connp, &data, &len) != HTP_OK) { + return HTP_ERROR; + } + + connp->out_tx->response_message_len += len; + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, "Chunk length line", data, len); + #endif + + int chunk_ext = 0; + connp->out_chunked_length = htp_parse_chunked_length(data, len, &chunk_ext); + if (chunk_ext == 1) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request chunk extension"); + } + // empty chunk length line, lets try to continue + if (connp->out_chunked_length == -1004) { + connp->out_current_consume_offset = connp->out_current_read_offset; + continue; + } + if (connp->out_chunked_length < 0) { + // reset out_current_read_offset so htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE + // doesn't miss the first bytes + + if (len > (size_t)connp->out_current_read_offset) { + connp->out_current_read_offset = 0; + } else { + connp->out_current_read_offset -= len; + } + + connp->out_state = htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE; + connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY; + + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "Response chunk encoding: Invalid chunk length: %"PRId64"", + connp->out_chunked_length); + return HTP_OK; + } + htp_connp_res_clear_buffer(connp); + + // Handle chunk length + if (connp->out_chunked_length > 0) { + // More data available + connp->out_state = htp_connp_RES_BODY_CHUNKED_DATA; + } else if (connp->out_chunked_length == 0) { + // End of data + connp->out_state = htp_connp_RES_HEADERS; + connp->out_tx->response_progress = HTP_RESPONSE_TRAILER; + } + + return HTP_OK; + } + } + + return HTP_ERROR; +} + +/** + * Processes an identity response body of known length. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_RES_BODY_IDENTITY_CL_KNOWN(htp_connp_t *connp) { + size_t bytes_to_consume; + + // Determine how many bytes we can consume. + if (connp->out_current_len - connp->out_current_read_offset >= connp->out_body_data_left) { + bytes_to_consume = connp->out_body_data_left; + } else { + bytes_to_consume = connp->out_current_len - connp->out_current_read_offset; + } + + if (connp->out_status == HTP_STREAM_CLOSED) { + connp->out_state = htp_connp_RES_FINALIZE; + // Sends close signal to decompressors + htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, NULL, 0); + return rc; + } + if (bytes_to_consume == 0) return HTP_DATA; + + // Consume the data. + htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, connp->out_current_data + connp->out_current_read_offset, bytes_to_consume); + if (rc != HTP_OK) return rc; + + // Adjust the counters. + connp->out_current_read_offset += bytes_to_consume; + connp->out_current_consume_offset += bytes_to_consume; + connp->out_stream_offset += bytes_to_consume; + connp->out_body_data_left -= bytes_to_consume; + + // Have we seen the entire response body? + if (connp->out_body_data_left == 0) { + connp->out_state = htp_connp_RES_FINALIZE; + // Tells decompressors to output partially decompressed data + rc = htp_tx_res_process_body_data_ex(connp->out_tx, NULL, 0); + return rc; + } + + return HTP_DATA; +} + +/** + * Processes identity response body of unknown length. In this case, we assume the + * response body consumes all data until the end of the stream. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE(htp_connp_t *connp) { + // Consume all data from the input buffer. + size_t bytes_to_consume = connp->out_current_len - connp->out_current_read_offset; + + #ifdef HTP_DEBUG + fprintf(stderr, "bytes_to_consume %"PRIuMAX, (uintmax_t)bytes_to_consume); + #endif + if (bytes_to_consume != 0) { + htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, connp->out_current_data + connp->out_current_read_offset, bytes_to_consume); + if (rc != HTP_OK) return rc; + + // Adjust the counters. + connp->out_current_read_offset += bytes_to_consume; + connp->out_current_consume_offset += bytes_to_consume; + connp->out_stream_offset += bytes_to_consume; + } + + // Have we seen the entire response body? + if (connp->out_status == HTP_STREAM_CLOSED) { + connp->out_state = htp_connp_RES_FINALIZE; + return HTP_OK; + } + + return HTP_DATA; +} + +/** + * Determines presence (and encoding) of a response body. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_RES_BODY_DETERMINE(htp_connp_t *connp) { + // If the request uses the CONNECT method, then not only are we + // to assume there's no body, but we need to ignore all + // subsequent data in the stream. + if (connp->out_tx->request_method_number == HTP_M_CONNECT) { + if ((connp->out_tx->response_status_number >= 200) + && (connp->out_tx->response_status_number <= 299)) { + // This is a successful CONNECT stream, which means + // we need to switch into tunneling mode: on the + // request side we'll now probe the tunnel data to see + // if we need to parse or ignore it. So on the response + // side we wrap up the tx and wait. + connp->out_state = htp_connp_RES_FINALIZE; + + // we may have response headers + htp_status_t rc = htp_tx_state_response_headers(connp->out_tx); + return rc; + } else if (connp->out_tx->response_status_number == 407) { + // proxy telling us to auth + if (connp->in_status != HTP_STREAM_ERROR) + connp->in_status = HTP_STREAM_DATA; + } else { + // This is a failed CONNECT stream, which means that + // we can unblock request parsing + if (connp->in_status != HTP_STREAM_ERROR) + connp->in_status = HTP_STREAM_DATA; + + // We are going to continue processing this transaction, + // adding a note for ourselves to stop at the end (because + // we don't want to see the beginning of a new transaction). + connp->out_data_other_at_tx_end = 1; + } + } + + htp_header_t *cl = htp_table_get_c(connp->out_tx->response_headers, "content-length"); + htp_header_t *te = htp_table_get_c(connp->out_tx->response_headers, "transfer-encoding"); + + // Check for "101 Switching Protocol" response. + // If it's seen, it means that traffic after empty line following headers + // is no longer HTTP. We can treat it similarly to CONNECT. + // Unlike CONNECT, however, upgrades from HTTP to HTTP seem + // rather unlikely, so don't try to probe tunnel for nested HTTP, + // and switch to tunnel mode right away. + if (connp->out_tx->response_status_number == 101) { + if (te == NULL && cl == NULL) { + connp->out_state = htp_connp_RES_FINALIZE; + + if (connp->in_status != HTP_STREAM_ERROR) + connp->in_status = HTP_STREAM_TUNNEL; + connp->out_status = HTP_STREAM_TUNNEL; + + // we may have response headers + htp_status_t rc = htp_tx_state_response_headers(connp->out_tx); + return rc; + } else { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Switching Protocol with Content-Length"); + } + } + + // Check for an interim "100 Continue" response. Ignore it if found, and revert back to RES_LINE. + if (connp->out_tx->response_status_number == 100 && te == NULL) { + int is100continue = 1; + if (cl != NULL){ + if (htp_parse_content_length(cl->value, connp) > 0) { + is100continue = 0; + } + } + if (is100continue) { + if (connp->out_tx->seen_100continue != 0) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Already seen 100-Continue."); + } + + // Ignore any response headers seen so far. + htp_header_t *h = NULL; + for (size_t i = 0, n = htp_table_size(connp->out_tx->response_headers); i < n; i++) { + h = htp_table_get_index(connp->out_tx->response_headers, i, NULL); + bstr_free(h->name); + bstr_free(h->value); + free(h); + } + + htp_table_clear(connp->out_tx->response_headers); + + // Expecting to see another response line next. + connp->out_state = htp_connp_RES_LINE; + connp->out_tx->response_progress = HTP_RESPONSE_LINE; + connp->out_tx->seen_100continue++; + + return HTP_OK; + } + } + + // A request can indicate it waits for headers validation + // before sending its body cf + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect + if (connp->out_tx->response_status_number >= 400 && + connp->out_tx->response_status_number <= 499 && + connp->in_content_length > 0 && + connp->in_body_data_left == connp->in_content_length) { + htp_header_t *exp = htp_table_get_c(connp->out_tx->request_headers, "expect"); + if ((exp != NULL) && (bstr_cmp_c_nocase(exp->value, "100-continue") == 0)) { + connp->in_state = htp_connp_REQ_FINALIZE; + } + } + + // 1. Any response message which MUST NOT include a message-body + // (such as the 1xx, 204, and 304 responses and any response to a HEAD + // request) is always terminated by the first empty line after the + // header fields, regardless of the entity-header fields present in the + // message. + if (connp->out_tx->request_method_number == HTP_M_HEAD) { + // There's no response body whatsoever + connp->out_tx->response_transfer_coding = HTP_CODING_NO_BODY; + connp->out_state = htp_connp_RES_FINALIZE; + } + else if (((connp->out_tx->response_status_number >= 100) && (connp->out_tx->response_status_number <= 199)) + || (connp->out_tx->response_status_number == 204) || (connp->out_tx->response_status_number == 304)) { + // There should be no response body + // but browsers interpret content sent by the server as such + if (te == NULL && cl == NULL) { + connp->out_tx->response_transfer_coding = HTP_CODING_NO_BODY; + connp->out_state = htp_connp_RES_FINALIZE; + } else { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Unexpected Response body"); + } + } + // Hack condition to check that we do not assume "no body" + if (connp->out_state != htp_connp_RES_FINALIZE) { + // We have a response body + htp_header_t *ct = htp_table_get_c(connp->out_tx->response_headers, "content-type"); + if (ct != NULL) { + connp->out_tx->response_content_type = bstr_dup_lower(ct->value); + if (connp->out_tx->response_content_type == NULL) return HTP_ERROR; + + // Ignore parameters + unsigned char *data = bstr_ptr(connp->out_tx->response_content_type); + size_t len = bstr_len(ct->value); + size_t newlen = 0; + while (newlen < len) { + // TODO Some platforms may do things differently here. + if (htp_is_space(data[newlen]) || (data[newlen] == ';')) { + bstr_adjust_len(connp->out_tx->response_content_type, newlen); + break; + } + + newlen++; + } + } + + // 2. If a Transfer-Encoding header field (section 14.40) is present and + // indicates that the "chunked" transfer coding has been applied, then + // the length is defined by the chunked encoding (section 3.6). + if ((te != NULL) && (bstr_index_of_c_nocasenorzero(te->value, "chunked") != -1)) { + if (bstr_cmp_c_nocase(te->value, "chunked") != 0) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "Transfer-encoding has abnormal chunked value"); + } + + // spec says chunked is HTTP/1.1 only, but some browsers accept it + // with 1.0 as well + if (connp->out_tx->response_protocol_number < HTP_PROTOCOL_1_1) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "Chunked transfer-encoding on HTTP/0.9 or HTTP/1.0"); + } + + // If the T-E header is present we are going to use it. + connp->out_tx->response_transfer_coding = HTP_CODING_CHUNKED; + + // We are still going to check for the presence of C-L + if (cl != NULL) { + // This is a violation of the RFC + connp->out_tx->flags |= HTP_REQUEST_SMUGGLING; + } + + connp->out_state = htp_connp_RES_BODY_CHUNKED_LENGTH; + connp->out_tx->response_progress = HTP_RESPONSE_BODY; + }// 3. If a Content-Length header field (section 14.14) is present, its + // value in bytes represents the length of the message-body. + else if (cl != NULL) { + // We know the exact length + connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY; + + // Check for multiple C-L headers + if (cl->flags & HTP_FIELD_REPEATED) { + connp->out_tx->flags |= HTP_REQUEST_SMUGGLING; + } + + // Get body length + connp->out_tx->response_content_length = htp_parse_content_length(cl->value, connp); + if (connp->out_tx->response_content_length < 0) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Invalid C-L field in response: %"PRId64"", + connp->out_tx->response_content_length); + return HTP_ERROR; + } else { + connp->out_content_length = connp->out_tx->response_content_length; + connp->out_body_data_left = connp->out_content_length; + + if (connp->out_content_length != 0) { + connp->out_state = htp_connp_RES_BODY_IDENTITY_CL_KNOWN; + connp->out_tx->response_progress = HTP_RESPONSE_BODY; + } else { + connp->out_state = htp_connp_RES_FINALIZE; + } + } + } else { + // 4. If the message uses the media type "multipart/byteranges", which is + // self-delimiting, then that defines the length. This media type MUST + // NOT be used unless the sender knows that the recipient can parse it; + // the presence in a request of a Range header with multiple byte-range + // specifiers implies that the client can parse multipart/byteranges + // responses. + if (ct != NULL) { + // TODO Handle multipart/byteranges + if (bstr_index_of_c_nocase(ct->value, "multipart/byteranges") != -1) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "C-T multipart/byteranges in responses not supported"); + return HTP_ERROR; + } + } + + // 5. By the server closing the connection. (Closing the connection + // cannot be used to indicate the end of a request body, since that + // would leave no possibility for the server to send back a response.) + connp->out_state = htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE; + connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY; + connp->out_tx->response_progress = HTP_RESPONSE_BODY; + connp->out_body_data_left = -1; + } + } + + // NOTE We do not need to check for short-style HTTP/0.9 requests here because + // that is done earlier, before response line parsing begins + + htp_status_t rc = htp_tx_state_response_headers(connp->out_tx); + if (rc != HTP_OK) return rc; + + return HTP_OK; +} + +/** + * Parses response headers. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_RES_HEADERS(htp_connp_t *connp) { + int endwithcr; + int lfcrending = 0; + + for (;;) { + if (connp->out_status == HTP_STREAM_CLOSED) { + // Finalize sending raw trailer data. + htp_status_t rc = htp_connp_res_receiver_finalize_clear(connp); + if (rc != HTP_OK) return rc; + + // Run hook response_TRAILER. + rc = htp_hook_run_all(connp->cfg->hook_response_trailer, connp->out_tx); + if (rc != HTP_OK) return rc; + + connp->out_state = htp_connp_RES_FINALIZE; + return HTP_OK; + } + OUT_COPY_BYTE_OR_RETURN(connp); + + // Have we reached the end of the line? + if (connp->out_next_byte != LF && connp->out_next_byte != CR) { + lfcrending = 0; + } else { + endwithcr = 0; + if (connp->out_next_byte == CR) { + OUT_PEEK_NEXT(connp); + if (connp->out_next_byte == -1) { + return HTP_DATA_BUFFER; + } else if (connp->out_next_byte == LF) { + OUT_COPY_BYTE_OR_RETURN(connp); + if (lfcrending) { + // Handling LFCRCRLFCRLF + // These 6 characters mean only 2 end of lines + OUT_PEEK_NEXT(connp); + if (connp->out_next_byte == CR) { + OUT_COPY_BYTE_OR_RETURN(connp); + connp->out_current_consume_offset++; + OUT_PEEK_NEXT(connp); + if (connp->out_next_byte == LF) { + OUT_COPY_BYTE_OR_RETURN(connp); + connp->out_current_consume_offset++; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "Weird response end of lines mix"); + } + } + } + } else if (connp->out_next_byte == CR) { + continue; + } + lfcrending = 0; + endwithcr = 1; + } else { + // connp->out_next_byte == LF + OUT_PEEK_NEXT(connp); + lfcrending = 0; + if (connp->out_next_byte == CR) { + // hanldes LF-CR sequence as end of line + OUT_COPY_BYTE_OR_RETURN(connp); + lfcrending = 1; + } + } + + unsigned char *data; + size_t len; + + if (htp_connp_res_consolidate_data(connp, &data, &len) != HTP_OK) { + return HTP_ERROR; + } + + // CRCRLF is not an empty line + if (endwithcr && len < 2) { + continue; + } + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, data, len); + #endif + + int next_no_lf = 0; + if (connp->out_current_read_offset < connp->out_current_len && + connp->out_current_data[connp->out_current_read_offset] != LF) { + next_no_lf = 1; + } + // Should we terminate headers? + if (htp_connp_is_line_terminator(connp, data, len, next_no_lf)) { + // Parse previous header, if any. + if (connp->out_header != NULL) { + if (connp->cfg->process_response_header(connp, bstr_ptr(connp->out_header), + bstr_len(connp->out_header)) != HTP_OK) return HTP_ERROR; + + bstr_free(connp->out_header); + connp->out_header = NULL; + } + + htp_connp_res_clear_buffer(connp); + + // We've seen all response headers. + if (connp->out_tx->response_progress == HTP_RESPONSE_HEADERS) { + // Response headers. + + // The next step is to determine if this response has a body. + connp->out_state = htp_connp_RES_BODY_DETERMINE; + } else { + // Response trailer. + + // Finalize sending raw trailer data. + htp_status_t rc = htp_connp_res_receiver_finalize_clear(connp); + if (rc != HTP_OK) return rc; + + // Run hook response_TRAILER. + rc = htp_hook_run_all(connp->cfg->hook_response_trailer, connp->out_tx); + if (rc != HTP_OK) return rc; + + // The next step is to finalize this response. + connp->out_state = htp_connp_RES_FINALIZE; + } + + return HTP_OK; + } + + htp_chomp(data, &len); + + // Check for header folding. + if (htp_connp_is_line_folded(data, len) == 0) { + // New header line. + + // Parse previous header, if any. + if (connp->out_header != NULL) { + if (connp->cfg->process_response_header(connp, bstr_ptr(connp->out_header), + bstr_len(connp->out_header)) != HTP_OK) return HTP_ERROR; + + bstr_free(connp->out_header); + connp->out_header = NULL; + } + + OUT_PEEK_NEXT(connp); + + if (htp_is_folding_char(connp->out_next_byte) == 0) { + // Because we know this header is not folded, we can process the buffer straight away. + if (connp->cfg->process_response_header(connp, data, len) != HTP_OK) return HTP_ERROR; + } else { + // Keep the partial header data for parsing later. + connp->out_header = bstr_dup_mem(data, len); + if (connp->out_header == NULL) return HTP_ERROR; + } + } else { + // Folding; check that there's a previous header line to add to. + if (connp->out_header == NULL) { + // Invalid folding. + + // Warn only once per transaction. + if (!(connp->out_tx->flags & HTP_INVALID_FOLDING)) { + connp->out_tx->flags |= HTP_INVALID_FOLDING; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Invalid response field folding"); + } + + // Keep the header data for parsing later. + size_t trim = 0; + while(trim < len) { + if (!htp_is_folding_char(data[trim])) { + break; + } + trim++; + } + connp->out_header = bstr_dup_mem(data + trim, len - trim); + if (connp->out_header == NULL) return HTP_ERROR; + } else { + size_t colon_pos = 0; + while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++; + + if (colon_pos < len && + bstr_chr(connp->out_header, ':') >= 0 && + connp->out_tx->response_protocol_number == HTP_PROTOCOL_1_1) { + // Warn only once per transaction. + if (!(connp->out_tx->flags & HTP_INVALID_FOLDING)) { + connp->out_tx->flags |= HTP_INVALID_FOLDING; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Invalid response field folding"); + } + if (connp->cfg->process_response_header(connp, bstr_ptr(connp->out_header), + bstr_len(connp->out_header)) != HTP_OK) + return HTP_ERROR; + bstr_free(connp->out_header); + connp->out_header = bstr_dup_mem(data+1, len-1); + if (connp->out_header == NULL) + return HTP_ERROR; + } else { + // Add to the existing header. + if (bstr_len(connp->out_header) < HTP_MAX_HEADER_FOLDED) { + bstr *new_out_header = bstr_add_mem(connp->out_header, data, len); + if (new_out_header == NULL) + return HTP_ERROR; + connp->out_header = new_out_header; + } else { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response field length exceeds folded maximum"); + } + } + } + } + + htp_connp_res_clear_buffer(connp); + } + } + + return HTP_ERROR; +} + +/** + * Parses response line. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_RES_LINE(htp_connp_t *connp) { + for (;;) { + // Don't try to get more data if the stream is closed. If we do, we'll return, asking for more data. + if (connp->out_status != HTP_STREAM_CLOSED) { + // Get one byte + OUT_COPY_BYTE_OR_RETURN(connp); + } + + // Have we reached the end of the line? We treat stream closure as end of line in + // order to handle the case when the first line of the response is actually response body + // (and we wish it processed as such). + if (connp->out_next_byte == CR) { + OUT_PEEK_NEXT(connp); + if (connp->out_next_byte == -1) { + return HTP_DATA_BUFFER; + } else if (connp->out_next_byte == LF) { + continue; + } + connp->out_next_byte = LF; + } + if ((connp->out_next_byte == LF)||(connp->out_status == HTP_STREAM_CLOSED)) { + unsigned char *data; + size_t len; + + if (htp_connp_res_consolidate_data(connp, &data, &len) != HTP_OK) { + return HTP_ERROR; + } + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, data, len); + #endif + + // Is this a line that should be ignored? + if (htp_connp_is_line_ignorable(connp, data, len)) { + if (connp->out_status == HTP_STREAM_CLOSED) { + connp->out_state = htp_connp_RES_FINALIZE; + } + // We have an empty/whitespace line, which we'll note, ignore and move on + connp->out_tx->response_ignored_lines++; + + // TODO How many lines are we willing to accept? + + // Start again + htp_connp_res_clear_buffer(connp); + + return HTP_OK; + } + + // Deallocate previous response line allocations, which we would have on a 100 response. + + if (connp->out_tx->response_line != NULL) { + bstr_free(connp->out_tx->response_line); + connp->out_tx->response_line = NULL; + } + + if (connp->out_tx->response_protocol != NULL) { + bstr_free(connp->out_tx->response_protocol); + connp->out_tx->response_protocol = NULL; + } + + if (connp->out_tx->response_status != NULL) { + bstr_free(connp->out_tx->response_status); + connp->out_tx->response_status = NULL; + } + + if (connp->out_tx->response_message != NULL) { + bstr_free(connp->out_tx->response_message); + connp->out_tx->response_message = NULL; + } + + // Process response line. + + int chomp_result = htp_chomp(data, &len); + + // If the response line is invalid, determine if it _looks_ like + // a response line. If it does not look like a line, process the + // data as a response body because that is what browsers do. + + if (htp_treat_response_line_as_body(data, len)) { + // if we have a next line beginning with H, skip this one + if (connp->out_current_read_offset+1 < connp->out_current_len && (connp->out_current_data[connp->out_current_read_offset] == 'H' || len <= 2)) { + connp->out_tx->response_ignored_lines++; + htp_connp_res_clear_buffer(connp); + return HTP_OK; + } + connp->out_tx->response_content_encoding_processing = HTP_COMPRESSION_NONE; + + connp->out_current_consume_offset = connp->out_current_read_offset; + htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, data, len + chomp_result); + htp_connp_res_clear_buffer(connp); + if (rc != HTP_OK) return rc; + + // Continue to process response body. Because we don't have + // any headers to parse, we assume the body continues until + // the end of the stream. + + // Have we seen the entire response body? + if (connp->out_current_len <= connp->out_current_read_offset) { + connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY; + connp->out_tx->response_progress = HTP_RESPONSE_BODY; + connp->out_body_data_left = -1; + connp->out_state = htp_connp_RES_FINALIZE; + } + + return HTP_OK; + } + + connp->out_tx->response_line = bstr_dup_mem(data, len); + if (connp->out_tx->response_line == NULL) return HTP_ERROR; + + if (connp->cfg->parse_response_line(connp) != HTP_OK) return HTP_ERROR; + + htp_status_t rc = htp_tx_state_response_line(connp->out_tx); + if (rc != HTP_OK) return rc; + + htp_connp_res_clear_buffer(connp); + + // Move on to the next phase. + connp->out_state = htp_connp_RES_HEADERS; + connp->out_tx->response_progress = HTP_RESPONSE_HEADERS; + + return HTP_OK; + } + } + + return HTP_ERROR; +} + +size_t htp_connp_res_data_consumed(htp_connp_t *connp) { + return connp->out_current_read_offset; +} + +htp_status_t htp_connp_RES_FINALIZE(htp_connp_t *connp) { + if (connp->out_status != HTP_STREAM_CLOSED) { + OUT_PEEK_NEXT(connp); + if (connp->out_next_byte == -1) { + return htp_tx_state_response_complete_ex(connp->out_tx, 0); + } + if (connp->out_next_byte != LF || connp->out_current_consume_offset >= connp->out_current_read_offset) { + for (;;) {//;i < max_read; i++) { + OUT_COPY_BYTE_OR_RETURN(connp); + // Have we reached the end of the line? For some reason + // we can't test after IN_COPY_BYTE_OR_RETURN */ + if (connp->out_next_byte == LF) + break; + } + } + } + size_t bytes_left; + unsigned char * data; + + if (htp_connp_res_consolidate_data(connp, &data, &bytes_left) != HTP_OK) { + return HTP_ERROR; + } +#ifdef HTP_DEBUG + fprint_raw_data(stderr, "PROBING response finalize", data, bytes_left); +#endif + if (bytes_left == 0) { + //closing + return htp_tx_state_response_complete_ex(connp->out_tx, 0); + } + + if (htp_treat_response_line_as_body(data, bytes_left)) { + // Interpret remaining bytes as body data + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Unexpected response body"); + htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, data, bytes_left); + htp_connp_res_clear_buffer(connp); + return rc; + } + + //unread last end of line so that RES_LINE works + if (connp->out_current_read_offset < (int64_t)bytes_left) { + connp->out_current_read_offset=0; + } else { + connp->out_current_read_offset-=bytes_left; + } + if (connp->out_current_read_offset < connp->out_current_consume_offset) { + connp->out_current_consume_offset=connp->out_current_read_offset; + } + return htp_tx_state_response_complete_ex(connp->out_tx, 0 /* not hybrid mode */); +} + +/** + * The response idle state will initialize response processing, as well as + * finalize each transactions after we are done with it. + * + * @param[in] connp + * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. + */ +htp_status_t htp_connp_RES_IDLE(htp_connp_t *connp) { + + // We want to start parsing the next response (and change + // the state from IDLE) only if there's at least one + // byte of data available. Otherwise we could be creating + // new structures even if there's no more data on the + // connection. + OUT_TEST_NEXT_BYTE_OR_RETURN(connp); + + // Parsing a new response + + // Find the next outgoing transaction + // If there is none, we just create one so that responses without + // request can still be processed. + connp->out_tx = htp_list_get(connp->conn->transactions, connp->out_next_tx_index); + if (connp->out_tx == NULL) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Unable to match response to request"); + // finalize dangling request waiting for next request or body + if (connp->in_state == htp_connp_REQ_FINALIZE) { + htp_tx_state_request_complete(connp->in_tx); + } + connp->out_tx = htp_connp_tx_create(connp); + if (connp->out_tx == NULL) { + return HTP_ERROR; + } + connp->out_tx->parsed_uri = htp_uri_alloc(); + if (connp->out_tx->parsed_uri == NULL) { + return HTP_ERROR; + } + connp->out_tx->parsed_uri->path = bstr_dup_c(REQUEST_URI_NOT_SEEN); + if (connp->out_tx->parsed_uri->path == NULL) { + return HTP_ERROR; + } + connp->out_tx->request_uri = bstr_dup_c(REQUEST_URI_NOT_SEEN); + if (connp->out_tx->request_uri == NULL) { + return HTP_ERROR; + } + + connp->in_state = htp_connp_REQ_FINALIZE; +#ifdef HTP_DEBUG + fprintf(stderr, "picked up response w/o request"); +#endif + // We've used one transaction + connp->out_next_tx_index++; + } else { + // We've used one transaction + connp->out_next_tx_index++; + + // TODO Detect state mismatch + + connp->out_content_length = -1; + connp->out_body_data_left = -1; + } + + htp_status_t rc = htp_tx_state_response_start(connp->out_tx); + if (rc != HTP_OK) return rc; + + return HTP_OK; +} + +int htp_connp_res_data(htp_connp_t *connp, const htp_time_t *timestamp, const void *data, size_t len) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data(connp->out_status %x)\n", connp->out_status); + fprint_raw_data(stderr, __func__, data, len); + #endif + + // Return if the connection is in stop state + if (connp->out_status == HTP_STREAM_STOP) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_INFO, 0, "Outbound parser is in HTP_STREAM_STOP"); + + return HTP_STREAM_STOP; + } + + // Return if the connection has had a fatal error + if (connp->out_status == HTP_STREAM_ERROR) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Outbound parser is in HTP_STREAM_ERROR"); + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA (previous error)\n"); + #endif + + return HTP_STREAM_ERROR; + } + + // Sanity check: we must have a transaction pointer if the state is not IDLE (no outbound transaction) + if ((connp->out_tx == NULL)&&(connp->out_state != htp_connp_RES_IDLE)) { + connp->out_status = HTP_STREAM_ERROR; + + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Missing outbound transaction data"); + + return HTP_STREAM_ERROR; + } + + // If the length of the supplied data chunk is zero, proceed + // only if the stream has been closed. We do not allow zero-sized + // chunks in the API, but we use it internally to force the parsers + // to finalize parsing. + if (len == 0 && connp->out_status != HTP_STREAM_CLOSED) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Zero-length data chunks are not allowed"); + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA (zero-length chunk)\n"); + #endif + + return HTP_STREAM_CLOSED; + } + + // Remember the timestamp of the current response data chunk + if (timestamp != NULL) { + memcpy(&connp->out_timestamp, timestamp, sizeof (*timestamp)); + } + + // Store the current chunk information + connp->out_current_data = (unsigned char *) data; + connp->out_current_len = len; + connp->out_current_read_offset = 0; + connp->out_current_consume_offset = 0; + connp->out_current_receiver_offset = 0; + + htp_conn_track_outbound_data(connp->conn, len, timestamp); + + // Return without processing any data if the stream is in tunneling + // mode (which it would be after an initial CONNECT transaction. + if (connp->out_status == HTP_STREAM_TUNNEL) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_TUNNEL\n"); + #endif + + return HTP_STREAM_TUNNEL; + } + + // Invoke a processor, in a loop, until an error + // occurs or until we run out of data. Many processors + // will process a request, each pointing to the next + // processor that needs to run. + for (;;) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: out state=%s, progress=%s\n", + htp_connp_out_state_as_string(connp), + htp_tx_response_progress_as_string(connp->out_tx)); + #endif + + // Return if there's been an error + // or if we've run out of data. We are relying + // on processors to add error messages, so we'll + // keep quiet here. + htp_status_t rc; + + //handle gap + if (data == NULL && len > 0) { + if (connp->out_state == htp_connp_RES_BODY_IDENTITY_CL_KNOWN || + connp->out_state == htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE) { + rc = connp->out_state(connp); + } else if (connp->out_state == htp_connp_RES_FINALIZE) { + rc = htp_tx_state_response_complete_ex(connp->out_tx, 0); + } else { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Gaps are not allowed during this state"); + return HTP_STREAM_CLOSED; + } + } else { + rc = connp->out_state(connp); + } + if (rc == HTP_OK) { + if (connp->out_status == HTP_STREAM_TUNNEL) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_TUNNEL\n"); + #endif + + return HTP_STREAM_TUNNEL; + } + + rc = htp_res_handle_state_change(connp); + } + + if (rc != HTP_OK) { + // Do we need more data? + if ((rc == HTP_DATA) || (rc == HTP_DATA_BUFFER)) { + htp_connp_res_receiver_send_data(connp, 0 /* not last */); + + if (rc == HTP_DATA_BUFFER) { + if (htp_connp_res_buffer(connp) != HTP_OK) { + connp->out_status = HTP_STREAM_ERROR; + return HTP_STREAM_ERROR; + } + } + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA\n"); + #endif + + connp->out_status = HTP_STREAM_DATA; + + return HTP_STREAM_DATA; + } + + // Check for stop + if (rc == HTP_STOP) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_STOP\n"); + #endif + + connp->out_status = HTP_STREAM_STOP; + + return HTP_STREAM_STOP; + } + + // Check for suspended parsing + if (rc == HTP_DATA_OTHER) { + // We might have actually consumed the entire data chunk? + if (connp->out_current_read_offset >= connp->out_current_len) { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA (suspended parsing)\n"); + #endif + + connp->out_status = HTP_STREAM_DATA; + + // Do not send STREAM_DATE_DATA_OTHER if we've + // consumed the entire chunk + return HTP_STREAM_DATA; + } else { + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_DATA_OTHER\n"); + #endif + + connp->out_status = HTP_STREAM_DATA_OTHER; + + // Partial chunk consumption + return HTP_STREAM_DATA_OTHER; + } + } + + #ifdef HTP_DEBUG + fprintf(stderr, "htp_connp_res_data: returning HTP_STREAM_ERROR\n"); + #endif + + // Permanent stream error. + connp->out_status = HTP_STREAM_ERROR; + + return HTP_STREAM_ERROR; + } + } +} diff --git a/htp/htp_response_generic.c b/htp/htp_response_generic.c new file mode 100644 index 0000000..f5fa59e --- /dev/null +++ b/htp/htp_response_generic.c @@ -0,0 +1,334 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * Generic response line parser. + * + * @param[in] connp + * @return HTP status + */ +htp_status_t htp_parse_response_line_generic(htp_connp_t *connp) { + htp_tx_t *tx = connp->out_tx; + unsigned char *data = bstr_ptr(tx->response_line); + size_t len = bstr_len(tx->response_line); + size_t pos = 0; + + tx->response_protocol = NULL; + tx->response_protocol_number = HTP_PROTOCOL_INVALID; + tx->response_status = NULL; + tx->response_status_number = HTP_STATUS_INVALID; + tx->response_message = NULL; + + // Ignore whitespace at the beginning of the line. + while ((pos < len) && (htp_is_space(data[pos]))) pos++; + + size_t start = pos; + + // Find the end of the protocol string. + while ((pos < len) && (!htp_is_space(data[pos]))) pos++; + if (pos - start == 0) return HTP_OK; + + tx->response_protocol = bstr_dup_mem(data + start, pos - start); + if (tx->response_protocol == NULL) return HTP_ERROR; + + tx->response_protocol_number = htp_parse_protocol(tx->response_protocol); + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, "Response protocol", bstr_ptr(tx->response_protocol), bstr_len(tx->response_protocol)); + fprintf(stderr, "Response protocol number: %d\n", tx->response_protocol_number); + #endif + + // Ignore whitespace after the response protocol. + while ((pos < len) && (htp_is_space(data[pos]))) pos++; + if (pos == len) return HTP_OK; + + start = pos; + + // Find the next whitespace character. + while ((pos < len) && (!htp_is_space(data[pos]))) pos++; + if (pos - start == 0) return HTP_OK; + + tx->response_status = bstr_dup_mem(data + start, pos - start); + if (tx->response_status == NULL) return HTP_ERROR; + + tx->response_status_number = htp_parse_status(tx->response_status); + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, "Response status (as text)", bstr_ptr(tx->response_status), bstr_len(tx->response_status)); + fprintf(stderr, "Response status number: %d\n", tx->response_status_number); + #endif + + // Ignore whitespace that follows the status code. + while ((pos < len) && (isspace(data[pos]))) pos++; + if (pos == len) return HTP_OK; + + // Assume the message stretches until the end of the line. + tx->response_message = bstr_dup_mem(data + pos, len - pos); + if (tx->response_message == NULL) return HTP_ERROR; + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, "Response status message", bstr_ptr(tx->response_message), bstr_len(tx->response_message)); + #endif + + return HTP_OK; +} + +/** + * Generic response header parser. + * + * @param[in] connp + * @param[in] h + * @param[in] data + * @param[in] len + * @return HTP status + */ +htp_status_t htp_parse_response_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) { + size_t name_start, name_end; + size_t value_start, value_end; + size_t prev; + + htp_chomp(data, &len); + + name_start = 0; + + // Look for the first colon. + size_t colon_pos = 0; + while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++; + + if (colon_pos == len) { + // Header line with a missing colon. + + h->flags |= HTP_FIELD_UNPARSEABLE; + h->flags |= HTP_FIELD_INVALID; + + if (!(connp->out_tx->flags & HTP_FIELD_UNPARSEABLE)) { + // Only once per transaction. + connp->out_tx->flags |= HTP_FIELD_UNPARSEABLE; + connp->out_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response field invalid: missing colon."); + } + + // Reset the position. We're going to treat this invalid header + // as a header with an empty name. That will increase the probability + // that the content will be inspected. + colon_pos = 0; + (void)colon_pos; // suppress scan-build warning + name_end = 0; + value_start = 0; + } else { + // Header line with a colon. + + if (colon_pos == 0) { + // Empty header name. + + h->flags |= HTP_FIELD_INVALID; + + if (!(connp->out_tx->flags & HTP_FIELD_INVALID)) { + // Only once per transaction. + connp->out_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response field invalid: empty name."); + } + } + + name_end = colon_pos; + + // Ignore unprintable after field-name. + prev = name_end; + while ((prev > name_start) && htp_is_space(data[prev - 1])) { + prev--; + name_end--; + + h->flags |= HTP_FIELD_INVALID; + + if (!(connp->out_tx->flags & HTP_FIELD_INVALID)) { + // Only once per transaction. + connp->out_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response field invalid: LWS after name."); + } + } + + value_start = colon_pos + 1; + } + + // Header value. + + // Ignore LWS before field-content. + while ((value_start < len) && (htp_is_lws(data[value_start]))) { + value_start++; + } + + // Look for the end of field-content. + value_end = len; + + // Check that the header name is a token. + size_t i = name_start; + while (i < name_end) { + if (!htp_is_token(data[i])) { + h->flags |= HTP_FIELD_INVALID; + + if (!(connp->out_tx->flags & HTP_FIELD_INVALID)) { + connp->out_tx->flags |= HTP_FIELD_INVALID; + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response header name is not a token."); + } + + break; + } + + i++; + } + for (i = value_start; i < value_end; i++) { + if (data[i] == 0) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response header value contains null."); + break; + } + } + // Ignore LWS after field-content. + prev = value_end - 1; + while ((prev > value_start) && (htp_is_lws(data[prev]))) { + prev--; + value_end--; + } + + // Now extract the name and the value. + h->name = bstr_dup_mem(data + name_start, name_end - name_start); + h->value = bstr_dup_mem(data + value_start, value_end - value_start); + if ((h->name == NULL) || (h->value == NULL)) { + bstr_free(h->name); + bstr_free(h->value); + return HTP_ERROR; + } + + return HTP_OK; +} + +/** + * Generic response header line(s) processor, which assembles folded lines + * into a single buffer before invoking the parsing function. + * + * @param[in] connp + * @param[in] data + * @param[in] len + * @return HTP status + */ +htp_status_t htp_process_response_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) { + // Create a new header structure. + htp_header_t *h = calloc(1, sizeof (htp_header_t)); + if (h == NULL) return HTP_ERROR; + + if (htp_parse_response_header_generic(connp, h, data, len) != HTP_OK) { + free(h); + return HTP_ERROR; + } + + #ifdef HTP_DEBUG + fprint_bstr(stderr, "Header name", h->name); + fprint_bstr(stderr, "Header value", h->value); + #endif + + // Do we already have a header with the same name? + htp_header_t *h_existing = htp_table_get(connp->out_tx->response_headers, h->name); + if (h_existing != NULL) { + // Keep track of repeated same-name headers. + if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) { + // This is the second occurence for this header. + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header"); + } else { + // For simplicity reasons, we count the repetitions of all headers + if (connp->out_tx->res_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) { + connp->out_tx->res_header_repetitions++; + } else { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_OK; + } + } + h_existing->flags |= HTP_FIELD_REPEATED; + + // Having multiple C-L headers is against the RFC but many + // browsers ignore the subsequent headers if the values are the same. + if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) { + // Don't use string comparison here because we want to + // ignore small formatting differences. + + int64_t existing_cl, new_cl; + + existing_cl = htp_parse_content_length(h_existing->value, NULL); + new_cl = htp_parse_content_length(h->value, NULL); + if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) { + // Ambiguous response C-L value. + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous response C-L value"); + } + + // Ignoring the new C-L header that has the same value as the previous ones. + } else { + // Add to the existing header. + + bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value)); + if (new_value == NULL) { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_ERROR; + } + + h_existing->value = new_value; + bstr_add_mem_noex(h_existing->value, (unsigned char *) ", ", 2); + bstr_add_noex(h_existing->value, h->value); + } + + // The new header structure is no longer needed. + bstr_free(h->name); + bstr_free(h->value); + free(h); + } else { + // Add as a new header. + if (htp_table_add(connp->out_tx->response_headers, h->name, h) != HTP_OK) { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_ERROR; + } + } + + return HTP_OK; +} diff --git a/htp/htp_table.c b/htp/htp_table.c new file mode 100644 index 0000000..535b961 --- /dev/null +++ b/htp/htp_table.c @@ -0,0 +1,250 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +static htp_status_t _htp_table_add(htp_table_t *table, const bstr *key, const void *element) { + // Add key. + if (htp_list_add(&table->list, (void *)key) != HTP_OK) return HTP_ERROR; + + // Add element. + if (htp_list_add(&table->list, (void *)element) != HTP_OK) { + htp_list_pop(&table->list); + return HTP_ERROR; + } + + return HTP_OK; +} + +htp_status_t htp_table_add(htp_table_t *table, const bstr *key, const void *element) { + if ((table == NULL)||(key == NULL)) return HTP_ERROR; + + // Keep track of how keys are allocated, and + // ensure that all invocations are consistent. + if (table->alloc_type == HTP_TABLE_KEYS_ALLOC_UKNOWN) { + table->alloc_type = HTP_TABLE_KEYS_COPIED; + } else { + if (table->alloc_type != HTP_TABLE_KEYS_COPIED) { + #ifdef HTP_DEBUG + fprintf(stderr, "# Inconsistent key management strategy. Actual %d. Attempted %d.\n", + table->alloc_type, HTP_TABLE_KEYS_COPIED); + #endif + + return HTP_ERROR; + } + } + + bstr *dupkey = bstr_dup(key); + if (dupkey == NULL) return HTP_ERROR; + + if (_htp_table_add(table, dupkey, element) != HTP_OK) { + bstr_free(dupkey); + return HTP_ERROR; + } + + return HTP_OK; +} + +htp_status_t htp_table_addn(htp_table_t *table, const bstr *key, const void *element) { + if ((table == NULL)||(key == NULL)) return HTP_ERROR; + + // Keep track of how keys are allocated, and + // ensure that all invocations are consistent. + if (table->alloc_type == HTP_TABLE_KEYS_ALLOC_UKNOWN) { + table->alloc_type = HTP_TABLE_KEYS_ADOPTED; + } else { + if (table->alloc_type != HTP_TABLE_KEYS_ADOPTED) { + #ifdef HTP_DEBUG + fprintf(stderr, "# Inconsistent key management strategy. Actual %d. Attempted %d.\n", + table->alloc_type, HTP_TABLE_KEYS_ADOPTED); + #endif + + return HTP_ERROR; + } + } + + return _htp_table_add(table, key, element); +} + +htp_status_t htp_table_addk(htp_table_t *table, const bstr *key, const void *element) { + if ((table == NULL)||(key == NULL)) return HTP_ERROR; + + // Keep track of how keys are allocated, and + // ensure that all invocations are consistent. + if (table->alloc_type == HTP_TABLE_KEYS_ALLOC_UKNOWN) { + table->alloc_type = HTP_TABLE_KEYS_REFERENCED; + } else { + if (table->alloc_type != HTP_TABLE_KEYS_REFERENCED) { + #ifdef HTP_DEBUG + fprintf(stderr, "# Inconsistent key management strategy. Actual %d. Attempted %d.\n", + table->alloc_type, HTP_TABLE_KEYS_REFERENCED); + #endif + + return HTP_ERROR; + } + } + + return _htp_table_add(table, key, element); +} + +void htp_table_clear(htp_table_t *table) { + if (table == NULL) return; + + // Free the table keys, but only if we're managing them. + if ((table->alloc_type == HTP_TABLE_KEYS_COPIED)||(table->alloc_type == HTP_TABLE_KEYS_ADOPTED)) { + bstr *key = NULL; + for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) { + key = htp_list_get(&table->list, i); + bstr_free(key); + } + } + + htp_list_clear(&table->list); +} + +void htp_table_clear_ex(htp_table_t *table) { + if (table == NULL) return; + + // This function does not free table keys. + + htp_list_clear(&table->list); +} + +htp_table_t *htp_table_create(size_t size) { + if (size == 0) return NULL; + + htp_table_t *table = calloc(1, sizeof (htp_table_t)); + if (table == NULL) return NULL; + + table->alloc_type = HTP_TABLE_KEYS_ALLOC_UKNOWN; + + // Use a list behind the scenes. + if (htp_list_init(&table->list, size * 2) == HTP_ERROR) { + free(table); + return NULL; + } + + return table; +} + +void htp_table_destroy(htp_table_t *table) { + if (table == NULL) return; + + htp_table_clear(table); + + htp_list_array_release(&table->list); + + free(table); +} + +void htp_table_destroy_ex(htp_table_t *table) { + if (table == NULL) return; + + // Change allocation strategy in order to + // prevent the keys from being freed. + table->alloc_type = HTP_TABLE_KEYS_REFERENCED; + + htp_table_destroy(table); +} + +void *htp_table_get(const htp_table_t *table, const bstr *key) { + if ((table == NULL)||(key == NULL)) return NULL; + + // Iterate through the list, comparing + // keys with the parameter, return data if found. + for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) { + bstr *key_candidate = htp_list_get(&table->list, i); + void *element = htp_list_get(&table->list, i + 1); + if (bstr_cmp_nocase(key_candidate, key) == 0) { + return element; + } + } + + return NULL; +} + +void *htp_table_get_c(const htp_table_t *table, const char *ckey) { + if ((table == NULL)||(ckey == NULL)) return NULL; + + // Iterate through the list, comparing + // keys with the parameter, return data if found. + for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) { + bstr *key_candidate = htp_list_get(&table->list, i); + void *element = htp_list_get(&table->list, i + 1); + if (bstr_cmp_c_nocasenorzero(key_candidate, ckey) == 0) { + return element; + } + } + + return NULL; +} + +void *htp_table_get_index(const htp_table_t *table, size_t idx, bstr **key) { + if (table == NULL) return NULL; + + if (idx >= htp_list_size(&table->list)) return NULL; + + if (key != NULL) { + *key = htp_list_get(&table->list, idx * 2); + } + + return htp_list_get(&table->list, (idx * 2) + 1); +} + +void *htp_table_get_mem(const htp_table_t *table, const void *key, size_t key_len) { + if ((table == NULL)||(key == NULL)) return NULL; + + // Iterate through the list, comparing + // keys with the parameter, return data if found. + for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) { + bstr *key_candidate = htp_list_get(&table->list, i); + void *element = htp_list_get(&table->list, i + 1); + if (bstr_cmp_mem_nocase(key_candidate, key, key_len) == 0) { + return element; + } + } + + return NULL; +} + +size_t htp_table_size(const htp_table_t *table) { + if (table == NULL) return 0; + return htp_list_size(&table->list) / 2; +} diff --git a/htp/htp_table.h b/htp/htp_table.h new file mode 100644 index 0000000..70a1e9b --- /dev/null +++ b/htp/htp_table.h @@ -0,0 +1,184 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_TABLE_H +#define HTP_TABLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct htp_table_t htp_table_t; + +/** + * Add a new element to the table. The key will be copied, and the copy + * managed by the table. The table keeps a pointer to the element. It is the + * callers responsibility to ensure the pointer remains valid. + * + * @param[in] table + * @param[in] key + * @param[in] element + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_table_add(htp_table_t *table, const bstr *key, const void *element); + +/** + * Add a new element to the table. The key provided will be adopted and managed + * by the table. You should not keep a copy of the pointer to the key unless you're + * certain that the table will live longer that the copy. The table keeps a pointer + * to the element. It is the callers responsibility to ensure the pointer remains + * valid. + * + * @param[in] table + * @param[in] key + * @param[in] element + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_table_addn(htp_table_t *table, const bstr *key, const void *element); + +/** + * Add a new element to the table. The key provided will be only referenced and the + * caller remains responsible to keep it alive until after the table is destroyed. The + * table keeps a pointer to the element. It is the callers responsibility to ensure + * the pointer remains valid. + * + * @param[in] table + * @param[in] key + * @param[in] element + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_table_addk(htp_table_t *table, const bstr *key, const void *element); + +/** + * Remove all elements from the table. This function handles keys + * according to the active allocation strategy. If the elements need freeing, + * you need to free them before invoking this function. + * + * @param[in] table + */ +void htp_table_clear(htp_table_t *table); + +/** + * Remove all elements from the table without freeing any of the keys, even + * if the table is using an allocation strategy where keys belong to it. This + * function is useful if all the keys have been adopted by some other structure. + * + * @param[in] table + */ +void htp_table_clear_ex(htp_table_t *table); + +/** + * Create a new table structure. The table will grow automatically as needed, + * but you are required to provide a starting size. + * + * @param[in] size The starting size. + * @return Newly created table instance, or NULL on failure. + */ +htp_table_t *htp_table_create(size_t size); + +/** + * Destroy a table. This function handles the keys according to the active + * allocation strategy. If the elements need freeing, you need to free them + * before invoking this function. After the table has been destroyed, + * the pointer is set to NULL. + * + * @param[in] table + */ +void htp_table_destroy(htp_table_t *table); + +/** + * Destroy the given table, but don't free the keys. even if they are managed by + * the table. Use this method when the responsibility for the keys has been transferred + * elsewhere. After the table has been destroyed, the pointer is set to NULL. + * + * @param[in] table + */ +void htp_table_destroy_ex(htp_table_t *table); + +/** + * Retrieve the first element that matches the given bstr key. + * + * @param[in] table + * @param[in] key + * @return Matched element, or NULL if no elements match the key. + */ +void *htp_table_get(const htp_table_t *table, const bstr *key); + +/** + * Retrieve the first element that matches the given NUL-terminated key. + * + * @param[in] table + * @param[in] ckey + * @return Matched element, or NULL if no elements match the key. + */ +void *htp_table_get_c(const htp_table_t *table, const char *ckey); + +/** + * Retrieve key and element at the given index. + * + * @param[in] table + * @param[in] idx + * @param[in,out] key Pointer in which the key will be returned. Can be NULL. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +void *htp_table_get_index(const htp_table_t *table, size_t idx, bstr **key); + +/** + * Retrieve table key defined by the provided pointer and length. + * + * @param[in] table + * @param[in] key + * @param[in] key_len + * @return Matched element, or NULL if no elements match the key. + */ +void *htp_table_get_mem(const htp_table_t *table, const void *key, size_t key_len); + +/** + * Return the size of the table. + * + * @param[in] table + * @return table size + */ +size_t htp_table_size(const htp_table_t *table); + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_TABLE_H */ + diff --git a/htp/htp_table_private.h b/htp/htp_table_private.h new file mode 100644 index 0000000..273ec99 --- /dev/null +++ b/htp/htp_table_private.h @@ -0,0 +1,78 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_TABLE_PRIVATE_H +#define HTP_TABLE_PRIVATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "htp_list.h" +#include "htp_table.h" + +enum htp_table_alloc_t { + /** This is the default value, used only until the first element is added. */ + HTP_TABLE_KEYS_ALLOC_UKNOWN = 0, + + /** Keys are copied.*/ + HTP_TABLE_KEYS_COPIED = 1, + + /** Keys are adopted and freed when the table is destroyed. */ + HTP_TABLE_KEYS_ADOPTED = 2, + + /** Keys are only referenced; the caller is still responsible for freeing them after the table is destroyed. */ + HTP_TABLE_KEYS_REFERENCED = 3 +}; + +struct htp_table_t { + /** Table key and value pairs are stored in this list; name first, then value. */ + htp_list_t list; + + /** + * Key management strategy. Initially set to HTP_TABLE_KEYS_ALLOC_UKNOWN. The + * actual strategy is determined by the first allocation. + */ + enum htp_table_alloc_t alloc_type; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_TABLE_PRIVATE_H */ diff --git a/htp/htp_transaction.c b/htp/htp_transaction.c new file mode 100644 index 0000000..7220459 --- /dev/null +++ b/htp/htp_transaction.c @@ -0,0 +1,1558 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +static void htp_tx_req_destroy_decompressors(htp_connp_t *connp); +static htp_status_t htp_tx_req_process_body_data_decompressor_callback(htp_tx_data_t *d); + +static bstr *copy_or_wrap_mem(const void *data, size_t len, enum htp_alloc_strategy_t alloc) { + if (data == NULL) return NULL; + + if (alloc == HTP_ALLOC_REUSE) { + return bstr_wrap_mem(data, len); + } else { + return bstr_dup_mem(data, len); + } +} + +htp_tx_t *htp_tx_create(htp_connp_t *connp) { + if (connp == NULL) return NULL; + + htp_tx_t *tx = calloc(1, sizeof (htp_tx_t)); + if (tx == NULL) return NULL; + + tx->connp = connp; + tx->conn = connp->conn; + tx->index = htp_list_size(tx->conn->transactions); + tx->cfg = connp->cfg; + tx->is_config_shared = HTP_CONFIG_SHARED; + + // Request fields. + + tx->request_progress = HTP_REQUEST_NOT_STARTED; + tx->request_protocol_number = HTP_PROTOCOL_UNKNOWN; + tx->request_content_length = -1; + + tx->parsed_uri_raw = htp_uri_alloc(); + if (tx->parsed_uri_raw == NULL) { + htp_tx_destroy_incomplete(tx); + return NULL; + } + + tx->request_headers = htp_table_create(32); + if (tx->request_headers == NULL) { + htp_tx_destroy_incomplete(tx); + return NULL; + } + + tx->request_params = htp_table_create(32); + if (tx->request_params == NULL) { + htp_tx_destroy_incomplete(tx); + return NULL; + } + + // Response fields. + + tx->response_progress = HTP_RESPONSE_NOT_STARTED; + tx->response_status = NULL; + tx->response_status_number = HTP_STATUS_UNKNOWN; + tx->response_protocol_number = HTP_PROTOCOL_UNKNOWN; + tx->response_content_length = -1; + + tx->response_headers = htp_table_create(32); + if (tx->response_headers == NULL) { + htp_tx_destroy_incomplete(tx); + return NULL; + } + + htp_list_add(tx->conn->transactions, tx); + + return tx; +} + +htp_status_t htp_tx_destroy(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + if (!htp_tx_is_complete(tx)) return HTP_ERROR; + + htp_tx_destroy_incomplete(tx); + + return HTP_OK; +} + +void htp_tx_destroy_incomplete(htp_tx_t *tx) { + if (tx == NULL) return; + + // Disconnect transaction from other structures. + htp_conn_remove_tx(tx->conn, tx); + htp_connp_tx_remove(tx->connp, tx); + + // Request fields. + + bstr_free(tx->request_line); + bstr_free(tx->request_method); + bstr_free(tx->request_uri); + bstr_free(tx->request_protocol); + bstr_free(tx->request_content_type); + bstr_free(tx->request_hostname); + htp_uri_free(tx->parsed_uri_raw); + htp_uri_free(tx->parsed_uri); + bstr_free(tx->request_auth_username); + bstr_free(tx->request_auth_password); + + // Request_headers. + if (tx->request_headers != NULL) { + htp_header_t *h = NULL; + for (size_t i = 0, n = htp_table_size(tx->request_headers); i < n; i++) { + h = htp_table_get_index(tx->request_headers, i, NULL); + bstr_free(h->name); + bstr_free(h->value); + free(h); + } + + htp_table_destroy(tx->request_headers); + } + + // Request parsers. + + htp_urlenp_destroy(tx->request_urlenp_query); + htp_urlenp_destroy(tx->request_urlenp_body); + htp_mpartp_destroy(tx->request_mpartp); + + // Request parameters. + + htp_param_t *param = NULL; + for (size_t i = 0, n = htp_table_size(tx->request_params); i < n; i++) { + param = htp_table_get_index(tx->request_params, i, NULL); + bstr_free(param->name); + bstr_free(param->value); + free(param); + } + + htp_table_destroy(tx->request_params); + + // Request cookies. + + if (tx->request_cookies != NULL) { + bstr *b = NULL; + for (size_t i = 0, n = htp_table_size(tx->request_cookies); i < n; i++) { + b = htp_table_get_index(tx->request_cookies, i, NULL); + bstr_free(b); + } + + htp_table_destroy(tx->request_cookies); + } + + htp_hook_destroy(tx->hook_request_body_data); + + // Response fields. + + bstr_free(tx->response_line); + bstr_free(tx->response_protocol); + bstr_free(tx->response_status); + bstr_free(tx->response_message); + bstr_free(tx->response_content_type); + + // Destroy response headers. + if (tx->response_headers != NULL) { + htp_header_t *h = NULL; + for (size_t i = 0, n = htp_table_size(tx->response_headers); i < n; i++) { + h = htp_table_get_index(tx->response_headers, i, NULL); + bstr_free(h->name); + bstr_free(h->value); + free(h); + } + + htp_table_destroy(tx->response_headers); + } + + // If we're using a private configuration structure, destroy it. + if (tx->is_config_shared == HTP_CONFIG_PRIVATE) { + htp_config_destroy(tx->cfg); + } + + free(tx); +} + +int htp_tx_get_is_config_shared(const htp_tx_t *tx) { + if (tx == NULL) return -1; + return tx->is_config_shared; +} + +void *htp_tx_get_user_data(const htp_tx_t *tx) { + if (tx == NULL) return NULL; + return tx->user_data; +} + +void htp_tx_set_config(htp_tx_t *tx, htp_cfg_t *cfg, int is_cfg_shared) { + if ((tx == NULL) || (cfg == NULL)) return; + + if ((is_cfg_shared != HTP_CONFIG_PRIVATE) && (is_cfg_shared != HTP_CONFIG_SHARED)) return; + + // If we're using a private configuration, destroy it. + if (tx->is_config_shared == HTP_CONFIG_PRIVATE) { + htp_config_destroy(tx->cfg); + } + + tx->cfg = cfg; + tx->is_config_shared = is_cfg_shared; +} + +void htp_tx_set_user_data(htp_tx_t *tx, void *user_data) { + if (tx == NULL) return; + tx->user_data = user_data; +} + +htp_status_t htp_tx_req_add_param(htp_tx_t *tx, htp_param_t *param) { + if ((tx == NULL) || (param == NULL)) return HTP_ERROR; + + if (tx->cfg->parameter_processor != NULL) { + if (tx->cfg->parameter_processor(param) != HTP_OK) return HTP_ERROR; + } + + return htp_table_addk(tx->request_params, param->name, param); +} + +htp_param_t *htp_tx_req_get_param(htp_tx_t *tx, const char *name, size_t name_len) { + if ((tx == NULL) || (name == NULL)) return NULL; + return htp_table_get_mem(tx->request_params, name, name_len); +} + +htp_param_t *htp_tx_req_get_param_ex(htp_tx_t *tx, enum htp_data_source_t source, const char *name, size_t name_len) { + if ((tx == NULL) || (name == NULL)) return NULL; + + htp_param_t *p = NULL; + + for (size_t i = 0, n = htp_table_size(tx->request_params); i < n; i++) { + p = htp_table_get_index(tx->request_params, i, NULL); + if (p->source != source) continue; + + if (bstr_cmp_mem_nocase(p->name, name, name_len) == 0) return p; + } + + return NULL; +} + +int htp_tx_req_has_body(const htp_tx_t *tx) { + if (tx == NULL) return -1; + + if ((tx->request_transfer_coding == HTP_CODING_IDENTITY) || (tx->request_transfer_coding == HTP_CODING_CHUNKED)) { + return 1; + } + + return 0; +} + +htp_status_t htp_tx_req_set_header(htp_tx_t *tx, const char *name, size_t name_len, + const char *value, size_t value_len, enum htp_alloc_strategy_t alloc) { + if ((tx == NULL) || (name == NULL) || (value == NULL)) return HTP_ERROR; + + htp_header_t *h = calloc(1, sizeof (htp_header_t)); + if (h == NULL) return HTP_ERROR; + + h->name = copy_or_wrap_mem(name, name_len, alloc); + if (h->name == NULL) { + free(h); + return HTP_ERROR; + } + + h->value = copy_or_wrap_mem(value, value_len, alloc); + if (h->value == NULL) { + bstr_free(h->name); + free(h); + return HTP_ERROR; + } + + if (htp_table_add(tx->request_headers, h->name, h) != HTP_OK) { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_ERROR; + } + + return HTP_OK; +} + +htp_status_t htp_tx_req_set_method(htp_tx_t *tx, const char *method, size_t method_len, enum htp_alloc_strategy_t alloc) { + if ((tx == NULL) || (method == NULL)) return HTP_ERROR; + + tx->request_method = copy_or_wrap_mem(method, method_len, alloc); + if (tx->request_method == NULL) return HTP_ERROR; + + return HTP_OK; +} + +void htp_tx_req_set_method_number(htp_tx_t *tx, enum htp_method_t method_number) { + if (tx == NULL) return; + tx->request_method_number = method_number; +} + +htp_status_t htp_tx_req_set_uri(htp_tx_t *tx, const char *uri, size_t uri_len, enum htp_alloc_strategy_t alloc) { + if ((tx == NULL) || (uri == NULL)) return HTP_ERROR; + + tx->request_uri = copy_or_wrap_mem(uri, uri_len, alloc); + if (tx->request_uri == NULL) return HTP_ERROR; + + return HTP_OK; +} + +htp_status_t htp_tx_req_set_protocol(htp_tx_t *tx, const char *protocol, size_t protocol_len, enum htp_alloc_strategy_t alloc) { + if ((tx == NULL) || (protocol == NULL)) return HTP_ERROR; + + tx->request_protocol = copy_or_wrap_mem(protocol, protocol_len, alloc); + if (tx->request_protocol == NULL) return HTP_ERROR; + + return HTP_OK; +} + +void htp_tx_req_set_protocol_number(htp_tx_t *tx, int protocol_number) { + if (tx == NULL) return; + tx->request_protocol_number = protocol_number; +} + +void htp_tx_req_set_protocol_0_9(htp_tx_t *tx, int is_protocol_0_9) { + if (tx == NULL) return; + + if (is_protocol_0_9) { + tx->is_protocol_0_9 = 1; + } else { + tx->is_protocol_0_9 = 0; + } +} + +static htp_status_t htp_tx_process_request_headers(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + // Determine if we have a request body, and how it is packaged. + + htp_status_t rc = HTP_OK; + + if (tx->connp->cfg->request_decompression_enabled) { + tx->request_content_encoding = HTP_COMPRESSION_NONE; + htp_header_t *ce = htp_table_get_c(tx->request_headers, "content-encoding"); + if (ce != NULL) { + /* fast paths: regular gzip and friends */ + if ((bstr_cmp_c_nocasenorzero(ce->value, "gzip") == 0) || + (bstr_cmp_c_nocasenorzero(ce->value, "x-gzip") == 0)) { + tx->request_content_encoding = HTP_COMPRESSION_GZIP; + } else if ((bstr_cmp_c_nocasenorzero(ce->value, "deflate") == 0) || + (bstr_cmp_c_nocasenorzero(ce->value, "x-deflate") == 0)) { + tx->request_content_encoding = HTP_COMPRESSION_DEFLATE; + } else if (bstr_cmp_c_nocasenorzero(ce->value, "lzma") == 0) { + tx->request_content_encoding = HTP_COMPRESSION_LZMA; + } + //ignore other cases such as inflate, ot multiple layers + if ((tx->request_content_encoding != HTP_COMPRESSION_NONE)) + { + if (tx->connp->req_decompressor != NULL) { + htp_tx_req_destroy_decompressors(tx->connp); + } + tx->connp->req_decompressor = htp_gzip_decompressor_create(tx->connp, tx->request_content_encoding); + if (tx->connp->req_decompressor == NULL) + return HTP_ERROR; + + tx->connp->req_decompressor->callback = htp_tx_req_process_body_data_decompressor_callback; + } + } + } + + htp_header_t *cl = htp_table_get_c(tx->request_headers, "content-length"); + htp_header_t *te = htp_table_get_c(tx->request_headers, "transfer-encoding"); + + // Check for the Transfer-Encoding header, which would indicate a chunked request body. + if (te != NULL) { + // Make sure it contains "chunked" only. + // TODO The HTTP/1.1 RFC also allows the T-E header to contain "identity", which + // presumably should have the same effect as T-E header absence. However, Apache + // (2.2.22 on Ubuntu 12.04 LTS) instead errors out with "Unknown Transfer-Encoding: identity". + // And it behaves strangely, too, sending a 501 and proceeding to process the request + // (e.g., PHP is run), but without the body. It then closes the connection. + if (htp_header_has_token(bstr_ptr(te->value), bstr_len(te->value), (unsigned char*) "chunked") != HTP_OK) { + // Invalid T-E header value. + tx->request_transfer_coding = HTP_CODING_INVALID; + tx->flags |= HTP_REQUEST_INVALID_T_E; + tx->flags |= HTP_REQUEST_INVALID; + } else { + // Chunked encoding is a HTTP/1.1 feature, so check that an earlier protocol + // version is not used. The flag will also be set if the protocol could not be parsed. + // + // TODO IIS 7.0, for example, would ignore the T-E header when it + // it is used with a protocol below HTTP 1.1. This should be a + // personality trait. + if (tx->request_protocol_number < HTP_PROTOCOL_1_1) { + tx->flags |= HTP_REQUEST_INVALID_T_E; + tx->flags |= HTP_REQUEST_SMUGGLING; + } + + // If the T-E header is present we are going to use it. + tx->request_transfer_coding = HTP_CODING_CHUNKED; + + // We are still going to check for the presence of C-L. + if (cl != NULL) { + // According to the HTTP/1.1 RFC (section 4.4): + // + // "The Content-Length header field MUST NOT be sent + // if these two lengths are different (i.e., if a Transfer-Encoding + // header field is present). If a message is received with both a + // Transfer-Encoding header field and a Content-Length header field, + // the latter MUST be ignored." + // + tx->flags |= HTP_REQUEST_SMUGGLING; + } + } + } else if (cl != NULL) { + // Check for a folded C-L header. + if (cl->flags & HTP_FIELD_FOLDED) { + tx->flags |= HTP_REQUEST_SMUGGLING; + } + + // Check for multiple C-L headers. + if (cl->flags & HTP_FIELD_REPEATED) { + tx->flags |= HTP_REQUEST_SMUGGLING; + // TODO Personality trait to determine which C-L header to parse. + // At the moment we're parsing the combination of all instances, + // which is bound to fail (because it will contain commas). + } + + // Get the body length. + tx->request_content_length = htp_parse_content_length(cl->value, tx->connp); + if (tx->request_content_length < 0) { + tx->request_transfer_coding = HTP_CODING_INVALID; + tx->flags |= HTP_REQUEST_INVALID_C_L; + tx->flags |= HTP_REQUEST_INVALID; + } else { + // We have a request body of known length. + tx->request_transfer_coding = HTP_CODING_IDENTITY; + } + } else { + // No body. + tx->request_transfer_coding = HTP_CODING_NO_BODY; + } + + // If we could not determine the correct body handling, + // consider the request invalid. + if (tx->request_transfer_coding == HTP_CODING_UNKNOWN) { + tx->request_transfer_coding = HTP_CODING_INVALID; + tx->flags |= HTP_REQUEST_INVALID; + } + + // Check for PUT requests, which we need to treat as file uploads. + if (tx->request_method_number == HTP_M_PUT) { + if (htp_tx_req_has_body(tx)) { + // Prepare to treat PUT request body as a file. + + tx->connp->put_file = calloc(1, sizeof (htp_file_t)); + if (tx->connp->put_file == NULL) return HTP_ERROR; + + tx->connp->put_file->fd = -1; + tx->connp->put_file->source = HTP_FILE_PUT; + } else { + // TODO Warn about PUT request without a body. + } + } + + // Determine hostname. + + // Use the hostname from the URI, when available. + if (tx->parsed_uri->hostname != NULL) { + tx->request_hostname = bstr_dup(tx->parsed_uri->hostname); + if (tx->request_hostname == NULL) return HTP_ERROR; + } + + tx->request_port_number = tx->parsed_uri->port_number; + + // Examine the Host header. + + htp_header_t *h = htp_table_get_c(tx->request_headers, "host"); + if (h == NULL) { + // No host information in the headers. + + // HTTP/1.1 requires host information in the headers. + if (tx->request_protocol_number >= HTP_PROTOCOL_1_1) { + tx->flags |= HTP_HOST_MISSING; + } + } else { + // Host information available in the headers. + + bstr *hostname; + int port; + + rc = htp_parse_header_hostport(h->value, &hostname, NULL, &port, &(tx->flags)); + if (rc != HTP_OK) return rc; + + if (hostname != NULL) { + // The host information in the headers is valid. + + // Is there host information in the URI? + if (tx->request_hostname == NULL) { + // There is no host information in the URI. Place the + // hostname from the headers into the parsed_uri structure. + tx->request_hostname = hostname; + tx->request_port_number = port; + } else { + // The host information appears in the URI and in the headers. The + // HTTP RFC states that we should ignore the header copy. + + // Check for different hostnames. + if (bstr_cmp_nocase(hostname, tx->request_hostname) != 0) { + tx->flags |= HTP_HOST_AMBIGUOUS; + } + + // Check for different ports. + if (((tx->request_port_number != -1)&&(port != -1))&&(tx->request_port_number != port)) { + tx->flags |= HTP_HOST_AMBIGUOUS; + } + + bstr_free(hostname); + } + } else { + // Invalid host information in the headers. + + if (tx->request_hostname != NULL) { + // Raise the flag, even though the host information in the headers is invalid. + tx->flags |= HTP_HOST_AMBIGUOUS; + } + } + } + + // Determine Content-Type. + htp_header_t *ct = htp_table_get_c(tx->request_headers, "content-type"); + if (ct != NULL) { + rc = htp_parse_ct_header(ct->value, &tx->request_content_type); + if (rc != HTP_OK) return rc; + } + + // Parse cookies. + if (tx->connp->cfg->parse_request_cookies) { + rc = htp_parse_cookies_v0(tx->connp); + if (rc != HTP_OK) return rc; + } + + // Parse authentication information. + if (tx->connp->cfg->parse_request_auth) { + rc = htp_parse_authorization(tx->connp); + if (rc == HTP_DECLINED) { + // Don't fail the stream if an authorization header is invalid, just set a flag. + tx->flags |= HTP_AUTH_INVALID; + } else { + if (rc != HTP_OK) return rc; + } + } + + // Finalize sending raw header data. + rc = htp_connp_req_receiver_finalize_clear(tx->connp); + if (rc != HTP_OK) return rc; + + // Run hook REQUEST_HEADERS. + rc = htp_hook_run_all(tx->connp->cfg->hook_request_headers, tx); + if (rc != HTP_OK) return rc; + + // We still proceed if the request is invalid. + + return HTP_OK; +} + +htp_status_t htp_tx_req_process_body_data(htp_tx_t *tx, const void *data, size_t len) { + if ((tx == NULL) || (data == NULL)) return HTP_ERROR; + if (len == 0) return HTP_OK; + + return htp_tx_req_process_body_data_ex(tx, data, len); +} + +htp_status_t htp_tx_req_process_body_data_ex(htp_tx_t *tx, const void *data, size_t len) { + if (tx == NULL) return HTP_ERROR; + + // NULL data is allowed in this private function; it's + // used to indicate the end of request body. + + // Send data to the callbacks. + + htp_tx_data_t d; + d.tx = tx; + d.data = (unsigned char *) data; + d.len = len; + d.is_last = (data == NULL && len == 0); + + switch(tx->request_content_encoding) { + case HTP_COMPRESSION_UNKNOWN: + case HTP_COMPRESSION_NONE: + // When there's no decompression, request_entity_len. + // is identical to request_message_len. + tx->request_entity_len += d.len; + htp_status_t rc = htp_req_run_hook_body_data(tx->connp, &d); + if (rc != HTP_OK) { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request body data callback returned error (%d)", rc); + return HTP_ERROR; + } + break; + + case HTP_COMPRESSION_GZIP: + case HTP_COMPRESSION_DEFLATE: + case HTP_COMPRESSION_LZMA: + // In severe memory stress these could be NULL + if (tx->connp->req_decompressor == NULL) + return HTP_ERROR; + + // Send data buffer to the decompressor. + htp_gzip_decompressor_decompress(tx->connp->req_decompressor, &d); + + if (data == NULL) { + // Shut down the decompressor, if we used one. + htp_tx_req_destroy_decompressors(tx->connp); + } + break; + + default: + // Internal error. + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "[Internal Error] Invalid tx->request_content_encoding value: %d", + tx->request_content_encoding); + return HTP_ERROR; + } + + return HTP_OK; +} + +htp_status_t htp_tx_req_set_headers_clear(htp_tx_t *tx) { + if ((tx == NULL) || (tx->request_headers == NULL)) return HTP_ERROR; + + htp_header_t *h = NULL; + for (size_t i = 0, n = htp_table_size(tx->request_headers); i < n; i++) { + h = htp_table_get_index(tx->request_headers, i, NULL); + bstr_free(h->name); + bstr_free(h->value); + free(h); + } + + htp_table_destroy(tx->request_headers); + + tx->request_headers = htp_table_create(32); + if (tx->request_headers == NULL) return HTP_ERROR; + + return HTP_OK; +} + +htp_status_t htp_tx_req_set_line(htp_tx_t *tx, const char *line, size_t line_len, enum htp_alloc_strategy_t alloc) { + if ((tx == NULL) || (line == NULL) || (line_len == 0)) return HTP_ERROR; + + tx->request_line = copy_or_wrap_mem(line, line_len, alloc); + if (tx->request_line == NULL) return HTP_ERROR; + + if (tx->connp->cfg->parse_request_line(tx->connp) != HTP_OK) return HTP_ERROR; + + return HTP_OK; +} + +void htp_tx_req_set_parsed_uri(htp_tx_t *tx, htp_uri_t *parsed_uri) { + if ((tx == NULL) || (parsed_uri == NULL)) return; + + if (tx->parsed_uri != NULL) { + htp_uri_free(tx->parsed_uri); + } + + tx->parsed_uri = parsed_uri; +} + +htp_status_t htp_tx_res_set_status_line(htp_tx_t *tx, const char *line, size_t line_len, enum htp_alloc_strategy_t alloc) { + if ((tx == NULL) || (line == NULL) || (line_len == 0)) return HTP_ERROR; + + tx->response_line = copy_or_wrap_mem(line, line_len, alloc); + if (tx->response_line == NULL) return HTP_ERROR; + + if (tx->connp->cfg->parse_response_line(tx->connp) != HTP_OK) return HTP_ERROR; + + return HTP_OK; +} + +void htp_tx_res_set_protocol_number(htp_tx_t *tx, int protocol_number) { + if (tx == NULL) return; + tx->response_protocol_number = protocol_number; +} + +void htp_tx_res_set_status_code(htp_tx_t *tx, int status_code) { + if (tx == NULL) return; + tx->response_status_number = status_code; +} + +htp_status_t htp_tx_res_set_status_message(htp_tx_t *tx, const char *msg, size_t msg_len, enum htp_alloc_strategy_t alloc) { + if ((tx == NULL) || (msg == NULL)) return HTP_ERROR; + + if (tx->response_message != NULL) { + bstr_free(tx->response_message); + } + + tx->response_message = copy_or_wrap_mem(msg, msg_len, alloc); + if (tx->response_message == NULL) return HTP_ERROR; + + return HTP_OK; +} + +htp_status_t htp_tx_state_response_line(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + #if 0 + // Commented-out until we determine which fields can be + // unavailable in real-life. + + // Unless we're dealing with HTTP/0.9, check that + // the minimum amount of data has been provided. + if (tx->is_protocol_0_9 != 0) { + if ((tx->response_protocol == NULL) || (tx->response_status_number == -1) || (tx->response_message == NULL)) { + return HTP_ERROR; + } + } + #endif + + // Is the response line valid? + if (tx->response_protocol_number == HTP_PROTOCOL_INVALID) { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "Invalid response line: invalid protocol"); + tx->flags |= HTP_STATUS_LINE_INVALID; + } + if ((tx->response_status_number == HTP_STATUS_INVALID) + || (tx->response_status_number < HTP_VALID_STATUS_MIN) + || (tx->response_status_number > HTP_VALID_STATUS_MAX)) { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "Invalid response line: invalid response status %d.", + tx->response_status_number); + tx->response_status_number = HTP_STATUS_INVALID; + tx->flags |= HTP_STATUS_LINE_INVALID; + } + + // Run hook HTP_RESPONSE_LINE + htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_response_line, tx); + if (rc != HTP_OK) return rc; + + return HTP_OK; +} + +htp_status_t htp_tx_res_set_header(htp_tx_t *tx, const char *name, size_t name_len, + const char *value, size_t value_len, enum htp_alloc_strategy_t alloc) { + if ((tx == NULL) || (name == NULL) || (value == NULL)) return HTP_ERROR; + + + htp_header_t *h = calloc(1, sizeof (htp_header_t)); + if (h == NULL) return HTP_ERROR; + + h->name = copy_or_wrap_mem(name, name_len, alloc); + if (h->name == NULL) { + free(h); + return HTP_ERROR; + } + + h->value = copy_or_wrap_mem(value, value_len, alloc); + if (h->value == NULL) { + bstr_free(h->name); + free(h); + return HTP_ERROR; + } + + if (htp_table_add(tx->response_headers, h->name, h) != HTP_OK) { + bstr_free(h->name); + bstr_free(h->value); + free(h); + return HTP_ERROR; + } + + return HTP_OK; +} + +htp_status_t htp_tx_res_set_headers_clear(htp_tx_t *tx) { + if ((tx == NULL) || (tx->response_headers == NULL)) return HTP_ERROR; + + htp_header_t *h = NULL; + for (size_t i = 0, n = htp_table_size(tx->response_headers); i < n; i++) { + h = htp_table_get_index(tx->response_headers, i, NULL); + bstr_free(h->name); + bstr_free(h->value); + free(h); + } + + htp_table_destroy(tx->response_headers); + + tx->response_headers = htp_table_create(32); + if (tx->response_headers == NULL) return HTP_ERROR; + + return HTP_OK; +} + +/** \internal + * + * Clean up decompressor(s). + * + * @param[in] tx + */ +static void htp_tx_res_destroy_decompressors(htp_connp_t *connp) { + htp_decompressor_t *comp = connp->out_decompressor; + while (comp) { + htp_decompressor_t *next = comp->next; + htp_gzip_decompressor_destroy(comp); + comp = next; + } + connp->out_decompressor = NULL; +} + +static void htp_tx_req_destroy_decompressors(htp_connp_t *connp) { + htp_decompressor_t *comp = connp->req_decompressor; + while (comp) { + htp_decompressor_t *next = comp->next; + htp_gzip_decompressor_destroy(comp); + comp = next; + } + connp->req_decompressor = NULL; +} + +void htp_connp_destroy_decompressors(htp_connp_t *connp) { + htp_tx_res_destroy_decompressors(connp); + htp_tx_req_destroy_decompressors(connp); +} + +static htp_status_t htp_timer_track(int32_t *time_spent, struct timeval * after, struct timeval *before) { + if (after->tv_sec < before->tv_sec) { + return HTP_ERROR; + } else if (after->tv_sec == before->tv_sec) { + if (after->tv_usec < before->tv_usec) { + return HTP_ERROR; + } + *time_spent += after->tv_usec - before->tv_usec; + } else { + *time_spent += (after->tv_sec - before->tv_sec) * 1000000 + after->tv_usec - before->tv_usec; + } + return HTP_OK; +} + +static htp_status_t htp_tx_req_process_body_data_decompressor_callback(htp_tx_data_t *d) { + if (d == NULL) return HTP_ERROR; + + #if HTP_DEBUG + fprint_raw_data(stderr, __func__, d->data, d->len); + #endif + + // Keep track of actual request body length. + d->tx->request_entity_len += d->len; + + // Invoke all callbacks. + htp_status_t rc = htp_req_run_hook_body_data(d->tx->connp, d); + if (rc != HTP_OK) return HTP_ERROR; + d->tx->connp->req_decompressor->nb_callbacks++; + if (d->tx->connp->req_decompressor->nb_callbacks % HTP_COMPRESSION_TIME_FREQ_TEST == 0) { + struct timeval after; + gettimeofday(&after, NULL); + // sanity check for race condition if system time changed + if ( htp_timer_track(&d->tx->connp->req_decompressor->time_spent, &after, &d->tx->connp->req_decompressor->time_before) == HTP_OK) { + // updates last tracked time + d->tx->connp->req_decompressor->time_before = after; + if (d->tx->connp->req_decompressor->time_spent > d->tx->connp->cfg->compression_time_limit ) { + htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "Compression bomb: spent %"PRId32" us decompressing", + d->tx->connp->req_decompressor->time_spent); + d->tx->connp->req_decompressor->passthrough = 1; + } + } + + } + if (d->tx->request_entity_len > d->tx->connp->cfg->compression_bomb_limit && + d->tx->request_entity_len > HTP_COMPRESSION_BOMB_RATIO * d->tx->request_message_len) { + htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "Compression bomb: decompressed %"PRId64" bytes out of %"PRId64, + d->tx->request_entity_len, d->tx->request_message_len); + return HTP_ERROR; + } + + return HTP_OK; +} + +static htp_status_t htp_tx_res_process_body_data_decompressor_callback(htp_tx_data_t *d) { + if (d == NULL) return HTP_ERROR; + + #if HTP_DEBUG + fprint_raw_data(stderr, __func__, d->data, d->len); + #endif + + // Keep track of actual response body length. + d->tx->response_entity_len += d->len; + + // Invoke all callbacks. + htp_status_t rc = htp_res_run_hook_body_data(d->tx->connp, d); + if (rc != HTP_OK) return HTP_ERROR; + d->tx->connp->out_decompressor->nb_callbacks++; + if (d->tx->connp->out_decompressor->nb_callbacks % HTP_COMPRESSION_TIME_FREQ_TEST == 0) { + struct timeval after; + gettimeofday(&after, NULL); + // sanity check for race condition if system time changed + if ( htp_timer_track(&d->tx->connp->out_decompressor->time_spent, &after, &d->tx->connp->out_decompressor->time_before) == HTP_OK) { + // updates last tracked time + d->tx->connp->out_decompressor->time_before = after; + if (d->tx->connp->out_decompressor->time_spent > d->tx->connp->cfg->compression_time_limit ) { + htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "Compression bomb: spent %"PRId32" us decompressing", + d->tx->connp->out_decompressor->time_spent); + d->tx->connp->out_decompressor->passthrough = 1; + } + } + + } + if (d->tx->response_entity_len > d->tx->connp->cfg->compression_bomb_limit && + d->tx->response_entity_len > HTP_COMPRESSION_BOMB_RATIO * d->tx->response_message_len) { + htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "Compression bomb: decompressed %"PRId64" bytes out of %"PRId64, + d->tx->response_entity_len, d->tx->response_message_len); + return HTP_ERROR; + } + + return HTP_OK; +} + +htp_status_t htp_tx_res_process_body_data(htp_tx_t *tx, const void *data, size_t len) { + if ((tx == NULL) || (data == NULL)) return HTP_ERROR; + if (len == 0) return HTP_OK; + return htp_tx_res_process_body_data_ex(tx, data, len); +} + +htp_status_t htp_tx_res_process_body_data_ex(htp_tx_t *tx, const void *data, size_t len) { + if (tx == NULL) return HTP_ERROR; + + // NULL data is allowed in this private function; it's + // used to indicate the end of response body. + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, __func__, data, len); + #endif + + htp_tx_data_t d; + + d.tx = tx; + d.data = (unsigned char *) data; + d.len = len; + d.is_last = 0; + + // Keep track of body size before decompression. + tx->response_message_len += d.len; + + switch (tx->response_content_encoding_processing) { + case HTP_COMPRESSION_GZIP: + case HTP_COMPRESSION_DEFLATE: + case HTP_COMPRESSION_LZMA: + // In severe memory stress these could be NULL + if (tx->connp->out_decompressor == NULL) + return HTP_ERROR; + + struct timeval after; + gettimeofday(&tx->connp->out_decompressor->time_before, NULL); + // Send data buffer to the decompressor. + tx->connp->out_decompressor->nb_callbacks=0; + htp_gzip_decompressor_decompress(tx->connp->out_decompressor, &d); + gettimeofday(&after, NULL); + // sanity check for race condition if system time changed + if ( htp_timer_track(&tx->connp->out_decompressor->time_spent, &after, &tx->connp->out_decompressor->time_before) == HTP_OK) { + if ( tx->connp->out_decompressor->time_spent > tx->connp->cfg->compression_time_limit ) { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "Compression bomb: spent %"PRId32" us decompressing", + tx->connp->out_decompressor->time_spent); + tx->connp->out_decompressor->passthrough = 1; + } + } + + if (data == NULL) { + // Shut down the decompressor, if we used one. + htp_tx_res_destroy_decompressors(tx->connp); + } + break; + + case HTP_COMPRESSION_NONE: + // When there's no decompression, response_entity_len. + // is identical to response_message_len. + tx->response_entity_len += d.len; + + htp_status_t rc = htp_res_run_hook_body_data(tx->connp, &d); + if (rc != HTP_OK) return HTP_ERROR; + break; + + default: + // Internal error. + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "[Internal Error] Invalid tx->response_content_encoding_processing value: %d", + tx->response_content_encoding_processing); + return HTP_ERROR; + break; + } + + return HTP_OK; +} + +htp_status_t htp_tx_state_request_complete_partial(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + // Finalize request body. + if (htp_tx_req_has_body(tx)) { + htp_status_t rc = htp_tx_req_process_body_data_ex(tx, NULL, 0); + if (rc != HTP_OK) return rc; + } + + tx->request_progress = HTP_REQUEST_COMPLETE; + + // Run hook REQUEST_COMPLETE. + htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_request_complete, tx); + if (rc != HTP_OK) return rc; + rc = htp_connp_req_receiver_finalize_clear(tx->connp); + if (rc != HTP_OK) return rc; + + // Clean-up. + if (tx->connp->put_file != NULL) { + bstr_free(tx->connp->put_file->filename); + free(tx->connp->put_file); + tx->connp->put_file = NULL; + } + + return HTP_OK; +} + +htp_status_t htp_tx_state_request_complete(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + if (tx->request_progress != HTP_REQUEST_COMPLETE) { + htp_status_t rc = htp_tx_state_request_complete_partial(tx); + if (rc != HTP_OK) return rc; + } + + // Make a copy of the connection parser pointer, so that + // we don't have to reference it via tx, which may be + // destroyed later. + htp_connp_t *connp = tx->connp; + + // Determine what happens next, and remove this transaction from the parser. + if (tx->is_protocol_0_9) { + connp->in_state = htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9; + } else { + connp->in_state = htp_connp_REQ_IDLE; + } + + // Check if the entire transaction is complete. This call may + // destroy the transaction, if auto-destroy is enabled. + htp_tx_finalize(tx); + + // At this point, tx may no longer be valid. + + connp->in_tx = NULL; + + return HTP_OK; +} + +htp_status_t htp_tx_state_request_start(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + // Run hook REQUEST_START. + htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_request_start, tx); + if (rc != HTP_OK) return rc; + + // Change state into request line parsing. + tx->connp->in_state = htp_connp_REQ_LINE; + tx->connp->in_tx->request_progress = HTP_REQUEST_LINE; + + return HTP_OK; +} + +htp_status_t htp_tx_state_request_headers(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + // If we're in HTP_REQ_HEADERS that means that this is the + // first time we're processing headers in a request. Otherwise, + // we're dealing with trailing headers. + if (tx->request_progress > HTP_REQUEST_HEADERS) { + // Request trailers. + + // Run hook HTP_REQUEST_TRAILER. + htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_request_trailer, tx); + if (rc != HTP_OK) return rc; + + // Finalize sending raw header data. + rc = htp_connp_req_receiver_finalize_clear(tx->connp); + if (rc != HTP_OK) return rc; + + // Completed parsing this request; finalize it now. + tx->connp->in_state = htp_connp_REQ_FINALIZE; + } else if (tx->request_progress >= HTP_REQUEST_LINE) { + // Request headers. + + // Did this request arrive in multiple data chunks? + if (tx->connp->in_chunk_count != tx->connp->in_chunk_request_index) { + tx->flags |= HTP_MULTI_PACKET_HEAD; + } + + htp_status_t rc = htp_tx_process_request_headers(tx); + if (rc != HTP_OK) return rc; + + tx->connp->in_state = htp_connp_REQ_CONNECT_CHECK; + } else { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "[Internal Error] Invalid tx progress: %d", tx->request_progress); + + return HTP_ERROR; + } + + return HTP_OK; +} + +htp_status_t htp_tx_state_request_line(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + // Determine how to process the request URI. + + if (tx->request_method_number == HTP_M_CONNECT) { + // When CONNECT is used, the request URI contains an authority string. + if (htp_parse_uri_hostport(tx->connp, tx->request_uri, tx->parsed_uri_raw) != HTP_OK) { + return HTP_ERROR; + } + } else { + // Parse the request URI into htp_tx_t::parsed_uri_raw. + if (htp_parse_uri(tx->request_uri, &(tx->parsed_uri_raw)) != HTP_OK) { + return HTP_ERROR; + } + } + + // Build htp_tx_t::parsed_uri, but only if it was not explicitly set already. + if (tx->parsed_uri == NULL) { + tx->parsed_uri = htp_uri_alloc(); + if (tx->parsed_uri == NULL) return HTP_ERROR; + + // Keep the original URI components, but create a copy which we can normalize and use internally. + if (htp_normalize_parsed_uri(tx, tx->parsed_uri_raw, tx->parsed_uri) != HTP_OK) { + return HTP_ERROR; + } + } + + // Check parsed_uri hostname. + if (tx->parsed_uri->hostname != NULL) { + if (htp_validate_hostname(tx->parsed_uri->hostname) == 0) { + tx->flags |= HTP_HOSTU_INVALID; + } + } + + // Run hook REQUEST_URI_NORMALIZE. + htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_request_uri_normalize, tx); + if (rc != HTP_OK) return rc; + + + // Run hook REQUEST_LINE. + rc = htp_hook_run_all(tx->connp->cfg->hook_request_line, tx); + if (rc != HTP_OK) return rc; + + // Move on to the next phase. + tx->connp->in_state = htp_connp_REQ_PROTOCOL; + + return HTP_OK; +} + +htp_status_t htp_tx_state_response_complete(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + return htp_tx_state_response_complete_ex(tx, 1 /* hybrid mode */); +} + +htp_status_t htp_tx_finalize(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + if (!htp_tx_is_complete(tx)) return HTP_OK; + + // Run hook TRANSACTION_COMPLETE. + htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_transaction_complete, tx); + if (rc != HTP_OK) return rc; + + // In streaming processing, we destroy the transaction because it will not be needed any more. + if (tx->connp->cfg->tx_auto_destroy) { + htp_tx_destroy(tx); + } + + return HTP_OK; +} + +htp_status_t htp_tx_state_response_complete_ex(htp_tx_t *tx, int hybrid_mode) { + if (tx == NULL) return HTP_ERROR; + + if (tx->response_progress != HTP_RESPONSE_COMPLETE) { + tx->response_progress = HTP_RESPONSE_COMPLETE; + + // Run the last RESPONSE_BODY_DATA HOOK, but only if there was a response body present. + if (tx->response_transfer_coding != HTP_CODING_NO_BODY) { + htp_tx_res_process_body_data_ex(tx, NULL, 0); + } + + // Run hook RESPONSE_COMPLETE. + htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_response_complete, tx); + if (rc != HTP_OK) return rc; + + // Clear the data receivers hook if any + rc = htp_connp_res_receiver_finalize_clear(tx->connp); + if (rc != HTP_OK) return rc; + } + + if (!hybrid_mode) { + // Check if the inbound parser is waiting on us. If it is, that means that + // there might be request data that the inbound parser hasn't consumed yet. + // If we don't stop parsing we might encounter a response without a request, + // which is why we want to return straight away before processing any data. + // + // This situation will occur any time the parser needs to see the server + // respond to a particular situation before it can decide how to proceed. For + // example, when a CONNECT is sent, different paths are used when it is accepted + // and when it is not accepted. + // + // It is not enough to check only in_status here. Because of pipelining, it's possible + // that many inbound transactions have been processed, and that the parser is + // waiting on a response that we have not seen yet. + if ((tx->connp->in_status == HTP_STREAM_DATA_OTHER) && (tx->connp->in_tx == tx->connp->out_tx)) { + return HTP_DATA_OTHER; + } + + // Do we have a signal to yield to inbound processing at + // the end of the next transaction? + if (tx->connp->out_data_other_at_tx_end) { + // We do. Let's yield then. + tx->connp->out_data_other_at_tx_end = 0; + return HTP_DATA_OTHER; + } + } + + // Make a copy of the connection parser pointer, so that + // we don't have to reference it via tx, which may be destroyed later. + htp_connp_t *connp = tx->connp; + + // Finalize the transaction. This may call may destroy the transaction, if auto-destroy is enabled. + htp_status_t rc = htp_tx_finalize(tx); + if (rc != HTP_OK) return rc; + + // Disconnect transaction from the parser. + connp->out_tx = NULL; + + connp->out_state = htp_connp_RES_IDLE; + + return HTP_OK; +} + +/** + * @internal + * @brief split input into tokens separated by "seps" + * @param seps nul-terminated string: each character is a separator + */ +static int get_token(const unsigned char *in, size_t in_len, const char *seps, + unsigned char **ret_tok_ptr, size_t *ret_tok_len) +{ + #if HTP_DEBUG + fprintf(stderr, "INPUT %"PRIuMAX, (uintmax_t)in_len); + fprint_raw_data(stderr, __func__, in, in_len); + #endif + + size_t i = 0; + + /* skip leading 'separators' */ + while (i < in_len) + { + int match = 0; + for (const char *s = seps; *s != '\0'; s++) { + if (in[i] == *s) { + match++; + break; + } + } + if (!match) + break; + + i++; + } + if (i >= in_len) + return 0; + + in += i; + in_len -= i; + + #if HTP_DEBUG + fprintf(stderr, "INPUT (POST SEP STRIP) %"PRIuMAX, (uintmax_t)in_len); + fprint_raw_data(stderr, __func__, in, in_len); + #endif + + for (i = 0; i < in_len; i++) + { + for (const char *s = seps; *s != '\0'; s++) { + if (in[i] == *s) { + *ret_tok_ptr = (unsigned char *)in; + *ret_tok_len = i; + return 1; + } + } + } + + *ret_tok_ptr = (unsigned char *)in; + *ret_tok_len = in_len; + return 1; +} + +htp_status_t htp_tx_state_response_headers(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + // Check for compression. + + // Determine content encoding. + + int ce_multi_comp = 0; + tx->response_content_encoding = HTP_COMPRESSION_NONE; + htp_header_t *ce = htp_table_get_c(tx->response_headers, "content-encoding"); + if (ce != NULL) { + /* fast paths: regular gzip and friends */ + if ((bstr_cmp_c_nocasenorzero(ce->value, "gzip") == 0) || + (bstr_cmp_c_nocasenorzero(ce->value, "x-gzip") == 0)) { + tx->response_content_encoding = HTP_COMPRESSION_GZIP; + } else if ((bstr_cmp_c_nocasenorzero(ce->value, "deflate") == 0) || + (bstr_cmp_c_nocasenorzero(ce->value, "x-deflate") == 0)) { + tx->response_content_encoding = HTP_COMPRESSION_DEFLATE; + } else if (bstr_cmp_c_nocasenorzero(ce->value, "lzma") == 0) { + tx->response_content_encoding = HTP_COMPRESSION_LZMA; + } else if (bstr_cmp_c_nocasenorzero(ce->value, "inflate") == 0) { + // ignore + } else { + /* exceptional cases: enter slow path */ + ce_multi_comp = 1; + } + } + + // Configure decompression, if enabled in the configuration. + if (tx->connp->cfg->response_decompression_enabled) { + tx->response_content_encoding_processing = tx->response_content_encoding; + } else { + tx->response_content_encoding_processing = HTP_COMPRESSION_NONE; + ce_multi_comp = 0; + } + + // Finalize sending raw header data. + htp_status_t rc = htp_connp_res_receiver_finalize_clear(tx->connp); + if (rc != HTP_OK) return rc; + + // Run hook RESPONSE_HEADERS. + rc = htp_hook_run_all(tx->connp->cfg->hook_response_headers, tx); + if (rc != HTP_OK) return rc; + + // Initialize the decompression engine as necessary. We can deal with three + // scenarios: + // + // 1. Decompression is enabled, compression indicated in headers, and we decompress. + // + // 2. As above, but the user disables decompression by setting response_content_encoding + // to COMPRESSION_NONE. + // + // 3. Decompression is disabled and we do not attempt to enable it, but the user + // forces decompression by setting response_content_encoding to one of the + // supported algorithms. + if ((tx->response_content_encoding_processing == HTP_COMPRESSION_GZIP) || + (tx->response_content_encoding_processing == HTP_COMPRESSION_DEFLATE) || + (tx->response_content_encoding_processing == HTP_COMPRESSION_LZMA) || + ce_multi_comp) + { + if (tx->connp->out_decompressor != NULL) { + htp_tx_res_destroy_decompressors(tx->connp); + } + + /* normal case */ + if (!ce_multi_comp) { + tx->connp->out_decompressor = htp_gzip_decompressor_create(tx->connp, tx->response_content_encoding_processing); + if (tx->connp->out_decompressor == NULL) return HTP_ERROR; + + tx->connp->out_decompressor->callback = htp_tx_res_process_body_data_decompressor_callback; + + /* multiple ce value case */ + } else { + int layers = 0; + htp_decompressor_t *comp = NULL; + int nblzma = 0; + + uint8_t *tok = NULL; + size_t tok_len = 0; + + uint8_t *input = bstr_ptr(ce->value); + size_t input_len = bstr_len(ce->value); + + #if HTP_DEBUG + fprintf(stderr, "INPUT %"PRIuMAX, (uintmax_t)input_len); + fprint_raw_data(stderr, __func__, input, input_len); + #endif + + while (input_len > 0 && + get_token(input, input_len, ", ", &tok, &tok_len)) + { + #if HTP_DEBUG + fprintf(stderr, "TOKEN %"PRIuMAX, (uintmax_t)tok_len); + fprint_raw_data(stderr, __func__, tok, tok_len); + #endif + enum htp_content_encoding_t cetype = HTP_COMPRESSION_NONE; + + /* check depth limit (0 means no limit) */ + if ((tx->connp->cfg->response_decompression_layer_limit != 0) && + ((++layers) > tx->connp->cfg->response_decompression_layer_limit)) + { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "Too many response content encoding layers"); + break; + } + + nblzma++; + if (bstr_util_mem_index_of_c_nocase(tok, tok_len, "gzip") != -1) { + if (!(bstr_util_cmp_mem(tok, tok_len, "gzip", 4) == 0 || + bstr_util_cmp_mem(tok, tok_len, "x-gzip", 6) == 0)) { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "C-E gzip has abnormal value"); + } + cetype = HTP_COMPRESSION_GZIP; + } else if (bstr_util_mem_index_of_c_nocase(tok, tok_len, "deflate") != -1) { + if (!(bstr_util_cmp_mem(tok, tok_len, "deflate", 7) == 0 || + bstr_util_cmp_mem(tok, tok_len, "x-deflate", 9) == 0)) { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "C-E deflate has abnormal value"); + } + cetype = HTP_COMPRESSION_DEFLATE; + } else if (bstr_util_cmp_mem(tok, tok_len, "lzma", 4) == 0) { + cetype = HTP_COMPRESSION_LZMA; + if (nblzma > tx->connp->cfg->response_lzma_layer_limit) { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, + "Compression bomb: multiple encoding with lzma"); + break; + } + } else if (bstr_util_cmp_mem(tok, tok_len, "inflate", 7) == 0 || bstr_util_cmp_mem(tok, tok_len, "none", 4) == 0) { + cetype = HTP_COMPRESSION_NONE; + } else { + // continue + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "C-E unknown setting"); + } + + if (cetype != HTP_COMPRESSION_NONE) { + if (comp == NULL) { + tx->response_content_encoding_processing = cetype; + tx->connp->out_decompressor = htp_gzip_decompressor_create(tx->connp, tx->response_content_encoding_processing); + if (tx->connp->out_decompressor == NULL) { + return HTP_ERROR; + } + tx->connp->out_decompressor->callback = htp_tx_res_process_body_data_decompressor_callback; + comp = tx->connp->out_decompressor; + } else { + comp->next = htp_gzip_decompressor_create(tx->connp, cetype); + if (comp->next == NULL) { + return HTP_ERROR; + } + comp->next->callback = htp_tx_res_process_body_data_decompressor_callback; + comp = comp->next; + } + } + + if ((tok_len + 1) >= input_len) + break; + input += (tok_len + 1); + input_len -= (tok_len + 1); + } + } + } else if (tx->response_content_encoding_processing != HTP_COMPRESSION_NONE) { + return HTP_ERROR; + } + + return HTP_OK; +} + +htp_status_t htp_tx_state_response_start(htp_tx_t *tx) { + if (tx == NULL) return HTP_ERROR; + + tx->connp->out_tx = tx; + + // Run hook RESPONSE_START. + htp_status_t rc = htp_hook_run_all(tx->connp->cfg->hook_response_start, tx); + if (rc != HTP_OK) return rc; + + // Change state into response line parsing, except if we're following + // a HTTP/0.9 request (no status line or response headers). + if (tx->is_protocol_0_9) { + tx->response_transfer_coding = HTP_CODING_IDENTITY; + tx->response_content_encoding_processing = HTP_COMPRESSION_NONE; + tx->response_progress = HTP_RESPONSE_BODY; + tx->connp->out_state = htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE; + tx->connp->out_body_data_left = -1; + } else { + tx->connp->out_state = htp_connp_RES_LINE; + tx->response_progress = HTP_RESPONSE_LINE; + } + + /* If at this point we have no method and no uri and our status + * is still htp_connp_REQ_LINE, we likely have timed out request + * or a overly long request */ + if (tx->request_method == HTP_M_UNKNOWN && tx->request_uri == NULL && tx->connp->in_state == htp_connp_REQ_LINE) { + htp_log(tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line incomplete"); + } + + return HTP_OK; +} + +/** + * Register callback for the transaction-specific REQUEST_BODY_DATA hook. + * + * @param[in] tx + * @param[in] callback_fn + */ +void htp_tx_register_request_body_data(htp_tx_t *tx, int (*callback_fn)(htp_tx_data_t *)) { + if ((tx == NULL) || (callback_fn == NULL)) return; + htp_hook_register(&tx->hook_request_body_data, (htp_callback_fn_t) callback_fn); +} + +/** + * Register callback for the transaction-specific RESPONSE_BODY_DATA hook. + * + * @param[in] tx + * @param[in] callback_fn + */ +void htp_tx_register_response_body_data(htp_tx_t *tx, int (*callback_fn)(htp_tx_data_t *)) { + if ((tx == NULL) || (callback_fn == NULL)) return; + htp_hook_register(&tx->hook_response_body_data, (htp_callback_fn_t) callback_fn); +} + +int htp_tx_is_complete(htp_tx_t *tx) { + if (tx == NULL) return -1; + + // A transaction is considered complete only when both the request and + // response are complete. (Sometimes a complete response can be seen + // even while the request is ongoing.) + if ((tx->request_progress != HTP_REQUEST_COMPLETE) || (tx->response_progress != HTP_RESPONSE_COMPLETE)) { + return 0; + } else { + return 1; + } +} diff --git a/htp/htp_transaction.h b/htp/htp_transaction.h new file mode 100644 index 0000000..32d6773 --- /dev/null +++ b/htp/htp_transaction.h @@ -0,0 +1,529 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/* + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef HTP_TRANSACTION_H +#define HTP_TRANSACTION_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "htp.h" + +/** + * Enumerate possible data handling strategies in hybrid parsing + * mode. The two possibilities are to make copies of all data and + * use bstr instances to wrap already available data. + */ +enum htp_alloc_strategy_t { + /** + * Make copies of all data. This strategy should be used when + * the supplied buffers are transient and will go away after + * the invoked function returns. + */ + HTP_ALLOC_COPY = 1, + + /** + * Reuse buffers, without a change of ownership. We assume the + * buffers will continue to be available until the transaction + * is deleted by the container. + */ + HTP_ALLOC_REUSE = 2 +}; + +/** + * Possible states of a progressing transaction. Internally, progress will change + * to the next state when the processing activities associated with that state + * begin. For example, when we start to process request line bytes, the request + * state will change from HTP_REQUEST_NOT_STARTED to HTP_REQUEST_LINE.* + */ +enum htp_tx_req_progress_t { + HTP_REQUEST_NOT_STARTED = 0, + HTP_REQUEST_LINE = 1, + HTP_REQUEST_HEADERS = 2, + HTP_REQUEST_BODY = 3, + HTP_REQUEST_TRAILER = 4, + HTP_REQUEST_COMPLETE = 5 +}; + +enum htp_tx_res_progress_t { + HTP_RESPONSE_NOT_STARTED = 0, + HTP_RESPONSE_LINE = 1, + HTP_RESPONSE_HEADERS = 2, + HTP_RESPONSE_BODY = 3, + HTP_RESPONSE_TRAILER = 4, + HTP_RESPONSE_COMPLETE = 5 +}; + +#define HTP_CONFIG_PRIVATE 0 +#define HTP_CONFIG_SHARED 1 + +/** + * Creates a new transaction structure. + * + * @param[in] connp Connection parser pointer. Must not be NULL. + * @return The newly created transaction, or NULL on memory allocation failure. + */ +htp_tx_t *htp_tx_create(htp_connp_t *connp); + +/** + * Destroys the supplied transaction. + * + * @param[in] tx Transaction pointer. Must not be NULL. + */ +htp_status_t htp_tx_destroy(htp_tx_t *tx); + +/** + * Determines if the transaction used a shared configuration structure. See the + * documentation for htp_tx_set_config() for more information why you might want + * to know that. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_CFG_SHARED or HTP_CFG_PRIVATE. + */ +int htp_tx_get_is_config_shared(const htp_tx_t *tx); + +/** + * Returns the user data associated with this transaction. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return A pointer to user data or NULL. + */ +void *htp_tx_get_user_data(const htp_tx_t *tx); + +/** + * Registers a callback that will be invoked to process the transaction's request body data. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] callback_fn Callback function pointer. Must not be NULL. + */ +void htp_tx_register_request_body_data(htp_tx_t *tx, int (*callback_fn)(htp_tx_data_t *)); + +/** + * Registers a callback that will be invoked to process the transaction's response body data. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] callback_fn Callback function pointer. Must not be NULL. + */ +void htp_tx_register_response_body_data(htp_tx_t *tx, int (*callback_fn)(htp_tx_data_t *)); + +/** + * Adds one parameter to the request. THis function will take over the + * responsibility for the provided htp_param_t structure. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] param Parameter pointer. Must not be NULL. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_req_add_param(htp_tx_t *tx, htp_param_t *param); + +/** + * Returns the first request parameter that matches the given name, using case-insensitive matching. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] name Name data pointer. Must not be NULL. + * @param[in] name_len Name data length. + * @return htp_param_t instance, or NULL if parameter not found. + */ +htp_param_t *htp_tx_req_get_param(htp_tx_t *tx, const char *name, size_t name_len); + +/** + * Returns the first request parameter from the given source that matches the given name, + * using case-insensitive matching. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] source Parameter source (where in request the parameter was located). + * @param[in] name Name data pointer. Must not be NULL. + * @param[in] name_len Name data length. + * @return htp_param_t instance, or NULL if parameter not found. + */ +htp_param_t *htp_tx_req_get_param_ex(htp_tx_t *tx, enum htp_data_source_t source, const char *name, size_t name_len); + +/** + * Determine if the request has a body. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return 1 if there is a body, 0 otherwise. + */ +int htp_tx_req_has_body(const htp_tx_t *tx); + +/** + * Process a chunk of request body data. This function assumes that + * handling of chunked encoding is implemented by the container. When + * you're done submitting body data, invoke a state change (to REQUEST) + * to finalize any processing that might be pending. The supplied data is + * fully consumed and there is no expectation that it will be available + * afterwards. The protocol parsing code makes no copies of the data, + * but some parsers might. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] data Data pointer. Must not be NULL. + * @param[in] len Data length. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_req_process_body_data(htp_tx_t *tx, const void *data, size_t len); + +/** + * Set one request header. This function should be invoked once for + * each available header, and in the order in which headers were + * seen in the request. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] name Name data pointer. Must not be NULL. + * @param[in] name_len Name data length. + * @param[in] value Value data pointer. Must not be NULL. + * @param[in] value_len Value data length. + * @param[in] alloc Desired allocation strategy. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_req_set_header(htp_tx_t *tx, const char *name, size_t name_len, + const char *value, size_t value_len, enum htp_alloc_strategy_t alloc); + +/** + * Removes all request headers associated with this transaction. This + * function is needed because in some cases the container does not + * differentiate between standard and trailing headers. In that case, + * you set request headers once at the beginning of the transaction, + * read the body (at this point the request headers should contain the + * mix of regular and trailing headers), clear all headers, and then set + * them all again. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_req_set_headers_clear(htp_tx_t *tx); + +/** + * Set request line. When used, this function should always be called first, + * with more specific functions following. Must not contain line terminators. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] line Line data pointer. Must not be NULL. + * @param[in] line_len Line data length. + * @param[in] alloc Desired allocation strategy. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_req_set_line(htp_tx_t *tx, const char *line, size_t line_len, enum htp_alloc_strategy_t alloc); + +/** + * Set transaction request method. This function will enable you to keep + * track of the text representation of the method. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] method Method data pointer. Must not be NULL. + * @param[in] method_len Method data length. + * @param[in] alloc Desired allocation strategy. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_req_set_method(htp_tx_t *tx, const char *method, size_t method_len, enum htp_alloc_strategy_t alloc); + +/** + * Set transaction request method number. This function enables you to + * keep track how a particular method string is interpreted. This function + * is useful with web servers that ignore invalid methods; for example, some + * web servers will treat them as a GET. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] method_number Method number. + */ +void htp_tx_req_set_method_number(htp_tx_t *tx, enum htp_method_t method_number); + +/** + * Set parsed request URI. You don't need to use this function if you are already providing + * the request line or request URI. But if your container already has this data available, + * feeding it to LibHTP will minimize any potential data differences. This function assumes + * management of the data provided in parsed_uri. This function will not change htp_tx_t::parsed_uri_raw + * (which may have data in it from the parsing of the request URI). + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] parsed_uri URI pointer. Must not be NULL. + */ +void htp_tx_req_set_parsed_uri(htp_tx_t *tx, htp_uri_t *parsed_uri); + +/** + * Forces HTTP/0.9 as the transaction protocol. This method exists to ensure + * that both LibHTP and the container treat the transaction as HTTP/0.9, despite + * potential differences in how the protocol version is determined. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] is_protocol_0_9 Zero if protocol is not HTTP/0.9, or 1 if it is. + */ +void htp_tx_req_set_protocol_0_9(htp_tx_t *tx, int is_protocol_0_9); + +/** + * Sets the request protocol string (e.g., "HTTP/1.0"). The information provided + * is only stored, not parsed. Use htp_tx_req_set_protocol_number() to set the + * actual protocol number, as interpreted by the container. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] protocol Protocol data pointer. Must not be NULL. + * @param[in] protocol_len Protocol data length. + * @param[in] alloc Desired allocation strategy. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_req_set_protocol(htp_tx_t *tx, const char *protocol, size_t protocol_len, enum htp_alloc_strategy_t alloc); + +/** + * Set request protocol version number. Must be invoked after + * htp_txh_set_req_protocol(), because it will overwrite the previously + * extracted version number. Convert the protocol version number to an integer + * by multiplying it with 100. For example, 1.1 becomes 110. Alternatively, + * use the HTP_PROTOCOL_0_9, HTP_PROTOCOL_1_0, and HTP_PROTOCOL_1_1 constants. + * Note: setting protocol to HTP_PROTOCOL_0_9 alone will _not_ get the library to + * treat the transaction as HTTP/0.9. You need to also invoke htp_tx_req_set_protocol_0_9(). + * This is because HTTP 0.9 is used only when protocol information is absent from the + * request line, and not when it is explicitly stated (as "HTTP/0.9"). This behavior is + * consistent with that of Apache httpd. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] protocol_number Protocol number. + */ +void htp_tx_req_set_protocol_number(htp_tx_t *tx, int protocol_number); + +/** + * Set transaction request URI. The value provided here will be stored in htp_tx_t::request_uri + * and subsequently parsed. If htp_tx_req_set_line() was previously used, the uri provided + * when calling this function will overwrite any previously parsed value. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] uri URI data pointer. Must not be NULL. + * @param[in] uri_len URI data length. + * @param[in] alloc Desired allocation strategy. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_req_set_uri(htp_tx_t *tx, const char *uri, size_t uri_len, enum htp_alloc_strategy_t alloc); + +/** + * Process a chunk of response body data. This function assumes that + * handling of chunked encoding is implemented by the container. When + * you're done submitting body data, invoking a state change (to RESPONSE) + * will finalize any processing that might be pending. + * + * The response body data will be decompressed if two conditions are met: one, + * decompression is enabled in configuration and two, if the response headers + * indicate compression. Alternatively, you can control decompression from + * a RESPONSE_HEADERS callback, by setting tx->response_content_encoding either + * to COMPRESSION_NONE (to disable compression), or to one of the supported + * decompression algorithms. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] data Data pointer. Must not be NULL. + * @param[in] len Data length. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_res_process_body_data(htp_tx_t *tx, const void *data, size_t len); + +/** + * Set one response header. This function should be invoked once for + * each available header, and in the order in which headers were + * seen in the response. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] name Name data pointer. Must not be NULL. + * @param[in] name_len Name data length. + * @param[in] value Value data pointer. Must not be NULL. + * @param[in] value_len Value length. + * @param[in] alloc Desired allocation strategy. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_res_set_header(htp_tx_t *tx, const char *name, size_t name_len, + const char *value, size_t value_len, enum htp_alloc_strategy_t alloc); + +/** + * Removes all response headers associated with this transaction. This + * function is needed because in some cases the container does not + * differentiate between standard and trailing headers. In that case, + * you set response headers once at the beginning of the transaction, + * read the body, clear all headers, and then set them all again. After + * the headers are set for the second time, they will potentially contain + * a mixture of standard and trailing headers. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_res_set_headers_clear(htp_tx_t *tx); + +/** + * Set response protocol number. See htp_tx_res_set_protocol_number() for more information + * about the correct format of the protocol_parameter parameter. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] protocol_number Protocol number. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +void htp_tx_res_set_protocol_number(htp_tx_t *tx, int protocol_number); + +/** + * Set response line. Use this function is you have a single buffer containing + * the entire line. If you have individual request line pieces, use the other + * available functions. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] line Line data pointer. Must not be NULL. + * @param[in] line_len Line data length. + * @param[in] alloc Desired allocation strategy. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_res_set_status_line(htp_tx_t *tx, const char *line, size_t line_len, enum htp_alloc_strategy_t alloc); + +/** + * Set response status code. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] status_code Response status code. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +void htp_tx_res_set_status_code(htp_tx_t *tx, int status_code); + +/** + * Set response status message, which is the part of the response + * line that comes after the status code. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] msg Message data pointer. Must not be NULL. + * @param[in] msg_len Message data length. + * @param[in] alloc Desired allocation strategy. + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_tx_res_set_status_message(htp_tx_t *tx, const char *msg, size_t msg_len, enum htp_alloc_strategy_t alloc); + +/** + * Sets the configuration that is to be used for this transaction. If the + * second parameter is set to HTP_CFG_PRIVATE, the transaction will adopt + * the configuration structure and destroy it when appropriate. This function is + * useful if you need to make changes to configuration on per-transaction basis. + * Initially, all transactions will share the configuration with that of the + * connection; if you were to make changes on it, they would affect all + * current and future connections. To work around that, you make a copy of the + * configuration object, call this function with the second parameter set to + * HTP_CFG_PRIVATE, and modify configuration at will. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] cfg Configuration pointer. Must not be NULL. + * @param[in] is_cfg_shared HTP_CFG_SHARED or HTP_CFG_PRIVATE + */ +void htp_tx_set_config(htp_tx_t *tx, htp_cfg_t *cfg, int is_cfg_shared); + +/** + * Associates user data with this transaction. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @param[in] user_data Opaque user data pointer. + */ +void htp_tx_set_user_data(htp_tx_t *tx, void *user_data); + +/** + * Change transaction state to REQUEST and invoke registered callbacks. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the + * callbacks does not want to follow the transaction any more. + */ +htp_status_t htp_tx_state_request_complete(htp_tx_t *tx); + +/** + * Change transaction state to REQUEST_HEADERS and invoke all + * registered callbacks. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the + * callbacks does not want to follow the transaction any more. + */ +htp_status_t htp_tx_state_request_headers(htp_tx_t *tx); + +/** + * Change transaction state to REQUEST_LINE and invoke all + * registered callbacks. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the + * callbacks does not want to follow the transaction any more. + */ +htp_status_t htp_tx_state_request_line(htp_tx_t *tx); + +/** + * Initialize hybrid parsing mode, change state to TRANSACTION_START, + * and invoke all registered callbacks. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the + * callbacks does not want to follow the transaction any more. + */ +htp_status_t htp_tx_state_request_start(htp_tx_t *tx); + +/** + * Change transaction state to RESPONSE and invoke registered callbacks. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the + * callbacks does not want to follow the transaction any more. + */ +htp_status_t htp_tx_state_response_complete(htp_tx_t *tx); + +/** + * Change transaction state to RESPONSE_HEADERS and invoke registered callbacks. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the + * callbacks does not want to follow the transaction any more. + */ +htp_status_t htp_tx_state_response_headers(htp_tx_t *tx); + +/** + * Change transaction state to HTP_RESPONSE_LINE and invoke registered callbacks. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the + * callbacks does not want to follow the transaction any more. + */ +htp_status_t htp_tx_state_response_line(htp_tx_t *tx); + +/** + * Change transaction state to RESPONSE_START and invoke registered callbacks. + * + * @param[in] tx Transaction pointer. Must not be NULL. + * @return HTP_OK on success; HTP_ERROR on error, HTP_STOP if one of the + * callbacks does not want to follow the transaction any more. + */ +htp_status_t htp_tx_state_response_start(htp_tx_t *tx); + +#ifdef __cplusplus +} +#endif + +#endif /* HTP_HYBRID_H */ diff --git a/htp/htp_transcoder.c b/htp/htp_transcoder.c new file mode 100644 index 0000000..03d49ed --- /dev/null +++ b/htp/htp_transcoder.c @@ -0,0 +1,211 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * Transcode all parameters supplied in the table. + * + * @param[in] connp + * @param[in] params + * @param[in] destroy_old + */ +int htp_transcode_params(htp_connp_t *connp, htp_table_t **params, int destroy_old) { + htp_table_t *input_params = *params; + + // No transcoding unless necessary + if ((connp->cfg->internal_encoding == NULL)||(connp->cfg->request_encoding == NULL)) return HTP_OK; + + // Create a new table that will hold transcoded parameters + htp_table_t *output_params = htp_table_create(htp_table_size(input_params)); + if (output_params == NULL) return HTP_ERROR; + + // Initialize iconv + iconv_t cd = iconv_open(connp->cfg->internal_encoding, connp->cfg->request_encoding); + if (cd == (iconv_t) -1) { + htp_table_destroy(output_params); + return HTP_ERROR; + } + + #if (_LIBICONV_VERSION >= 0x0108 && HAVE_ICONVCTL) + int iconv_param = 0; + iconvctl(cd, ICONV_SET_TRANSLITERATE, &iconv_param); + iconv_param = 1; + iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &iconv_param); + #endif + + // Convert the parameters, one by one + bstr *name = NULL; + bstr *value = NULL; + for (size_t i = 0, n = htp_table_size(input_params); i < n; i++) { + value = htp_table_get_index(input_params, i, &name); + + bstr *new_name = NULL, *new_value = NULL; + + // Convert name + htp_transcode_bstr(cd, name, &new_name); + if (new_name == NULL) { + iconv_close(cd); + + bstr *b = NULL; + for (size_t j = 0, k = htp_table_size(output_params); j < k; j++) { + b = htp_table_get_index(output_params, j, NULL); + bstr_free(b); + } + + htp_table_destroy(output_params); + return HTP_ERROR; + } + + // Convert value + htp_transcode_bstr(cd, value, &new_value); + if (new_value == NULL) { + bstr_free(new_name); + iconv_close(cd); + + bstr *b = NULL; + for (size_t j = 0, k = htp_table_size(output_params); j < k; j++) { + b = htp_table_get_index(output_params, j, NULL); + bstr_free(b); + } + + htp_table_destroy(output_params); + return HTP_ERROR; + } + + // Add to new table + htp_table_addn(output_params, new_name, new_value); + } + + // Replace the old parameter table + *params = output_params; + + // Destroy the old parameter table if necessary + if (destroy_old) { + bstr *b = NULL; + for (size_t i = 0, n = htp_table_size(input_params); i < n; i++) { + b = htp_table_get_index(input_params, i, NULL); + bstr_free(b); + } + + htp_table_destroy(input_params); + } + + iconv_close(cd); + + return HTP_OK; +} + +/** + * Transcode one bstr. + * + * @param[in] cd + * @param[in] input + * @param[in] output + */ +int htp_transcode_bstr(iconv_t cd, bstr *input, bstr **output) { + // Reset conversion state for every new string + iconv(cd, NULL, 0, NULL, 0); + + bstr_builder_t *bb = NULL; + + const size_t buflen = 10; + unsigned char *buf = malloc(buflen); + if (buf == NULL) { + return HTP_ERROR; + } + + const char *inbuf = (const char *)bstr_ptr(input); + size_t inleft = bstr_len(input); + char *outbuf = (char *)buf; + size_t outleft = buflen; + + int loop = 1; + while (loop) { + loop = 0; + + if (iconv(cd, (ICONV_CONST char **)&inbuf, &inleft, (char **)&outbuf, &outleft) == (size_t) - 1) { + if (errno == E2BIG) { + // Create bstr builder on-demand + if (bb == NULL) { + bb = bstr_builder_create(); + if (bb == NULL) { + free(buf); + return HTP_ERROR; + } + } + + // The output buffer is full + bstr_builder_append_mem(bb, buf, buflen - outleft); + + outbuf = (char *)buf; + outleft = buflen; + + // Continue in the loop, as there's more work to do + loop = 1; + } else { + // Error + if (bb != NULL) bstr_builder_destroy(bb); + free(buf); + return HTP_ERROR; + } + } + } + + if (bb != NULL) { + bstr_builder_append_mem(bb, buf, buflen - outleft); + *output = bstr_builder_to_str(bb); + bstr_builder_destroy(bb); + if (*output == NULL) { + free(buf); + return HTP_ERROR; + } + } else { + *output = bstr_dup_mem(buf, buflen - outleft); + if (*output == NULL) { + free(buf); + return HTP_ERROR; + } + } + + free(buf); + + return HTP_OK; +} diff --git a/htp/htp_urlencoded.c b/htp/htp_urlencoded.c new file mode 100644 index 0000000..5ad3886 --- /dev/null +++ b/htp/htp_urlencoded.c @@ -0,0 +1,332 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +/** + * This method is invoked whenever a piece of data, belonging to a single field (name or value) + * becomes available. It will either create a new parameter or store the transient information + * until a parameter can be created. + * + * @param[in] urlenp + * @param[in] data + * @param[in] startpos + * @param[in] endpos + * @param[in] c Should contain -1 if the reason this function is called is because the end of + * the current data chunk is reached. + */ +static void htp_urlenp_add_field_piece(htp_urlenp_t *urlenp, const unsigned char *data, size_t startpos, size_t endpos, int last_char) { + // Add field if we know it ended (last_char is something other than -1) + // or if we know that there won't be any more input data (urlenp->_complete is true). + if ((last_char != -1) || (urlenp->_complete)) { + // Prepare the field value, assembling from multiple pieces as necessary. + + bstr *field = NULL; + + // Did we use the string builder for this field? + if (bstr_builder_size(urlenp->_bb) > 0) { + // The current field consists of more than once piece, we have to use the string builder. + + // Add current piece to string builder. + if ((data != NULL) && (endpos - startpos > 0)) { + bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos); + } + + // Generate the field and clear the string builder. + field = bstr_builder_to_str(urlenp->_bb); + if (field == NULL) return; + + bstr_builder_clear(urlenp->_bb); + } else { + // We only have the current piece to work with, so no need to involve the string builder. + if ((data != NULL) && (endpos - startpos > 0)) { + field = bstr_dup_mem(data + startpos, endpos - startpos); + if (field == NULL) return; + } + } + + // Process field as key or value, as appropriate. + + if (urlenp->_state == HTP_URLENP_STATE_KEY) { + // Key. + + // If there is no more work left to do, then we have a single key. Add it. + if ((urlenp->_complete)||(last_char == urlenp->argument_separator)) { + + // Handling empty pairs is tricky. We don't want to create a pair for + // an entirely empty input, but in some cases it may be appropriate + // (e.g., /index.php?&q=2). + if ((field != NULL)||(last_char == urlenp->argument_separator)) { + // Add one pair, with an empty value and possibly empty key too. + + bstr *name = field; + if (name == NULL) { + name = bstr_dup_c(""); + if (name == NULL) return; + } + + bstr *value = bstr_dup_c(""); + if (value == NULL) { + bstr_free(name); + return; + } + + if (urlenp->decode_url_encoding) { + htp_tx_urldecode_params_inplace(urlenp->tx, name); + } + + htp_table_addn(urlenp->params, name, value); + + urlenp->_name = NULL; + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name)); + fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value)); + #endif + } + } else { + // This key will possibly be followed by a value, so keep it for later. + urlenp->_name = field; + } + } else { + // Value (with a key remembered from before). + + bstr *name = urlenp->_name; + urlenp->_name = NULL; + + if (name == NULL) { + name = bstr_dup_c(""); + if (name == NULL) { + bstr_free(field); + return; + } + } + + bstr *value = field; + if (value == NULL) { + value = bstr_dup_c(""); + if (value == NULL) { + bstr_free(name); + return; + } + } + + if (urlenp->decode_url_encoding) { + htp_tx_urldecode_params_inplace(urlenp->tx, name); + htp_tx_urldecode_params_inplace(urlenp->tx, value); + } + + htp_table_addn(urlenp->params, name, value); + + #ifdef HTP_DEBUG + fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name)); + fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value)); + #endif + } + } else { + // The field has not ended. We'll make a copy of of the available data for later. + if ((data != NULL) && (endpos - startpos > 0)) { + bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos); + } + } +} + +/** + * Creates a new URLENCODED parser. + * + * @return New parser, or NULL on memory allocation failure. + */ +htp_urlenp_t *htp_urlenp_create(htp_tx_t *tx) { + htp_urlenp_t *urlenp = calloc(1, sizeof (htp_urlenp_t)); + if (urlenp == NULL) return NULL; + + urlenp->tx = tx; + + urlenp->params = htp_table_create(HTP_URLENP_DEFAULT_PARAMS_SIZE); + if (urlenp->params == NULL) { + free(urlenp); + return NULL; + } + + urlenp->_bb = bstr_builder_create(); + if (urlenp->_bb == NULL) { + htp_table_destroy(urlenp->params); + free(urlenp); + return NULL; + } + + urlenp->argument_separator = '&'; + urlenp->decode_url_encoding = 1; + urlenp->_state = HTP_URLENP_STATE_KEY; + + return urlenp; +} + +/** + * Destroys an existing URLENCODED parser. + * + * @param[in] urlenp + */ +void htp_urlenp_destroy(htp_urlenp_t *urlenp) { + if (urlenp == NULL) return; + + if (urlenp->_name != NULL) { + bstr_free(urlenp->_name); + } + + bstr_builder_destroy(urlenp->_bb); + + if (urlenp->params != NULL) { + // Destroy parameters. + for (size_t i = 0, n = htp_table_size(urlenp->params); i < n; i++) { + bstr *b = htp_table_get_index(urlenp->params, i, NULL); + // Parameter name will be freed by the table code. + bstr_free(b); + } + + htp_table_destroy(urlenp->params); + } + + free(urlenp); +} + +/** + * Finalizes parsing, forcing the parser to convert any outstanding + * data into parameters. This method should be invoked at the end + * of a parsing operation that used htp_urlenp_parse_partial(). + * + * @param[in] urlenp + * @return Success indication + */ +htp_status_t htp_urlenp_finalize(htp_urlenp_t *urlenp) { + urlenp->_complete = 1; + return htp_urlenp_parse_partial(urlenp, NULL, 0); +} + +/** + * Parses the provided data chunk under the assumption + * that it contains all the data that will be parsed. When this + * method is used for parsing the finalization method should not + * be invoked. + * + * @param[in] urlenp + * @param[in] data + * @param[in] len + * @return + */ +htp_status_t htp_urlenp_parse_complete(htp_urlenp_t *urlenp, const void *data, size_t len) { + htp_urlenp_parse_partial(urlenp, data, len); + return htp_urlenp_finalize(urlenp); +} + +/** + * Parses the provided data chunk, keeping state to allow streaming parsing, i.e., the + * parsing where only partial information is available at any one time. The method + * htp_urlenp_finalize() must be invoked at the end to finalize parsing. + * + * @param[in] urlenp + * @param[in] _data + * @param[in] len + * @return + */ +htp_status_t htp_urlenp_parse_partial(htp_urlenp_t *urlenp, const void *_data, size_t len) { + unsigned char *data = (unsigned char *) _data; + size_t startpos = 0; + size_t pos = 0; + int c; + + if (data == NULL) len = 0; + + do { + // Get the next character, or use -1 to indicate end of input. + if (pos < len) c = data[pos]; + else c = -1; + + switch (urlenp->_state) { + + case HTP_URLENP_STATE_KEY: + // Look for =, argument separator, or end of input. + if ((c == '=') || (c == urlenp->argument_separator) || (c == -1)) { + // Data from startpos to pos. + htp_urlenp_add_field_piece(urlenp, data, startpos, pos, c); + + // If it's not the end of input, then it must be the end of this field. + if (c != -1) { + // Next state. + startpos = pos + 1; + + if (c == urlenp->argument_separator) { + urlenp->_state = HTP_URLENP_STATE_KEY; + } else { + urlenp->_state = HTP_URLENP_STATE_VALUE; + } + } + } + + pos++; + + break; + + case HTP_URLENP_STATE_VALUE: + // Look for argument separator or end of input. + if ((c == urlenp->argument_separator) || (c == -1)) { + // Data from startpos to pos. + htp_urlenp_add_field_piece(urlenp, data, startpos, pos, c); + + // If it's not the end of input, then it must be the end of this field. + if (c != -1) { + // Next state. + startpos = pos + 1; + urlenp->_state = HTP_URLENP_STATE_KEY; + } + } + + pos++; + + break; + + default: + // Invalid state. + return HTP_ERROR; + } + } while (c != -1); + + return HTP_OK; +} diff --git a/htp/htp_urlencoded.h b/htp/htp_urlencoded.h new file mode 100644 index 0000000..bc4697c --- /dev/null +++ b/htp/htp_urlencoded.h @@ -0,0 +1,111 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _HTP_URLENCODED_H +#define _HTP_URLENCODED_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct htp_urlenp_t htp_urlenp_t; +typedef struct htp_urlen_param_t htp_urlen_param_t; + +#define HTP_URLENP_DEFAULT_PARAMS_SIZE 32 + +#define HTP_URLENP_STATE_KEY 1 +#define HTP_URLENP_STATE_VALUE 2 + +// The MIME type that triggers the parser. Must be lowercase. +#define HTP_URLENCODED_MIME_TYPE "application/x-www-form-urlencoded" + +#include "htp.h" + +/** + * This is the main URLENCODED parser structure. It is used to store + * parser configuration, temporary parsing data, as well as the parameters. + */ +struct htp_urlenp_t { + /** The transaction this parser belongs to. */ + htp_tx_t *tx; + + /** The character used to separate parameters. Defaults to & and should + * not be changed without good reason. + */ + unsigned char argument_separator; + + /** Whether to perform URL-decoding on parameters. */ + int decode_url_encoding; + + /** This table contains the list of parameters, indexed by name. */ + htp_table_t *params; + + // Private fields; these are used during the parsing process only + int _state; + int _complete; + bstr *_name; + bstr_builder_t *_bb; +}; + +/** + * Holds one application/x-www-form-urlencoded parameter. + */ +struct htp_urlen_param_t { + /** Parameter name. */ + bstr *name; + + /** Parameter value. */ + bstr *value; +}; + +htp_urlenp_t *htp_urlenp_create(htp_tx_t *tx); +void htp_urlenp_destroy(htp_urlenp_t *urlenp); + +void htp_urlenp_set_argument_separator(htp_urlenp_t *urlenp, unsigned char argument_separator); +void htp_urlenp_set_decode_url_encoding(htp_urlenp_t *urlenp, int decode_url_encoding); + +htp_status_t htp_urlenp_parse_partial(htp_urlenp_t *urlenp, const void *data, size_t len); +htp_status_t htp_urlenp_parse_complete(htp_urlenp_t *urlenp, const void *data, size_t len); +htp_status_t htp_urlenp_finalize(htp_urlenp_t *urlenp); + +#ifdef __cplusplus +} +#endif + +#endif /* _HTP_URLENCODED_H */ + diff --git a/htp/htp_utf8_decoder.c b/htp/htp_utf8_decoder.c new file mode 100644 index 0000000..6017a18 --- /dev/null +++ b/htp/htp_utf8_decoder.c @@ -0,0 +1,118 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +/* +Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software +and associated documentation files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, +sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> +// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. + +#include "htp_config_auto.h" + +#include "htp_private.h" + +static const uint8_t utf8d[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 +}; + +static const uint8_t utf8d_allow_overlong[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df; changed c0 and c1 + 0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef; changed e0 + 0x6,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff; changed f0 + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 +}; + +/** + * Process one byte of UTF-8 data and return a code point if one is available. Allows + * overlong characters in input. + * + * @param[in] state + * @param[in] codep + * @param[in] byte + * @return HTP_UTF8_ACCEPT for a valid character, HTP_UTF8_REJECT for an invalid character, + * or something else if the character has not yet been formed + */ +uint32_t htp_utf8_decode_allow_overlong(uint32_t* state, uint32_t* codep, uint32_t byte) { + uint32_t type = utf8d_allow_overlong[byte]; + + *codep = (*state != HTP_UTF8_ACCEPT) ? + (byte & 0x3fu) | (*codep << 6) : + (0xff >> type) & (byte); + + *state = utf8d[256 + *state*16 + type]; + return *state; +} diff --git a/htp/htp_utf8_decoder.h b/htp/htp_utf8_decoder.h new file mode 100644 index 0000000..b39abdd --- /dev/null +++ b/htp/htp_utf8_decoder.h @@ -0,0 +1,85 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +/* LibHTP changes: + * + * - Changed the name of the function from "decode" to "utf8_decode" + * - Created a separate header file + * - Copied the license from the web page + * - Created a copy of the data and function "utf8_decode_allow_overlong", which + * does not treat overlong characters as invalid. + */ + +/* +Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software +and associated documentation files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, +sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef _UTF8_DECODER_H +#define _UTF8_DECODER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +#define HTP_UTF8_ACCEPT 0 +#define HTP_UTF8_REJECT 1 + +uint32_t htp_utf8_decode_allow_overlong(uint32_t* state, uint32_t* codep, uint32_t byte); + +#ifdef __cplusplus +} +#endif + +#endif /* _UTF8_DECODER_H */ diff --git a/htp/htp_util.c b/htp/htp_util.c new file mode 100644 index 0000000..936e22b --- /dev/null +++ b/htp/htp_util.c @@ -0,0 +1,2602 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#include "htp_config_auto.h" + +//inet_pton +#if _WIN32 +#include <ws2tcpip.h> +#else // mac, linux, freebsd +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#endif + +#include "htp_private.h" + +/** + * Is character a linear white space character? + * + * @param[in] c + * @return 0 or 1 + */ +int htp_is_lws(int c) { + if ((c == ' ') || (c == '\t')) return 1; + else return 0; +} + +/** + * Is character a separator character? + * + * @param[in] c + * @return 0 or 1 + */ +int htp_is_separator(int c) { + /* separators = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT */ + switch (c) { + case '(': + case ')': + case '<': + case '>': + case '@': + case ',': + case ';': + case ':': + case '\\': + case '"': + case '/': + case '[': + case ']': + case '?': + case '=': + case '{': + case '}': + case ' ': + case '\t': + return 1; + break; + default: + return 0; + } +} + +/** + * Is character a text character? + * + * @param[in] c + * @return 0 or 1 + */ +int htp_is_text(int c) { + if (c == '\t') return 1; + if (c < 32) return 0; + return 1; +} + +/** + * Is character a token character? + * + * @param[in] c + * @return 0 or 1 + */ +int htp_is_token(int c) { + /* token = 1*<any CHAR except CTLs or separators> */ + /* CHAR = <any US-ASCII character (octets 0 - 127)> */ + if ((c < 32) || (c > 126)) return 0; + if (htp_is_separator(c)) return 0; + return 1; +} + +/** + * Remove all line terminators (LF, CR or CRLF) from + * the end of the line provided as input. + * + * @return 0 if nothing was removed, 1 if one or more LF characters were removed, or + * 2 if one or more CR and/or LF characters were removed. + */ +int htp_chomp(unsigned char *data, size_t *len) { + int r = 0; + + // Loop until there's no more stuff in the buffer + while (*len > 0) { + // Try one LF first + if (data[*len - 1] == LF) { + (*len)--; + r = 1; + + if (*len == 0) return r; + + // A CR is allowed before LF + if (data[*len - 1] == CR) { + (*len)--; + r = 2; + } + } else if (data[*len - 1] == CR) { + (*len)--; + r = 1; + } else return r; + } + + return r; +} + +/** + * Is character a white space character? + * + * @param[in] c + * @return 0 or 1 + */ +int htp_is_space(int c) { + switch (c) { + case ' ': + case '\f': + case '\v': + case '\t': + case '\r': + case '\n': + return 1; + default: + return 0; + } +} + +/** + * Converts request method, given as a string, into a number. + * + * @param[in] method + * @return Method number of M_UNKNOWN + */ +int htp_convert_method_to_number(bstr *method) { + if (method == NULL) return HTP_M_UNKNOWN; + + // TODO Optimize using parallel matching, or something similar. + + if (bstr_cmp_c(method, "GET") == 0) return HTP_M_GET; + if (bstr_cmp_c(method, "PUT") == 0) return HTP_M_PUT; + if (bstr_cmp_c(method, "POST") == 0) return HTP_M_POST; + if (bstr_cmp_c(method, "DELETE") == 0) return HTP_M_DELETE; + if (bstr_cmp_c(method, "CONNECT") == 0) return HTP_M_CONNECT; + if (bstr_cmp_c(method, "OPTIONS") == 0) return HTP_M_OPTIONS; + if (bstr_cmp_c(method, "TRACE") == 0) return HTP_M_TRACE; + if (bstr_cmp_c(method, "PATCH") == 0) return HTP_M_PATCH; + if (bstr_cmp_c(method, "PROPFIND") == 0) return HTP_M_PROPFIND; + if (bstr_cmp_c(method, "PROPPATCH") == 0) return HTP_M_PROPPATCH; + if (bstr_cmp_c(method, "MKCOL") == 0) return HTP_M_MKCOL; + if (bstr_cmp_c(method, "COPY") == 0) return HTP_M_COPY; + if (bstr_cmp_c(method, "MOVE") == 0) return HTP_M_MOVE; + if (bstr_cmp_c(method, "LOCK") == 0) return HTP_M_LOCK; + if (bstr_cmp_c(method, "UNLOCK") == 0) return HTP_M_UNLOCK; + if (bstr_cmp_c(method, "VERSION-CONTROL") == 0) return HTP_M_VERSION_CONTROL; + if (bstr_cmp_c(method, "CHECKOUT") == 0) return HTP_M_CHECKOUT; + if (bstr_cmp_c(method, "UNCHECKOUT") == 0) return HTP_M_UNCHECKOUT; + if (bstr_cmp_c(method, "CHECKIN") == 0) return HTP_M_CHECKIN; + if (bstr_cmp_c(method, "UPDATE") == 0) return HTP_M_UPDATE; + if (bstr_cmp_c(method, "LABEL") == 0) return HTP_M_LABEL; + if (bstr_cmp_c(method, "REPORT") == 0) return HTP_M_REPORT; + if (bstr_cmp_c(method, "MKWORKSPACE") == 0) return HTP_M_MKWORKSPACE; + if (bstr_cmp_c(method, "MKACTIVITY") == 0) return HTP_M_MKACTIVITY; + if (bstr_cmp_c(method, "BASELINE-CONTROL") == 0) return HTP_M_BASELINE_CONTROL; + if (bstr_cmp_c(method, "MERGE") == 0) return HTP_M_MERGE; + if (bstr_cmp_c(method, "INVALID") == 0) return HTP_M_INVALID; + if (bstr_cmp_c(method, "HEAD") == 0) return HTP_M_HEAD; + + return HTP_M_UNKNOWN; +} + +/** + * Is the given line empty? + * + * @param[in] data + * @param[in] len + * @return 0 or 1 + */ +int htp_is_line_empty(unsigned char *data, size_t len) { + if (((len == 1) && ((data[0] == CR) || (data[0] == LF))) || + ((len == 2) && (data[0] == CR) && (data[1] == LF))) { + return 1; + } + + return 0; +} + +/** + * Does line consist entirely of whitespace characters? + * + * @param[in] data + * @param[in] len + * @return 0 or 1 + */ +int htp_is_line_whitespace(unsigned char *data, size_t len) { + size_t i; + + for (i = 0; i < len; i++) { + if (!isspace(data[i])) { + return 0; + } + } + + return 1; +} + +/** + * Parses Content-Length string (positive decimal number). + * White space is allowed before and after the number. + * + * @param[in] b + * @return Content-Length as a number, or -1 on error. + */ +int64_t htp_parse_content_length(bstr *b, htp_connp_t *connp) { + size_t len = bstr_len(b); + unsigned char * data = (unsigned char *) bstr_ptr(b); + size_t pos = 0; + int64_t r = 0; + + if (len == 0) return -1003; + + // Ignore junk before + while ((pos < len) && (data[pos] < '0' || data[pos] > '9')) { + if (!htp_is_lws(data[pos]) && connp != NULL && r == 0) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "C-L value with extra data in the beginning"); + r = -1; + } + pos++; + } + if (pos == len) return -1001; + + r = bstr_util_mem_to_pint(data + pos, len - pos, 10, &pos); + // Ok to have junk afterwards + if (pos < len && connp != NULL) { + htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, + "C-L value with extra data in the end"); + } + return r; +} + +/** + * Parses chunk length (positive hexadecimal number). White space is allowed before + * and after the number. An error will be returned if the chunk length is greater than + * INT32_MAX. + * + * @param[in] data + * @param[in] len + * @return Chunk length, or a negative number on error. + */ +int64_t htp_parse_chunked_length(unsigned char *data, size_t len, int *extension) { + // skip leading line feeds and other control chars + while (len) { + unsigned char c = *data; + if (!(c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x0b || c == 0x0c)) + break; + data++; + len--; + } + if (len == 0) + return -1004; + + // find how much of the data is correctly formatted + size_t i = 0; + while (i < len) { + unsigned char c = data[i]; + if (!(isdigit(c) || + (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) + break; + i++; + } + // cut off trailing junk + if (i != len) { + if (extension) { + size_t j = i; + while (j < len) { + if (data[j] == ';') { + *extension = 1; + break; + } + j++; + } + } + len = i; + } + + int64_t chunk_len = htp_parse_positive_integer_whitespace(data, len, 16); + if (chunk_len < 0) return chunk_len; + if (chunk_len > INT32_MAX) return -1; + return chunk_len; +} + +/** + * A somewhat forgiving parser for a positive integer in a given base. + * Only LWS is allowed before and after the number. + * + * @param[in] data + * @param[in] len + * @param[in] base + * @return The parsed number on success; a negative number on error. + */ +int64_t htp_parse_positive_integer_whitespace(unsigned char *data, size_t len, int base) { + if (len == 0) return -1003; + + size_t last_pos; + size_t pos = 0; + + // Ignore LWS before + while ((pos < len) && (htp_is_lws(data[pos]))) pos++; + if (pos == len) return -1001; + + int64_t r = bstr_util_mem_to_pint(data + pos, len - pos, base, &last_pos); + if (r < 0) return r; + + // Move after the last digit + pos += last_pos; + + // Ignore LWS after + while (pos < len) { + if (!htp_is_lws(data[pos])) { + return -1002; + } + + pos++; + } + + return r; +} + +#ifdef HTP_DEBUG + +/** + * Prints one log message to stderr. + * + * @param[in] stream + * @param[in] log + */ +void htp_print_log(FILE *stream, htp_log_t *log) { + if (log->code != 0) { + fprintf(stream, "[%d][code %d][file %s][line %d] %s\n", log->level, + log->code, log->file, log->line, log->msg); + } else { + fprintf(stream, "[%d][file %s][line %d] %s\n", log->level, + log->file, log->line, log->msg); + } +} +#endif + +/** + * Records one log message. + * + * @param[in] connp + * @param[in] file + * @param[in] line + * @param[in] level + * @param[in] code + * @param[in] fmt + */ +void htp_log(htp_connp_t *connp, const char *file, int line, enum htp_log_level_t level, int code, const char *fmt, ...) { + if (connp == NULL) return; + + char buf[1024]; + va_list args; + + // Ignore messages below our log level. + if (connp->cfg->log_level < level) { + return; + } + + va_start(args, fmt); + + int r = vsnprintf(buf, 1024, fmt, args); + + va_end(args); + + if (r < 0) { + snprintf(buf, 1024, "[vnsprintf returned error %d]", r); + } else if (r >= 1024) { + // Indicate overflow with a '+' at the end. + buf[1022] = '+'; + buf[1023] = '\0'; + } + + // Create a new log entry. + + htp_log_t *log = calloc(1, sizeof (htp_log_t)); + if (log == NULL) return; + + log->connp = connp; + log->file = file; + log->line = line; + log->level = level; + log->code = code; + log->msg = strdup(buf); + + if (htp_list_add(connp->conn->messages, log) != HTP_OK) { + free((void *) log->msg); + free(log); + return; + } + + if (level == HTP_LOG_ERROR) { + connp->last_error = log; + } + + #ifdef HTP_DEBUG + fprintf(stderr, "[LOG] %s\n", log->msg); + #endif + + /* coverity[check_return] */ + htp_hook_run_all(connp->cfg->hook_log, log); +} + +/** + * Determines if the given line is a continuation (of some previous line). + * + * @param[in] data + * @param[in] len + * @return 0 or 1 for false and true, respectively. Returns -1 on error (NULL pointer or length zero). + */ +int htp_connp_is_line_folded(unsigned char *data, size_t len) { + if ((data == NULL) || (len == 0)) return -1; + return htp_is_folding_char(data[0]); +} + +int htp_is_folding_char(int c) { + if (htp_is_lws(c) || c == 0) return 1; + else return 0; +} + +/** + * Determines if the given line is a request terminator. + * + * @param[in] connp + * @param[in] data + * @param[in] len + * @return 0 or 1 + */ +int htp_connp_is_line_terminator(htp_connp_t *connp, unsigned char *data, size_t len, int next_no_lf) { + // Is this the end of request headers? + switch (connp->cfg->server_personality) { + case HTP_SERVER_IIS_5_1: + // IIS 5 will accept a whitespace line as a terminator + if (htp_is_line_whitespace(data, len)) { + return 1; + } + + // Fall through + default: + // Treat an empty line as terminator + if (htp_is_line_empty(data, len)) { + return 1; + } + // Only space is terminator if terminator does not follow right away + if (len == 2 && htp_is_lws(data[0]) && data[1] == LF) { + return next_no_lf; + } + break; + } + + return 0; +} + +/** + * Determines if the given line can be ignored when it appears before a request. + * + * @param[in] connp + * @param[in] data + * @param[in] len + * @return 0 or 1 + */ +int htp_connp_is_line_ignorable(htp_connp_t *connp, unsigned char *data, size_t len) { + return htp_connp_is_line_terminator(connp, data, len, 0); +} + +static htp_status_t htp_parse_port(unsigned char *data, size_t len, int *port, int *invalid) { + if (len == 0) { + *port = -1; + *invalid = 1; + return HTP_OK; + } + + int64_t port_parsed = htp_parse_positive_integer_whitespace(data, len, 10); + + if (port_parsed < 0) { + // Failed to parse the port number. + *port = -1; + *invalid = 1; + } else if ((port_parsed > 0) && (port_parsed < 65536)) { + // Valid port number. + *port = (int) port_parsed; + } else { + // Port number out of range. + *port = -1; + *invalid = 1; + } + + return HTP_OK; +} + +/** + * Parses an authority string, which consists of a hostname with an optional port number; username + * and password are not allowed and will not be handled. + * + * @param[in] hostport + * @param[out] hostname A bstring containing the hostname, or NULL if the hostname is invalid. If this value + * is not NULL, the caller assumes responsibility for memory management. + * @param[out] port Port as text, or NULL if not provided. + * @param[out] port_number Port number, or -1 if the port is not present or invalid. + * @param[out] invalid Set to 1 if any part of the authority is invalid. + * @return HTP_OK on success, HTP_ERROR on memory allocation failure. + */ +htp_status_t htp_parse_hostport(bstr *hostport, bstr **hostname, bstr **port, int *port_number, int *invalid) { + if ((hostport == NULL) || (hostname == NULL) || (port_number == NULL) || (invalid == NULL)) return HTP_ERROR; + + *hostname = NULL; + if (port != NULL) { + *port = NULL; + } + *port_number = -1; + *invalid = 0; + + unsigned char *data = bstr_ptr(hostport); + size_t len = bstr_len(hostport); + + bstr_util_mem_trim(&data, &len); + + if (len == 0) { + *invalid = 1; + return HTP_OK; + } + + // Check for an IPv6 address. + if (data[0] == '[') { + // IPv6 host. + + // Find the end of the IPv6 address. + size_t pos = 0; + while ((pos < len) && (data[pos] != ']')) pos++; + if (pos == len) { + *invalid = 1; + return HTP_OK; + } + + *hostname = bstr_dup_mem(data, pos + 1); + if (*hostname == NULL) return HTP_ERROR; + + // Over the ']'. + pos++; + if (pos == len) return HTP_OK; + + // Handle port. + if (data[pos] == ':') { + if (port != NULL) { + *port = bstr_dup_mem(data + pos + 1, len - pos - 1); + if (*port == NULL) { + bstr_free(*hostname); + return HTP_ERROR; + } + } + + return htp_parse_port(data + pos + 1, len - pos - 1, port_number, invalid); + } else { + *invalid = 1; + return HTP_OK; + } + } else { + // Not IPv6 host. + + // Is there a colon? + unsigned char *colon = memchr(data, ':', len); + if (colon == NULL) { + // Hostname alone, no port. + + *hostname = bstr_dup_mem(data, len); + if (*hostname == NULL) return HTP_ERROR; + + bstr_to_lowercase(*hostname); + } else { + // Hostname and port. + + // Ignore whitespace at the end of hostname. + unsigned char *hostend = colon; + while ((hostend > data) && (isspace(*(hostend - 1)))) hostend--; + + *hostname = bstr_dup_mem(data, hostend - data); + if (*hostname == NULL) return HTP_ERROR; + + if (port != NULL) { + *port = bstr_dup_mem(colon + 1, len - (colon + 1 - data)); + if (*port == NULL) { + bstr_free(*hostname); + return HTP_ERROR; + } + } + + return htp_parse_port(colon + 1, len - (colon + 1 - data), port_number, invalid); + } + } + + return HTP_OK; +} + +/** + * Parses hostport provided in the URI. + * + * @param[in] connp + * @param[in] hostport + * @param[in] uri + * @return HTP_OK on success or HTP_ERROR error. + */ +int htp_parse_uri_hostport(htp_connp_t *connp, bstr *hostport, htp_uri_t *uri) { + int invalid; + + htp_status_t rc = htp_parse_hostport(hostport, &(uri->hostname), &(uri->port), &(uri->port_number), &invalid); + if (rc != HTP_OK) return rc; + + if (invalid) { + connp->in_tx->flags |= HTP_HOSTU_INVALID; + } + + if (uri->hostname != NULL) { + if (htp_validate_hostname(uri->hostname) == 0) { + connp->in_tx->flags |= HTP_HOSTU_INVALID; + } + } + + return HTP_OK; +} + +/** + * Parses hostport provided in the Host header. + * + * @param[in] hostport + * @param[out] hostname + * @param[out] port + * @param[out] port_number + * @param[out] flags + * @return HTP_OK on success or HTP_ERROR error. + */ +htp_status_t htp_parse_header_hostport(bstr *hostport, bstr **hostname, bstr **port, int *port_number, uint64_t *flags) { + int invalid; + + htp_status_t rc = htp_parse_hostport(hostport, hostname, port, port_number, &invalid); + if (rc != HTP_OK) return rc; + + if (invalid) { + *flags |= HTP_HOSTH_INVALID; + } + + if (*hostname != NULL) { + if (htp_validate_hostname(*hostname) == 0) { + *flags |= HTP_HOSTH_INVALID; + } + } + + return HTP_OK; +} + +/** + * Parses request URI, making no attempt to validate the contents. + * + * @param[in] input + * @param[in] uri + * @return HTP_ERROR on memory allocation failure, HTP_OK otherwise + */ +int htp_parse_uri(bstr *input, htp_uri_t **uri) { + // Allow a htp_uri_t structure to be provided on input, + // but allocate a new one if the structure is NULL. + if (*uri == NULL) { + *uri = calloc(1, sizeof (htp_uri_t)); + if (*uri == NULL) return HTP_ERROR; + } + + if (input == NULL) { + // The input might be NULL on requests that don't actually + // contain the URI. We allow that. + return HTP_OK; + } + + unsigned char *data = bstr_ptr(input); + size_t len = bstr_len(input); + // remove trailing spaces + while (len > 0) { + if (data[len-1] != ' ') { + break; + } + len--; + } + size_t start, pos; + + if (len == 0) { + // Empty string. + return HTP_OK; + } + + pos = 0; + + // Scheme test: if it doesn't start with a forward slash character (which it must + // for the contents to be a path or an authority, then it must be the scheme part + if (data[0] != '/') { + // Parse scheme + + // Find the colon, which marks the end of the scheme part + start = pos; + while ((pos < len) && (data[pos] != ':')) pos++; + + if (pos >= len) { + // We haven't found a colon, which means that the URI + // is invalid. Apache will ignore this problem and assume + // the URI contains an invalid path so, for the time being, + // we are going to do the same. + pos = 0; + } else { + // Make a copy of the scheme + (*uri)->scheme = bstr_dup_mem(data + start, pos - start); + if ((*uri)->scheme == NULL) return HTP_ERROR; + + // Go over the colon + pos++; + } + } + + // Authority test: two forward slash characters and it's an authority. + // One, three or more slash characters, and it's a path. We, however, + // only attempt to parse authority if we've seen a scheme. + if ((*uri)->scheme != NULL) + if ((pos + 2 < len) && (data[pos] == '/') && (data[pos + 1] == '/') && (data[pos + 2] != '/')) { + // Parse authority + + // Go over the two slash characters + start = pos = pos + 2; + + // Authority ends with a question mark, forward slash or hash + while ((pos < len) && (data[pos] != '?') && (data[pos] != '/') && (data[pos] != '#')) pos++; + + unsigned char *hostname_start; + size_t hostname_len; + + // Are the credentials included in the authority? + unsigned char *m = memchr(data + start, '@', pos - start); + if (m != NULL) { + // Credentials present + unsigned char *credentials_start = data + start; + size_t credentials_len = m - data - start; + + // Figure out just the hostname part + hostname_start = data + start + credentials_len + 1; + hostname_len = pos - start - credentials_len - 1; + + // Extract the username and the password + m = memchr(credentials_start, ':', credentials_len); + if (m != NULL) { + // Username and password + (*uri)->username = bstr_dup_mem(credentials_start, m - credentials_start); + if ((*uri)->username == NULL) return HTP_ERROR; + (*uri)->password = bstr_dup_mem(m + 1, credentials_len - (m - credentials_start) - 1); + if ((*uri)->password == NULL) return HTP_ERROR; + } else { + // Username alone + (*uri)->username = bstr_dup_mem(credentials_start, credentials_len); + if ((*uri)->username == NULL) return HTP_ERROR; + } + } else { + // No credentials + hostname_start = data + start; + hostname_len = pos - start; + } + + // Parsing authority without credentials. + if ((hostname_len > 0) && (hostname_start[0] == '[')) { + // IPv6 address. + + m = memchr(hostname_start, ']', hostname_len); + if (m == NULL) { + // Invalid IPv6 address; use the entire string as hostname. + (*uri)->hostname = bstr_dup_mem(hostname_start, hostname_len); + if ((*uri)->hostname == NULL) return HTP_ERROR; + } else { + (*uri)->hostname = bstr_dup_mem(hostname_start, m - hostname_start + 1); + if ((*uri)->hostname == NULL) return HTP_ERROR; + + // Is there a port? + hostname_len = hostname_len - (m - hostname_start + 1); + hostname_start = m + 1; + + // Port string + m = memchr(hostname_start, ':', hostname_len); + if (m != NULL) { + size_t port_len = hostname_len - (m - hostname_start) - 1; + (*uri)->port = bstr_dup_mem(m + 1, port_len); + if ((*uri)->port == NULL) return HTP_ERROR; + } + } + } else { + // Not IPv6 address. + + m = memchr(hostname_start, ':', hostname_len); + if (m != NULL) { + size_t port_len = hostname_len - (m - hostname_start) - 1; + hostname_len = hostname_len - port_len - 1; + + // Port string + (*uri)->port = bstr_dup_mem(m + 1, port_len); + if ((*uri)->port == NULL) return HTP_ERROR; + } + + // Hostname + (*uri)->hostname = bstr_dup_mem(hostname_start, hostname_len); + if ((*uri)->hostname == NULL) return HTP_ERROR; + } + } + + // Path + start = pos; + + // The path part will end with a question mark or a hash character, which + // mark the beginning of the query part or the fragment part, respectively. + while ((pos < len) && (data[pos] != '?') && (data[pos] != '#')) pos++; + + // Path + (*uri)->path = bstr_dup_mem(data + start, pos - start); + if ((*uri)->path == NULL) return HTP_ERROR; + + if (pos == len) return HTP_OK; + + // Query + if (data[pos] == '?') { + // Step over the question mark + start = pos + 1; + + // The query part will end with the end of the input + // or the beginning of the fragment part + while ((pos < len) && (data[pos] != '#')) pos++; + + // Query string + (*uri)->query = bstr_dup_mem(data + start, pos - start); + if ((*uri)->query == NULL) return HTP_ERROR; + + if (pos == len) return HTP_OK; + } + + // Fragment + if (data[pos] == '#') { + // Step over the hash character + start = pos + 1; + + // Fragment; ends with the end of the input + (*uri)->fragment = bstr_dup_mem(data + start, len - start); + if ((*uri)->fragment == NULL) return HTP_ERROR; + } + + return HTP_OK; +} + +/** + * Convert two input bytes, pointed to by the pointer parameter, + * into a single byte by assuming the input consists of hexadecimal + * characters. This function will happily convert invalid input. + * + * @param[in] what + * @return hex-decoded byte + */ +static unsigned char x2c(unsigned char *what) { + register unsigned char digit; + + digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0')); + digit *= 16; + digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 : (what[1] - '0')); + + return digit; +} + +/** + * Convert a Unicode codepoint into a single-byte, using best-fit + * mapping (as specified in the provided configuration structure). + * + * @param[in] cfg + * @param[in] codepoint + * @return converted single byte + */ +static uint8_t bestfit_codepoint(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, uint32_t codepoint) { + // Is it a single-byte codepoint? + if (codepoint < 0x100) { + return (uint8_t) codepoint; + } + + // Our current implementation converts only the 2-byte codepoints. + if (codepoint > 0xffff) { + return cfg->decoder_cfgs[ctx].bestfit_replacement_byte; + } + + uint8_t *p = cfg->decoder_cfgs[ctx].bestfit_map; + + // TODO Optimize lookup. + + for (;;) { + uint32_t x = (p[0] << 8) + p[1]; + + if (x == 0) { + return cfg->decoder_cfgs[ctx].bestfit_replacement_byte; + } + + if (x == codepoint) { + return p[2]; + } + + // Move to the next triplet + p += 3; + } +} + +/** + * Decode a UTF-8 encoded path. Overlong characters will be decoded, invalid + * characters will be left as-is. Best-fit mapping will be used to convert + * UTF-8 into a single-byte stream. + * + * @param[in] cfg + * @param[in] tx + * @param[in] path + */ +void htp_utf8_decode_path_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *path) { + if (path == NULL) return; + + uint8_t *data = bstr_ptr(path); + if (data == NULL) return; + + size_t len = bstr_len(path); + size_t rpos = 0; + size_t wpos = 0; + uint32_t codepoint = 0; + uint32_t state = HTP_UTF8_ACCEPT; + uint32_t counter = 0; + uint8_t seen_valid = 0; + + while ((rpos < len)&&(wpos < len)) { + counter++; + + switch (htp_utf8_decode_allow_overlong(&state, &codepoint, data[rpos])) { + case HTP_UTF8_ACCEPT: + if (counter == 1) { + // ASCII character, which we just copy. + data[wpos++] = (uint8_t) codepoint; + } else { + // A valid UTF-8 character, which we need to convert. + + seen_valid = 1; + + // Check for overlong characters and set the flag accordingly. + switch (counter) { + case 2: + if (codepoint < 0x80) { + tx->flags |= HTP_PATH_UTF8_OVERLONG; + } + break; + case 3: + if (codepoint < 0x800) { + tx->flags |= HTP_PATH_UTF8_OVERLONG; + } + break; + case 4: + if (codepoint < 0x10000) { + tx->flags |= HTP_PATH_UTF8_OVERLONG; + } + break; + } + + // Special flag for half-width/full-width evasion. + if ((codepoint >= 0xff00) && (codepoint <= 0xffef)) { + tx->flags |= HTP_PATH_HALF_FULL_RANGE; + } + + // Use best-fit mapping to convert to a single byte. + data[wpos++] = bestfit_codepoint(cfg, HTP_DECODER_URL_PATH, codepoint); + } + + // Advance over the consumed byte and reset the byte counter. + rpos++; + counter = 0; + + break; + + case HTP_UTF8_REJECT: + // Invalid UTF-8 character. + + tx->flags |= HTP_PATH_UTF8_INVALID; + + // Is the server expected to respond with 400? + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].utf8_invalid_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].utf8_invalid_unwanted; + } + + // Output the replacement byte, replacing one or more invalid bytes. + data[wpos++] = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].bestfit_replacement_byte; + + // If the invalid byte was first in a sequence, consume it. Otherwise, + // assume it's the starting byte of the next character. + if (counter == 1) { + rpos++; + } + + // Reset the decoder state and continue decoding. + state = HTP_UTF8_ACCEPT; + codepoint = 0; + counter = 0; + + break; + + default: + // Keep going; the character is not yet formed. + rpos++; + break; + } + } + + // Did the input stream seem like a valid UTF-8 string? + if ((seen_valid) && (!(tx->flags & HTP_PATH_UTF8_INVALID))) { + tx->flags |= HTP_PATH_UTF8_VALID; + } + + // Adjust the length of the string, because + // we're doing in-place decoding. + bstr_adjust_len(path, wpos); +} + +/** + * Validate a path that is quite possibly UTF-8 encoded. + * + * @param[in] tx + * @param[in] path + */ +void htp_utf8_validate_path(htp_tx_t *tx, bstr *path) { + unsigned char *data = bstr_ptr(path); + size_t len = bstr_len(path); + size_t rpos = 0; + uint32_t codepoint = 0; + uint32_t state = HTP_UTF8_ACCEPT; + uint32_t counter = 0; // How many bytes used by a UTF-8 character. + uint8_t seen_valid = 0; + + while (rpos < len) { + counter++; + + switch (htp_utf8_decode_allow_overlong(&state, &codepoint, data[rpos])) { + case HTP_UTF8_ACCEPT: + // We have a valid character. + + if (counter > 1) { + // A valid UTF-8 character, consisting of 2 or more bytes. + + seen_valid = 1; + + // Check for overlong characters and set the flag accordingly. + switch (counter) { + case 2: + if (codepoint < 0x80) { + tx->flags |= HTP_PATH_UTF8_OVERLONG; + } + break; + case 3: + if (codepoint < 0x800) { + tx->flags |= HTP_PATH_UTF8_OVERLONG; + } + break; + case 4: + if (codepoint < 0x10000) { + tx->flags |= HTP_PATH_UTF8_OVERLONG; + } + break; + } + } + + // Special flag for half-width/full-width evasion. + if ((codepoint > 0xfeff) && (codepoint < 0x010000)) { + tx->flags |= HTP_PATH_HALF_FULL_RANGE; + } + + // Advance over the consumed byte and reset the byte counter. + rpos++; + counter = 0; + + break; + + case HTP_UTF8_REJECT: + // Invalid UTF-8 character. + + tx->flags |= HTP_PATH_UTF8_INVALID; + + // Override the decoder state because we want to continue decoding. + state = HTP_UTF8_ACCEPT; + + // Advance over the consumed byte and reset the byte counter. + rpos++; + counter = 0; + + break; + + default: + // Keep going; the character is not yet formed. + rpos++; + break; + } + } + + // Did the input stream seem like a valid UTF-8 string? + if ((seen_valid) && (!(tx->flags & HTP_PATH_UTF8_INVALID))) { + tx->flags |= HTP_PATH_UTF8_VALID; + } +} + +/** + * Decode a %u-encoded character, using best-fit mapping as necessary. Path version. + * + * @param[in] cfg + * @param[in] tx + * @param[in] data + * @return decoded byte + */ +static uint8_t decode_u_encoding_path(htp_cfg_t *cfg, htp_tx_t *tx, unsigned char *data) { + uint8_t c1 = x2c(data); + uint8_t c2 = x2c(data + 2); + uint8_t r = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].bestfit_replacement_byte; + + if (c1 == 0x00) { + r = c2; + tx->flags |= HTP_PATH_OVERLONG_U; + } else { + // Check for fullwidth form evasion + if (c1 == 0xff) { + tx->flags |= HTP_PATH_HALF_FULL_RANGE; + } + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_unwanted; + } + + // Use best-fit mapping + unsigned char *p = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].bestfit_map; + + // TODO Optimize lookup. + + for (;;) { + // Have we reached the end of the map? + if ((p[0] == 0) && (p[1] == 0)) { + break; + } + + // Have we found the mapping we're looking for? + if ((p[0] == c1) && (p[1] == c2)) { + r = p[2]; + break; + } + + // Move to the next triplet + p += 3; + } + } + + // Check for encoded path separators + if ((r == '/') || ((cfg->decoder_cfgs[HTP_DECODER_URL_PATH].backslash_convert_slashes) && (r == '\\'))) { + tx->flags |= HTP_PATH_ENCODED_SEPARATOR; + } + + return r; +} + +/** + * Decode a %u-encoded character, using best-fit mapping as necessary. Params version. + * + * @param[in] cfg + * @param[in] tx + * @param[in] data + * @return decoded byte + */ +static uint8_t decode_u_encoding_params(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, unsigned char *data, uint64_t *flags) { + uint8_t c1 = x2c(data); + uint8_t c2 = x2c(data + 2); + + // Check for overlong usage first. + if (c1 == 0) { + (*flags) |= HTP_URLEN_OVERLONG_U; + return c2; + } + + // Both bytes were used. + + // Detect half-width and full-width range. + if ((c1 == 0xff) && (c2 <= 0xef)) { + (*flags) |= HTP_URLEN_HALF_FULL_RANGE; + } + + // Use best-fit mapping. + unsigned char *p = cfg->decoder_cfgs[ctx].bestfit_map; + uint8_t r = cfg->decoder_cfgs[ctx].bestfit_replacement_byte; + + // TODO Optimize lookup. + + for (;;) { + // Have we reached the end of the map? + if ((p[0] == 0) && (p[1] == 0)) { + break; + } + + // Have we found the mapping we're looking for? + if ((p[0] == c1) && (p[1] == c2)) { + r = p[2]; + break; + } + + // Move to the next triplet + p += 3; + } + + return r; +} + +/** + * Decode a request path according to the settings in the + * provided configuration structure. + * + * @param[in] cfg + * @param[in] tx + * @param[in] path + */ +htp_status_t htp_decode_path_inplace(htp_tx_t *tx, bstr *path) { + if (path == NULL) return HTP_ERROR; + unsigned char *data = bstr_ptr(path); + if (data == NULL) return HTP_ERROR; + + size_t len = bstr_len(path); + + htp_cfg_t *cfg = tx->cfg; + + size_t rpos = 0; + size_t wpos = 0; + int previous_was_separator = 0; + + while ((rpos < len) && (wpos < len)) { + uint8_t c = data[rpos]; + + // Decode encoded characters + if (c == '%') { + if (rpos + 2 < len) { + int handled = 0; + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_decode) { + // Check for the %u encoding + if ((data[rpos + 1] == 'u') || (data[rpos + 1] == 'U')) { + handled = 1; + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].u_encoding_unwanted; + } + + if (rpos + 5 < len) { + if (isxdigit(data[rpos + 2]) && (isxdigit(data[rpos + 3])) + && isxdigit(data[rpos + 4]) && (isxdigit(data[rpos + 5]))) { + // Decode a valid %u encoding + c = decode_u_encoding_path(cfg, tx, &data[rpos + 2]); + rpos += 6; + + if (c == 0) { + tx->flags |= HTP_PATH_ENCODED_NUL; + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_unwanted; + } + } + } else { + // Invalid %u encoding + tx->flags |= HTP_PATH_INVALID_ENCODING; + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted; + } + + switch (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_handling) { + case HTP_URL_DECODE_REMOVE_PERCENT: + // Do not place anything in output; eat + // the percent character + rpos++; + continue; + break; + case HTP_URL_DECODE_PRESERVE_PERCENT: + // Leave the percent character in output + rpos++; + break; + case HTP_URL_DECODE_PROCESS_INVALID: + // Decode invalid %u encoding + c = decode_u_encoding_path(cfg, tx, &data[rpos + 2]); + rpos += 6; + break; + } + } + } else { + // Invalid %u encoding (not enough data) + tx->flags |= HTP_PATH_INVALID_ENCODING; + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted; + } + + switch (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_handling) { + case HTP_URL_DECODE_REMOVE_PERCENT: + // Do not place anything in output; eat + // the percent character + rpos++; + continue; + break; + case HTP_URL_DECODE_PRESERVE_PERCENT: + // Leave the percent character in output + rpos++; + break; + case HTP_URL_DECODE_PROCESS_INVALID: + // Cannot decode, because there's not enough data. + // Leave the percent character in output + rpos++; + // TODO Configurable handling. + break; + } + } + } + } + + // Handle standard URL encoding + if (!handled) { + if ((isxdigit(data[rpos + 1])) && (isxdigit(data[rpos + 2]))) { + c = x2c(&data[rpos + 1]); + + if (c == 0) { + tx->flags |= HTP_PATH_ENCODED_NUL; + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_unwanted; + } + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_encoded_terminates) { + bstr_adjust_len(path, wpos); + return HTP_OK; + } + } + + if ((c == '/') || ((cfg->decoder_cfgs[HTP_DECODER_URL_PATH].backslash_convert_slashes) && (c == '\\'))) { + tx->flags |= HTP_PATH_ENCODED_SEPARATOR; + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].path_separators_encoded_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].path_separators_encoded_unwanted; + } + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].path_separators_decode) { + // Decode + rpos += 3; + } else { + // Leave encoded + c = '%'; + rpos++; + } + } else { + // Decode + rpos += 3; + } + } else { + // Invalid encoding + tx->flags |= HTP_PATH_INVALID_ENCODING; + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted; + } + + switch (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_handling) { + case HTP_URL_DECODE_REMOVE_PERCENT: + // Do not place anything in output; eat + // the percent character + rpos++; + continue; + break; + case HTP_URL_DECODE_PRESERVE_PERCENT: + // Leave the percent character in output + rpos++; + break; + case HTP_URL_DECODE_PROCESS_INVALID: + // Decode + c = x2c(&data[rpos + 1]); + rpos += 3; + // Note: What if an invalid encoding decodes into a path + // separator? This is theoretical at the moment, because + // the only platform we know doesn't convert separators is + // Apache, who will also respond with 400 if invalid encoding + // is encountered. Thus no check for a separator here. + break; + default: + // Unknown setting + return HTP_ERROR; + break; + } + } + } + } else { + // Invalid URL encoding (not enough data) + tx->flags |= HTP_PATH_INVALID_ENCODING; + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_unwanted; + } + + switch (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].url_encoding_invalid_handling) { + case HTP_URL_DECODE_REMOVE_PERCENT: + // Do not place anything in output; eat + // the percent character + rpos++; + continue; + break; + case HTP_URL_DECODE_PRESERVE_PERCENT: + // Leave the percent character in output + rpos++; + break; + case HTP_URL_DECODE_PROCESS_INVALID: + // Cannot decode, because there's not enough data. + // Leave the percent character in output. + // TODO Configurable handling. + rpos++; + break; + } + } + } else { + // One non-encoded character + + // Is it a NUL byte? + if (c == 0) { + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_raw_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_raw_unwanted; + } + + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].nul_raw_terminates) { + // Terminate path with a raw NUL byte + bstr_adjust_len(path, wpos); + return HTP_OK; + break; + } + } + + rpos++; + } + + // Place the character into output + + // Check for control characters + if (c < 0x20) { + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].control_chars_unwanted != HTP_UNWANTED_IGNORE) { + tx->response_status_expected_number = cfg->decoder_cfgs[HTP_DECODER_URL_PATH].control_chars_unwanted; + } + } + + // Convert backslashes to forward slashes, if necessary + if ((c == '\\') && (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].backslash_convert_slashes)) { + c = '/'; + } + + // Lowercase characters, if necessary + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].convert_lowercase) { + c = (uint8_t) tolower(c); + } + + // If we're compressing separators then we need + // to track if the previous character was a separator + if (cfg->decoder_cfgs[HTP_DECODER_URL_PATH].path_separators_compress) { + if (c == '/') { + if (!previous_was_separator) { + data[wpos++] = c; + previous_was_separator = 1; + } else { + // Do nothing; we don't want + // another separator in output + } + } else { + data[wpos++] = c; + previous_was_separator = 0; + } + } else { + data[wpos++] = c; + } + } + + bstr_adjust_len(path, wpos); + + return HTP_OK; +} + +htp_status_t htp_tx_urldecode_uri_inplace(htp_tx_t *tx, bstr *input) { + uint64_t flags = 0; + + htp_status_t rc = htp_urldecode_inplace_ex(tx->cfg, HTP_DECODER_URL_PATH, input, &flags, &(tx->response_status_expected_number)); + + if (flags & HTP_URLEN_INVALID_ENCODING) { + tx->flags |= HTP_PATH_INVALID_ENCODING; + } + + if (flags & HTP_URLEN_ENCODED_NUL) { + tx->flags |= HTP_PATH_ENCODED_NUL; + } + + if (flags & HTP_URLEN_RAW_NUL) { + tx->flags |= HTP_PATH_RAW_NUL; + } + + return rc; +} + +htp_status_t htp_tx_urldecode_params_inplace(htp_tx_t *tx, bstr *input) { + return htp_urldecode_inplace_ex(tx->cfg, HTP_DECODER_URLENCODED, input, &(tx->flags), &(tx->response_status_expected_number)); +} + +htp_status_t htp_urldecode_inplace(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags) { + int expected_status_code = 0; + return htp_urldecode_inplace_ex(cfg, ctx, input, flags, &expected_status_code); +} + +htp_status_t htp_urldecode_inplace_ex(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags, int *expected_status_code) { + if (input == NULL) return HTP_ERROR; + + unsigned char *data = bstr_ptr(input); + if (data == NULL) return HTP_ERROR; + size_t len = bstr_len(input); + + size_t rpos = 0; + size_t wpos = 0; + + while ((rpos < len) && (wpos < len)) { + uint8_t c = data[rpos]; + + // Decode encoded characters. + if (c == '%') { + // Need at least 2 additional bytes for %HH. + if (rpos + 2 < len) { + int handled = 0; + + // Decode %uHHHH encoding, but only if allowed in configuration. + if (cfg->decoder_cfgs[ctx].u_encoding_decode) { + // The next character must be a case-insensitive u. + if ((data[rpos + 1] == 'u') || (data[rpos + 1] == 'U')) { + handled = 1; + + if (cfg->decoder_cfgs[ctx].u_encoding_unwanted != HTP_UNWANTED_IGNORE) { + (*expected_status_code) = cfg->decoder_cfgs[ctx].u_encoding_unwanted; + } + + // Need at least 5 additional bytes for %uHHHH. + if (rpos + 5 < len) { + if (isxdigit(data[rpos + 2]) && (isxdigit(data[rpos + 3])) + && isxdigit(data[rpos + 4]) && (isxdigit(data[rpos + 5]))) { + // Decode a valid %u encoding. + c = decode_u_encoding_params(cfg, ctx, &(data[rpos + 2]), flags); + rpos += 6; + } else { + // Invalid %u encoding (could not find 4 xdigits). + (*flags) |= HTP_URLEN_INVALID_ENCODING; + + if (cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) { + (*expected_status_code) = cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted; + } + + switch (cfg->decoder_cfgs[ctx].url_encoding_invalid_handling) { + case HTP_URL_DECODE_REMOVE_PERCENT: + // Do not place anything in output; consume the %. + rpos++; + continue; + break; + case HTP_URL_DECODE_PRESERVE_PERCENT: + // Leave the % in output. + rpos++; + break; + case HTP_URL_DECODE_PROCESS_INVALID: + // Decode invalid %u encoding. + c = decode_u_encoding_params(cfg, ctx, &(data[rpos + 2]), flags); + rpos += 6; + break; + } + } + } else { + // Invalid %u encoding; not enough data. + (*flags) |= HTP_URLEN_INVALID_ENCODING; + + if (cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) { + (*expected_status_code) = cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted; + } + + switch (cfg->decoder_cfgs[ctx].url_encoding_invalid_handling) { + case HTP_URL_DECODE_REMOVE_PERCENT: + // Do not place anything in output; consume the %. + rpos++; + continue; + break; + case HTP_URL_DECODE_PRESERVE_PERCENT: + // Leave the % in output. + rpos++; + break; + case HTP_URL_DECODE_PROCESS_INVALID: + // Cannot decode because there's not enough data. + // Leave the % in output. + // TODO Configurable handling of %, u, etc. + rpos++; + break; + } + } + } + } + + // Handle standard URL encoding. + if (!handled) { + // Need 2 hexadecimal digits. + if ((isxdigit(data[rpos + 1])) && (isxdigit(data[rpos + 2]))) { + // Decode %HH encoding. + c = x2c(&(data[rpos + 1])); + rpos += 3; + } else { + // Invalid encoding (enough bytes, but not hexadecimal digits). + (*flags) |= HTP_URLEN_INVALID_ENCODING; + + if (cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) { + (*expected_status_code) = cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted; + } + + switch (cfg->decoder_cfgs[ctx].url_encoding_invalid_handling) { + case HTP_URL_DECODE_REMOVE_PERCENT: + // Do not place anything in output; consume the %. + rpos++; + continue; + break; + case HTP_URL_DECODE_PRESERVE_PERCENT: + // Leave the % in output. + rpos++; + break; + case HTP_URL_DECODE_PROCESS_INVALID: + // Decode. + c = x2c(&(data[rpos + 1])); + rpos += 3; + break; + } + } + } + } else { + // Invalid encoding; not enough data (at least 2 bytes required). + (*flags) |= HTP_URLEN_INVALID_ENCODING; + + if (cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted != HTP_UNWANTED_IGNORE) { + (*expected_status_code) = cfg->decoder_cfgs[ctx].url_encoding_invalid_unwanted; + } + + switch (cfg->decoder_cfgs[ctx].url_encoding_invalid_handling) { + case HTP_URL_DECODE_REMOVE_PERCENT: + // Do not place anything in output; consume the %. + rpos++; + continue; + break; + case HTP_URL_DECODE_PRESERVE_PERCENT: + // Leave the % in output. + rpos++; + break; + case HTP_URL_DECODE_PROCESS_INVALID: + // Cannot decode because there's not enough data. + // Leave the % in output. + // TODO Configurable handling of %, etc. + rpos++; + break; + } + } + + // Did we get an encoded NUL byte? + if (c == 0) { + if (cfg->decoder_cfgs[ctx].nul_encoded_unwanted != HTP_UNWANTED_IGNORE) { + (*expected_status_code) = cfg->decoder_cfgs[ctx].nul_encoded_unwanted; + } + + (*flags) |= HTP_URLEN_ENCODED_NUL; + + if (cfg->decoder_cfgs[ctx].nul_encoded_terminates) { + // Terminate the path at the raw NUL byte. + bstr_adjust_len(input, wpos); + return 1; + } + } + + data[wpos++] = c; + } else if (c == '+') { + // Decoding of the plus character is conditional on the configuration. + + if (cfg->decoder_cfgs[ctx].plusspace_decode) { + c = 0x20; + } + + rpos++; + data[wpos++] = c; + } else { + // One non-encoded byte. + + // Did we get a raw NUL byte? + if (c == 0) { + if (cfg->decoder_cfgs[ctx].nul_raw_unwanted != HTP_UNWANTED_IGNORE) { + (*expected_status_code) = cfg->decoder_cfgs[ctx].nul_raw_unwanted; + } + + (*flags) |= HTP_URLEN_RAW_NUL; + + if (cfg->decoder_cfgs[ctx].nul_raw_terminates) { + // Terminate the path at the encoded NUL byte. + bstr_adjust_len(input, wpos); + return HTP_OK; + } + } + + rpos++; + data[wpos++] = c; + } + } + + bstr_adjust_len(input, wpos); + + return HTP_OK; +} + +/** + * Normalize a previously-parsed request URI. + * + * @param[in] connp + * @param[in] incomplete + * @param[in] normalized + * @return HTP_OK or HTP_ERROR + */ +int htp_normalize_parsed_uri(htp_tx_t *tx, htp_uri_t *incomplete, htp_uri_t *normalized) { + // Scheme. + if (incomplete->scheme != NULL) { + // Duplicate and convert to lowercase. + normalized->scheme = bstr_dup_lower(incomplete->scheme); + if (normalized->scheme == NULL) return HTP_ERROR; + } + + // Username. + if (incomplete->username != NULL) { + normalized->username = bstr_dup(incomplete->username); + if (normalized->username == NULL) return HTP_ERROR; + htp_tx_urldecode_uri_inplace(tx, normalized->username); + } + + // Password. + if (incomplete->password != NULL) { + normalized->password = bstr_dup(incomplete->password); + if (normalized->password == NULL) return HTP_ERROR; + htp_tx_urldecode_uri_inplace(tx, normalized->password); + } + + // Hostname. + if (incomplete->hostname != NULL) { + // We know that incomplete->hostname does not contain + // port information, so no need to check for it here. + normalized->hostname = bstr_dup(incomplete->hostname); + if (normalized->hostname == NULL) return HTP_ERROR; + htp_tx_urldecode_uri_inplace(tx, normalized->hostname); + htp_normalize_hostname_inplace(normalized->hostname); + } + + // Port. + if (incomplete->port != NULL) { + int64_t port_parsed = htp_parse_positive_integer_whitespace( + bstr_ptr(incomplete->port), bstr_len(incomplete->port), 10); + + if (port_parsed < 0) { + // Failed to parse the port number. + normalized->port_number = -1; + tx->flags |= HTP_HOSTU_INVALID; + } else if ((port_parsed > 0) && (port_parsed < 65536)) { + // Valid port number. + normalized->port_number = (int) port_parsed; + } else { + // Port number out of range. + normalized->port_number = -1; + tx->flags |= HTP_HOSTU_INVALID; + } + } else { + normalized->port_number = -1; + } + + // Path. + if (incomplete->path != NULL) { + // Make a copy of the path, so that we can work on it. + normalized->path = bstr_dup(incomplete->path); + if (normalized->path == NULL) return HTP_ERROR; + + // Decode URL-encoded (and %u-encoded) characters, as well as lowercase, + // compress separators and convert backslashes. + htp_decode_path_inplace(tx, normalized->path); + + // Handle UTF-8 in the path. + if (tx->cfg->decoder_cfgs[HTP_DECODER_URL_PATH].utf8_convert_bestfit) { + // Decode Unicode characters into a single-byte stream, using best-fit mapping. + htp_utf8_decode_path_inplace(tx->cfg, tx, normalized->path); + } else { + // No decoding, but try to validate the path as a UTF-8 stream. + htp_utf8_validate_path(tx, normalized->path); + } + + // RFC normalization. + htp_normalize_uri_path_inplace(normalized->path); + } + + // Query string. + if (incomplete->query != NULL) { + normalized->query = bstr_dup(incomplete->query); + if (normalized->query == NULL) return HTP_ERROR; + } + + // Fragment. + if (incomplete->fragment != NULL) { + normalized->fragment = bstr_dup(incomplete->fragment); + if (normalized->fragment == NULL) return HTP_ERROR; + htp_tx_urldecode_uri_inplace(tx, normalized->fragment); + } + + return HTP_OK; +} + +/** + * Normalize request hostname. Convert all characters to lowercase and + * remove trailing dots from the end, if present. + * + * @param[in] hostname + * @return Normalized hostname. + */ +bstr *htp_normalize_hostname_inplace(bstr *hostname) { + if (hostname == NULL) return NULL; + + bstr_to_lowercase(hostname); + + // Remove dots from the end of the string. + while (bstr_char_at_end(hostname, 0) == '.') bstr_chop(hostname); + + return hostname; +} + +/** + * Normalize URL path. This function implements the remove dot segments algorithm + * specified in RFC 3986, section 5.2.4. + * + * @param[in] s + */ +void htp_normalize_uri_path_inplace(bstr *s) { + if (s == NULL) return; + + unsigned char *data = bstr_ptr(s); + if (data == NULL) return; + size_t len = bstr_len(s); + + size_t rpos = 0; + size_t wpos = 0; + + int c = -1; + while ((rpos < len)&&(wpos < len)) { + if (c == -1) { + c = data[rpos++]; + } + + // A. If the input buffer begins with a prefix of "../" or "./", + // then remove that prefix from the input buffer; otherwise, + if (c == '.') { + if ((rpos + 1 < len) && (data[rpos] == '.') && (data[rpos + 1] == '/')) { + c = -1; + rpos += 2; + continue; + } else if ((rpos < len) && (data[rpos] == '/')) { + c = -1; + rpos += 1; + continue; + } + } + + if (c == '/') { + // B. if the input buffer begins with a prefix of "/./" or "/.", + // where "." is a complete path segment, then replace that + // prefix with "/" in the input buffer; otherwise, + if ((rpos + 1 < len) && (data[rpos] == '.') && (data[rpos + 1] == '/')) { + c = '/'; + rpos += 2; + continue; + } else if ((rpos + 1 == len) && (data[rpos] == '.')) { + c = '/'; + rpos += 1; + continue; + } + + // C. if the input buffer begins with a prefix of "/../" or "/..", + // where ".." is a complete path segment, then replace that + // prefix with "/" in the input buffer and remove the last + // segment and its preceding "/" (if any) from the output + // buffer; otherwise, + if ((rpos + 2 < len) && (data[rpos] == '.') && (data[rpos + 1] == '.') && (data[rpos + 2] == '/')) { + c = '/'; + rpos += 3; + + // Remove the last segment + while ((wpos > 0) && (data[wpos - 1] != '/')) wpos--; + if (wpos > 0) wpos--; + continue; + } else if ((rpos + 2 == len) && (data[rpos] == '.') && (data[rpos + 1] == '.')) { + c = '/'; + rpos += 2; + + // Remove the last segment + while ((wpos > 0) && (data[wpos - 1] != '/')) wpos--; + if (wpos > 0) wpos--; + continue; + } + } + + // D. if the input buffer consists only of "." or "..", then remove + // that from the input buffer; otherwise, + if ((c == '.') && (rpos == len)) { + rpos++; + continue; + } + + if ((c == '.') && (rpos + 1 == len) && (data[rpos] == '.')) { + rpos += 2; + continue; + } + + // E. move the first path segment in the input buffer to the end of + // the output buffer, including the initial "/" character (if + // any) and any subsequent characters up to, but not including, + // the next "/" character or the end of the input buffer. + data[wpos++] = (uint8_t) c; + + while ((rpos < len) && (data[rpos] != '/') && (wpos < len)) { + data[wpos++] = data[rpos++]; + } + + c = -1; + } + + bstr_adjust_len(s, wpos); +} + +/** + * + */ +void fprint_bstr(FILE *stream, const char *name, bstr *b) { + if (b == NULL) { + fprint_raw_data_ex(stream, name, "(null)", 0, 6); + return; + } + + fprint_raw_data_ex(stream, name, bstr_ptr(b), 0, bstr_len(b)); +} + +/** + * + */ +void fprint_raw_data(FILE *stream, const char *name, const void *data, size_t len) { + // may happen for gaps + if (data == NULL) { + fprintf(stream, "\n%s: ptr NULL len %u\n", name, (unsigned int)len); + } else { + fprint_raw_data_ex(stream, name, data, 0, len); + } +} + +/** + * + */ +void fprint_raw_data_ex(FILE *stream, const char *name, const void *_data, size_t offset, size_t printlen) { + const unsigned char *data = (const unsigned char *) _data; + char buf[160]; + size_t len = offset + printlen; + + fprintf(stream, "\n%s: ptr %p offset %u len %u\n", name, (void*) data, (unsigned int)offset, (unsigned int)len); + + while (offset < len) { + size_t i; + + snprintf(buf, sizeof(buf), "%x" PRIx64, (unsigned int) offset); + strlcat(buf, " ", sizeof(buf)); + + i = 0; + while (i < 8) { + if (offset + i < len) { + char step[4]; + snprintf(step, sizeof(step), "%02x ", data[offset + i]); + strlcat(buf, step, sizeof(buf)); + } else { + strlcat(buf, " ", sizeof(buf)); + } + + i++; + } + + strlcat(buf, " ", sizeof(buf)); + + i = 8; + while (i < 16) { + if (offset + i < len) { + char step[4]; + snprintf(step, sizeof(step), "%02x ", data[offset + i]); + strlcat(buf, step, sizeof(buf)); + } else { + strlcat(buf, " ", sizeof(buf)); + } + + i++; + } + + strlcat(buf, " |", sizeof(buf)); + + i = 0; + char *p = buf + strlen(buf); + while ((offset + i < len) && (i < 16)) { + uint8_t c = data[offset + i]; + + if (isprint(c)) { + *p++ = c; + } else { + *p++ = '.'; + } + + i++; + } + + *p++ = '|'; + *p++ = '\n'; + *p = '\0'; + + fprintf(stream, "%s", buf); + offset += 16; + } + + fprintf(stream, "\n"); +} + +/** + * + */ +char *htp_connp_in_state_as_string(htp_connp_t *connp) { + if (connp == NULL) return "NULL"; + + if (connp->in_state == htp_connp_REQ_IDLE) return "REQ_IDLE"; + if (connp->in_state == htp_connp_REQ_LINE) return "REQ_LINE"; + if (connp->in_state == htp_connp_REQ_PROTOCOL) return "REQ_PROTOCOL"; + if (connp->in_state == htp_connp_REQ_HEADERS) return "REQ_HEADERS"; + if (connp->in_state == htp_connp_REQ_CONNECT_CHECK) return "REQ_CONNECT_CHECK"; + if (connp->in_state == htp_connp_REQ_CONNECT_WAIT_RESPONSE) return "REQ_CONNECT_WAIT_RESPONSE"; + if (connp->in_state == htp_connp_REQ_BODY_DETERMINE) return "REQ_BODY_DETERMINE"; + if (connp->in_state == htp_connp_REQ_BODY_IDENTITY) return "REQ_BODY_IDENTITY"; + if (connp->in_state == htp_connp_REQ_BODY_CHUNKED_LENGTH) return "REQ_BODY_CHUNKED_LENGTH"; + if (connp->in_state == htp_connp_REQ_BODY_CHUNKED_DATA) return "REQ_BODY_CHUNKED_DATA"; + if (connp->in_state == htp_connp_REQ_BODY_CHUNKED_DATA_END) return "REQ_BODY_CHUNKED_DATA_END"; + if (connp->in_state == htp_connp_REQ_FINALIZE) return "REQ_FINALIZE"; + if (connp->in_state == htp_connp_REQ_IGNORE_DATA_AFTER_HTTP_0_9) return "REQ_IGNORE_DATA_AFTER_HTTP_0_9"; + + return "UNKNOWN"; +} + +/** + * + */ +char *htp_connp_out_state_as_string(htp_connp_t *connp) { + if (connp == NULL) return "NULL"; + + if (connp->out_state == htp_connp_RES_IDLE) return "RES_IDLE"; + if (connp->out_state == htp_connp_RES_LINE) return "RES_LINE"; + if (connp->out_state == htp_connp_RES_HEADERS) return "RES_HEADERS"; + if (connp->out_state == htp_connp_RES_BODY_DETERMINE) return "RES_BODY_DETERMINE"; + if (connp->out_state == htp_connp_RES_BODY_IDENTITY_CL_KNOWN) return "RES_BODY_IDENTITY_CL_KNOWN"; + if (connp->out_state == htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE) return "RES_BODY_IDENTITY_STREAM_CLOSE"; + if (connp->out_state == htp_connp_RES_BODY_CHUNKED_LENGTH) return "RES_BODY_CHUNKED_LENGTH"; + if (connp->out_state == htp_connp_RES_BODY_CHUNKED_DATA) return "RES_BODY_CHUNKED_DATA"; + if (connp->out_state == htp_connp_RES_BODY_CHUNKED_DATA_END) return "RES_BODY_CHUNKED_DATA_END"; + if (connp->out_state == htp_connp_RES_FINALIZE) return "RES_BODY_FINALIZE"; + + return "UNKNOWN"; +} + +/** + * + */ +char *htp_tx_request_progress_as_string(htp_tx_t *tx) { + if (tx == NULL) return "NULL"; + + switch (tx->request_progress) { + case HTP_REQUEST_NOT_STARTED: + return "NOT_STARTED"; + case HTP_REQUEST_LINE: + return "REQ_LINE"; + case HTP_REQUEST_HEADERS: + return "REQ_HEADERS"; + case HTP_REQUEST_BODY: + return "REQ_BODY"; + case HTP_REQUEST_TRAILER: + return "REQ_TRAILER"; + case HTP_REQUEST_COMPLETE: + return "COMPLETE"; + } + + return "INVALID"; +} + +/** + * + */ +char *htp_tx_response_progress_as_string(htp_tx_t *tx) { + if (tx == NULL) return "NULL"; + + switch (tx->response_progress) { + case HTP_RESPONSE_NOT_STARTED: + return "NOT_STARTED"; + case HTP_RESPONSE_LINE: + return "RES_LINE"; + case HTP_RESPONSE_HEADERS: + return "RES_HEADERS"; + case HTP_RESPONSE_BODY: + return "RES_BODY"; + case HTP_RESPONSE_TRAILER: + return "RES_TRAILER"; + case HTP_RESPONSE_COMPLETE: + return "COMPLETE"; + } + + return "INVALID"; +} + +bstr *htp_unparse_uri_noencode(htp_uri_t *uri) { + if (uri == NULL) return NULL; + + // On the first pass determine the length of the final string + size_t len = 0; + + if (uri->scheme != NULL) { + len += bstr_len(uri->scheme); + len += 3; // "://" + } + + if ((uri->username != NULL) || (uri->password != NULL)) { + if (uri->username != NULL) { + len += bstr_len(uri->username); + } + + len += 1; // ":" + + if (uri->password != NULL) { + len += bstr_len(uri->password); + } + + len += 1; // "@" + } + + if (uri->hostname != NULL) { + len += bstr_len(uri->hostname); + } + + if (uri->port != NULL) { + len += 1; // ":" + len += bstr_len(uri->port); + } + + if (uri->path != NULL) { + len += bstr_len(uri->path); + } + + if (uri->query != NULL) { + len += 1; // "?" + len += bstr_len(uri->query); + } + + if (uri->fragment != NULL) { + len += 1; // "#" + len += bstr_len(uri->fragment); + } + + // On the second pass construct the string + bstr *r = bstr_alloc(len); + if (r == NULL) return NULL; + + if (uri->scheme != NULL) { + bstr_add_noex(r, uri->scheme); + bstr_add_c_noex(r, "://"); + } + + if ((uri->username != NULL) || (uri->password != NULL)) { + if (uri->username != NULL) { + bstr_add_noex(r, uri->username); + } + + bstr_add_c_noex(r, ":"); + + if (uri->password != NULL) { + bstr_add_noex(r, uri->password); + } + + bstr_add_c_noex(r, "@"); + } + + if (uri->hostname != NULL) { + bstr_add_noex(r, uri->hostname); + } + + if (uri->port != NULL) { + bstr_add_c_noex(r, ":"); + bstr_add_noex(r, uri->port); + } + + if (uri->path != NULL) { + bstr_add_noex(r, uri->path); + } + + if (uri->query != NULL) { + bstr_add_c_noex(r, "?"); + bstr_add_noex(r, uri->query); + } + + if (uri->fragment != NULL) { + bstr_add_c_noex(r, "#"); + bstr_add_noex(r, uri->fragment); + } + + return r; +} + +/** + * Determine if the information provided on the response line + * is good enough. Browsers are lax when it comes to response + * line parsing. In most cases they will only look for the + * words "http" at the beginning. + * + * @param[in] data pointer to bytearray + * @param[in] len length in bytes of data + * @return 1 for good enough or 0 for not good enough + */ +int htp_treat_response_line_as_body(const uint8_t *data, size_t len) { + // Browser behavior: + // Firefox 3.5.x: (?i)^\s*http + // IE: (?i)^\s*http\s*/ + // Safari: ^HTTP/\d+\.\d+\s+\d{3} + size_t pos = 0; + + if (data == NULL) return 1; + while ((pos < len) && (htp_is_space(data[pos]) || data[pos] == 0)) pos++; + + if (len < pos + 4) return 1; + + if ((data[pos] != 'H') && (data[pos] != 'h')) return 1; + if ((data[pos+1] != 'T') && (data[pos+1] != 't')) return 1; + if ((data[pos+2] != 'T') && (data[pos+2] != 't')) return 1; + if ((data[pos+3] != 'P') && (data[pos+3] != 'p')) return 1; + + return 0; +} + +/** + * Run the REQUEST_BODY_DATA hook. + * + * @param[in] connp + * @param[in] d + */ +htp_status_t htp_req_run_hook_body_data(htp_connp_t *connp, htp_tx_data_t *d) { + // Do not invoke callbacks with an empty data chunk + if ((d->data != NULL) && (d->len == 0)) return HTP_OK; + + // Do not invoke callbacks without a transaction. + if (connp->in_tx == NULL) return HTP_OK; + + // Run transaction hooks first + htp_status_t rc = htp_hook_run_all(connp->in_tx->hook_request_body_data, d); + if (rc != HTP_OK) return rc; + + // Run configuration hooks second + rc = htp_hook_run_all(connp->cfg->hook_request_body_data, d); + if (rc != HTP_OK) return rc; + + // On PUT requests, treat request body as file + if (connp->put_file != NULL) { + htp_file_data_t file_data; + + file_data.data = d->data; + file_data.len = d->len; + file_data.file = connp->put_file; + file_data.file->len += d->len; + + rc = htp_hook_run_all(connp->cfg->hook_request_file_data, &file_data); + if (rc != HTP_OK) return rc; + } + + return HTP_OK; +} + +/** + * Run the RESPONSE_BODY_DATA hook. + * + * @param[in] connp + * @param[in] d + */ +htp_status_t htp_res_run_hook_body_data(htp_connp_t *connp, htp_tx_data_t *d) { + // Do not invoke callbacks with an empty data chunk. + if ((d->data != NULL) && (d->len == 0)) return HTP_OK; + + // Run transaction hooks first + htp_status_t rc = htp_hook_run_all(connp->out_tx->hook_response_body_data, d); + if (rc != HTP_OK) return rc; + + // Run configuration hooks second + rc = htp_hook_run_all(connp->cfg->hook_response_body_data, d); + if (rc != HTP_OK) return rc; + + return HTP_OK; +} + +/** + * Parses the provided memory region, extracting the double-quoted string. + * + * @param[in] data + * @param[in] len + * @param[out] out + * @param[out] endoffset + * @return HTP_OK on success, HTP_DECLINED if the input is not well formed, and HTP_ERROR on fatal errors. + */ +htp_status_t htp_extract_quoted_string_as_bstr(unsigned char *data, size_t len, bstr **out, size_t *endoffset) { + if ((data == NULL) || (out == NULL)) return HTP_ERROR; + + if (len == 0) return HTP_DECLINED; + + size_t pos = 0; + + // Check that the first character is a double quote. + if (data[pos] != '"') return HTP_DECLINED; + + // Step over the double quote. + pos++; + if (pos == len) return HTP_DECLINED; + + // Calculate the length of the resulting string. + size_t escaped_chars = 0; + while (pos < len) { + if (data[pos] == '\\') { + if (pos + 1 < len) { + escaped_chars++; + pos += 2; + continue; + } + } else if (data[pos] == '"') { + break; + } + + pos++; + } + + // Have we reached the end of input without seeing the terminating double quote? + if (pos == len) return HTP_DECLINED; + + // Copy the data and unescape it as necessary. + size_t outlen = pos - 1 - escaped_chars; + *out = bstr_alloc(outlen); + if (*out == NULL) return HTP_ERROR; + unsigned char *outptr = bstr_ptr(*out); + size_t outpos = 0; + + pos = 1; + while ((pos < len) && (outpos < outlen)) { + // TODO We are not properly unescaping test here, we're only + // handling escaped double quotes. + if (data[pos] == '\\') { + if (pos + 1 < len) { + outptr[outpos++] = data[pos + 1]; + pos += 2; + continue; + } + } else if (data[pos] == '"') { + break; + } + + outptr[outpos++] = data[pos++]; + } + + bstr_adjust_len(*out, outlen); + + if (endoffset != NULL) { + *endoffset = pos; + } + + return HTP_OK; +} + +htp_status_t htp_parse_ct_header(bstr *header, bstr **ct) { + if ((header == NULL) || (ct == NULL)) return HTP_ERROR; + + unsigned char *data = bstr_ptr(header); + size_t len = bstr_len(header); + + // The assumption here is that the header value we receive + // here has been left-trimmed, which means the starting position + // is on the media type. On some platforms that may not be the + // case, and we may need to do the left-trim ourselves. + + // Find the end of the MIME type, using the same approach PHP 5.4.3 uses. + size_t pos = 0; + while ((pos < len) && (data[pos] != ';') && (data[pos] != ',') && (data[pos] != ' ')) pos++; + + *ct = bstr_dup_ex(header, 0, pos); + if (*ct == NULL) return HTP_ERROR; + + bstr_to_lowercase(*ct); + + return HTP_OK; +} + +/** + * Implements relaxed (not strictly RFC) hostname validation. + * + * @param[in] hostname + * @return 1 if the supplied hostname is valid; 0 if it is not. + */ +int htp_validate_hostname(bstr *hostname) { + unsigned char *data = bstr_ptr(hostname); + size_t len = bstr_len(hostname); + size_t startpos = 0; + size_t pos = 0; + + if ((len == 0) || (len > 255)) return 0; + + if (data[0] == '[') { + // only ipv6 possible + if (len < 2 || len - 2 >= INET6_ADDRSTRLEN) { + return 0; + } + char dst[sizeof(struct in6_addr)]; + char str[INET6_ADDRSTRLEN]; + memcpy(str, data+1, len-2); + str[len-2] = 0; + return inet_pton(AF_INET6, str, dst); + } + while (pos < len) { + // Validate label characters. + startpos = pos; + while ((pos < len) && (data[pos] != '.')) { + unsigned char c = data[pos]; + // According to the RFC, the underscore is not allowed in a label, but + // we allow it here because we think it's often seen in practice. + if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || + (c == '-') || (c == '_'))) + { + return 0; + } + + pos++; + } + + // Validate label length. + if ((pos - startpos == 0) || (pos - startpos > 63)) return 0; + + if (pos >= len) return 1; // No more data after label. + + // How many dots are there? + startpos = pos; + while ((pos < len) && (data[pos] == '.')) pos++; + + if (pos - startpos != 1) return 0; // Exactly one dot expected. + } + + return 1; +} + +void htp_uri_free(htp_uri_t *uri) { + if (uri == NULL) return; + + bstr_free(uri->scheme); + bstr_free(uri->username); + bstr_free(uri->password); + bstr_free(uri->hostname); + bstr_free(uri->port); + bstr_free(uri->path); + bstr_free(uri->query); + bstr_free(uri->fragment); + + free(uri); +} + +htp_uri_t *htp_uri_alloc(void) { + htp_uri_t *u = calloc(1, sizeof (htp_uri_t)); + if (u == NULL) return NULL; + + u->port_number = -1; + + return u; +} + +char *htp_get_version(void) { + return HTP_VERSION_STRING_FULL; +} + +/** + * Tells if a header value (haystack) contains a token (needle) + * This is done with a caseless comparison + * + * @param[in] hvp header value pointer + * @param[in] hvlen length of header value buffer + * @param[in] value token to look for (null-terminated string), should be a lowercase constant + * @return HTP_OK if the header has the token; HTP_ERROR if it has not. + */ +htp_status_t htp_header_has_token(const unsigned char *hvp, size_t hvlen, const unsigned char *value) { + int state = 0; + // offset to compare in value + size_t v_off = 0; + // The header value is a list of comma-separated tokens (with additional spaces) + for (size_t i = 0; i < hvlen; i++) { + switch (state) { + case 0: + if (v_off == 0 && htp_is_space(hvp[i])) { + // skip leading space + continue; + } + if (tolower(hvp[i]) == value[v_off]) { + v_off++; + if (value[v_off] == 0) { + // finish validation if end of token + state = 2; + } + continue; + } else { + // wait for a new token + v_off = 0; + state = 1; + } + // fallthrough + case 1: + if (hvp[i] == ',') { + // start of next token + state = 0; + } + break; + case 2: + if (hvp[i] == ',') { + return HTP_OK; + } + if (!htp_is_space(hvp[i])) { + // trailing junk in token, wait for a next one + v_off = 0; + state = 1; + } + } + } + if (state == 2) { + return HTP_OK; + } + return HTP_ERROR; +} diff --git a/htp/htp_version.h.in b/htp/htp_version.h.in new file mode 100644 index 0000000..89a503f --- /dev/null +++ b/htp/htp_version.h.in @@ -0,0 +1,53 @@ +/***************************************************************************
+ * Copyright (c) 2009-2010 Open Information Security Foundation
+ * Copyright (c) 2010-2013 Qualys, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+
+ * - Neither the name of the Qualys, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+/**
+ * @file
+ * @author Ivan Ristic <ivanr@webkreator.com>
+ */
+
+#ifndef HTP_VERSION_H
+#define HTP_VERSION_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define HTP_VERSION_STRING "@PACKAGE_VERSION@"
+#define HTP_VERSION_STRING_FULL "LibHTP v" HTP_VERSION_STRING
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTP_VERSION_H */
diff --git a/htp/lzma/7zTypes.h b/htp/lzma/7zTypes.h new file mode 100644 index 0000000..f5d7505 --- /dev/null +++ b/htp/lzma/7zTypes.h @@ -0,0 +1,375 @@ +/* 7zTypes.h -- Basic types
+2018-08-04 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#ifdef _WIN32
+/* #include <windows.h> */
+#endif
+
+#include <stddef.h>
+#include <zconf.h>
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+
+#ifdef _WIN32
+
+/* typedef DWORD WRes; */
+typedef unsigned WRes;
+#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
+
+#else
+
+typedef int WRes;
+#define MY__FACILITY_WIN32 7
+#define MY__FACILITY__WRes MY__FACILITY_WIN32
+#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
+
+#endif
+
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+#ifdef _SZ_NO_INT_64
+
+/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
+ NOTES: Some code will work incorrectly in that case! */
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#define UINT64_CONST(n) n
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#define UINT64_CONST(n) n ## ULL
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
+typedef int BoolInt;
+/* typedef BoolInt Bool; */
+#define True 1
+#define False 0
+
+
+#ifdef _WIN32
+#define MY_STD_CALL __stdcall
+#else
+#define MY_STD_CALL
+#endif
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_FORCE_INLINE __forceinline
+
+#define MY_CDECL __cdecl
+#define MY_FAST_CALL __fastcall
+
+#else
+
+#define MY_NO_INLINE
+#define MY_FORCE_INLINE
+#define MY_CDECL
+#define MY_FAST_CALL
+
+/* inline keyword : for C++ / C99 */
+
+/* GCC, clang: */
+/*
+#if defined (__GNUC__) && (__GNUC__ >= 4)
+#define MY_FORCE_INLINE __attribute__((always_inline))
+#define MY_NO_INLINE __attribute__((noinline))
+#endif
+*/
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct IByteIn IByteIn;
+struct IByteIn
+{
+ Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
+};
+#define IByteIn_Read(p) (p)->Read(p)
+
+
+typedef struct IByteOut IByteOut;
+struct IByteOut
+{
+ void (*Write)(const IByteOut *p, Byte b);
+};
+#define IByteOut_Write(p, b) (p)->Write(p, b)
+
+
+typedef struct ISeqInStream ISeqInStream;
+struct ISeqInStream
+{
+ SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
+ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+ (output(*size) < input(*size)) is allowed */
+};
+#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+
+/* it can return SZ_ERROR_INPUT_EOF */
+SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
+SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
+
+
+typedef struct ISeqOutStream ISeqOutStream;
+struct ISeqOutStream
+{
+ size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
+ /* Returns: result - the number of actually written bytes.
+ (result < size) means error */
+};
+#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
+
+typedef enum
+{
+ SZ_SEEK_SET = 0,
+ SZ_SEEK_CUR = 1,
+ SZ_SEEK_END = 2
+} ESzSeek;
+
+
+typedef struct ISeekInStream ISeekInStream;
+struct ISeekInStream
+{
+ SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
+ SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
+};
+#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+typedef struct ILookInStream ILookInStream;
+struct ILookInStream
+{
+ SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
+ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+ (output(*size) > input(*size)) is not allowed
+ (output(*size) < input(*size)) is allowed */
+ SRes (*Skip)(const ILookInStream *p, size_t offset);
+ /* offset must be <= output(*size) of Look */
+
+ SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
+ /* reads directly (without buffer). It's same as ISeqInStream::Read */
+ SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
+};
+
+#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
+#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)
+#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
+
+
+
+typedef struct
+{
+ ILookInStream vt;
+ const ISeekInStream *realStream;
+
+ size_t pos;
+ size_t size; /* it's data size */
+
+ /* the following variables must be set outside */
+ Byte *buf;
+ size_t bufSize;
+} CLookToRead2;
+
+void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
+
+#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
+
+
+typedef struct
+{
+ ISeqInStream vt;
+ const ILookInStream *realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+
+
+typedef struct
+{
+ ISeqInStream vt;
+ const ILookInStream *realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+
+typedef struct ICompressProgress ICompressProgress;
+
+struct ICompressProgress
+{
+ SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
+ /* Returns: result. (result != SZ_OK) means break.
+ Value (UInt64)(Int64)-1 for size means unknown value. */
+};
+#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
+
+
+
+typedef struct ISzAlloc ISzAlloc;
+typedef const ISzAlloc * ISzAllocPtr;
+
+struct ISzAlloc
+{
+ void *(*Alloc)(ISzAllocPtr p, size_t size);
+ void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
+};
+
+#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
+#define ISzAlloc_Free(p, a) (p)->Free(p, a)
+
+/* deprecated */
+#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
+#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
+
+
+
+
+
+#ifndef MY_offsetof
+ #ifdef offsetof
+ #define MY_offsetof(type, m) offsetof(type, m)
+ /*
+ #define MY_offsetof(type, m) FIELD_OFFSET(type, m)
+ */
+ #else
+ #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
+ #endif
+#endif
+
+
+
+#ifndef MY_container_of
+
+/*
+#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
+#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
+#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
+#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
+*/
+
+/*
+ GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
+ GCC 3.4.4 : classes with constructor
+ GCC 4.8.1 : classes with non-public variable members"
+*/
+
+#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
+
+
+#endif
+
+#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
+
+/*
+#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+*/
+#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
+
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+/*
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
+*/
+
+
+
+#ifdef _WIN32
+
+#define CHAR_PATH_SEPARATOR '\\'
+#define WCHAR_PATH_SEPARATOR L'\\'
+#define STRING_PATH_SEPARATOR "\\"
+#define WSTRING_PATH_SEPARATOR L"\\"
+
+#else
+
+#define CHAR_PATH_SEPARATOR '/'
+#define WCHAR_PATH_SEPARATOR L'/'
+#define STRING_PATH_SEPARATOR "/"
+#define WSTRING_PATH_SEPARATOR L"/"
+
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/htp/lzma/Compiler.h b/htp/lzma/Compiler.h new file mode 100644 index 0000000..c788648 --- /dev/null +++ b/htp/lzma/Compiler.h @@ -0,0 +1,33 @@ +/* Compiler.h
+2017-04-03 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_COMPILER_H
+#define __7Z_COMPILER_H
+
+#ifdef _MSC_VER
+
+ #ifdef UNDER_CE
+ #define RPC_NO_WINDOWS_H
+ /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
+ #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
+ #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+ #endif
+
+ #if _MSC_VER >= 1300
+ #pragma warning(disable : 4996) // This function or variable may be unsafe
+ #else
+ #pragma warning(disable : 4511) // copy constructor could not be generated
+ #pragma warning(disable : 4512) // assignment operator could not be generated
+ #pragma warning(disable : 4514) // unreferenced inline function has been removed
+ #pragma warning(disable : 4702) // unreachable code
+ #pragma warning(disable : 4710) // not inlined
+ #pragma warning(disable : 4714) // function marked as __forceinline not inlined
+ #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
+ #endif
+
+#endif
+
+#define UNUSED_VAR(x) (void)x;
+/* #define UNUSED_VAR(x) x=x; */
+
+#endif
diff --git a/htp/lzma/LzFind.c b/htp/lzma/LzFind.c new file mode 100644 index 0000000..ee5375c --- /dev/null +++ b/htp/lzma/LzFind.c @@ -0,0 +1,1127 @@ +/* LzFind.c -- Match finder for LZ algorithms
+2018-07-08 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "LzFind.h"
+#include "LzHash.h"
+
+#define kEmptyHashValue 0
+#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
+#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
+#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
+#define kMaxHistorySize ((UInt32)7 << 29)
+
+#define kStartMaxLen 3
+
+static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ if (!p->directInput)
+ {
+ ISzAlloc_Free(alloc, p->bufferBase);
+ p->bufferBase = NULL;
+ }
+}
+
+/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
+
+static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc)
+{
+ UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
+ if (p->directInput)
+ {
+ p->blockSize = blockSize;
+ return 1;
+ }
+ if (!p->bufferBase || p->blockSize != blockSize)
+ {
+ LzInWindow_Free(p, alloc);
+ p->blockSize = blockSize;
+ p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize);
+ }
+ return (p->bufferBase != NULL);
+}
+
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+
+static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
+{
+ p->posLimit -= subValue;
+ p->pos -= subValue;
+ p->streamPos -= subValue;
+}
+
+static void MatchFinder_ReadBlock(CMatchFinder *p)
+{
+ if (p->streamEndWasReached || p->result != SZ_OK)
+ return;
+
+ /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */
+
+ if (p->directInput)
+ {
+ UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos);
+ if (curSize > p->directInputRem)
+ curSize = (UInt32)p->directInputRem;
+ p->directInputRem -= curSize;
+ p->streamPos += curSize;
+ if (p->directInputRem == 0)
+ p->streamEndWasReached = 1;
+ return;
+ }
+
+ for (;;)
+ {
+ Byte *dest = p->buffer + (p->streamPos - p->pos);
+ size_t size = (p->bufferBase + p->blockSize - dest);
+ if (size == 0)
+ return;
+
+ p->result = ISeqInStream_Read(p->stream, dest, &size);
+ if (p->result != SZ_OK)
+ return;
+ if (size == 0)
+ {
+ p->streamEndWasReached = 1;
+ return;
+ }
+ p->streamPos += (UInt32)size;
+ if (p->streamPos - p->pos > p->keepSizeAfter)
+ return;
+ }
+}
+
+void MatchFinder_MoveBlock(CMatchFinder *p)
+{
+ memmove(p->bufferBase,
+ p->buffer - p->keepSizeBefore,
+ (size_t)(p->streamPos - p->pos) + p->keepSizeBefore);
+ p->buffer = p->bufferBase + p->keepSizeBefore;
+}
+
+int MatchFinder_NeedMove(CMatchFinder *p)
+{
+ if (p->directInput)
+ return 0;
+ /* if (p->streamEndWasReached) return 0; */
+ return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+}
+
+void MatchFinder_ReadIfRequired(CMatchFinder *p)
+{
+ if (p->streamEndWasReached)
+ return;
+ if (p->keepSizeAfter >= p->streamPos - p->pos)
+ MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
+{
+ if (MatchFinder_NeedMove(p))
+ MatchFinder_MoveBlock(p);
+ MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+{
+ p->cutValue = 32;
+ p->btMode = 1;
+ p->numHashBytes = 4;
+ p->bigHash = 0;
+}
+
+#define kCrcPoly 0xEDB88320
+
+void MatchFinder_Construct(CMatchFinder *p)
+{
+ unsigned i;
+ p->bufferBase = NULL;
+ p->directInput = 0;
+ p->hash = NULL;
+ p->expectedDataSize = (UInt64)(Int64)-1;
+ MatchFinder_SetDefaultSettings(p);
+
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 r = (UInt32)i;
+ unsigned j;
+ for (j = 0; j < 8; j++)
+ r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
+ p->crc[i] = r;
+ }
+}
+
+static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->hash);
+ p->hash = NULL;
+}
+
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ MatchFinder_FreeThisClassMemory(p, alloc);
+ LzInWindow_Free(p, alloc);
+}
+
+static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
+{
+ size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+ if (sizeInBytes / sizeof(CLzRef) != num)
+ return NULL;
+ return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc)
+{
+ UInt32 sizeReserv;
+
+ if (historySize > kMaxHistorySize)
+ {
+ MatchFinder_Free(p, alloc);
+ return 0;
+ }
+
+ sizeReserv = historySize >> 1;
+ if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3;
+ else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2;
+
+ sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+
+ p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
+ p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
+
+ /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
+
+ if (LzInWindow_Create(p, sizeReserv, alloc))
+ {
+ UInt32 newCyclicBufferSize = historySize + 1;
+ UInt32 hs;
+ p->matchMaxLen = matchMaxLen;
+ {
+ p->fixedHashSize = 0;
+ if (p->numHashBytes == 2)
+ hs = (1 << 16) - 1;
+ else
+ {
+ hs = historySize;
+ if (hs > p->expectedDataSize)
+ hs = (UInt32)p->expectedDataSize;
+ if (hs != 0)
+ hs--;
+ hs |= (hs >> 1);
+ hs |= (hs >> 2);
+ hs |= (hs >> 4);
+ hs |= (hs >> 8);
+ hs >>= 1;
+ hs |= 0xFFFF; /* don't change it! It's required for Deflate */
+ if (hs > (1 << 24))
+ {
+ if (p->numHashBytes == 3)
+ hs = (1 << 24) - 1;
+ else
+ hs >>= 1;
+ /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+ }
+ }
+ p->hashMask = hs;
+ hs++;
+ if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
+ if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
+ if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+ hs += p->fixedHashSize;
+ }
+
+ {
+ size_t newSize;
+ size_t numSons;
+ p->historySize = historySize;
+ p->hashSizeSum = hs;
+ p->cyclicBufferSize = newCyclicBufferSize;
+
+ numSons = newCyclicBufferSize;
+ if (p->btMode)
+ numSons <<= 1;
+ newSize = hs + numSons;
+
+ if (p->hash && p->numRefs == newSize)
+ return 1;
+
+ MatchFinder_FreeThisClassMemory(p, alloc);
+ p->numRefs = newSize;
+ p->hash = AllocRefs(newSize, alloc);
+
+ if (p->hash)
+ {
+ p->son = p->hash + p->hashSizeSum;
+ return 1;
+ }
+ }
+ }
+
+ MatchFinder_Free(p, alloc);
+ return 0;
+}
+
+static void MatchFinder_SetLimits(CMatchFinder *p)
+{
+ UInt32 limit = kMaxValForNormalize - p->pos;
+ UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
+
+ if (limit2 < limit)
+ limit = limit2;
+ limit2 = p->streamPos - p->pos;
+
+ if (limit2 <= p->keepSizeAfter)
+ {
+ if (limit2 > 0)
+ limit2 = 1;
+ }
+ else
+ limit2 -= p->keepSizeAfter;
+
+ if (limit2 < limit)
+ limit = limit2;
+
+ {
+ UInt32 lenLimit = p->streamPos - p->pos;
+ if (lenLimit > p->matchMaxLen)
+ lenLimit = p->matchMaxLen;
+ p->lenLimit = lenLimit;
+ }
+ p->posLimit = p->pos + limit;
+}
+
+
+void MatchFinder_Init_LowHash(CMatchFinder *p)
+{
+ size_t i;
+ CLzRef *items = p->hash;
+ size_t numItems = p->fixedHashSize;
+ for (i = 0; i < numItems; i++)
+ items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_HighHash(CMatchFinder *p)
+{
+ size_t i;
+ CLzRef *items = p->hash + p->fixedHashSize;
+ size_t numItems = (size_t)p->hashMask + 1;
+ for (i = 0; i < numItems; i++)
+ items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_3(CMatchFinder *p, int readData)
+{
+ p->cyclicBufferPos = 0;
+ p->buffer = p->bufferBase;
+ p->pos =
+ p->streamPos = p->cyclicBufferSize;
+ p->result = SZ_OK;
+ p->streamEndWasReached = 0;
+
+ if (readData)
+ MatchFinder_ReadBlock(p);
+
+ MatchFinder_SetLimits(p);
+}
+
+
+void MatchFinder_Init(CMatchFinder *p)
+{
+ MatchFinder_Init_HighHash(p);
+ MatchFinder_Init_LowHash(p);
+ MatchFinder_Init_3(p, True);
+}
+
+
+static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+{
+ return (p->pos - p->historySize - 1) & kNormalizeMask;
+}
+
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+{
+ size_t i;
+ for (i = 0; i < numItems; i++)
+ {
+ UInt32 value = items[i];
+ if (value <= subValue)
+ value = kEmptyHashValue;
+ else
+ value -= subValue;
+ items[i] = value;
+ }
+}
+
+static void MatchFinder_Normalize(CMatchFinder *p)
+{
+ UInt32 subValue = MatchFinder_GetSubValue(p);
+ MatchFinder_Normalize3(subValue, p->hash, p->numRefs);
+ MatchFinder_ReduceOffsets(p, subValue);
+}
+
+
+MY_NO_INLINE
+static void MatchFinder_CheckLimits(CMatchFinder *p)
+{
+ if (p->pos == kMaxValForNormalize)
+ MatchFinder_Normalize(p);
+ if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
+ MatchFinder_CheckAndMoveAndRead(p);
+ if (p->cyclicBufferPos == p->cyclicBufferSize)
+ p->cyclicBufferPos = 0;
+ MatchFinder_SetLimits(p);
+}
+
+
+/*
+ (lenLimit > maxLen)
+*/
+MY_FORCE_INLINE
+static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *distances, unsigned maxLen)
+{
+ /*
+ son[_cyclicBufferPos] = curMatch;
+ for (;;)
+ {
+ UInt32 delta = pos - curMatch;
+ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+ return distances;
+ {
+ const Byte *pb = cur - delta;
+ curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+ if (pb[maxLen] == cur[maxLen] && *pb == *cur)
+ {
+ UInt32 len = 0;
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *distances++ = len;
+ *distances++ = delta - 1;
+ if (len == lenLimit)
+ return distances;
+ }
+ }
+ }
+ }
+ */
+
+ const Byte *lim = cur + lenLimit;
+ son[_cyclicBufferPos] = curMatch;
+ do
+ {
+ UInt32 delta = pos - curMatch;
+ if (delta >= _cyclicBufferSize)
+ break;
+ {
+ ptrdiff_t diff;
+ curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+ diff = (ptrdiff_t)0 - delta;
+ if (cur[maxLen] == cur[maxLen + diff])
+ {
+ const Byte *c = cur;
+ while (*c == c[diff])
+ {
+ if (++c == lim)
+ {
+ distances[0] = (UInt32)(lim - cur);
+ distances[1] = delta - 1;
+ return distances + 2;
+ }
+ }
+ {
+ unsigned len = (unsigned)(c - cur);
+ if (maxLen < len)
+ {
+ maxLen = len;
+ distances[0] = (UInt32)len;
+ distances[1] = delta - 1;
+ distances += 2;
+ }
+ }
+ }
+ }
+ }
+ while (--cutValue);
+
+ return distances;
+}
+
+
+MY_FORCE_INLINE
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *distances, UInt32 maxLen)
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ unsigned len0 = 0, len1 = 0;
+ for (;;)
+ {
+ UInt32 delta = pos - curMatch;
+ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return distances;
+ }
+ {
+ CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+ const Byte *pb = cur - delta;
+ unsigned len = (len0 < len1 ? len0 : len1);
+ UInt32 pair0 = pair[0];
+ if (pb[len] == cur[len])
+ {
+ if (++len != lenLimit && pb[len] == cur[len])
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = (UInt32)len;
+ *distances++ = (UInt32)len;
+ *distances++ = delta - 1;
+ if (len == lenLimit)
+ {
+ *ptr1 = pair0;
+ *ptr0 = pair[1];
+ return distances;
+ }
+ }
+ }
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ curMatch = *ptr1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ curMatch = *ptr0;
+ len0 = len;
+ }
+ }
+ }
+}
+
+static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ unsigned len0 = 0, len1 = 0;
+ for (;;)
+ {
+ UInt32 delta = pos - curMatch;
+ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return;
+ }
+ {
+ CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+ const Byte *pb = cur - delta;
+ unsigned len = (len0 < len1 ? len0 : len1);
+ if (pb[len] == cur[len])
+ {
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ {
+ if (len == lenLimit)
+ {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return;
+ }
+ }
+ }
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ curMatch = *ptr1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ curMatch = *ptr0;
+ len0 = len;
+ }
+ }
+ }
+}
+
+#define MOVE_POS \
+ ++p->cyclicBufferPos; \
+ p->buffer++; \
+ if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+
+#define MOVE_POS_RET MOVE_POS return (UInt32)offset;
+
+static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+
+#define GET_MATCHES_HEADER2(minLen, ret_op) \
+ unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
+ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
+ cur = p->buffer;
+
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
+#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define GET_MATCHES_FOOTER(offset, maxLen) \
+ offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
+ distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
+
+#define SKIP_FOOTER \
+ SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
+
+#define UPDATE_maxLen { \
+ ptrdiff_t diff = (ptrdiff_t)0 - d2; \
+ const Byte *c = cur + maxLen; \
+ const Byte *lim = cur + lenLimit; \
+ for (; c != lim; c++) if (*(c + diff) != *c) break; \
+ maxLen = (unsigned)(c - cur); }
+
+static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ unsigned offset;
+ GET_MATCHES_HEADER(2)
+ HASH2_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ offset = 0;
+ GET_MATCHES_FOOTER(offset, 1)
+}
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ unsigned offset;
+ GET_MATCHES_HEADER(3)
+ HASH_ZIP_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ offset = 0;
+ GET_MATCHES_FOOTER(offset, 2)
+}
+
+static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, d2, pos;
+ unsigned maxLen, offset;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(3)
+
+ HASH3_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash[h2];
+
+ curMatch = (hash + kFix3HashSize)[hv];
+
+ hash[h2] = pos;
+ (hash + kFix3HashSize)[hv] = pos;
+
+ maxLen = 2;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ UPDATE_maxLen
+ distances[0] = (UInt32)maxLen;
+ distances[1] = d2 - 1;
+ offset = 2;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+ MOVE_POS_RET;
+ }
+ }
+
+ GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen, offset;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(4)
+
+ HASH4_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+
+ curMatch = (hash + kFix4HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[hv] = pos;
+
+ maxLen = 0;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ maxLen = 2;
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ offset = 2;
+ }
+
+ if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ maxLen = 3;
+ distances[(size_t)offset + 1] = d3 - 1;
+ offset += 2;
+ d2 = d3;
+ }
+
+ if (offset != 0)
+ {
+ UPDATE_maxLen
+ distances[(size_t)offset - 2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+ MOVE_POS_RET;
+ }
+ }
+
+ if (maxLen < 3)
+ maxLen = 3;
+
+ GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+/*
+static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(5)
+
+ HASH5_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ d4 = pos - (hash + kFix4HashSize)[h4];
+
+ curMatch = (hash + kFix5HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[h4] = pos;
+ (hash + kFix5HashSize)[hv] = pos;
+
+ maxLen = 0;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ distances[0] = maxLen = 2;
+ distances[1] = d2 - 1;
+ offset = 2;
+ if (*(cur - d2 + 2) == cur[2])
+ distances[0] = maxLen = 3;
+ else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ distances[2] = maxLen = 3;
+ distances[3] = d3 - 1;
+ offset = 4;
+ d2 = d3;
+ }
+ }
+ else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ distances[0] = maxLen = 3;
+ distances[1] = d3 - 1;
+ offset = 2;
+ d2 = d3;
+ }
+
+ if (d2 != d4 && d4 < p->cyclicBufferSize
+ && *(cur - d4) == *cur
+ && *(cur - d4 + 3) == *(cur + 3))
+ {
+ maxLen = 4;
+ distances[(size_t)offset + 1] = d4 - 1;
+ offset += 2;
+ d2 = d4;
+ }
+
+ if (offset != 0)
+ {
+ UPDATE_maxLen
+ distances[(size_t)offset - 2] = maxLen;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+ MOVE_POS_RET;
+ }
+ }
+
+ if (maxLen < 4)
+ maxLen = 4;
+
+ GET_MATCHES_FOOTER(offset, maxLen)
+}
+*/
+
+static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen, offset;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(4)
+
+ HASH4_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ curMatch = (hash + kFix4HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[hv] = pos;
+
+ maxLen = 0;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ maxLen = 2;
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ offset = 2;
+ }
+
+ if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ maxLen = 3;
+ distances[(size_t)offset + 1] = d3 - 1;
+ offset += 2;
+ d2 = d3;
+ }
+
+ if (offset != 0)
+ {
+ UPDATE_maxLen
+ distances[(size_t)offset - 2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS_RET;
+ }
+ }
+
+ if (maxLen < 3)
+ maxLen = 3;
+
+ offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+ distances + offset, maxLen) - (distances));
+ MOVE_POS_RET
+}
+
+/*
+static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
+ UInt32 *hash;
+ GET_MATCHES_HEADER(5)
+
+ HASH5_CALC;
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ d4 = pos - (hash + kFix4HashSize)[h4];
+
+ curMatch = (hash + kFix5HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[h4] = pos;
+ (hash + kFix5HashSize)[hv] = pos;
+
+ maxLen = 0;
+ offset = 0;
+
+ if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ {
+ distances[0] = maxLen = 2;
+ distances[1] = d2 - 1;
+ offset = 2;
+ if (*(cur - d2 + 2) == cur[2])
+ distances[0] = maxLen = 3;
+ else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ distances[2] = maxLen = 3;
+ distances[3] = d3 - 1;
+ offset = 4;
+ d2 = d3;
+ }
+ }
+ else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ {
+ distances[0] = maxLen = 3;
+ distances[1] = d3 - 1;
+ offset = 2;
+ d2 = d3;
+ }
+
+ if (d2 != d4 && d4 < p->cyclicBufferSize
+ && *(cur - d4) == *cur
+ && *(cur - d4 + 3) == *(cur + 3))
+ {
+ maxLen = 4;
+ distances[(size_t)offset + 1] = d4 - 1;
+ offset += 2;
+ d2 = d4;
+ }
+
+ if (offset != 0)
+ {
+ UPDATE_maxLen
+ distances[(size_t)offset - 2] = maxLen;
+ if (maxLen == lenLimit)
+ {
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS_RET;
+ }
+ }
+
+ if (maxLen < 4)
+ maxLen = 4;
+
+ offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+ distances + offset, maxLen) - (distances));
+ MOVE_POS_RET
+}
+*/
+
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ unsigned offset;
+ GET_MATCHES_HEADER(3)
+ HASH_ZIP_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+ distances, 2) - (distances));
+ MOVE_POS_RET
+}
+
+static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ SKIP_HEADER(2)
+ HASH2_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ SKIP_HEADER(3)
+ HASH_ZIP_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+
+static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2;
+ UInt32 *hash;
+ SKIP_HEADER(3)
+ HASH3_CALC;
+ hash = p->hash;
+ curMatch = (hash + kFix3HashSize)[hv];
+ hash[h2] =
+ (hash + kFix3HashSize)[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+
+static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2, h3;
+ UInt32 *hash;
+ SKIP_HEADER(4)
+ HASH4_CALC;
+ hash = p->hash;
+ curMatch = (hash + kFix4HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+
+/*
+static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2, h3, h4;
+ UInt32 *hash;
+ SKIP_HEADER(5)
+ HASH5_CALC;
+ hash = p->hash;
+ curMatch = (hash + kFix5HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = p->pos;
+ SKIP_FOOTER
+ }
+ while (--num != 0);
+}
+*/
+
+static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2, h3;
+ UInt32 *hash;
+ SKIP_HEADER(4)
+ HASH4_CALC;
+ hash = p->hash;
+ curMatch = (hash + kFix4HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[hv] = p->pos;
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS
+ }
+ while (--num != 0);
+}
+
+/*
+static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ UInt32 h2, h3, h4;
+ UInt32 *hash;
+ SKIP_HEADER(5)
+ HASH5_CALC;
+ hash = p->hash;
+ curMatch = hash + kFix5HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = p->pos;
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS
+ }
+ while (--num != 0);
+}
+*/
+
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ do
+ {
+ SKIP_HEADER(3)
+ HASH_ZIP_CALC;
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS
+ }
+ while (--num != 0);
+}
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+{
+ vTable->Init = (Mf_Init_Func)MatchFinder_Init;
+ vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
+ vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
+ if (!p->btMode)
+ {
+ /* if (p->numHashBytes <= 4) */
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
+ }
+ /*
+ else
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
+ }
+ */
+ }
+ else if (p->numHashBytes == 2)
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
+ }
+ else if (p->numHashBytes == 3)
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
+ }
+ else /* if (p->numHashBytes == 4) */
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
+ }
+ /*
+ else
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
+ }
+ */
+}
diff --git a/htp/lzma/LzFind.h b/htp/lzma/LzFind.h new file mode 100644 index 0000000..c77adde --- /dev/null +++ b/htp/lzma/LzFind.h @@ -0,0 +1,121 @@ +/* LzFind.h -- Match finder for LZ algorithms
+2017-06-10 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_FIND_H
+#define __LZ_FIND_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+typedef UInt32 CLzRef;
+
+typedef struct _CMatchFinder
+{
+ Byte *buffer;
+ UInt32 pos;
+ UInt32 posLimit;
+ UInt32 streamPos;
+ UInt32 lenLimit;
+
+ UInt32 cyclicBufferPos;
+ UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+
+ Byte streamEndWasReached;
+ Byte btMode;
+ Byte bigHash;
+ Byte directInput;
+
+ UInt32 matchMaxLen;
+ CLzRef *hash;
+ CLzRef *son;
+ UInt32 hashMask;
+ UInt32 cutValue;
+
+ Byte *bufferBase;
+ ISeqInStream *stream;
+
+ UInt32 blockSize;
+ UInt32 keepSizeBefore;
+ UInt32 keepSizeAfter;
+
+ UInt32 numHashBytes;
+ size_t directInputRem;
+ UInt32 historySize;
+ UInt32 fixedHashSize;
+ UInt32 hashSizeSum;
+ SRes result;
+ UInt32 crc[256];
+ size_t numRefs;
+
+ UInt64 expectedDataSize;
+} CMatchFinder;
+
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+
+#define Inline_MatchFinder_IsFinishedOK(p) \
+ ((p)->streamEndWasReached \
+ && (p)->streamPos == (p)->pos \
+ && (!(p)->directInput || (p)->directInputRem == 0))
+
+int MatchFinder_NeedMove(CMatchFinder *p);
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+void MatchFinder_MoveBlock(CMatchFinder *p);
+void MatchFinder_ReadIfRequired(CMatchFinder *p);
+
+void MatchFinder_Construct(CMatchFinder *p);
+
+/* Conditions:
+ historySize <= 3 GB
+ keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
+*/
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc);
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
+ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+ UInt32 *distances, UInt32 maxLen);
+
+/*
+Conditions:
+ Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
+ Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
+*/
+
+typedef void (*Mf_Init_Func)(void *object);
+typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
+typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef void (*Mf_Skip_Func)(void *object, UInt32);
+
+typedef struct _IMatchFinder
+{
+ Mf_Init_Func Init;
+ Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+ Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
+ Mf_GetMatches_Func GetMatches;
+ Mf_Skip_Func Skip;
+} IMatchFinder;
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+
+void MatchFinder_Init_LowHash(CMatchFinder *p);
+void MatchFinder_Init_HighHash(CMatchFinder *p);
+void MatchFinder_Init_3(CMatchFinder *p, int readData);
+void MatchFinder_Init(CMatchFinder *p);
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+
+EXTERN_C_END
+
+#endif
diff --git a/htp/lzma/LzHash.h b/htp/lzma/LzHash.h new file mode 100644 index 0000000..2191444 --- /dev/null +++ b/htp/lzma/LzHash.h @@ -0,0 +1,57 @@ +/* LzHash.h -- HASH functions for LZ algorithms
+2015-04-12 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_HASH_H
+#define __LZ_HASH_H
+
+#define kHash2Size (1 << 10)
+#define kHash3Size (1 << 16)
+#define kHash4Size (1 << 20)
+
+#define kFix3HashSize (kHash2Size)
+#define kFix4HashSize (kHash2Size + kHash3Size)
+#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
+
+#define HASH3_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
+
+#define HASH5_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ temp ^= (p->crc[cur[3]] << 5); \
+ h4 = temp & (kHash4Size - 1); \
+ hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
+
+/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
+
+#define MT_HASH2_CALC \
+ h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define MT_HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+
+#endif
diff --git a/htp/lzma/LzmaDec.c b/htp/lzma/LzmaDec.c new file mode 100644 index 0000000..138c36c --- /dev/null +++ b/htp/lzma/LzmaDec.c @@ -0,0 +1,1223 @@ +/* LzmaDec.c -- LZMA Decoder
+2018-07-04 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+/* #include "CpuArch.h" */
+#include "LzmaDec.h"
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_INIT_SIZE 5
+
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+ { UPDATE_0(p); i = (i + i); A0; } else \
+ { UPDATE_1(p); i = (i + i) + 1; A1; }
+
+#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
+
+#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
+ { UPDATE_0(p + i); A0; } else \
+ { UPDATE_1(p + i); A1; }
+#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
+#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
+#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
+
+#define TREE_DECODE(probs, limit, i) \
+ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define _LZMA_SIZE_OPT */
+
+#ifdef _LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+ { i = 1; \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ TREE_GET_BIT(probs, i); \
+ i -= 0x40; }
+#endif
+
+#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
+#define MATCHED_LITER_DEC \
+ matchByte += matchByte; \
+ bit = offs; \
+ offs &= matchByte; \
+ probLit = prob + (offs + bit + symbol); \
+ GET_BIT2(probLit, symbol, offs ^= bit; , ;)
+
+
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+ { UPDATE_0_CHECK; i = (i + i); A0; } else \
+ { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+#define TREE_DECODE_CHECK(probs, limit, i) \
+ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
+
+
+#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
+ { UPDATE_0_CHECK; i += m; m += m; } else \
+ { UPDATE_1_CHECK; m += m; i += m; }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenLow 0
+#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+#define LenChoice LenLow
+#define LenChoice2 (LenLow + (1 << kLenNumLowBits))
+
+#define kNumStates 12
+#define kNumStates2 16
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+/* External ASM code needs same CLzmaProb array layout. So don't change it. */
+
+/* (probs_1664) is faster and better for code size at some platforms */
+/*
+#ifdef MY_CPU_X86_OR_AMD64
+*/
+#define kStartOffset 1664
+#define GET_PROBS p->probs_1664
+/*
+#define GET_PROBS p->probs + kStartOffset
+#else
+#define kStartOffset 0
+#define GET_PROBS p->probs
+#endif
+*/
+
+#define SpecPos (-kStartOffset)
+#define IsRep0Long (SpecPos + kNumFullDistances)
+#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
+#define LenCoder (RepLenCoder + kNumLenProbs)
+#define IsMatch (LenCoder + kNumLenProbs)
+#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
+#define IsRep (Align + kAlignTableSize)
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define PosSlot (IsRepG2 + kNumStates)
+#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define NUM_BASE_PROBS (Literal + kStartOffset)
+
+#if Align != 0 && kStartOffset != 0
+ #error Stop_Compiling_Bad_LZMA_kAlign
+#endif
+
+#if NUM_BASE_PROBS != 1984
+ #error Stop_Compiling_Bad_LZMA_PROBS
+#endif
+
+
+#define LZMA_LIT_SIZE 0x300
+
+#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
+
+
+#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)
+#define COMBINED_PS_STATE (posState + state)
+#define GET_LEN_STATE (posState)
+
+#define LZMA_DIC_MIN (1 << 12)
+
+/*
+p->remainLen : shows status of LZMA decoder:
+ < kMatchSpecLenStart : normal remain
+ = kMatchSpecLenStart : finished
+ = kMatchSpecLenStart + 1 : need init range coder
+ = kMatchSpecLenStart + 2 : need init range coder and state
+*/
+
+/* ---------- LZMA_DECODE_REAL ---------- */
+/*
+LzmaDec_DecodeReal_3() can be implemented in external ASM file.
+3 - is the code compatibility version of that function for check at link time.
+*/
+
+#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3
+
+/*
+LZMA_DECODE_REAL()
+In:
+ RangeCoder is normalized
+ if (p->dicPos == limit)
+ {
+ LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
+ So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
+ is not END_OF_PAYALOAD_MARKER, then function returns error code.
+ }
+
+Processing:
+ first LZMA symbol will be decoded in any case
+ All checks for limits are at the end of main loop,
+ It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
+ RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
+
+Out:
+ RangeCoder is normalized
+ Result:
+ SZ_OK - OK
+ SZ_ERROR_DATA - Error
+ p->remainLen:
+ < kMatchSpecLenStart : normal remain
+ = kMatchSpecLenStart : finished
+*/
+
+
+#ifdef _LZMA_DEC_OPT
+
+int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
+
+#else
+
+static
+int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+ CLzmaProb *probs = GET_PROBS;
+ unsigned state = (unsigned)p->state;
+ UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+ unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+ unsigned lc = p->prop.lc;
+ unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
+
+ Byte *dic = p->dic;
+ SizeT dicBufSize = p->dicBufSize;
+ SizeT dicPos = p->dicPos;
+
+ UInt32 processedPos = p->processedPos;
+ UInt32 checkDicSize = p->checkDicSize;
+ unsigned len = 0;
+
+ const Byte *buf = p->buf;
+ UInt32 range = p->range;
+ UInt32 code = p->code;
+
+ do
+ {
+ CLzmaProb *prob;
+ UInt32 bound;
+ unsigned ttt;
+ unsigned posState = CALC_POS_STATE(processedPos, pbMask);
+
+ prob = probs + IsMatch + COMBINED_PS_STATE;
+ IF_BIT_0(prob)
+ {
+ unsigned symbol;
+ UPDATE_0(prob);
+ prob = probs + Literal;
+ if (processedPos != 0 || checkDicSize != 0)
+ prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
+ processedPos++;
+
+ if (state < kNumLitStates)
+ {
+ state -= (state < 4) ? state : 3;
+ symbol = 1;
+ #ifdef _LZMA_SIZE_OPT
+ do { NORMAL_LITER_DEC } while (symbol < 0x100);
+ #else
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ #endif
+ }
+ else
+ {
+ unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ unsigned offs = 0x100;
+ state -= (state < 10) ? 3 : 6;
+ symbol = 1;
+ #ifdef _LZMA_SIZE_OPT
+ do
+ {
+ unsigned bit;
+ CLzmaProb *probLit;
+ MATCHED_LITER_DEC
+ }
+ while (symbol < 0x100);
+ #else
+ {
+ unsigned bit;
+ CLzmaProb *probLit;
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ }
+ #endif
+ }
+
+ dic[dicPos++] = (Byte)symbol;
+ continue;
+ }
+
+ {
+ UPDATE_1(prob);
+ prob = probs + IsRep + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ state += kNumStates;
+ prob = probs + LenCoder;
+ }
+ else
+ {
+ UPDATE_1(prob);
+ /*
+ // that case was checked before with kBadRepCode
+ if (checkDicSize == 0 && processedPos == 0)
+ return SZ_ERROR_DATA;
+ */
+ prob = probs + IsRepG0 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ prob = probs + IsRep0Long + COMBINED_PS_STATE;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ dicPos++;
+ processedPos++;
+ state = state < kNumLitStates ? 9 : 11;
+ continue;
+ }
+ UPDATE_1(prob);
+ }
+ else
+ {
+ UInt32 distance;
+ UPDATE_1(prob);
+ prob = probs + IsRepG1 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ distance = rep1;
+ }
+ else
+ {
+ UPDATE_1(prob);
+ prob = probs + IsRepG2 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob);
+ distance = rep2;
+ }
+ else
+ {
+ UPDATE_1(prob);
+ distance = rep3;
+ rep3 = rep2;
+ }
+ rep2 = rep1;
+ }
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ state = state < kNumLitStates ? 8 : 11;
+ prob = probs + RepLenCoder;
+ }
+
+ #ifdef _LZMA_SIZE_OPT
+ {
+ unsigned lim, offset;
+ CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen);
+ probLen = prob + LenLow + GET_LEN_STATE;
+ offset = 0;
+ lim = (1 << kLenNumLowBits);
+ }
+ else
+ {
+ UPDATE_1(probLen);
+ probLen = prob + LenChoice2;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen);
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ offset = kLenNumLowSymbols;
+ lim = (1 << kLenNumLowBits);
+ }
+ else
+ {
+ UPDATE_1(probLen);
+ probLen = prob + LenHigh;
+ offset = kLenNumLowSymbols * 2;
+ lim = (1 << kLenNumHighBits);
+ }
+ }
+ TREE_DECODE(probLen, lim, len);
+ len += offset;
+ }
+ #else
+ {
+ CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen);
+ probLen = prob + LenLow + GET_LEN_STATE;
+ len = 1;
+ TREE_GET_BIT(probLen, len);
+ TREE_GET_BIT(probLen, len);
+ TREE_GET_BIT(probLen, len);
+ len -= 8;
+ }
+ else
+ {
+ UPDATE_1(probLen);
+ probLen = prob + LenChoice2;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen);
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ len = 1;
+ TREE_GET_BIT(probLen, len);
+ TREE_GET_BIT(probLen, len);
+ TREE_GET_BIT(probLen, len);
+ }
+ else
+ {
+ UPDATE_1(probLen);
+ probLen = prob + LenHigh;
+ TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
+ len += kLenNumLowSymbols * 2;
+ }
+ }
+ }
+ #endif
+
+ if (state >= kNumStates)
+ {
+ UInt32 distance;
+ prob = probs + PosSlot +
+ ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+ TREE_6_DECODE(prob, distance);
+ if (distance >= kStartPosModelIndex)
+ {
+ unsigned posSlot = (unsigned)distance;
+ unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+ distance = (2 | (distance & 1));
+ if (posSlot < kEndPosModelIndex)
+ {
+ distance <<= numDirectBits;
+ prob = probs + SpecPos;
+ {
+ UInt32 m = 1;
+ distance++;
+ do
+ {
+ REV_BIT_VAR(prob, distance, m);
+ }
+ while (--numDirectBits);
+ distance -= m;
+ }
+ }
+ else
+ {
+ numDirectBits -= kNumAlignBits;
+ do
+ {
+ NORMALIZE
+ range >>= 1;
+
+ {
+ UInt32 t;
+ code -= range;
+ t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+ distance = (distance << 1) + (t + 1);
+ code += range & t;
+ }
+ /*
+ distance <<= 1;
+ if (code >= range)
+ {
+ code -= range;
+ distance |= 1;
+ }
+ */
+ }
+ while (--numDirectBits);
+ prob = probs + Align;
+ distance <<= kNumAlignBits;
+ {
+ unsigned i = 1;
+ REV_BIT_CONST(prob, i, 1);
+ REV_BIT_CONST(prob, i, 2);
+ REV_BIT_CONST(prob, i, 4);
+ REV_BIT_LAST (prob, i, 8);
+ distance |= i;
+ }
+ if (distance == (UInt32)0xFFFFFFFF)
+ {
+ len = kMatchSpecLenStart;
+ state -= kNumStates;
+ break;
+ }
+ }
+ }
+
+ rep3 = rep2;
+ rep2 = rep1;
+ rep1 = rep0;
+ rep0 = distance + 1;
+ state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+ if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
+ {
+ p->dicPos = dicPos;
+ return SZ_ERROR_DATA;
+ }
+ }
+
+ len += kMatchMinLen;
+
+ {
+ SizeT rem;
+ unsigned curLen;
+ SizeT pos;
+
+ if ((rem = limit - dicPos) == 0)
+ {
+ p->dicPos = dicPos;
+ return SZ_ERROR_DATA;
+ }
+
+ curLen = ((rem < len) ? (unsigned)rem : len);
+ pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+
+ processedPos += (UInt32)curLen;
+
+ len -= curLen;
+ if (curLen <= dicBufSize - pos)
+ {
+ Byte *dest = dic + dicPos;
+ ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+ const Byte *lim = dest + curLen;
+ dicPos += (SizeT)curLen;
+ do
+ *(dest) = (Byte)*(dest + src);
+ while (++dest != lim);
+ }
+ else
+ {
+ do
+ {
+ dic[dicPos++] = dic[pos];
+ if (++pos == dicBufSize)
+ pos = 0;
+ }
+ while (--curLen != 0);
+ }
+ }
+ }
+ }
+ while (dicPos < limit && buf < bufLimit);
+
+ NORMALIZE;
+
+ p->buf = buf;
+ p->range = range;
+ p->code = code;
+ p->remainLen = (UInt32)len;
+ p->dicPos = dicPos;
+ p->processedPos = processedPos;
+ p->reps[0] = rep0;
+ p->reps[1] = rep1;
+ p->reps[2] = rep2;
+ p->reps[3] = rep3;
+ p->state = (UInt32)state;
+
+ return SZ_OK;
+}
+#endif
+
+static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+ if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+ {
+ Byte *dic = p->dic;
+ SizeT dicPos = p->dicPos;
+ SizeT dicBufSize = p->dicBufSize;
+ unsigned len = (unsigned)p->remainLen;
+ SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+ SizeT rem = limit - dicPos;
+ if (rem < len)
+ len = (unsigned)(rem);
+
+ if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+ p->checkDicSize = p->prop.dicSize;
+
+ p->processedPos += (UInt32)len;
+ p->remainLen -= (UInt32)len;
+ while (len != 0)
+ {
+ len--;
+ dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ dicPos++;
+ }
+ p->dicPos = dicPos;
+ }
+}
+
+
+#define kRange0 0xFFFFFFFF
+#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
+#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
+#if kBadRepCode != (0xC0000000 - 0x400)
+ #error Stop_Compiling_Bad_LZMA_Check
+#endif
+
+static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit, SizeT memlimit)
+{
+ do
+ {
+ SizeT limit2 = limit;
+ if (p->checkDicSize == 0)
+ {
+ UInt32 rem = p->prop.dicSize - p->processedPos;
+ if (limit - p->dicPos > rem) {
+ if (p->dicBufSize < p->prop.dicSize) {
+ p->dicBufSize = p->prop.dicSize;
+ if (p->dicBufSize > memlimit) {
+ return SZ_ERROR_MEM;
+ }
+ Byte *tmp = realloc(p->dic, p->dicBufSize);
+ if (!tmp) {
+ return SZ_ERROR_MEM;
+ }
+ p->dic = tmp;
+ }
+ limit2 = p->dicPos + rem;
+ }
+
+ if (p->processedPos == 0)
+ if (p->code >= kBadRepCode)
+ return SZ_ERROR_DATA;
+ }
+
+ RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
+
+ if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
+ p->checkDicSize = p->prop.dicSize;
+
+ LzmaDec_WriteRem(p, limit);
+ }
+ while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
+
+ return 0;
+}
+
+typedef enum
+{
+ DUMMY_ERROR, /* unexpected end of input stream */
+ DUMMY_LIT,
+ DUMMY_MATCH,
+ DUMMY_REP
+} ELzmaDummy;
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+{
+ UInt32 range = p->range;
+ UInt32 code = p->code;
+ const Byte *bufLimit = buf + inSize;
+ const CLzmaProb *probs = GET_PROBS;
+ unsigned state = (unsigned)p->state;
+ ELzmaDummy res;
+
+ {
+ const CLzmaProb *prob;
+ UInt32 bound;
+ unsigned ttt;
+ unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
+
+ prob = probs + IsMatch + COMBINED_PS_STATE;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+
+ /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
+
+ prob = probs + Literal;
+ if (p->checkDicSize != 0 || p->processedPos != 0)
+ prob += ((UInt32)LZMA_LIT_SIZE *
+ ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+ (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+ if (state < kNumLitStates)
+ {
+ unsigned symbol = 1;
+ do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+ }
+ else
+ {
+ unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
+ (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
+ unsigned offs = 0x100;
+ unsigned symbol = 1;
+ do
+ {
+ unsigned bit;
+ const CLzmaProb *probLit;
+ matchByte += matchByte;
+ bit = offs;
+ offs &= matchByte;
+ probLit = prob + (offs + bit + symbol);
+ GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
+ }
+ while (symbol < 0x100);
+ }
+ res = DUMMY_LIT;
+ }
+ else
+ {
+ unsigned len;
+ UPDATE_1_CHECK;
+
+ prob = probs + IsRep + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ state = 0;
+ prob = probs + LenCoder;
+ res = DUMMY_MATCH;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ res = DUMMY_REP;
+ prob = probs + IsRepG0 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ prob = probs + IsRep0Long + COMBINED_PS_STATE;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ NORMALIZE_CHECK;
+ return DUMMY_REP;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ }
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ prob = probs + IsRepG1 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ prob = probs + IsRepG2 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ }
+ }
+ }
+ state = kNumStates;
+ prob = probs + RepLenCoder;
+ }
+ {
+ unsigned limit, offset;
+ const CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0_CHECK(probLen)
+ {
+ UPDATE_0_CHECK;
+ probLen = prob + LenLow + GET_LEN_STATE;
+ offset = 0;
+ limit = 1 << kLenNumLowBits;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ probLen = prob + LenChoice2;
+ IF_BIT_0_CHECK(probLen)
+ {
+ UPDATE_0_CHECK;
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ offset = kLenNumLowSymbols;
+ limit = 1 << kLenNumLowBits;
+ }
+ else
+ {
+ UPDATE_1_CHECK;
+ probLen = prob + LenHigh;
+ offset = kLenNumLowSymbols * 2;
+ limit = 1 << kLenNumHighBits;
+ }
+ }
+ TREE_DECODE_CHECK(probLen, limit, len);
+ len += offset;
+ }
+
+ if (state < 4)
+ {
+ unsigned posSlot;
+ prob = probs + PosSlot +
+ ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
+ kNumPosSlotBits);
+ TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
+ if (posSlot >= kStartPosModelIndex)
+ {
+ unsigned numDirectBits = ((posSlot >> 1) - 1);
+
+ /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
+
+ if (posSlot < kEndPosModelIndex)
+ {
+ prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
+ }
+ else
+ {
+ numDirectBits -= kNumAlignBits;
+ do
+ {
+ NORMALIZE_CHECK
+ range >>= 1;
+ code -= range & (((code - range) >> 31) - 1);
+ /* if (code >= range) code -= range; */
+ }
+ while (--numDirectBits);
+ prob = probs + Align;
+ numDirectBits = kNumAlignBits;
+ }
+ {
+ unsigned i = 1;
+ unsigned m = 1;
+ do
+ {
+ REV_BIT_CHECK(prob, i, m);
+ }
+ while (--numDirectBits);
+ }
+ }
+ }
+ }
+ }
+ NORMALIZE_CHECK;
+ return res;
+}
+
+
+static void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
+{
+ p->remainLen = kMatchSpecLenStart + 1;
+ p->tempBufSize = 0;
+
+ if (initDic)
+ {
+ p->processedPos = 0;
+ p->checkDicSize = 0;
+ p->remainLen = kMatchSpecLenStart + 2;
+ }
+ if (initState)
+ p->remainLen = kMatchSpecLenStart + 2;
+}
+
+void LzmaDec_Init(CLzmaDec *p)
+{
+ p->dicPos = 0;
+ LzmaDec_InitDicAndState(p, True, True);
+}
+
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
+ ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit)
+{
+ SizeT inSize = *srcLen;
+ (*srcLen) = 0;
+
+ *status = LZMA_STATUS_NOT_SPECIFIED;
+
+ if (p->remainLen > kMatchSpecLenStart)
+ {
+ for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
+ p->tempBuf[p->tempBufSize++] = *src++;
+ if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
+ return SZ_ERROR_DATA;
+ if (p->tempBufSize < RC_INIT_SIZE)
+ {
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+ p->code =
+ ((UInt32)p->tempBuf[1] << 24)
+ | ((UInt32)p->tempBuf[2] << 16)
+ | ((UInt32)p->tempBuf[3] << 8)
+ | ((UInt32)p->tempBuf[4]);
+ p->range = 0xFFFFFFFF;
+ p->tempBufSize = 0;
+
+ if (p->remainLen > kMatchSpecLenStart + 1)
+ {
+ SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
+ SizeT i;
+ CLzmaProb *probs = p->probs;
+ for (i = 0; i < numProbs; i++)
+ probs[i] = kBitModelTotal >> 1;
+ p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
+ p->state = 0;
+ }
+
+ p->remainLen = 0;
+ }
+
+ LzmaDec_WriteRem(p, dicLimit);
+
+ while (p->remainLen != kMatchSpecLenStart)
+ {
+ int checkEndMarkNow = 0;
+
+ if (p->dicPos >= dicLimit)
+ {
+ if (p->remainLen == 0 && p->code == 0)
+ {
+ *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+ return SZ_OK;
+ }
+ if (finishMode == LZMA_FINISH_ANY)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_OK;
+ }
+ if (p->remainLen != 0)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_ERROR_DATA;
+ }
+ checkEndMarkNow = 1;
+ }
+
+ if (p->tempBufSize == 0)
+ {
+ SizeT processed;
+ const Byte *bufLimit;
+ if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+ {
+ int dummyRes = LzmaDec_TryDummy(p, src, inSize);
+ if (dummyRes == DUMMY_ERROR)
+ {
+ memcpy(p->tempBuf, src, inSize);
+ p->tempBufSize = (unsigned)inSize;
+ (*srcLen) += inSize;
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+ if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_ERROR_DATA;
+ }
+ bufLimit = src;
+ }
+ else
+ bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+ p->buf = src;
+ if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit, memlimit) != 0)
+ return SZ_ERROR_DATA;
+ processed = (SizeT)(p->buf - src);
+ (*srcLen) += processed;
+ src += processed;
+ inSize -= processed;
+ }
+ else
+ {
+ unsigned rem = p->tempBufSize, lookAhead = 0;
+ while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
+ p->tempBuf[rem++] = src[lookAhead++];
+ p->tempBufSize = rem;
+ if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+ {
+ int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
+ if (dummyRes == DUMMY_ERROR)
+ {
+ (*srcLen) += (SizeT)lookAhead;
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+ if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_ERROR_DATA;
+ }
+ }
+ p->buf = p->tempBuf;
+ if (LzmaDec_DecodeReal2(p, dicLimit, p->buf, memlimit) != 0)
+ return SZ_ERROR_DATA;
+
+ {
+ unsigned kkk = (unsigned)(p->buf - p->tempBuf);
+ if (rem < kkk)
+ return SZ_ERROR_FAIL; /* some internal error */
+ rem -= kkk;
+ if (lookAhead < rem)
+ return SZ_ERROR_FAIL; /* some internal error */
+ lookAhead -= rem;
+ }
+ (*srcLen) += (SizeT)lookAhead;
+ src += lookAhead;
+ inSize -= (SizeT)lookAhead;
+ p->tempBufSize = 0;
+ }
+ }
+
+ if (p->code != 0)
+ return SZ_ERROR_DATA;
+ *status = LZMA_STATUS_FINISHED_WITH_MARK;
+ return SZ_OK;
+}
+
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit)
+{
+ SizeT outSize = *destLen;
+ SizeT inSize = *srcLen;
+ *srcLen = *destLen = 0;
+ for (;;)
+ {
+ SizeT inSizeCur = inSize, outSizeCur, dicPos;
+ ELzmaFinishMode curFinishMode;
+ SRes res;
+ if (p->dicPos == p->dicBufSize) {
+ if (p->dicBufSize < p->prop.dicSize) {
+ if (p->dicBufSize < memlimit) {
+ p->dicBufSize = p->dicBufSize << 2;
+ if (p->dicBufSize > memlimit) {
+ p->dicBufSize = memlimit;
+ }
+ if (p->dicBufSize > p->prop.dicSize) {
+ p->dicBufSize = p->prop.dicSize;
+ }
+ Byte *tmp = realloc(p->dic, p->dicBufSize);
+ if (!tmp) {
+ return SZ_ERROR_MEM;
+ }
+ p->dic = tmp;
+ } else {
+ return SZ_ERROR_MEM;
+ }
+ } else {
+ p->dicPos = 0;
+ }
+ }
+ dicPos = p->dicPos;
+ if (outSize > p->dicBufSize - dicPos)
+ {
+ outSizeCur = p->dicBufSize;
+ curFinishMode = LZMA_FINISH_ANY;
+ }
+ else
+ {
+ outSizeCur = dicPos + outSize;
+ curFinishMode = finishMode;
+ }
+
+ res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status, memlimit);
+ src += inSizeCur;
+ inSize -= inSizeCur;
+ *srcLen += inSizeCur;
+ outSizeCur = p->dicPos - dicPos;
+ memcpy(dest, p->dic + dicPos, outSizeCur);
+ dest += outSizeCur;
+ outSize -= outSizeCur;
+ *destLen += outSizeCur;
+ if (res != 0)
+ return res;
+ if (outSizeCur == 0 || outSize == 0)
+ return SZ_OK;
+ }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->probs);
+ p->probs = NULL;
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->dic);
+ p->dic = NULL;
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ LzmaDec_FreeProbs(p, alloc);
+ LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+ UInt32 dicSize;
+ Byte d;
+
+ if (size < LZMA_PROPS_SIZE)
+ return SZ_ERROR_UNSUPPORTED;
+ else
+ dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+
+ if (dicSize < LZMA_DIC_MIN)
+ dicSize = LZMA_DIC_MIN;
+ p->dicSize = dicSize;
+
+ d = data[0];
+ if (d >= (9 * 5 * 5))
+ return SZ_ERROR_UNSUPPORTED;
+
+ p->lc = (Byte)(d % 9);
+ d /= 9;
+ p->pb = (Byte)(d / 5);
+ p->lp = (Byte)(d % 5);
+ p->_pad_ = 0;
+
+ return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
+{
+ UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+ if (!p->probs || numProbs != p->numProbs)
+ {
+ LzmaDec_FreeProbs(p, alloc);
+ p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
+ if (!p->probs)
+ return SZ_ERROR_MEM;
+ p->probs_1664 = p->probs + 1664;
+ p->numProbs = numProbs;
+ }
+ return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+ CLzmaProps propNew;
+ RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+ p->prop = propNew;
+ return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+ CLzmaProps propNew;
+ SizeT dicBufSize;
+ RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+
+ {
+ UInt32 dictSize = propNew.dicSize;
+ SizeT mask = ((UInt32)1 << 12) - 1;
+ if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
+ else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
+ dicBufSize = ((SizeT)dictSize + mask) & ~mask;
+ if (dicBufSize < dictSize)
+ dicBufSize = dictSize;
+ }
+ if (dicBufSize > LZMA_DIC_MIN) {
+ dicBufSize = LZMA_DIC_MIN;
+ }
+
+ if (!p->dic || dicBufSize != p->dicBufSize)
+ {
+ LzmaDec_FreeDict(p, alloc);
+ p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
+ if (!p->dic)
+ {
+ LzmaDec_FreeProbs(p, alloc);
+ return SZ_ERROR_MEM;
+ }
+ }
+ p->dicBufSize = dicBufSize;
+ p->prop = propNew;
+ return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+ ELzmaStatus *status, ISzAllocPtr alloc)
+{
+ CLzmaDec p;
+ SRes res;
+ SizeT outSize = *destLen, inSize = *srcLen;
+ *destLen = *srcLen = 0;
+ *status = LZMA_STATUS_NOT_SPECIFIED;
+ if (inSize < RC_INIT_SIZE)
+ return SZ_ERROR_INPUT_EOF;
+ LzmaDec_Construct(&p);
+ RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
+ p.dic = dest;
+ p.dicBufSize = outSize;
+ LzmaDec_Init(&p);
+ *srcLen = inSize;
+ res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status, SIZE_MAX);
+ *destLen = p.dicPos;
+ if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+ res = SZ_ERROR_INPUT_EOF;
+ LzmaDec_FreeProbs(&p, alloc);
+ return res;
+}
diff --git a/htp/lzma/LzmaDec.h b/htp/lzma/LzmaDec.h new file mode 100644 index 0000000..20b5228 --- /dev/null +++ b/htp/lzma/LzmaDec.h @@ -0,0 +1,234 @@ +/* LzmaDec.h -- LZMA Decoder
+2018-04-21 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_DEC_H
+#define __LZMA_DEC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define _LZMA_PROB32 */
+/* _LZMA_PROB32 can increase the speed on some CPUs,
+ but memory usage for CLzmaDec::probs will be doubled in that case */
+
+typedef
+#ifdef _LZMA_PROB32
+ UInt32
+#else
+ UInt16
+#endif
+ CLzmaProb;
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaProps
+{
+ Byte lc;
+ Byte lp;
+ Byte pb;
+ Byte _pad_;
+ UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+ SZ_OK
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+ Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+ /* Don't change this structure. ASM code can use it. */
+ CLzmaProps prop;
+ CLzmaProb *probs;
+ CLzmaProb *probs_1664;
+ Byte *dic;
+ SizeT dicBufSize;
+ SizeT dicPos;
+ const Byte *buf;
+ UInt32 range;
+ UInt32 code;
+ UInt32 processedPos;
+ UInt32 checkDicSize;
+ UInt32 reps[4];
+ UInt32 state;
+ UInt32 remainLen;
+
+ UInt32 numProbs;
+ unsigned tempBufSize;
+ Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+ - Stream with end mark. That end mark adds about 6 bytes to compressed size.
+ - Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum
+{
+ LZMA_FINISH_ANY, /* finish at any point */
+ LZMA_FINISH_END /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+ You must use LZMA_FINISH_END, when you know that current output buffer
+ covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+ If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+ and output value of destLen will be less than output buffer size limit.
+ You can check status result also.
+
+ You can use multiple checks to test data integrity after full decompression:
+ 1) Check Result and "status" variable.
+ 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+ 3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+ You must use correct finish mode in that case. */
+
+typedef enum
+{
+ LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
+ LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
+ LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
+ LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+ 1) Dictionary Interface
+ 2) Buffer Interface
+ 3) One Call Interface
+ You can select any of these interfaces, but don't mix functions from different
+ groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+ 1) LzmaDec_Allocate / LzmaDec_Free
+ 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+ You can use variant 2, if you set dictionary buffer manually.
+ For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+ SZ_OK
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+ dictionary to some other external buffer.
+ You must work with CLzmaDec variables directly in this interface.
+
+ STEPS:
+ LzmaDec_Construct()
+ LzmaDec_Allocate()
+ for (each new stream)
+ {
+ LzmaDec_Init()
+ while (it needs more decompression)
+ {
+ LzmaDec_DecodeToDic()
+ use data from CLzmaDec::dic and update CLzmaDec::dicPos
+ }
+ }
+ LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+
+ The decoding to internal dictionary buffer (CLzmaDec::dic).
+ You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (dicLimit).
+ LZMA_FINISH_ANY - Decode just dicLimit bytes.
+ LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+ SZ_OK
+ status:
+ LZMA_STATUS_FINISHED_WITH_MARK
+ LZMA_STATUS_NOT_FINISHED
+ LZMA_STATUS_NEEDS_MORE_INPUT
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+ SZ_ERROR_DATA - Data error
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+ See LzmaDec_DecodeToDic description for information about STEPS and return results,
+ but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+ to work with CLzmaDec variables manually.
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (*destLen).
+ LZMA_FINISH_ANY - Decode just destLen bytes.
+ LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (*destLen).
+ LZMA_FINISH_ANY - Decode just destLen bytes.
+ LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+ SZ_OK
+ status:
+ LZMA_STATUS_FINISHED_WITH_MARK
+ LZMA_STATUS_NOT_FINISHED
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+ SZ_ERROR_DATA - Data error
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+ SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+ ELzmaStatus *status, ISzAllocPtr alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/htp/lzma/Makefile.am b/htp/lzma/Makefile.am new file mode 100644 index 0000000..5fa3ec5 --- /dev/null +++ b/htp/lzma/Makefile.am @@ -0,0 +1,16 @@ + +h_sources = LzmaDec.h 7zTypes.h + +h_sources_private = LzFind.h LzHash.h Compiler.h Precomp.h + +c_sources = LzFind.c LzmaDec.c + +AM_CFLAGS = -I$(top_srcdir) -D_GNU_SOURCE -g -Wall -Wextra -std=gnu99 -pedantic \ + -Wextra -Wno-missing-field-initializers -Wshadow -Wpointer-arith \ + -Wstrict-prototypes -Wmissing-prototypes -Wno-unused-parameter + +library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)/lzma +library_include_HEADERS = $(h_sources) + +noinst_LTLIBRARIES = liblzma-c.la +liblzma_c_la_SOURCES = $(h_sources) $(h_sources_private) $(c_sources) diff --git a/htp/lzma/Precomp.h b/htp/lzma/Precomp.h new file mode 100644 index 0000000..edb5814 --- /dev/null +++ b/htp/lzma/Precomp.h @@ -0,0 +1,10 @@ +/* Precomp.h -- StdAfx
+2013-11-12 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_PRECOMP_H
+#define __7Z_PRECOMP_H
+
+#include "Compiler.h"
+/* #include "7zTypes.h" */
+
+#endif
diff --git a/htp/strlcat.c b/htp/strlcat.c new file mode 100644 index 0000000..fc1776d --- /dev/null +++ b/htp/strlcat.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* $Id: strlcatu.c,v 1.4 2003/10/20 15:03:27 chrisgreen Exp $ */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +#ifndef HAVE_STRLCAT + +#if defined(LIBC_SCCS) && !defined(lint) +static char *rcsid = "$OpenBSD: strlcat.c,v 1.5 2001/01/13 16:17:24 millert Exp $"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#include <string.h> + +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(initial dst) + strlen(src); if retval >= siz, + * truncation occurred. + */ +size_t strlcat(char *dst, const char *src, size_t siz) +{ + register char *d = dst; + register const char *s = src; + register size_t n = siz; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return(dlen + (s - src)); /* count does not include NUL */ +} +#endif diff --git a/htp/strlcpy.c b/htp/strlcpy.c new file mode 100644 index 0000000..227f52a --- /dev/null +++ b/htp/strlcpy.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* $Id: strlcpyu.c,v 1.4 2003/10/20 15:03:27 chrisgreen Exp $ */ + +#include "htp_config_auto.h" + +#include "htp_private.h" + +#ifndef HAVE_STRLCPY + +#if defined(LIBC_SCCS) && !defined(lint) +static char *rcsid = "$OpenBSD: strlcpy.c,v 1.4 1999/05/01 18:56:41 millert Exp $"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#include <string.h> + +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t strlcpy(char *dst, const char *src, size_t siz) +{ + register char *d = dst; + register const char *s = src; + register size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0 && --n != 0) { + do { + if ((*d++ = *s++) == 0) + break; + } while (--n != 0); + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} +#endif |