diff options
Diffstat (limited to 'htp/htp.h')
-rw-r--r-- | htp/htp.h | 678 |
1 files changed, 678 insertions, 0 deletions
diff --git a/htp/htp.h b/htp/htp.h new file mode 100644 index 0000000..36209ad --- /dev/null +++ b/htp/htp.h @@ -0,0 +1,678 @@ +/*************************************************************************** + * Copyright (c) 2009-2010 Open Information Security Foundation + * Copyright (c) 2010-2013 Qualys, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + + * - Neither the name of the Qualys, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ***************************************************************************/ + +/** + * @file + * @author Ivan Ristic <ivanr@webkreator.com> + */ + +#ifndef _HTP_H +#define _HTP_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/time.h> + +#include "htp_version.h" +#include "htp_core.h" + +#include "bstr.h" +#include "htp_base64.h" +#include "htp_config.h" +#include "htp_connection_parser.h" +#include "htp_decompressors.h" +#include "htp_hooks.h" +#include "htp_list.h" +#include "htp_multipart.h" +#include "htp_table.h" +#include "htp_transaction.h" +#include "htp_urlencoded.h" +#include "htp_utf8_decoder.h" + +/** + * Represents a single TCP connection. + */ +struct htp_conn_t { + /** Client IP address. */ + char *client_addr; + + /** Client port. */ + int client_port; + + /** Server IP address. */ + char *server_addr; + + /** Server port. */ + int server_port; + + /** + * Transactions carried out on this connection. The list may contain + * NULL elements when some of the transactions are deleted (and then + * removed from a connection by calling htp_conn_remove_tx(). + */ + htp_list_t *transactions; + + /** Log messages associated with this connection. */ + htp_list_t *messages; + + /** Parsing flags: HTP_CONN_PIPELINED. */ + uint8_t flags; + + /** When was this connection opened? Can be NULL. */ + htp_time_t open_timestamp; + + /** When was this connection closed? Can be NULL. */ + htp_time_t close_timestamp; + + /** Inbound data counter. */ + int64_t in_data_counter; + + /** Outbound data counter. */ + int64_t out_data_counter; +}; + +/** + * Used to represent files that are seen during the processing of HTTP traffic. Most + * commonly this refers to files seen in multipart/form-data payloads. In addition, PUT + * request bodies can be treated as files. + */ +struct htp_file_t { + /** Where did this file come from? Possible values: HTP_FILE_MULTIPART and HTP_FILE_PUT. */ + enum htp_file_source_t source; + + /** File name, as provided (e.g., in the Content-Disposition multipart part header. */ + bstr *filename; + + /** File length. */ + int64_t len; + + /** The unique filename in which this file is stored on the filesystem, when applicable.*/ + char *tmpname; + + /** The file descriptor used for external storage, or -1 if unused. */ + int fd; +}; + +/** + * Represents a chunk of file data. + */ +struct htp_file_data_t { + /** File information. */ + htp_file_t *file; + + /** Pointer to the data buffer. */ + const unsigned char *data; + + /** Buffer length. */ + size_t len; +}; + +/** + * Represents a single log entry. + */ +struct htp_log_t { + /** The connection parser associated with this log message. */ + htp_connp_t *connp; + + /** The transaction associated with this log message, if any. */ + htp_tx_t *tx; + + /** Log message. */ + const char *msg; + + /** Message level. */ + enum htp_log_level_t level; + + /** Message code. */ + int code; + + /** File in which the code that emitted the message resides. */ + const char *file; + + /** Line number on which the code that emitted the message resides. */ + unsigned int line; +}; + +/** + * Represents a single request or response header. + */ +struct htp_header_t { + /** Header name. */ + bstr *name; + + /** Header value. */ + bstr *value; + + /** Parsing flags; a combination of: HTP_FIELD_INVALID, HTP_FIELD_FOLDED, HTP_FIELD_REPEATED. */ + uint64_t flags; +}; + +/** + * Represents a single request parameter. + */ +struct htp_param_t { + /** Parameter name. */ + bstr *name; + + /** Parameter value. */ + bstr *value; + + /** Source of the parameter, for example HTP_SOURCE_QUERY_STRING. */ + enum htp_data_source_t source; + + /** Type of the data structure referenced below. */ + enum htp_parser_id_t parser_id; + + /** + * Pointer to the parser data structure that contains + * complete information about the parameter. Can be NULL. + */ + void *parser_data; +}; + +/** + * Represents a single HTTP transaction, which is a combination of a request and a response. + */ +struct htp_tx_t { + /** The connection parser associated with this transaction. */ + htp_connp_t *connp; + + /** The connection to which this transaction belongs. */ + htp_conn_t *conn; + + /** The configuration structure associated with this transaction. */ + htp_cfg_t *cfg; + + /** + * Is the configuration structure shared with other transactions or connections? If + * this field is set to HTP_CONFIG_PRIVATE, the transaction owns the configuration. + */ + int is_config_shared; + + /** The user data associated with this transaction. */ + void *user_data; + + + // Request fields + + /** Contains a count of how many empty lines were skipped before the request line. */ + unsigned int request_ignored_lines; + + /** The first line of this request. */ + bstr *request_line; + + /** Request method. */ + bstr *request_method; + + /** Request method, as number. Available only if we were able to recognize the request method. */ + enum htp_method_t request_method_number; + + /** + * Request URI, raw, as given to us on the request line. This field can take different forms, + * for example authority for CONNECT methods, absolute URIs for proxy requests, and the query + * string when one is provided. Use htp_tx_t::parsed_uri if you need to access to specific + * URI elements. Can be NULL if the request line contains only a request method (which is + * an extreme case of HTTP/0.9, but passes in practice. + */ + bstr *request_uri; + + /** Request protocol, as text. Can be NULL if no protocol was specified. */ + bstr *request_protocol; + + /** + * Protocol version as a number. Multiply the high version number by 100, then add the low + * version number. You should prefer to work the pre-defined HTP_PROTOCOL_* constants. + */ + int request_protocol_number; + + /** + * Is this request using HTTP/0.9? We need a separate field for this purpose because + * the protocol version alone is not sufficient to determine if HTTP/0.9 is used. For + * example, if you submit "GET / HTTP/0.9" to Apache, it will not treat the request + * as HTTP/0.9. + */ + int is_protocol_0_9; + + /** + * This structure holds the individual components parsed out of the request URI, with + * appropriate normalization and transformation applied, per configuration. No information + * is added. In extreme cases when no URI is provided on the request line, all fields + * will be NULL. (Well, except for port_number, which will be -1.) To inspect raw data, use + * htp_tx_t::request_uri or htp_tx_t::parsed_uri_raw. + */ + htp_uri_t *parsed_uri; + + /** + * This structure holds the individual components parsed out of the request URI, but + * without any modification. The purpose of this field is to allow you to look at the data as it + * was supplied on the request line. Fields can be NULL, depending on what data was supplied. + * The port_number field is always -1. + */ + htp_uri_t *parsed_uri_raw; + + /* HTTP 1.1 RFC + * + * 4.3 Message Body + * + * The message-body (if any) of an HTTP message is used to carry the + * entity-body associated with the request or response. The message-body + * differs from the entity-body only when a transfer-coding has been + * applied, as indicated by the Transfer-Encoding header field (section + * 14.41). + * + * message-body = entity-body + * | <entity-body encoded as per Transfer-Encoding> + */ + + /** + * The length of the request message-body. In most cases, this value + * will be the same as request_entity_len. The values will be different + * if request compression or chunking were applied. In that case, + * request_message_len contains the length of the request body as it + * has been seen over TCP; request_entity_len contains length after + * de-chunking and decompression. + */ + int64_t request_message_len; + + /** + * The length of the request entity-body. In most cases, this value + * will be the same as request_message_len. The values will be different + * if request compression or chunking were applied. In that case, + * request_message_len contains the length of the request body as it + * has been seen over TCP; request_entity_len contains length after + * de-chunking and decompression. + */ + int64_t request_entity_len; + + /** Parsed request headers. */ + htp_table_t *request_headers; + + /** + * Request transfer coding. Can be one of HTP_CODING_UNKNOWN (body presence not + * determined yet), HTP_CODING_IDENTITY, HTP_CODING_CHUNKED, HTP_CODING_NO_BODY, + * and HTP_CODING_UNRECOGNIZED. + */ + enum htp_transfer_coding_t request_transfer_coding; + + /** Request body compression. */ + enum htp_content_encoding_t request_content_encoding; + + /** + * This field contain the request content type when that information is + * available in request headers. The contents of the field will be converted + * to lowercase and any parameters (e.g., character set information) removed. + */ + bstr *request_content_type; + + /** + * Contains the value specified in the Content-Length header. The value of this + * field will be -1 from the beginning of the transaction and until request + * headers are processed. It will stay -1 if the C-L header was not provided, + * or if the value in it cannot be parsed. + */ + int64_t request_content_length; + + /** + * Transaction-specific REQUEST_BODY_DATA hook. Behaves as + * the configuration hook with the same name. + */ + htp_hook_t *hook_request_body_data; + + /** + * Transaction-specific RESPONSE_BODY_DATA hook. Behaves as + * the configuration hook with the same name. + */ + htp_hook_t *hook_response_body_data; + + /** + * Query string URLENCODED parser. Available only + * when the query string is not NULL and not empty. + */ + htp_urlenp_t *request_urlenp_query; + + /** + * Request body URLENCODED parser. Available only when the request body is in the + * application/x-www-form-urlencoded format and the parser was configured to run. + */ + htp_urlenp_t *request_urlenp_body; + + /** + * Request body MULTIPART parser. Available only when the body is in the + * multipart/form-data format and the parser was configured to run. + */ + htp_mpartp_t *request_mpartp; + + /** Request parameters. */ + htp_table_t *request_params; + + /** Request cookies */ + htp_table_t *request_cookies; + + /** Authentication type used in the request. */ + enum htp_auth_type_t request_auth_type; + + /** Authentication username. */ + bstr *request_auth_username; + + /** Authentication password. Available only when htp_tx_t::request_auth_type is HTP_AUTH_BASIC. */ + bstr *request_auth_password; + + /** + * Request hostname. Per the RFC, the hostname will be taken from the Host header + * when available. If the host information is also available in the URI, it is used + * instead of whatever might be in the Host header. Can be NULL. This field does + * not contain port information. + */ + bstr *request_hostname; + + /** + * Request port number, if presented. The rules for htp_tx_t::request_host apply. Set to + * -1 by default. + */ + int request_port_number; + + + // Response fields + + /** How many empty lines did we ignore before reaching the status line? */ + unsigned int response_ignored_lines; + + /** Response line. */ + bstr *response_line; + + /** Response protocol, as text. Can be NULL. */ + bstr *response_protocol; + + /** + * Response protocol as number. Available only if we were able to parse the protocol version, + * HTP_PROTOCOL_INVALID otherwise. HTP_PROTOCOL_UNKNOWN until parsing is attempted. + */ + int response_protocol_number; + + /** + * Response status code, as text. Starts as NULL and can remain NULL on + * an invalid response that does not specify status code. + */ + bstr *response_status; + + /** + * Response status code, available only if we were able to parse it, HTP_STATUS_INVALID + * otherwise. HTP_STATUS_UNKNOWN until parsing is attempted. + */ + int response_status_number; + + /** + * This field is set by the protocol decoder with it thinks that the + * backend server will reject a request with a particular status code. + */ + int response_status_expected_number; + + /** The message associated with the response status code. Can be NULL. */ + bstr *response_message; + + /** Have we seen the server respond with a 100 response? */ + int seen_100continue; + + /** Parsed response headers. Contains instances of htp_header_t. */ + htp_table_t *response_headers; + + /* HTTP 1.1 RFC + * + * 4.3 Message Body + * + * The message-body (if any) of an HTTP message is used to carry the + * entity-body associated with the request or response. The message-body + * differs from the entity-body only when a transfer-coding has been + * applied, as indicated by the Transfer-Encoding header field (section + * 14.41). + * + * message-body = entity-body + * | <entity-body encoded as per Transfer-Encoding> + */ + + /** + * The length of the response message-body. In most cases, this value + * will be the same as response_entity_len. The values will be different + * if response compression or chunking were applied. In that case, + * response_message_len contains the length of the response body as it + * has been seen over TCP; response_entity_len contains the length after + * de-chunking and decompression. + */ + int64_t response_message_len; + + /** + * The length of the response entity-body. In most cases, this value + * will be the same as response_message_len. The values will be different + * if request compression or chunking were applied. In that case, + * response_message_len contains the length of the response body as it + * has been seen over TCP; response_entity_len contains length after + * de-chunking and decompression. + */ + int64_t response_entity_len; + + /** + * Contains the value specified in the Content-Length header. The value of this + * field will be -1 from the beginning of the transaction and until response + * headers are processed. It will stay -1 if the C-L header was not provided, + * or if the value in it cannot be parsed. + */ + int64_t response_content_length; + + /** + * Response transfer coding, which indicates if there is a response body, + * and how it is transported (e.g., as-is, or chunked). + */ + enum htp_transfer_coding_t response_transfer_coding; + + /** + * Response body compression, which indicates if compression is used + * for the response body. This field is an interpretation of the information + * available in response headers. + */ + enum htp_content_encoding_t response_content_encoding; + + /** + * Response body compression processing information, which is related to how + * the library is going to process (or has processed) a response body. Changing + * this field mid-processing can influence library actions. For example, setting + * this field to HTP_COMPRESSION_NONE in a RESPONSE_HEADERS callback will prevent + * decompression. + */ + enum htp_content_encoding_t response_content_encoding_processing; + + /** + * This field will contain the response content type when that information + * is available in response headers. The contents of the field will be converted + * to lowercase and any parameters (e.g., character set information) removed. + */ + bstr *response_content_type; + + + // Common fields + + /** + * Parsing flags; a combination of: HTP_REQUEST_INVALID_T_E, HTP_INVALID_FOLDING, + * HTP_REQUEST_SMUGGLING, HTP_MULTI_PACKET_HEAD, and HTP_FIELD_UNPARSEABLE. + */ + uint64_t flags; + + /** Request progress. */ + enum htp_tx_req_progress_t request_progress; + + /** Response progress. */ + enum htp_tx_res_progress_t response_progress; + + /** Transaction index on the connection. */ + size_t index; + + /** Total repetitions for headers in request. */ + uint16_t req_header_repetitions; + + /** Total repetitions for headers in response. */ + uint16_t res_header_repetitions; +}; + +/** + * This structure is used to pass transaction data (for example + * request and response body buffers) to callbacks. + */ +struct htp_tx_data_t { + /** Transaction pointer. */ + htp_tx_t *tx; + + /** Pointer to the data buffer. */ + const unsigned char *data; + + /** Buffer length. */ + size_t len; + + /** + * Indicator if this chunk of data is the last in the series. Currently + * used only by REQUEST_HEADER_DATA, REQUEST_TRAILER_DATA, RESPONSE_HEADER_DATA, + * and RESPONSE_TRAILER_DATA callbacks. + */ + int is_last; +}; + +/** + * URI structure. Each of the fields provides access to a single + * URI element. Where an element is not present in a URI, the + * corresponding field will be set to NULL or -1, depending on the + * field type. + */ +struct htp_uri_t { + /** Scheme, e.g., "http". */ + bstr *scheme; + + /** Username. */ + bstr *username; + + /** Password. */ + bstr *password; + + /** Hostname. */ + bstr *hostname; + + /** Port, as string. */ + bstr *port; + + /** + * Port, as number. This field will contain HTP_PORT_NONE if there was + * no port information in the URI and HTP_PORT_INVALID if the port information + * was invalid (e.g., it's not a number or it falls out of range. + */ + int port_number; + + /** The path part of this URI. */ + bstr *path; + + /** Query string. */ + bstr *query; + + /** + * Fragment identifier. This field will rarely be available in a server-side + * setting, but it's not impossible to see it. */ + bstr *fragment; +}; + +/** + * Frees all data contained in the uri, and then the uri itself. + * + * @param[in] uri + */ +void htp_uri_free(htp_uri_t *uri); + +/** + * Allocates and initializes a new htp_uri_t structure. + * + * @return New structure, or NULL on memory allocation failure. + */ +htp_uri_t *htp_uri_alloc(void); + +/** + * Creates a new log entry and stores it with the connection. The file and line + * parameters are typically auto-generated using the HTP_LOG_MARK macro. +* + * @param[in] connp + * @param[in] file + * @param[in] line + * @param[in] level + * @param[in] code + * @param[in] fmt + * @param[in] ... + */ +void htp_log(htp_connp_t *connp, const char *file, int line, enum htp_log_level_t level, int code, const char *fmt, ...); + +/** + * Performs in-place decoding of the input string, according to the configuration specified + * by cfg and ctx. On output, various flags (HTP_URLEN_*) might be set. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] input + * @param[out] flags + * + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_urldecode_inplace(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags); + +/** + * Performs in-place decoding of the input string, according to the configuration specified + * by cfg and ctx. On output, various flags (HTP_URLEN_*) might be set. If something in the + * input would cause a particular server to respond with an error, the appropriate status + * code will be set. + * + * @param[in] cfg + * @param[in] ctx + * @param[in] input + * @param[out] flags + * @param[out] expected_status_code 0 by default, or status code as necessary + * + * @return HTP_OK on success, HTP_ERROR on failure. + */ +htp_status_t htp_urldecode_inplace_ex(htp_cfg_t *cfg, enum htp_decoder_ctx_t ctx, bstr *input, uint64_t *flags, int *expected_status_code); + +/** + * Returns the LibHTP version string. + * + * @return LibHTP version, for example "LibHTP v0.5.x". + */ +char *htp_get_version(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _HTP_H */ |