summaryrefslogtreecommitdiffstats
path: root/htp/htp_request_generic.c
blob: 435cf0a738095699faf9e1b75132c3c86cc92f93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
/***************************************************************************
 * Copyright (c) 2009-2010 Open Information Security Foundation
 * Copyright (c) 2010-2013 Qualys, Inc.
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 * 
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.

 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.

 * - Neither the name of the Qualys, Inc. nor the names of its
 *   contributors may be used to endorse or promote products derived from
 *   this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ***************************************************************************/

/**
 * @file
 * @author Ivan Ristic <ivanr@webkreator.com>
 */

#include "htp_config_auto.h"

#include "htp_private.h"

/**
 * Extract one request header. A header can span multiple lines, in
 * which case they will be folded into one before parsing is attempted.
 *
 * @param[in] connp
 * @param[in] data
 * @param[in] len
 * @return HTP_OK or HTP_ERROR
 */
htp_status_t htp_process_request_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) {
    // Create a new header structure.
    htp_header_t *h = calloc(1, sizeof (htp_header_t));
    if (h == NULL) return HTP_ERROR;

    // Now try to parse the header.
    if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) {
        free(h);
        return HTP_ERROR;
    }

    #ifdef HTP_DEBUG
    fprint_bstr(stderr, "Header name", h->name);
    fprint_bstr(stderr, "Header value", h->value);
    #endif

    // Do we already have a header with the same name?
    htp_header_t *h_existing = htp_table_get(connp->in_tx->request_headers, h->name);
    if (h_existing != NULL) {
        // TODO Do we want to have a list of the headers that are
        //      allowed to be combined in this way?
        if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) {
            // This is the second occurence for this header.
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header");
        } else {
            // For simplicity reasons, we count the repetitions of all headers
            if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) {
                connp->in_tx->req_header_repetitions++;
            } else {
                bstr_free(h->name);
                bstr_free(h->value);
                free(h);
                return HTP_OK;
            }
        }
        // Keep track of repeated same-name headers.
        h_existing->flags |= HTP_FIELD_REPEATED;

        // Having multiple C-L headers is against the RFC but
        // servers may ignore the subsequent headers if the values are the same.
        if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) {
            // Don't use string comparison here because we want to
            // ignore small formatting differences.

            int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL);
            int64_t new_cl = htp_parse_content_length(h->value, NULL);
            // Ambiguous response C-L value.
            if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) {
                htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value");
            }
            // Ignoring the new C-L header that has the same value as the previous ones.
        } else {
            // Add to the existing header.
            bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value));
            if (new_value == NULL) {
                bstr_free(h->name);
                bstr_free(h->value);
                free(h);
                return HTP_ERROR;
            }

            h_existing->value = new_value;
            bstr_add_mem_noex(h_existing->value, ", ", 2);
            bstr_add_noex(h_existing->value, h->value);
        }

        // The new header structure is no longer needed.
        bstr_free(h->name);
        bstr_free(h->value);
        free(h);
    } else {
        // Add as a new header.
        if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) {
            bstr_free(h->name);
            bstr_free(h->value);
            free(h);
        }
    }

    return HTP_OK;
}

/**
 * Generic request header parser.
 *
 * @param[in] connp
 * @param[in] h
 * @param[in] data
 * @param[in] len
 * @return HTP_OK or HTP_ERROR
 */
htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) {
    size_t name_start, name_end;
    size_t value_start, value_end;

    htp_chomp(data, &len);

    name_start = 0;

    // Look for the colon.
    size_t colon_pos = 0;
    while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++;

    if ((colon_pos == len) || (data[colon_pos] == '\0')) {
        // Missing colon.

        h->flags |= HTP_FIELD_UNPARSEABLE;

        // Log only once per transaction.
        if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) {
            connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE;
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: colon missing");
        }

        // We handle this case as a header with an empty name, with the value equal
        // to the entire input string.

        // TODO Apache will respond to this problem with a 400.

        // Now extract the name and the value
        h->name = bstr_dup_c("");
        if (h->name == NULL) return HTP_ERROR;

        h->value = bstr_dup_mem(data, len);
        if (h->value == NULL) {
            bstr_free(h->name);
            return HTP_ERROR;
        }

        return HTP_OK;
    }

    if (colon_pos == 0) {
        // Empty header name.

        h->flags |= HTP_FIELD_INVALID;

        // Log only once per transaction.
        if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
            connp->in_tx->flags |= HTP_FIELD_INVALID;
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name");
        }
    }

    name_end = colon_pos;

    // Ignore LWS after field-name.
    size_t prev = name_end;
    while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
        // LWS after header name.

        prev--;
        name_end--;

        h->flags |= HTP_FIELD_INVALID;

        // Log only once per transaction.
        if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
            connp->in_tx->flags |= HTP_FIELD_INVALID;
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name");
        }
    }

    // Header value.

    value_start = colon_pos;

    // Go over the colon.
    if (value_start < len) {
        value_start++;
    }

    // Ignore LWS before field-content.
    while ((value_start < len) && (htp_is_lws(data[value_start]))) {
        value_start++;
    }

    // Look for the end of field-content.
    value_end = len;

    // Ignore LWS after field-content.
    prev = value_end - 1;
    while ((prev > value_start) && (htp_is_lws(data[prev]))) {
        prev--;
        value_end--;
    }

    // Check that the header name is a token.
    size_t i = name_start;
    while (i < name_end) {
        if (!htp_is_token(data[i])) {
            // Incorrectly formed header name.

            h->flags |= HTP_FIELD_INVALID;

            // Log only once per transaction.
            if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
                connp->in_tx->flags |= HTP_FIELD_INVALID;
                htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token");
            }

            break;
        }

        i++;
    }

    // Now extract the name and the value
    h->name = bstr_dup_mem(data + name_start, name_end - name_start);
    if (h->name == NULL) return HTP_ERROR;

    h->value = bstr_dup_mem(data + value_start, value_end - value_start);
    if (h->value == NULL) {
        bstr_free(h->name);
        return HTP_ERROR;
    }

    return HTP_OK;
}

/**
 * Generic request line parser.
 *
 * @param[in] connp
 * @return HTP_OK or HTP_ERROR
 */
htp_status_t htp_parse_request_line_generic(htp_connp_t *connp) {
    return htp_parse_request_line_generic_ex(connp, 0 /* NUL does not terminates line */);
}

htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates) {
    htp_tx_t *tx = connp->in_tx;
    unsigned char *data = bstr_ptr(tx->request_line);
    size_t len = bstr_len(tx->request_line);
    size_t pos = 0;
    size_t mstart = 0;
    size_t start;
    size_t bad_delim;

    if (nul_terminates) {
        // The line ends with the first NUL byte.
        
        size_t newlen = 0;
        while ((pos < len) && (data[pos] != '\0')) {
            pos++;
            newlen++;
        }

        // Start again, with the new length.
        len = newlen;
        pos = 0;
    }

    // skip past leading whitespace. IIS allows this
    while ((pos < len) && htp_is_space(data[pos])) pos++;
    if (pos) {
        htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: leading whitespace");
        mstart = pos;

        if (connp->cfg->requestline_leading_whitespace_unwanted != HTP_UNWANTED_IGNORE) {
            // reset mstart so that we copy the whitespace into the method
            mstart = 0;
            // set expected response code to this anomaly
            tx->response_status_expected_number = connp->cfg->requestline_leading_whitespace_unwanted;
        }
    }

    // The request method starts at the beginning of the
    // line and ends with the first whitespace character.
    while ((pos < len) && (!htp_is_space(data[pos]))) pos++;

    // No, we don't care if the method is empty.

    tx->request_method = bstr_dup_mem(data + mstart, pos - mstart);
    if (tx->request_method == NULL) return HTP_ERROR;

    #ifdef HTP_DEBUG
    fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_method), bstr_len(tx->request_method));
    #endif

    tx->request_method_number = htp_convert_method_to_number(tx->request_method);

    bad_delim = 0;
    // Ignore whitespace after request method. The RFC allows
    // for only one SP, but then suggests any number of SP and HT
    // should be permitted. Apache uses isspace(), which is even
    // more permitting, so that's what we use here.
    while ((pos < len) && (isspace(data[pos]))) {
        if (!bad_delim && data[pos] != 0x20) {
            bad_delim++;
        }
        pos++;
    }
// Too much performance overhead for fuzzing
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
    if (bad_delim) {
        htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI");
    }
#endif

    // Is there anything after the request method?
    if (pos == len) {
        // No, this looks like a HTTP/0.9 request.

        tx->is_protocol_0_9 = 1;
        tx->request_protocol_number = HTP_PROTOCOL_0_9;
        if (tx->request_method_number == HTP_M_UNKNOWN)
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only");

        return HTP_OK;
    }

    start = pos;
    bad_delim = 0;
    if (tx->connp->cfg->allow_space_uri) {
        pos = len - 1;
        // Skips the spaces at the end of line (after protocol)
        while (pos > start && htp_is_space(data[pos])) pos--;
        // The URI ends with the last whitespace.
        while ((pos > start) && (data[pos] != 0x20)) {
            if (!bad_delim && htp_is_space(data[pos])) {
                bad_delim++;
            }
            pos--;
        }
        /* if we've seen some 'bad' delimiters, we retry with those */
        if (bad_delim && pos == start) {
            // special case: even though RFC's allow only SP (0x20), many
            // implementations allow other delimiters, like tab or other
            // characters that isspace() accepts.
            pos = len - 1;
            while ((pos > start) && (!htp_is_space(data[pos]))) pos--;
        } else {
            // reset bad_delim found in protocol part
            bad_delim = 0;
            for (size_t i = start; i < pos; i++) {
                if (data[i] != 0x20 && htp_is_space(data[i])) {
                    bad_delim = 1;
                    break;
                }
            }
        }
        if (bad_delim) {
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
            // warn regardless if we've seen non-compliant chars
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter");
#endif
        } else if (pos == start) {
            pos = len;
        }
    } else {
        // The URI ends with the first whitespace.
        while ((pos < len) && (data[pos] != 0x20)) {
            if (!bad_delim && htp_is_space(data[pos])) {
                bad_delim++;
            }
            pos++;
        }
        /* if we've seen some 'bad' delimiters, we retry with those */
        if (bad_delim && pos == len) {
            // special case: even though RFC's allow only SP (0x20), many
            // implementations allow other delimiters, like tab or other
            // characters that isspace() accepts.
            pos = start;
            while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
        }
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
        if (bad_delim) {
            // warn regardless if we've seen non-compliant chars
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter");
        }
#endif
    }

    tx->request_uri = bstr_dup_mem(data + start, pos - start);
    if (tx->request_uri == NULL) return HTP_ERROR;

    #ifdef HTP_DEBUG
    fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_uri), bstr_len(tx->request_uri));
    #endif

    // Ignore whitespace after URI.
    while ((pos < len) && (htp_is_space(data[pos]))) pos++;

    // Is there protocol information available?
    if (pos == len) {
        // No, this looks like a HTTP/0.9 request.

        tx->is_protocol_0_9 = 1;
        tx->request_protocol_number = HTP_PROTOCOL_0_9;
        if (tx->request_method_number == HTP_M_UNKNOWN)
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol");

        return HTP_OK;
    }

    // The protocol information continues until the end of the line.
    tx->request_protocol = bstr_dup_mem(data + pos, len - pos);
    if (tx->request_protocol == NULL) return HTP_ERROR;

    tx->request_protocol_number = htp_parse_protocol(tx->request_protocol);
    if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID)
        htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol");

    #ifdef HTP_DEBUG
    fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol));
    #endif

    return HTP_OK;
}