include/haproxy/h1.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377

/*
 * include/haproxy/h1.h
 * This file contains HTTP/1 protocol definitions.
 *
 * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation, version 2.1
 * exclusively.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */

#ifndef _HAPROXY_H1_H
#define _HAPROXY_H1_H

#include <import/ist.h>
#include <haproxy/api.h>
#include <haproxy/buf.h>
#include <haproxy/http.h>
#include <haproxy/http-hdr-t.h>
#include <haproxy/intops.h>


/* Possible states while parsing HTTP/1 messages (request|response) */
enum h1m_state {
	H1_MSG_RQBEFORE     =  0, // request: leading LF, before start line
	H1_MSG_RQBEFORE_CR  =  1, // request: leading CRLF, before start line
	/* these ones define a request start line */
	H1_MSG_RQMETH       =  2, // parsing the Method
	H1_MSG_RQMETH_SP    =  3, // space(s) after the Method
	H1_MSG_RQURI        =  4, // parsing the Request URI
	H1_MSG_RQURI_SP     =  5, // space(s) after the Request URI
	H1_MSG_RQVER        =  6, // parsing the Request Version
	H1_MSG_RQLINE_END   =  7, // end of request line (CR or LF)

	H1_MSG_RPBEFORE     =  8, // response: leading LF, before start line
	H1_MSG_RPBEFORE_CR  =  9, // response: leading CRLF, before start line

	/* these ones define a response start line */
	H1_MSG_RPVER        = 10, // parsing the Response Version
	H1_MSG_RPVER_SP     = 11, // space(s) after the Response Version
	H1_MSG_RPCODE       = 12, // response code
	H1_MSG_RPCODE_SP    = 13, // space(s) after the response code
	H1_MSG_RPREASON     = 14, // response reason
	H1_MSG_RPLINE_END   = 15, // end of response line (CR or LF)

	/* common header processing */
	H1_MSG_HDR_FIRST    = 16, // waiting for first header or last CRLF (no LWS possible)
	H1_MSG_HDR_NAME     = 17, // parsing header name
	H1_MSG_HDR_COL      = 18, // parsing header colon
	H1_MSG_HDR_L1_SP    = 19, // parsing header LWS (SP|HT) before value
	H1_MSG_HDR_L1_LF    = 20, // parsing header LWS (LF) before value
	H1_MSG_HDR_L1_LWS   = 21, // checking whether it's a new header or an LWS
	H1_MSG_HDR_VAL      = 22, // parsing header value
	H1_MSG_HDR_L2_LF    = 23, // parsing header LWS (LF) inside/after value
	H1_MSG_HDR_L2_LWS   = 24, // checking whether it's a new header or an LWS

	H1_MSG_LAST_LF      = 25, // parsing last LF, last state for headers

	/* Body processing. */

	H1_MSG_CHUNK_SIZE   = 26, // parsing the chunk size (RFC7230 #4.1)
	H1_MSG_DATA         = 27, // skipping data chunk / content-length data
	H1_MSG_CHUNK_CRLF   = 28, // skipping CRLF after data chunk
	H1_MSG_TRAILERS     = 29, // trailers (post-data entity headers)
	/* we enter this state when we've received the end of the current message */
	H1_MSG_DONE         = 30, // message end received, waiting for resync or close
	H1_MSG_TUNNEL       = 31, // tunneled data after DONE
} __attribute__((packed));


/* HTTP/1 message flags (32 bit), for use in h1m->flags only */
#define H1_MF_NONE              0x00000000
#define H1_MF_CLEN              0x00000001 // content-length present
#define H1_MF_CHNK              0x00000002 // chunk present (as last encoding), exclusive with c-l
#define H1_MF_RESP              0x00000004 // this message is the response message
#define H1_MF_TOLOWER           0x00000008 // turn the header names to lower case
#define H1_MF_VER_11            0x00000010 // message indicates version 1.1 or above
#define H1_MF_CONN_CLO          0x00000020 // message contains "connection: close"
#define H1_MF_CONN_KAL          0x00000040 // message contains "connection: keep-alive"
#define H1_MF_CONN_UPG          0x00000080 // message contains "connection: upgrade"
#define H1_MF_XFER_LEN          0x00000100 // message xfer size can be determined
#define H1_MF_XFER_ENC          0x00000200 // transfer-encoding is present
#define H1_MF_NO_PHDR           0x00000400 // don't add pseudo-headers in the header list
#define H1_MF_HDRS_ONLY         0x00000800 // parse headers only
#define H1_MF_CLEAN_CONN_HDR    0x00001000 // skip close/keep-alive values of connection headers during parsing
#define H1_MF_METH_CONNECT      0x00002000 // Set for a response to a CONNECT request
#define H1_MF_METH_HEAD         0x00004000 // Set for a response to a HEAD request
#define H1_MF_UPG_WEBSOCKET     0x00008000 // Set for a Websocket upgrade handshake
#define H1_MF_TE_CHUNKED        0x00010000 // T-E "chunked"
#define H1_MF_TE_OTHER          0x00020000 // T-E other than supported ones found (only "chunked" is supported for now)

/* Mask to use to reset H1M flags when we restart headers parsing.
 *
 * WARNING: Don't forget to update it if a new flag must be preserved when
 *          headers parsing is restarted.
 */
#define H1_MF_RESTART_MASK    (H1_MF_RESP|H1_MF_TOLOWER|H1_MF_NO_PHDR|H1_MF_HDRS_ONLY| \
			       H1_MF_CLEAN_CONN_HDR|H1_MF_METH_CONNECT|H1_MF_METH_HEAD)

/* Note: for a connection to be persistent, we need this for the request :
 *   - one of CLEN or CHNK
 *   - version 1.0 and KAL and not CLO
 *   - or version 1.1 and not CLO
 * For the response it's the same except that UPG must not appear either.
 * So in short, for a request it's (CLEN|CHNK) > 0 && !CLO && (VER_11 || KAL)
 * and for a response it's (CLEN|CHNK) > 0 && !(CLO|UPG) && (VER_11 || KAL)
 */


/* basic HTTP/1 message state for use in parsers. The err_pos field is special,
 * it is pre-set to a negative value (-1 or -2), and once non-negative it contains
 * the relative position in the message of the first parse error. -2 is used to tell
 * the parser that we want to block the invalid message. -1 is used to only perform
 * a silent capture.
 */
struct h1m {
	enum h1m_state state;       // H1 message state (H1_MSG_*)
	/* 24 bits available here */
	uint32_t flags;             // H1 message flags (H1_MF_*)
	uint64_t curr_len;          // content-length or last chunk length
	uint64_t body_len;          // total known size of the body length
	uint32_t next;              // next byte to parse, relative to buffer's head
	int err_pos;                // position in the byte stream of the first error (H1 or H2)
	int err_state;              // state where the first error was met (H1 or H2)
};

/* basic H1 start line, describes either the request and the response */
union h1_sl {                          /* useful start line pointers, relative to ->sol */
	struct {
		struct ist m;          /* METHOD */
		struct ist u;          /* URI */
		struct ist v;          /* VERSION */
		enum http_meth_t meth; /* method */
	} rq;                          /* request line : field, length */
	struct {
		struct ist v;          /* VERSION */
		struct ist c;          /* CODE */
		struct ist r;          /* REASON */
		uint16_t status;       /* status code */
	} st;                          /* status line : field, length */
};

int h1_headers_to_hdr_list(char *start, const char *stop,
                           struct http_hdr *hdr, unsigned int hdr_num,
                           struct h1m *h1m, union h1_sl *slp);
int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max);

int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value);
int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value);
void h1_parse_connection_header(struct h1m *h1m, struct ist *value);
void h1_parse_upgrade_header(struct h1m *h1m, struct ist value);

void h1_generate_random_ws_input_key(char key_out[25]);
void h1_calculate_ws_output_key(const char *key, char *result);

/* for debugging, reports the HTTP/1 message state name */
static inline const char *h1m_state_str(enum h1m_state msg_state)
{
	switch (msg_state) {
	case H1_MSG_RQBEFORE:    return "MSG_RQBEFORE";
	case H1_MSG_RQBEFORE_CR: return "MSG_RQBEFORE_CR";
	case H1_MSG_RQMETH:      return "MSG_RQMETH";
	case H1_MSG_RQMETH_SP:   return "MSG_RQMETH_SP";
	case H1_MSG_RQURI:       return "MSG_RQURI";
	case H1_MSG_RQURI_SP:    return "MSG_RQURI_SP";
	case H1_MSG_RQVER:       return "MSG_RQVER";
	case H1_MSG_RQLINE_END:  return "MSG_RQLINE_END";
	case H1_MSG_RPBEFORE:    return "MSG_RPBEFORE";
	case H1_MSG_RPBEFORE_CR: return "MSG_RPBEFORE_CR";
	case H1_MSG_RPVER:       return "MSG_RPVER";
	case H1_MSG_RPVER_SP:    return "MSG_RPVER_SP";
	case H1_MSG_RPCODE:      return "MSG_RPCODE";
	case H1_MSG_RPCODE_SP:   return "MSG_RPCODE_SP";
	case H1_MSG_RPREASON:    return "MSG_RPREASON";
	case H1_MSG_RPLINE_END:  return "MSG_RPLINE_END";
	case H1_MSG_HDR_FIRST:   return "MSG_HDR_FIRST";
	case H1_MSG_HDR_NAME:    return "MSG_HDR_NAME";
	case H1_MSG_HDR_COL:     return "MSG_HDR_COL";
	case H1_MSG_HDR_L1_SP:   return "MSG_HDR_L1_SP";
	case H1_MSG_HDR_L1_LF:   return "MSG_HDR_L1_LF";
	case H1_MSG_HDR_L1_LWS:  return "MSG_HDR_L1_LWS";
	case H1_MSG_HDR_VAL:     return "MSG_HDR_VAL";
	case H1_MSG_HDR_L2_LF:   return "MSG_HDR_L2_LF";
	case H1_MSG_HDR_L2_LWS:  return "MSG_HDR_L2_LWS";
	case H1_MSG_LAST_LF:     return "MSG_LAST_LF";
	case H1_MSG_CHUNK_SIZE:  return "MSG_CHUNK_SIZE";
	case H1_MSG_DATA:        return "MSG_DATA";
	case H1_MSG_CHUNK_CRLF:  return "MSG_CHUNK_CRLF";
	case H1_MSG_TRAILERS:    return "MSG_TRAILERS";
	case H1_MSG_DONE:        return "MSG_DONE";
	case H1_MSG_TUNNEL:      return "MSG_TUNNEL";
	default:                 return "MSG_??????";
	}
}

/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF
 * at the end of a chunk. The caller should adjust msg->next
 * in order to include this part into the next forwarding phase.  Note that the
 * caller must ensure that head+start points to the first byte to parse.  It
 * returns the number of bytes parsed on success, so the caller can set msg_state
 * to HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
 * change anything and returns zero. Otherwise it returns a negative value
 * indicating the error position relative to <stop>. Note: this function is
 * designed to parse wrapped CRLF at the end of the buffer.
 */
static inline int h1_skip_chunk_crlf(const struct buffer *buf, int start, int stop)
{
	const char *ptr = b_peek(buf, start);
	int bytes = 1;

	if (stop <= start)
		return 0;

	if (unlikely(*ptr != '\r')) // negative position to stop
		return ptr - __b_peek(buf, stop);

	/* NB: we'll check data availability at the end. It's not a
	 * problem because whatever we match first will be checked
	 * against the correct length.
	 */
	bytes++;
	ptr++;
	if (ptr >= b_wrap(buf))
		ptr = b_orig(buf);

	if (bytes > stop - start)
		return 0;

	if (*ptr != '\n') // negative position to stop
		return ptr - __b_peek(buf, stop);

	return bytes;
}

/* Parse the chunk size start at buf + start and stops before buf + stop. The
 * positions are relative to the buffer's head.
 * It returns the chunk size in <res> and the amount of bytes read this way :
 *   < 0 : error at this position relative to <stop>
 *   = 0 : not enough bytes to read a complete chunk size
 *   > 0 : number of bytes successfully read that the caller can skip
 * On success, the caller should adjust its msg->next to point to the first
 * byte of data after the chunk size, so that we know we can forward exactly
 * msg->next bytes, and msg->sol to contain the exact number of bytes forming
 * the chunk size. That way it is always possible to differentiate between the
 * start of the body and the start of the data. Note: this function is designed
 * to parse wrapped CRLF at the end of the buffer.
 */
static inline int h1_parse_chunk_size(const struct buffer *buf, int start, int stop, uint64_t *res)
{
	const char *ptr = b_peek(buf, start);
	const char *ptr_old = ptr;
	const char *end = b_wrap(buf);
	uint64_t chunk = 0;

	stop -= start; // bytes left
	start = stop;  // bytes to transfer

	/* The chunk size is in the following form, though we are only
	 * interested in the size and CRLF :
	 *    1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
	 */
	while (1) {
		int c;
		if (!stop)
			return 0;
		c = hex2i(*ptr);
		if (c < 0) /* not a hex digit anymore */
			break;
		if (unlikely(++ptr >= end))
			ptr = b_orig(buf);
		chunk = (chunk << 4) + c;
		if (unlikely(chunk & 0xF0000000000000ULL)) {
			/* Don't get more than 13 hexa-digit (2^52 - 1) to never fed possibly
			 * bogus values from languages that use floats for their integers
			 */
			goto error;
		}
		stop--;
	}

	/* empty size not allowed */
	if (unlikely(ptr == ptr_old))
		goto error;

	while (HTTP_IS_SPHT(*ptr)) {
		if (++ptr >= end)
			ptr = b_orig(buf);
		if (--stop == 0)
			return 0;
	}

	/* Up to there, we know that at least one byte is present at *ptr. Check
	 * for the end of chunk size.
	 */
	while (1) {
		if (likely(*ptr == '\r')) {
			/* we now have a CR, it must be followed by a LF */
			if (++ptr >= end)
				ptr = b_orig(buf);
			if (--stop == 0)
				return 0;

			if (*ptr != '\n')
				goto error;
			if (++ptr >= end)
				ptr = b_orig(buf);
			--stop;
			/* done */
			break;
		}
		else if (likely(*ptr == ';')) {
			/* chunk extension, ends at next CRLF */
			if (++ptr >= end)
				ptr = b_orig(buf);
			if (--stop == 0)
				return 0;

			while (!HTTP_IS_CRLF(*ptr)) {
				if (++ptr >= end)
					ptr = b_orig(buf);
				if (--stop == 0)
					return 0;
			}
			/* we have a CRLF now, loop above */
			continue;
		}
		else
			goto error;
	}

	/* OK we found our CRLF and now <ptr> points to the next byte, which may
	 * or may not be present. Let's return the number of bytes parsed.
	 */
	*res = chunk;
	return start - stop;
 error:
	*res = 0; // just to stop gcc's -Wuninitialized warning :-(
	return -stop;
}

/* initializes an H1 message for a request */
static inline struct h1m *h1m_init_req(struct h1m *h1m)
{
	h1m->state = H1_MSG_RQBEFORE;
	h1m->next = 0;
	h1m->flags = H1_MF_NONE;
	h1m->curr_len = 0;
	h1m->body_len = 0;
	h1m->err_pos = -2;
	h1m->err_state = 0;
	return h1m;
}

/* initializes an H1 message for a response */
static inline struct h1m *h1m_init_res(struct h1m *h1m)
{
	h1m->state = H1_MSG_RPBEFORE;
	h1m->next = 0;
	h1m->flags = H1_MF_RESP;
	h1m->curr_len = 0;
	h1m->body_len = 0;
	h1m->err_pos = -2;
	h1m->err_state = 0;
	return h1m;
}

#endif /* _HAPROXY_H1_H */