Imported Upstream version 0.9.0
[deb_shairplay.git] / src / lib / http_parser.c
CommitLineData
15c988f7
JB
1/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2 *
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24#include "http_parser.h"
25#include <assert.h>
26#include <stddef.h>
27#include <ctype.h>
28#include <stdlib.h>
29#include <string.h>
30#include <limits.h>
31
32#ifndef ULLONG_MAX
33# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34#endif
35
36#ifndef MIN
37# define MIN(a,b) ((a) < (b) ? (a) : (b))
38#endif
39
40#ifndef ARRAY_SIZE
41# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42#endif
43
44#ifndef BIT_AT
45# define BIT_AT(a, i) \
46 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47 (1 << ((unsigned int) (i) & 7))))
48#endif
49
50#ifndef ELEM_AT
51# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52#endif
53
54#if HTTP_PARSER_DEBUG
55#define SET_ERRNO(e) \
56do { \
57 parser->http_errno = (e); \
58 parser->error_lineno = __LINE__; \
59} while (0)
60#else
61#define SET_ERRNO(e) \
62do { \
63 parser->http_errno = (e); \
64} while(0)
65#endif
66
67
68/* Run the notify callback FOR, returning ER if it fails */
69#define CALLBACK_NOTIFY_(FOR, ER) \
70do { \
71 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
72 \
73 if (settings->on_##FOR) { \
74 if (0 != settings->on_##FOR(parser)) { \
75 SET_ERRNO(HPE_CB_##FOR); \
76 } \
77 \
78 /* We either errored above or got paused; get out */ \
79 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
80 return (ER); \
81 } \
82 } \
83} while (0)
84
85/* Run the notify callback FOR and consume the current byte */
86#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
87
88/* Run the notify callback FOR and don't consume the current byte */
89#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
90
91/* Run data callback FOR with LEN bytes, returning ER if it fails */
92#define CALLBACK_DATA_(FOR, LEN, ER) \
93do { \
94 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
95 \
96 if (FOR##_mark) { \
97 if (settings->on_##FOR) { \
98 if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
99 SET_ERRNO(HPE_CB_##FOR); \
100 } \
101 \
102 /* We either errored above or got paused; get out */ \
103 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
104 return (ER); \
105 } \
106 } \
107 FOR##_mark = NULL; \
108 } \
109} while (0)
110
111/* Run the data callback FOR and consume the current byte */
112#define CALLBACK_DATA(FOR) \
113 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
114
115/* Run the data callback FOR and don't consume the current byte */
116#define CALLBACK_DATA_NOADVANCE(FOR) \
117 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
118
119/* Set the mark FOR; non-destructive if mark is already set */
120#define MARK(FOR) \
121do { \
122 if (!FOR##_mark) { \
123 FOR##_mark = p; \
124 } \
125} while (0)
126
127
128#define PROXY_CONNECTION "proxy-connection"
129#define CONNECTION "connection"
130#define CONTENT_LENGTH "content-length"
131#define TRANSFER_ENCODING "transfer-encoding"
132#define UPGRADE "upgrade"
133#define CHUNKED "chunked"
134#define KEEP_ALIVE "keep-alive"
135#define CLOSE "close"
136
137
138static const char *method_strings[] =
139 {
140#define XX(num, name, string) #string,
141 HTTP_METHOD_MAP(XX)
142#undef XX
143 };
144
145
146/* Tokens as defined by rfc 2616. Also lowercases them.
147 * token = 1*<any CHAR except CTLs or separators>
148 * separators = "(" | ")" | "<" | ">" | "@"
149 * | "," | ";" | ":" | "\" | <">
150 * | "/" | "[" | "]" | "?" | "="
151 * | "{" | "}" | SP | HT
152 */
153static const char tokens[256] = {
154/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
155 0, 0, 0, 0, 0, 0, 0, 0,
156/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
157 0, 0, 0, 0, 0, 0, 0, 0,
158/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
159 0, 0, 0, 0, 0, 0, 0, 0,
160/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
161 0, 0, 0, 0, 0, 0, 0, 0,
162/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
163 0, '!', 0, '#', '$', '%', '&', '\'',
164/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
165 0, 0, '*', '+', 0, '-', '.', 0,
166/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
167 '0', '1', '2', '3', '4', '5', '6', '7',
168/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
169 '8', '9', 0, 0, 0, 0, 0, 0,
170/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
171 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
172/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
173 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
174/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
175 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
176/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
177 'x', 'y', 'z', 0, 0, 0, '^', '_',
178/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
179 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
180/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
181 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
182/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
183 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
184/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
185 'x', 'y', 'z', 0, '|', 0, '~', 0 };
186
187
188static const int8_t unhex[256] =
189 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
190 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
191 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
192 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
193 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
194 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
195 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
196 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
197 };
198
199
200#if HTTP_PARSER_STRICT
201# define T(v) 0
202#else
203# define T(v) v
204#endif
205
206
207static const uint8_t normal_url_char[32] = {
208/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
209 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
210/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
211 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
212/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
213 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
214/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
215 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
216/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
217 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
218/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
219 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
220/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
221 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
222/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
223 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
224/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
225 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
226/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
227 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
228/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
229 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
230/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
231 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
232/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
233 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
234/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
235 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
236/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
237 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
238/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
239 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
240
241#undef T
242
243enum state
244 { s_dead = 1 /* important that this is > 0 */
245
246 , s_start_req_or_res
247 , s_res_or_resp_H
248 , s_start_res
249 , s_res_H
250 , s_res_HT
251 , s_res_HTT
252 , s_res_HTTP
253 , s_res_first_http_major
254 , s_res_http_major
255 , s_res_first_http_minor
256 , s_res_http_minor
257 , s_res_first_status_code
258 , s_res_status_code
259 , s_res_status
260 , s_res_line_almost_done
261
262 , s_start_req
263
264 , s_req_method
265 , s_req_spaces_before_url
266 , s_req_schema
267 , s_req_schema_slash
268 , s_req_schema_slash_slash
269 , s_req_server_start
270 , s_req_server
271 , s_req_server_with_at
272 , s_req_path
273 , s_req_query_string_start
274 , s_req_query_string
275 , s_req_fragment_start
276 , s_req_fragment
277 , s_req_http_start
278 , s_req_http_H
279 , s_req_http_HT
280 , s_req_http_HTT
281 , s_req_http_HTTP
282 , s_req_first_http_major
283 , s_req_http_major
284 , s_req_first_http_minor
285 , s_req_http_minor
286 , s_req_line_almost_done
287
288 , s_header_field_start
289 , s_header_field
290 , s_header_value_start
291 , s_header_value
292 , s_header_value_lws
293
294 , s_header_almost_done
295
296 , s_chunk_size_start
297 , s_chunk_size
298 , s_chunk_parameters
299 , s_chunk_size_almost_done
300
301 , s_headers_almost_done
302 , s_headers_done
303
304 /* Important: 's_headers_done' must be the last 'header' state. All
305 * states beyond this must be 'body' states. It is used for overflow
306 * checking. See the PARSING_HEADER() macro.
307 */
308
309 , s_chunk_data
310 , s_chunk_data_almost_done
311 , s_chunk_data_done
312
313 , s_body_identity
314 , s_body_identity_eof
315
316 , s_message_done
317 };
318
319
320#define PARSING_HEADER(state) (state <= s_headers_done)
321
322
323enum header_states
324 { h_general = 0
325 , h_C
326 , h_CO
327 , h_CON
328
329 , h_matching_connection
330 , h_matching_proxy_connection
331 , h_matching_content_length
332 , h_matching_transfer_encoding
333 , h_matching_upgrade
334
335 , h_connection
336 , h_content_length
337 , h_transfer_encoding
338 , h_upgrade
339
340 , h_matching_transfer_encoding_chunked
341 , h_matching_connection_keep_alive
342 , h_matching_connection_close
343
344 , h_transfer_encoding_chunked
345 , h_connection_keep_alive
346 , h_connection_close
347 };
348
349enum http_host_state
350 {
351 s_http_host_dead = 1
352 , s_http_userinfo_start
353 , s_http_userinfo
354 , s_http_host_start
355 , s_http_host_v6_start
356 , s_http_host
357 , s_http_host_v6
358 , s_http_host_v6_end
359 , s_http_host_port_start
360 , s_http_host_port
361};
362
363/* Macros for character classes; depends on strict-mode */
364#define CR '\r'
365#define LF '\n'
366#define LOWER(c) (unsigned char)(c | 0x20)
367#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
368#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
369#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
370#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
371#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
372 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
373 (c) == ')')
374#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
375 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
376 (c) == '$' || (c) == ',')
377
378#if HTTP_PARSER_STRICT
379#define TOKEN(c) (tokens[(unsigned char)c])
380#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
381#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
382#else
383#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
384#define IS_URL_CHAR(c) \
385 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
386#define IS_HOST_CHAR(c) \
387 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
388#endif
389
390
391#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
392
393
394#if HTTP_PARSER_STRICT
395# define STRICT_CHECK(cond) \
396do { \
397 if (cond) { \
398 SET_ERRNO(HPE_STRICT); \
399 goto error; \
400 } \
401} while (0)
402# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
403#else
404# define STRICT_CHECK(cond)
405# define NEW_MESSAGE() start_state
406#endif
407
408
409/* Map errno values to strings for human-readable output */
410#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
411static struct {
412 const char *name;
413 const char *description;
414} http_strerror_tab[] = {
415 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
416};
417#undef HTTP_STRERROR_GEN
418
419int http_message_needs_eof(const http_parser *parser);
420
421/* Our URL parser.
422 *
423 * This is designed to be shared by http_parser_execute() for URL validation,
424 * hence it has a state transition + byte-for-byte interface. In addition, it
425 * is meant to be embedded in http_parser_parse_url(), which does the dirty
426 * work of turning state transitions URL components for its API.
427 *
428 * This function should only be invoked with non-space characters. It is
429 * assumed that the caller cares about (and can detect) the transition between
430 * URL and non-URL states by looking for these.
431 */
432static enum state
433parse_url_char(enum state s, const char ch)
434{
435 if (ch == ' ' || ch == '\r' || ch == '\n') {
436 return s_dead;
437 }
438
439#if HTTP_PARSER_STRICT
440 if (ch == '\t' || ch == '\f') {
441 return s_dead;
442 }
443#endif
444
445 switch (s) {
446 case s_req_spaces_before_url:
447 /* Proxied requests are followed by scheme of an absolute URI (alpha).
448 * All methods except CONNECT are followed by '/' or '*'.
449 */
450
451 if (ch == '/' || ch == '*') {
452 return s_req_path;
453 }
454
455 if (IS_ALPHA(ch)) {
456 return s_req_schema;
457 }
458
459 break;
460
461 case s_req_schema:
462 if (IS_ALPHA(ch)) {
463 return s;
464 }
465
466 if (ch == ':') {
467 return s_req_schema_slash;
468 }
469
470 break;
471
472 case s_req_schema_slash:
473 if (ch == '/') {
474 return s_req_schema_slash_slash;
475 }
476
477 break;
478
479 case s_req_schema_slash_slash:
480 if (ch == '/') {
481 return s_req_server_start;
482 }
483
484 break;
485
486 case s_req_server_with_at:
487 if (ch == '@') {
488 return s_dead;
489 }
490
491 /* FALLTHROUGH */
492 case s_req_server_start:
493 case s_req_server:
494 if (ch == '/') {
495 return s_req_path;
496 }
497
498 if (ch == '?') {
499 return s_req_query_string_start;
500 }
501
502 if (ch == '@') {
503 return s_req_server_with_at;
504 }
505
506 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
507 return s_req_server;
508 }
509
510 break;
511
512 case s_req_path:
513 if (IS_URL_CHAR(ch)) {
514 return s;
515 }
516
517 switch (ch) {
518 case '?':
519 return s_req_query_string_start;
520
521 case '#':
522 return s_req_fragment_start;
523 }
524
525 break;
526
527 case s_req_query_string_start:
528 case s_req_query_string:
529 if (IS_URL_CHAR(ch)) {
530 return s_req_query_string;
531 }
532
533 switch (ch) {
534 case '?':
535 /* allow extra '?' in query string */
536 return s_req_query_string;
537
538 case '#':
539 return s_req_fragment_start;
540 }
541
542 break;
543
544 case s_req_fragment_start:
545 if (IS_URL_CHAR(ch)) {
546 return s_req_fragment;
547 }
548
549 switch (ch) {
550 case '?':
551 return s_req_fragment;
552
553 case '#':
554 return s;
555 }
556
557 break;
558
559 case s_req_fragment:
560 if (IS_URL_CHAR(ch)) {
561 return s;
562 }
563
564 switch (ch) {
565 case '?':
566 case '#':
567 return s;
568 }
569
570 break;
571
572 default:
573 break;
574 }
575
576 /* We should never fall out of the switch above unless there's an error */
577 return s_dead;
578}
579
580size_t http_parser_execute (http_parser *parser,
581 const http_parser_settings *settings,
582 const char *data,
583 size_t len)
584{
585 char c, ch;
586 int8_t unhex_val;
587 const char *p = data;
588 const char *header_field_mark = 0;
589 const char *header_value_mark = 0;
590 const char *url_mark = 0;
591 const char *body_mark = 0;
592
593 /* We're in an error state. Don't bother doing anything. */
594 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
595 return 0;
596 }
597
598 if (len == 0) {
599 switch (parser->state) {
600 case s_body_identity_eof:
601 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
602 * we got paused.
603 */
604 CALLBACK_NOTIFY_NOADVANCE(message_complete);
605 return 0;
606
607 case s_dead:
608 case s_start_req_or_res:
609 case s_start_res:
610 case s_start_req:
611 return 0;
612
613 default:
614 SET_ERRNO(HPE_INVALID_EOF_STATE);
615 return 1;
616 }
617 }
618
619
620 if (parser->state == s_header_field)
621 header_field_mark = data;
622 if (parser->state == s_header_value)
623 header_value_mark = data;
624 switch (parser->state) {
625 case s_req_path:
626 case s_req_schema:
627 case s_req_schema_slash:
628 case s_req_schema_slash_slash:
629 case s_req_server_start:
630 case s_req_server:
631 case s_req_server_with_at:
632 case s_req_query_string_start:
633 case s_req_query_string:
634 case s_req_fragment_start:
635 case s_req_fragment:
636 url_mark = data;
637 break;
638 }
639
640 for (p=data; p != data + len; p++) {
641 ch = *p;
642
643 if (PARSING_HEADER(parser->state)) {
644 ++parser->nread;
645 /* Buffer overflow attack */
646 if (parser->nread > HTTP_MAX_HEADER_SIZE) {
647 SET_ERRNO(HPE_HEADER_OVERFLOW);
648 goto error;
649 }
650 }
651
652 reexecute_byte:
653 switch (parser->state) {
654
655 case s_dead:
656 /* this state is used after a 'Connection: close' message
657 * the parser will error out if it reads another message
658 */
659 if (ch == CR || ch == LF)
660 break;
661
662 SET_ERRNO(HPE_CLOSED_CONNECTION);
663 goto error;
664
665 case s_start_req_or_res:
666 {
667 if (ch == CR || ch == LF)
668 break;
669 parser->flags = 0;
670 parser->content_length = ULLONG_MAX;
671
672 if (ch == 'H') {
673 parser->state = s_res_or_resp_H;
674
675 CALLBACK_NOTIFY(message_begin);
676 } else {
677 parser->type = HTTP_REQUEST;
678 parser->state = s_start_req;
679 goto reexecute_byte;
680 }
681
682 break;
683 }
684
685 case s_res_or_resp_H:
686 if (ch == 'T') {
687 parser->type = HTTP_RESPONSE;
688 parser->state = s_res_HT;
689 } else {
690 if (ch != 'E') {
691 SET_ERRNO(HPE_INVALID_CONSTANT);
692 goto error;
693 }
694
695 parser->type = HTTP_REQUEST;
696 parser->method = HTTP_HEAD;
697 parser->index = 2;
698 parser->state = s_req_method;
699 }
700 break;
701
702 case s_start_res:
703 {
704 parser->flags = 0;
705 parser->content_length = ULLONG_MAX;
706
707 switch (ch) {
708 case 'H':
709 parser->state = s_res_H;
710 break;
711
712 case CR:
713 case LF:
714 break;
715
716 default:
717 SET_ERRNO(HPE_INVALID_CONSTANT);
718 goto error;
719 }
720
721 CALLBACK_NOTIFY(message_begin);
722 break;
723 }
724
725 case s_res_H:
726 STRICT_CHECK(ch != 'T');
727 parser->state = s_res_HT;
728 break;
729
730 case s_res_HT:
731 STRICT_CHECK(ch != 'T');
732 parser->state = s_res_HTT;
733 break;
734
735 case s_res_HTT:
736 STRICT_CHECK(ch != 'P');
737 parser->state = s_res_HTTP;
738 break;
739
740 case s_res_HTTP:
741 STRICT_CHECK(ch != '/');
742 parser->state = s_res_first_http_major;
743 break;
744
745 case s_res_first_http_major:
746 if (ch < '0' || ch > '9') {
747 SET_ERRNO(HPE_INVALID_VERSION);
748 goto error;
749 }
750
751 parser->http_major = ch - '0';
752 parser->state = s_res_http_major;
753 break;
754
755 /* major HTTP version or dot */
756 case s_res_http_major:
757 {
758 if (ch == '.') {
759 parser->state = s_res_first_http_minor;
760 break;
761 }
762
763 if (!IS_NUM(ch)) {
764 SET_ERRNO(HPE_INVALID_VERSION);
765 goto error;
766 }
767
768 parser->http_major *= 10;
769 parser->http_major += ch - '0';
770
771 if (parser->http_major > 999) {
772 SET_ERRNO(HPE_INVALID_VERSION);
773 goto error;
774 }
775
776 break;
777 }
778
779 /* first digit of minor HTTP version */
780 case s_res_first_http_minor:
781 if (!IS_NUM(ch)) {
782 SET_ERRNO(HPE_INVALID_VERSION);
783 goto error;
784 }
785
786 parser->http_minor = ch - '0';
787 parser->state = s_res_http_minor;
788 break;
789
790 /* minor HTTP version or end of request line */
791 case s_res_http_minor:
792 {
793 if (ch == ' ') {
794 parser->state = s_res_first_status_code;
795 break;
796 }
797
798 if (!IS_NUM(ch)) {
799 SET_ERRNO(HPE_INVALID_VERSION);
800 goto error;
801 }
802
803 parser->http_minor *= 10;
804 parser->http_minor += ch - '0';
805
806 if (parser->http_minor > 999) {
807 SET_ERRNO(HPE_INVALID_VERSION);
808 goto error;
809 }
810
811 break;
812 }
813
814 case s_res_first_status_code:
815 {
816 if (!IS_NUM(ch)) {
817 if (ch == ' ') {
818 break;
819 }
820
821 SET_ERRNO(HPE_INVALID_STATUS);
822 goto error;
823 }
824 parser->status_code = ch - '0';
825 parser->state = s_res_status_code;
826 break;
827 }
828
829 case s_res_status_code:
830 {
831 if (!IS_NUM(ch)) {
832 switch (ch) {
833 case ' ':
834 parser->state = s_res_status;
835 break;
836 case CR:
837 parser->state = s_res_line_almost_done;
838 break;
839 case LF:
840 parser->state = s_header_field_start;
841 break;
842 default:
843 SET_ERRNO(HPE_INVALID_STATUS);
844 goto error;
845 }
846 break;
847 }
848
849 parser->status_code *= 10;
850 parser->status_code += ch - '0';
851
852 if (parser->status_code > 999) {
853 SET_ERRNO(HPE_INVALID_STATUS);
854 goto error;
855 }
856
857 break;
858 }
859
860 case s_res_status:
861 /* the human readable status. e.g. "NOT FOUND"
862 * we are not humans so just ignore this */
863 if (ch == CR) {
864 parser->state = s_res_line_almost_done;
865 break;
866 }
867
868 if (ch == LF) {
869 parser->state = s_header_field_start;
870 break;
871 }
872 break;
873
874 case s_res_line_almost_done:
875 STRICT_CHECK(ch != LF);
876 parser->state = s_header_field_start;
877 break;
878
879 case s_start_req:
880 {
881 if (ch == CR || ch == LF)
882 break;
883 parser->flags = 0;
884 parser->content_length = ULLONG_MAX;
885
886 if (!IS_ALPHA(ch)) {
887 SET_ERRNO(HPE_INVALID_METHOD);
888 goto error;
889 }
890
891 parser->method = (enum http_method) 0;
892 parser->index = 1;
893 switch (ch) {
894 case 'A': parser->method = HTTP_ANNOUNCE; break;
895 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
896 case 'D': parser->method = HTTP_DELETE; /* or DESCRIBE */ break;
897 case 'F': parser->method = HTTP_FLUSH; break;
898 case 'G': parser->method = HTTP_GET; /* or GET_PARAMETER */ break;
899 case 'H': parser->method = HTTP_HEAD; break;
900 case 'L': parser->method = HTTP_LOCK; break;
901 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
902 case 'N': parser->method = HTTP_NOTIFY; break;
903 case 'O': parser->method = HTTP_OPTIONS; break;
904 case 'P': parser->method = HTTP_POST;
905 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE|PLAY|PAUSE */
906 break;
907 case 'R': parser->method = HTTP_REPORT; /* or REDIRECT, RECORD */ break;
908 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SETUP, SET_PARAMETER */ break;
909 case 'T': parser->method = HTTP_TRACE; /* or TEARDOWN */ break;
910 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
911 default:
912 SET_ERRNO(HPE_INVALID_METHOD);
913 goto error;
914 }
915 parser->state = s_req_method;
916
917 CALLBACK_NOTIFY(message_begin);
918
919 break;
920 }
921
922 case s_req_method:
923 {
924 const char *matcher;
925 if (ch == '\0') {
926 SET_ERRNO(HPE_INVALID_METHOD);
927 goto error;
928 }
929
930 matcher = method_strings[parser->method];
931 if (ch == ' ' && matcher[parser->index] == '\0') {
932 parser->state = s_req_spaces_before_url;
933 } else if (ch == matcher[parser->index]) {
934 ; /* nada */
935 } else if (parser->method == HTTP_CONNECT) {
936 if (parser->index == 1 && ch == 'H') {
937 parser->method = HTTP_CHECKOUT;
938 } else if (parser->index == 2 && ch == 'P') {
939 parser->method = HTTP_COPY;
940 } else {
941 goto error;
942 }
943 } else if (parser->index == 2 && parser->method == HTTP_DELETE && ch == 'S') {
944 parser->method = HTTP_DESCRIBE;
945 } else if (parser->index == 3 && parser->method == HTTP_GET && ch == '_') {
946 parser->method = HTTP_GET_PARAMETER;
947 } else if (parser->method == HTTP_MKCOL) {
948 if (parser->index == 1 && ch == 'O') {
949 parser->method = HTTP_MOVE;
950 } else if (parser->index == 1 && ch == 'E') {
951 parser->method = HTTP_MERGE;
952 } else if (parser->index == 1 && ch == '-') {
953 parser->method = HTTP_MSEARCH;
954 } else if (parser->index == 2 && ch == 'A') {
955 parser->method = HTTP_MKACTIVITY;
956 } else {
957 goto error;
958 }
959 } else if (parser->method == HTTP_SUBSCRIBE) {
960 if (parser->index == 1 && ch == 'E') {
961 parser->method = HTTP_SEARCH; /* or HTTP_SETUP or HTTP_SET_PARAMETER */
962 } else {
963 goto error;
964 }
965 } else if (parser->method == HTTP_TRACE) {
966 if (parser->index == 1 && ch == 'E') {
967 parser->method = HTTP_TEARDOWN;
968 } else {
969 goto error;
970 }
971 } else if (parser->index == 1 && parser->method == HTTP_POST) {
972 if (ch == 'R') {
973 parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
974 } else if (ch == 'U') {
975 parser->method = HTTP_PUT; /* or HTTP_PURGE */
976 } else if (ch == 'A') {
977 parser->method = HTTP_PATCH; /* or HTTP_PAUSE */
978 } else if (ch == 'L') {
979 parser->method = HTTP_PLAY;
980 } else {
981 goto error;
982 }
983 } else if (parser->index == 2) {
984 if (parser->method == HTTP_PUT) {
985 if (ch == 'R') parser->method = HTTP_PURGE;
986 } else if (parser->method == HTTP_PATCH) {
987 if (ch == 'U') parser->method = HTTP_PAUSE;
988 } else if (parser->method == HTTP_REPORT && ch == 'D') {
989 parser->method = HTTP_REDIRECT;
990 } else if (parser->method == HTTP_REPORT && ch == 'C') {
991 parser->method = HTTP_RECORD;
992 } else if (parser->method == HTTP_SEARCH) {
993 if (ch == 'T') parser->method = HTTP_SETUP; /* or HTTP_SET_PARAMETER */
994 } else if (parser->method == HTTP_UNLOCK) {
995 if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
996 } else {
997 goto error;
998 }
999 } else if (parser->index == 3 && parser->method == HTTP_SETUP && ch == '_') {
1000 parser->method = HTTP_SET_PARAMETER;
1001 } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1002 parser->method = HTTP_PROPPATCH;
1003 } else {
1004 SET_ERRNO(HPE_INVALID_METHOD);
1005 goto error;
1006 }
1007
1008 ++parser->index;
1009 break;
1010 }
1011
1012 case s_req_spaces_before_url:
1013 {
1014 if (ch == ' ') break;
1015
1016 MARK(url);
1017 if (parser->method == HTTP_CONNECT) {
1018 parser->state = s_req_server_start;
1019 }
1020
1021 parser->state = parse_url_char((enum state)parser->state, ch);
1022 if (parser->state == s_dead) {
1023 SET_ERRNO(HPE_INVALID_URL);
1024 goto error;
1025 }
1026
1027 break;
1028 }
1029
1030 case s_req_schema:
1031 case s_req_schema_slash:
1032 case s_req_schema_slash_slash:
1033 case s_req_server_start:
1034 {
1035 switch (ch) {
1036 /* No whitespace allowed here */
1037 case ' ':
1038 case CR:
1039 case LF:
1040 SET_ERRNO(HPE_INVALID_URL);
1041 goto error;
1042 default:
1043 parser->state = parse_url_char((enum state)parser->state, ch);
1044 if (parser->state == s_dead) {
1045 SET_ERRNO(HPE_INVALID_URL);
1046 goto error;
1047 }
1048 }
1049
1050 break;
1051 }
1052
1053 case s_req_server:
1054 case s_req_server_with_at:
1055 case s_req_path:
1056 case s_req_query_string_start:
1057 case s_req_query_string:
1058 case s_req_fragment_start:
1059 case s_req_fragment:
1060 {
1061 switch (ch) {
1062 case ' ':
1063 parser->state = s_req_http_start;
1064 CALLBACK_DATA(url);
1065 break;
1066 case CR:
1067 case LF:
1068 parser->http_major = 0;
1069 parser->http_minor = 9;
1070 parser->state = (ch == CR) ?
1071 s_req_line_almost_done :
1072 s_header_field_start;
1073 CALLBACK_DATA(url);
1074 break;
1075 default:
1076 parser->state = parse_url_char((enum state)parser->state, ch);
1077 if (parser->state == s_dead) {
1078 SET_ERRNO(HPE_INVALID_URL);
1079 goto error;
1080 }
1081 }
1082 break;
1083 }
1084
1085 case s_req_http_start:
1086 switch (ch) {
1087 case 'H':
1088 case 'R':
1089 parser->state = s_req_http_H;
1090 break;
1091 case ' ':
1092 break;
1093 default:
1094 SET_ERRNO(HPE_INVALID_CONSTANT);
1095 goto error;
1096 }
1097 break;
1098
1099 case s_req_http_H:
1100 STRICT_CHECK(ch != 'T');
1101 parser->state = s_req_http_HT;
1102 break;
1103
1104 case s_req_http_HT:
1105 STRICT_CHECK(ch != 'T');
1106 parser->state = s_req_http_HTT;
1107 break;
1108
1109 case s_req_http_HTT:
1110 STRICT_CHECK(ch != 'P');
1111 parser->state = s_req_http_HTTP;
1112 break;
1113
1114 case s_req_http_HTTP:
1115 STRICT_CHECK(ch != '/');
1116 parser->state = s_req_first_http_major;
1117 break;
1118
1119 /* first digit of major HTTP version */
1120 case s_req_first_http_major:
1121 if (ch < '1' || ch > '9') {
1122 SET_ERRNO(HPE_INVALID_VERSION);
1123 goto error;
1124 }
1125
1126 parser->http_major = ch - '0';
1127 parser->state = s_req_http_major;
1128 break;
1129
1130 /* major HTTP version or dot */
1131 case s_req_http_major:
1132 {
1133 if (ch == '.') {
1134 parser->state = s_req_first_http_minor;
1135 break;
1136 }
1137
1138 if (!IS_NUM(ch)) {
1139 SET_ERRNO(HPE_INVALID_VERSION);
1140 goto error;
1141 }
1142
1143 parser->http_major *= 10;
1144 parser->http_major += ch - '0';
1145
1146 if (parser->http_major > 999) {
1147 SET_ERRNO(HPE_INVALID_VERSION);
1148 goto error;
1149 }
1150
1151 break;
1152 }
1153
1154 /* first digit of minor HTTP version */
1155 case s_req_first_http_minor:
1156 if (!IS_NUM(ch)) {
1157 SET_ERRNO(HPE_INVALID_VERSION);
1158 goto error;
1159 }
1160
1161 parser->http_minor = ch - '0';
1162 parser->state = s_req_http_minor;
1163 break;
1164
1165 /* minor HTTP version or end of request line */
1166 case s_req_http_minor:
1167 {
1168 if (ch == CR) {
1169 parser->state = s_req_line_almost_done;
1170 break;
1171 }
1172
1173 if (ch == LF) {
1174 parser->state = s_header_field_start;
1175 break;
1176 }
1177
1178 /* XXX allow spaces after digit? */
1179
1180 if (!IS_NUM(ch)) {
1181 SET_ERRNO(HPE_INVALID_VERSION);
1182 goto error;
1183 }
1184
1185 parser->http_minor *= 10;
1186 parser->http_minor += ch - '0';
1187
1188 if (parser->http_minor > 999) {
1189 SET_ERRNO(HPE_INVALID_VERSION);
1190 goto error;
1191 }
1192
1193 break;
1194 }
1195
1196 /* end of request line */
1197 case s_req_line_almost_done:
1198 {
1199 if (ch != LF) {
1200 SET_ERRNO(HPE_LF_EXPECTED);
1201 goto error;
1202 }
1203
1204 parser->state = s_header_field_start;
1205 break;
1206 }
1207
1208 case s_header_field_start:
1209 {
1210 if (ch == CR) {
1211 parser->state = s_headers_almost_done;
1212 break;
1213 }
1214
1215 if (ch == LF) {
1216 /* they might be just sending \n instead of \r\n so this would be
1217 * the second \n to denote the end of headers*/
1218 parser->state = s_headers_almost_done;
1219 goto reexecute_byte;
1220 }
1221
1222 c = TOKEN(ch);
1223
1224 if (!c) {
1225 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1226 goto error;
1227 }
1228
1229 MARK(header_field);
1230
1231 parser->index = 0;
1232 parser->state = s_header_field;
1233
1234 switch (c) {
1235 case 'c':
1236 parser->header_state = h_C;
1237 break;
1238
1239 case 'p':
1240 parser->header_state = h_matching_proxy_connection;
1241 break;
1242
1243 case 't':
1244 parser->header_state = h_matching_transfer_encoding;
1245 break;
1246
1247 case 'u':
1248 parser->header_state = h_matching_upgrade;
1249 break;
1250
1251 default:
1252 parser->header_state = h_general;
1253 break;
1254 }
1255 break;
1256 }
1257
1258 case s_header_field:
1259 {
1260 c = TOKEN(ch);
1261
1262 if (c) {
1263 switch (parser->header_state) {
1264 case h_general:
1265 break;
1266
1267 case h_C:
1268 parser->index++;
1269 parser->header_state = (c == 'o' ? h_CO : h_general);
1270 break;
1271
1272 case h_CO:
1273 parser->index++;
1274 parser->header_state = (c == 'n' ? h_CON : h_general);
1275 break;
1276
1277 case h_CON:
1278 parser->index++;
1279 switch (c) {
1280 case 'n':
1281 parser->header_state = h_matching_connection;
1282 break;
1283 case 't':
1284 parser->header_state = h_matching_content_length;
1285 break;
1286 default:
1287 parser->header_state = h_general;
1288 break;
1289 }
1290 break;
1291
1292 /* connection */
1293
1294 case h_matching_connection:
1295 parser->index++;
1296 if (parser->index > sizeof(CONNECTION)-1
1297 || c != CONNECTION[parser->index]) {
1298 parser->header_state = h_general;
1299 } else if (parser->index == sizeof(CONNECTION)-2) {
1300 parser->header_state = h_connection;
1301 }
1302 break;
1303
1304 /* proxy-connection */
1305
1306 case h_matching_proxy_connection:
1307 parser->index++;
1308 if (parser->index > sizeof(PROXY_CONNECTION)-1
1309 || c != PROXY_CONNECTION[parser->index]) {
1310 parser->header_state = h_general;
1311 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1312 parser->header_state = h_connection;
1313 }
1314 break;
1315
1316 /* content-length */
1317
1318 case h_matching_content_length:
1319 parser->index++;
1320 if (parser->index > sizeof(CONTENT_LENGTH)-1
1321 || c != CONTENT_LENGTH[parser->index]) {
1322 parser->header_state = h_general;
1323 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1324 parser->header_state = h_content_length;
1325 }
1326 break;
1327
1328 /* transfer-encoding */
1329
1330 case h_matching_transfer_encoding:
1331 parser->index++;
1332 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1333 || c != TRANSFER_ENCODING[parser->index]) {
1334 parser->header_state = h_general;
1335 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1336 parser->header_state = h_transfer_encoding;
1337 }
1338 break;
1339
1340 /* upgrade */
1341
1342 case h_matching_upgrade:
1343 parser->index++;
1344 if (parser->index > sizeof(UPGRADE)-1
1345 || c != UPGRADE[parser->index]) {
1346 parser->header_state = h_general;
1347 } else if (parser->index == sizeof(UPGRADE)-2) {
1348 parser->header_state = h_upgrade;
1349 }
1350 break;
1351
1352 case h_connection:
1353 case h_content_length:
1354 case h_transfer_encoding:
1355 case h_upgrade:
1356 if (ch != ' ') parser->header_state = h_general;
1357 break;
1358
1359 default:
1360 assert(0 && "Unknown header_state");
1361 break;
1362 }
1363 break;
1364 }
1365
1366 if (ch == ':') {
1367 parser->state = s_header_value_start;
1368 CALLBACK_DATA(header_field);
1369 break;
1370 }
1371
1372 if (ch == CR) {
1373 parser->state = s_header_almost_done;
1374 CALLBACK_DATA(header_field);
1375 break;
1376 }
1377
1378 if (ch == LF) {
1379 parser->state = s_header_field_start;
1380 CALLBACK_DATA(header_field);
1381 break;
1382 }
1383
1384 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1385 goto error;
1386 }
1387
1388 case s_header_value_start:
1389 {
1390 if (ch == ' ' || ch == '\t') break;
1391
1392 MARK(header_value);
1393
1394 parser->state = s_header_value;
1395 parser->index = 0;
1396
1397 if (ch == CR) {
1398 parser->header_state = h_general;
1399 parser->state = s_header_almost_done;
1400 CALLBACK_DATA(header_value);
1401 break;
1402 }
1403
1404 if (ch == LF) {
1405 parser->state = s_header_field_start;
1406 CALLBACK_DATA(header_value);
1407 break;
1408 }
1409
1410 c = LOWER(ch);
1411
1412 switch (parser->header_state) {
1413 case h_upgrade:
1414 parser->flags |= F_UPGRADE;
1415 parser->header_state = h_general;
1416 break;
1417
1418 case h_transfer_encoding:
1419 /* looking for 'Transfer-Encoding: chunked' */
1420 if ('c' == c) {
1421 parser->header_state = h_matching_transfer_encoding_chunked;
1422 } else {
1423 parser->header_state = h_general;
1424 }
1425 break;
1426
1427 case h_content_length:
1428 if (!IS_NUM(ch)) {
1429 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1430 goto error;
1431 }
1432
1433 parser->content_length = ch - '0';
1434 break;
1435
1436 case h_connection:
1437 /* looking for 'Connection: keep-alive' */
1438 if (c == 'k') {
1439 parser->header_state = h_matching_connection_keep_alive;
1440 /* looking for 'Connection: close' */
1441 } else if (c == 'c') {
1442 parser->header_state = h_matching_connection_close;
1443 } else {
1444 parser->header_state = h_general;
1445 }
1446 break;
1447
1448 default:
1449 parser->header_state = h_general;
1450 break;
1451 }
1452 break;
1453 }
1454
1455 case s_header_value:
1456 {
1457
1458 if (ch == CR) {
1459 parser->state = s_header_almost_done;
1460 CALLBACK_DATA(header_value);
1461 break;
1462 }
1463
1464 if (ch == LF) {
1465 parser->state = s_header_almost_done;
1466 CALLBACK_DATA_NOADVANCE(header_value);
1467 goto reexecute_byte;
1468 }
1469
1470 c = LOWER(ch);
1471
1472 switch (parser->header_state) {
1473 case h_general:
1474 break;
1475
1476 case h_connection:
1477 case h_transfer_encoding:
1478 assert(0 && "Shouldn't get here.");
1479 break;
1480
1481 case h_content_length:
1482 {
1483 uint64_t t;
1484
1485 if (ch == ' ') break;
1486
1487 if (!IS_NUM(ch)) {
1488 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1489 goto error;
1490 }
1491
1492 t = parser->content_length;
1493 t *= 10;
1494 t += ch - '0';
1495
1496 /* Overflow? */
1497 if (t < parser->content_length || t == ULLONG_MAX) {
1498 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1499 goto error;
1500 }
1501
1502 parser->content_length = t;
1503 break;
1504 }
1505
1506 /* Transfer-Encoding: chunked */
1507 case h_matching_transfer_encoding_chunked:
1508 parser->index++;
1509 if (parser->index > sizeof(CHUNKED)-1
1510 || c != CHUNKED[parser->index]) {
1511 parser->header_state = h_general;
1512 } else if (parser->index == sizeof(CHUNKED)-2) {
1513 parser->header_state = h_transfer_encoding_chunked;
1514 }
1515 break;
1516
1517 /* looking for 'Connection: keep-alive' */
1518 case h_matching_connection_keep_alive:
1519 parser->index++;
1520 if (parser->index > sizeof(KEEP_ALIVE)-1
1521 || c != KEEP_ALIVE[parser->index]) {
1522 parser->header_state = h_general;
1523 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1524 parser->header_state = h_connection_keep_alive;
1525 }
1526 break;
1527
1528 /* looking for 'Connection: close' */
1529 case h_matching_connection_close:
1530 parser->index++;
1531 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1532 parser->header_state = h_general;
1533 } else if (parser->index == sizeof(CLOSE)-2) {
1534 parser->header_state = h_connection_close;
1535 }
1536 break;
1537
1538 case h_transfer_encoding_chunked:
1539 case h_connection_keep_alive:
1540 case h_connection_close:
1541 if (ch != ' ') parser->header_state = h_general;
1542 break;
1543
1544 default:
1545 parser->state = s_header_value;
1546 parser->header_state = h_general;
1547 break;
1548 }
1549 break;
1550 }
1551
1552 case s_header_almost_done:
1553 {
1554 STRICT_CHECK(ch != LF);
1555
1556 parser->state = s_header_value_lws;
1557
1558 switch (parser->header_state) {
1559 case h_connection_keep_alive:
1560 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1561 break;
1562 case h_connection_close:
1563 parser->flags |= F_CONNECTION_CLOSE;
1564 break;
1565 case h_transfer_encoding_chunked:
1566 parser->flags |= F_CHUNKED;
1567 break;
1568 default:
1569 break;
1570 }
1571
1572 break;
1573 }
1574
1575 case s_header_value_lws:
1576 {
1577 if (ch == ' ' || ch == '\t')
1578 parser->state = s_header_value_start;
1579 else
1580 {
1581 parser->state = s_header_field_start;
1582 goto reexecute_byte;
1583 }
1584 break;
1585 }
1586
1587 case s_headers_almost_done:
1588 {
1589 STRICT_CHECK(ch != LF);
1590
1591 if (parser->flags & F_TRAILING) {
1592 /* End of a chunked request */
1593 parser->state = NEW_MESSAGE();
1594 CALLBACK_NOTIFY(message_complete);
1595 break;
1596 }
1597
1598 parser->state = s_headers_done;
1599
1600 /* Set this here so that on_headers_complete() callbacks can see it */
1601 parser->upgrade =
1602 (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1603
1604 /* Here we call the headers_complete callback. This is somewhat
1605 * different than other callbacks because if the user returns 1, we
1606 * will interpret that as saying that this message has no body. This
1607 * is needed for the annoying case of recieving a response to a HEAD
1608 * request.
1609 *
1610 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1611 * we have to simulate it by handling a change in errno below.
1612 */
1613 if (settings->on_headers_complete) {
1614 switch (settings->on_headers_complete(parser)) {
1615 case 0:
1616 break;
1617
1618 case 1:
1619 parser->flags |= F_SKIPBODY;
1620 break;
1621
1622 default:
1623 SET_ERRNO(HPE_CB_headers_complete);
1624 return p - data; /* Error */
1625 }
1626 }
1627
1628 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1629 return p - data;
1630 }
1631
1632 goto reexecute_byte;
1633 }
1634
1635 case s_headers_done:
1636 {
1637 STRICT_CHECK(ch != LF);
1638
1639 parser->nread = 0;
1640
1641 /* Exit, the rest of the connect is in a different protocol. */
1642 if (parser->upgrade) {
1643 parser->state = NEW_MESSAGE();
1644 CALLBACK_NOTIFY(message_complete);
1645 return (p - data) + 1;
1646 }
1647
1648 if (parser->flags & F_SKIPBODY) {
1649 parser->state = NEW_MESSAGE();
1650 CALLBACK_NOTIFY(message_complete);
1651 } else if (parser->flags & F_CHUNKED) {
1652 /* chunked encoding - ignore Content-Length header */
1653 parser->state = s_chunk_size_start;
1654 } else {
1655 if (parser->content_length == 0) {
1656 /* Content-Length header given but zero: Content-Length: 0\r\n */
1657 parser->state = NEW_MESSAGE();
1658 CALLBACK_NOTIFY(message_complete);
1659 } else if (parser->content_length != ULLONG_MAX) {
1660 /* Content-Length header given and non-zero */
1661 parser->state = s_body_identity;
1662 } else {
1663 if (parser->type == HTTP_REQUEST ||
1664 !http_message_needs_eof(parser)) {
1665 /* Assume content-length 0 - read the next */
1666 parser->state = NEW_MESSAGE();
1667 CALLBACK_NOTIFY(message_complete);
1668 } else {
1669 /* Read body until EOF */
1670 parser->state = s_body_identity_eof;
1671 }
1672 }
1673 }
1674
1675 break;
1676 }
1677
1678 case s_body_identity:
1679 {
1680 uint64_t to_read = MIN(parser->content_length,
1681 (uint64_t) ((data + len) - p));
1682
1683 assert(parser->content_length != 0
1684 && parser->content_length != ULLONG_MAX);
1685
1686 /* The difference between advancing content_length and p is because
1687 * the latter will automaticaly advance on the next loop iteration.
1688 * Further, if content_length ends up at 0, we want to see the last
1689 * byte again for our message complete callback.
1690 */
1691 MARK(body);
1692 parser->content_length -= to_read;
1693 p += to_read - 1;
1694
1695 if (parser->content_length == 0) {
1696 parser->state = s_message_done;
1697
1698 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1699 *
1700 * The alternative to doing this is to wait for the next byte to
1701 * trigger the data callback, just as in every other case. The
1702 * problem with this is that this makes it difficult for the test
1703 * harness to distinguish between complete-on-EOF and
1704 * complete-on-length. It's not clear that this distinction is
1705 * important for applications, but let's keep it for now.
1706 */
1707 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1708 goto reexecute_byte;
1709 }
1710
1711 break;
1712 }
1713
1714 /* read until EOF */
1715 case s_body_identity_eof:
1716 MARK(body);
1717 p = data + len - 1;
1718
1719 break;
1720
1721 case s_message_done:
1722 parser->state = NEW_MESSAGE();
1723 CALLBACK_NOTIFY(message_complete);
1724 break;
1725
1726 case s_chunk_size_start:
1727 {
1728 assert(parser->nread == 1);
1729 assert(parser->flags & F_CHUNKED);
1730
1731 unhex_val = unhex[(unsigned char)ch];
1732 if (unhex_val == -1) {
1733 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1734 goto error;
1735 }
1736
1737 parser->content_length = unhex_val;
1738 parser->state = s_chunk_size;
1739 break;
1740 }
1741
1742 case s_chunk_size:
1743 {
1744 uint64_t t;
1745
1746 assert(parser->flags & F_CHUNKED);
1747
1748 if (ch == CR) {
1749 parser->state = s_chunk_size_almost_done;
1750 break;
1751 }
1752
1753 unhex_val = unhex[(unsigned char)ch];
1754
1755 if (unhex_val == -1) {
1756 if (ch == ';' || ch == ' ') {
1757 parser->state = s_chunk_parameters;
1758 break;
1759 }
1760
1761 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1762 goto error;
1763 }
1764
1765 t = parser->content_length;
1766 t *= 16;
1767 t += unhex_val;
1768
1769 /* Overflow? */
1770 if (t < parser->content_length || t == ULLONG_MAX) {
1771 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1772 goto error;
1773 }
1774
1775 parser->content_length = t;
1776 break;
1777 }
1778
1779 case s_chunk_parameters:
1780 {
1781 assert(parser->flags & F_CHUNKED);
1782 /* just ignore this shit. TODO check for overflow */
1783 if (ch == CR) {
1784 parser->state = s_chunk_size_almost_done;
1785 break;
1786 }
1787 break;
1788 }
1789
1790 case s_chunk_size_almost_done:
1791 {
1792 assert(parser->flags & F_CHUNKED);
1793 STRICT_CHECK(ch != LF);
1794
1795 parser->nread = 0;
1796
1797 if (parser->content_length == 0) {
1798 parser->flags |= F_TRAILING;
1799 parser->state = s_header_field_start;
1800 } else {
1801 parser->state = s_chunk_data;
1802 }
1803 break;
1804 }
1805
1806 case s_chunk_data:
1807 {
1808 uint64_t to_read = MIN(parser->content_length,
1809 (uint64_t) ((data + len) - p));
1810
1811 assert(parser->flags & F_CHUNKED);
1812 assert(parser->content_length != 0
1813 && parser->content_length != ULLONG_MAX);
1814
1815 /* See the explanation in s_body_identity for why the content
1816 * length and data pointers are managed this way.
1817 */
1818 MARK(body);
1819 parser->content_length -= to_read;
1820 p += to_read - 1;
1821
1822 if (parser->content_length == 0) {
1823 parser->state = s_chunk_data_almost_done;
1824 }
1825
1826 break;
1827 }
1828
1829 case s_chunk_data_almost_done:
1830 assert(parser->flags & F_CHUNKED);
1831 assert(parser->content_length == 0);
1832 STRICT_CHECK(ch != CR);
1833 parser->state = s_chunk_data_done;
1834 CALLBACK_DATA(body);
1835 break;
1836
1837 case s_chunk_data_done:
1838 assert(parser->flags & F_CHUNKED);
1839 STRICT_CHECK(ch != LF);
1840 parser->nread = 0;
1841 parser->state = s_chunk_size_start;
1842 break;
1843
1844 default:
1845 assert(0 && "unhandled state");
1846 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1847 goto error;
1848 }
1849 }
1850
1851 /* Run callbacks for any marks that we have leftover after we ran our of
1852 * bytes. There should be at most one of these set, so it's OK to invoke
1853 * them in series (unset marks will not result in callbacks).
1854 *
1855 * We use the NOADVANCE() variety of callbacks here because 'p' has already
1856 * overflowed 'data' and this allows us to correct for the off-by-one that
1857 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1858 * value that's in-bounds).
1859 */
1860
1861 assert(((header_field_mark ? 1 : 0) +
1862 (header_value_mark ? 1 : 0) +
1863 (url_mark ? 1 : 0) +
1864 (body_mark ? 1 : 0)) <= 1);
1865
1866 CALLBACK_DATA_NOADVANCE(header_field);
1867 CALLBACK_DATA_NOADVANCE(header_value);
1868 CALLBACK_DATA_NOADVANCE(url);
1869 CALLBACK_DATA_NOADVANCE(body);
1870
1871 return len;
1872
1873error:
1874 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1875 SET_ERRNO(HPE_UNKNOWN);
1876 }
1877
1878 return (p - data);
1879}
1880
1881
1882/* Does the parser need to see an EOF to find the end of the message? */
1883int
1884http_message_needs_eof (const http_parser *parser)
1885{
1886 if (parser->type == HTTP_REQUEST) {
1887 return 0;
1888 }
1889
1890 /* See RFC 2616 section 4.4 */
1891 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1892 parser->status_code == 204 || /* No Content */
1893 parser->status_code == 304 || /* Not Modified */
1894 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1895 return 0;
1896 }
1897
1898 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1899 return 0;
1900 }
1901
1902 return 1;
1903}
1904
1905
1906int
1907http_should_keep_alive (const http_parser *parser)
1908{
1909 if (parser->http_major > 0 && parser->http_minor > 0) {
1910 /* HTTP/1.1 */
1911 if (parser->flags & F_CONNECTION_CLOSE) {
1912 return 0;
1913 }
1914 } else {
1915 /* HTTP/1.0 or earlier */
1916 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1917 return 0;
1918 }
1919 }
1920
1921 return !http_message_needs_eof(parser);
1922}
1923
1924
1925const char *
1926http_method_str (enum http_method m)
1927{
1928 return ELEM_AT(method_strings, m, "<unknown>");
1929}
1930
1931
1932void
1933http_parser_init (http_parser *parser, enum http_parser_type t)
1934{
1935 void *data = parser->data; /* preserve application data */
1936 memset(parser, 0, sizeof(*parser));
1937 parser->data = data;
1938 parser->type = t;
1939 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1940 parser->http_errno = HPE_OK;
1941}
1942
1943const char *
1944http_errno_name(enum http_errno err) {
1945 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1946 return http_strerror_tab[err].name;
1947}
1948
1949const char *
1950http_errno_description(enum http_errno err) {
1951 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1952 return http_strerror_tab[err].description;
1953}
1954
1955static enum http_host_state
1956http_parse_host_char(enum http_host_state s, const char ch) {
1957 switch(s) {
1958 case s_http_userinfo:
1959 case s_http_userinfo_start:
1960 if (ch == '@') {
1961 return s_http_host_start;
1962 }
1963
1964 if (IS_USERINFO_CHAR(ch)) {
1965 return s_http_userinfo;
1966 }
1967 break;
1968
1969 case s_http_host_start:
1970 if (ch == '[') {
1971 return s_http_host_v6_start;
1972 }
1973
1974 if (IS_HOST_CHAR(ch)) {
1975 return s_http_host;
1976 }
1977
1978 break;
1979
1980 case s_http_host:
1981 if (IS_HOST_CHAR(ch)) {
1982 return s_http_host;
1983 }
1984
1985 /* FALLTHROUGH */
1986 case s_http_host_v6_end:
1987 if (ch == ':') {
1988 return s_http_host_port_start;
1989 }
1990
1991 break;
1992
1993 case s_http_host_v6:
1994 if (ch == ']') {
1995 return s_http_host_v6_end;
1996 }
1997
1998 /* FALLTHROUGH */
1999 case s_http_host_v6_start:
2000 if (IS_HEX(ch) || ch == ':') {
2001 return s_http_host_v6;
2002 }
2003
2004 break;
2005
2006 case s_http_host_port:
2007 case s_http_host_port_start:
2008 if (IS_NUM(ch)) {
2009 return s_http_host_port;
2010 }
2011
2012 break;
2013
2014 default:
2015 break;
2016 }
2017 return s_http_host_dead;
2018}
2019
2020static int
2021http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2022 enum http_host_state s;
2023
2024 const char *p;
2025 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2026
2027 u->field_data[UF_HOST].len = 0;
2028
2029 s = found_at ? s_http_userinfo_start : s_http_host_start;
2030
2031 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2032 enum http_host_state new_s = http_parse_host_char(s, *p);
2033
2034 if (new_s == s_http_host_dead) {
2035 return 1;
2036 }
2037
2038 switch(new_s) {
2039 case s_http_host:
2040 if (s != s_http_host) {
2041 u->field_data[UF_HOST].off = p - buf;
2042 }
2043 u->field_data[UF_HOST].len++;
2044 break;
2045
2046 case s_http_host_v6:
2047 if (s != s_http_host_v6) {
2048 u->field_data[UF_HOST].off = p - buf;
2049 }
2050 u->field_data[UF_HOST].len++;
2051 break;
2052
2053 case s_http_host_port:
2054 if (s != s_http_host_port) {
2055 u->field_data[UF_PORT].off = p - buf;
2056 u->field_data[UF_PORT].len = 0;
2057 u->field_set |= (1 << UF_PORT);
2058 }
2059 u->field_data[UF_PORT].len++;
2060 break;
2061
2062 case s_http_userinfo:
2063 if (s != s_http_userinfo) {
2064 u->field_data[UF_USERINFO].off = p - buf ;
2065 u->field_data[UF_USERINFO].len = 0;
2066 u->field_set |= (1 << UF_USERINFO);
2067 }
2068 u->field_data[UF_USERINFO].len++;
2069 break;
2070
2071 default:
2072 break;
2073 }
2074 s = new_s;
2075 }
2076
2077 /* Make sure we don't end somewhere unexpected */
2078 switch (s) {
2079 case s_http_host_start:
2080 case s_http_host_v6_start:
2081 case s_http_host_v6:
2082 case s_http_host_port_start:
2083 case s_http_userinfo:
2084 case s_http_userinfo_start:
2085 return 1;
2086 default:
2087 break;
2088 }
2089
2090 return 0;
2091}
2092
2093int
2094http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2095 struct http_parser_url *u)
2096{
2097 enum state s;
2098 const char *p;
2099 enum http_parser_url_fields uf, old_uf;
2100 int found_at = 0;
2101
2102 u->port = u->field_set = 0;
2103 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2104 uf = old_uf = UF_MAX;
2105
2106 for (p = buf; p < buf + buflen; p++) {
2107 s = parse_url_char(s, *p);
2108
2109 /* Figure out the next field that we're operating on */
2110 switch (s) {
2111 case s_dead:
2112 return 1;
2113
2114 /* Skip delimeters */
2115 case s_req_schema_slash:
2116 case s_req_schema_slash_slash:
2117 case s_req_server_start:
2118 case s_req_query_string_start:
2119 case s_req_fragment_start:
2120 continue;
2121
2122 case s_req_schema:
2123 uf = UF_SCHEMA;
2124 break;
2125
2126 case s_req_server_with_at:
2127 found_at = 1;
2128
2129 /* FALLTROUGH */
2130 case s_req_server:
2131 uf = UF_HOST;
2132 break;
2133
2134 case s_req_path:
2135 uf = UF_PATH;
2136 break;
2137
2138 case s_req_query_string:
2139 uf = UF_QUERY;
2140 break;
2141
2142 case s_req_fragment:
2143 uf = UF_FRAGMENT;
2144 break;
2145
2146 default:
2147 assert(!"Unexpected state");
2148 return 1;
2149 }
2150
2151 /* Nothing's changed; soldier on */
2152 if (uf == old_uf) {
2153 u->field_data[uf].len++;
2154 continue;
2155 }
2156
2157 u->field_data[uf].off = p - buf;
2158 u->field_data[uf].len = 1;
2159
2160 u->field_set |= (1 << uf);
2161 old_uf = uf;
2162 }
2163
2164 /* host must be present if there is a schema */
2165 /* parsing http:///toto will fail */
2166 if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2167 if (http_parse_host(buf, u, found_at) != 0) {
2168 return 1;
2169 }
2170 }
2171
2172 /* CONNECT requests can only contain "hostname:port" */
2173 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2174 return 1;
2175 }
2176
2177 if (u->field_set & (1 << UF_PORT)) {
2178 /* Don't bother with endp; we've already validated the string */
2179 unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2180
2181 /* Ports have a max value of 2^16 */
2182 if (v > 0xffff) {
2183 return 1;
2184 }
2185
2186 u->port = (uint16_t) v;
2187 }
2188
2189 return 0;
2190}
2191
2192void
2193http_parser_pause(http_parser *parser, int paused) {
2194 /* Users should only be pausing/unpausing a parser that is not in an error
2195 * state. In non-debug builds, there's not much that we can do about this
2196 * other than ignore it.
2197 */
2198 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2199 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2200 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2201 } else {
2202 assert(0 && "Attempting to pause parser in error state");
2203 }
2204}
2205
2206int
2207http_body_is_final(const struct http_parser *parser) {
2208 return parser->state == s_message_done;
2209}