2012330c55c71dfa7758573d73547e968e736358
[deb_shairplay.git] / src / lib / http_parser.c
1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2 *
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "http_parser.h"
25 #include <assert.h>
26 #include <stddef.h>
27
28
29 #ifndef MIN
30 # define MIN(a,b) ((a) < (b) ? (a) : (b))
31 #endif
32
33
34 #if HTTP_PARSER_DEBUG
35 #define SET_ERRNO(e) \
36 do { \
37 parser->http_errno = (e); \
38 parser->error_lineno = __LINE__; \
39 } while (0)
40 #else
41 #define SET_ERRNO(e) \
42 do { \
43 parser->http_errno = (e); \
44 } while(0)
45 #endif
46
47
48 #define CALLBACK2(FOR) \
49 do { \
50 if (settings->on_##FOR) { \
51 if (0 != settings->on_##FOR(parser)) { \
52 SET_ERRNO(HPE_CB_##FOR); \
53 return (p - data); \
54 } \
55 } \
56 } while (0)
57
58
59 #define MARK(FOR) \
60 do { \
61 FOR##_mark = p; \
62 } while (0)
63
64 #define CALLBACK(FOR) \
65 do { \
66 if (FOR##_mark) { \
67 if (settings->on_##FOR) { \
68 if (0 != settings->on_##FOR(parser, \
69 FOR##_mark, \
70 p - FOR##_mark)) \
71 { \
72 SET_ERRNO(HPE_CB_##FOR); \
73 return (p - data); \
74 } \
75 } \
76 FOR##_mark = NULL; \
77 } \
78 } while (0)
79
80
81 #define PROXY_CONNECTION "proxy-connection"
82 #define CONNECTION "connection"
83 #define CONTENT_LENGTH "content-length"
84 #define TRANSFER_ENCODING "transfer-encoding"
85 #define UPGRADE "upgrade"
86 #define CHUNKED "chunked"
87 #define KEEP_ALIVE "keep-alive"
88 #define CLOSE "close"
89
90
91 static const char *method_strings[] =
92 { "DELETE"
93 , "GET"
94 , "HEAD"
95 , "POST"
96 , "PUT"
97 , "CONNECT"
98 , "OPTIONS"
99 , "TRACE"
100 , "COPY"
101 , "LOCK"
102 , "MKCOL"
103 , "MOVE"
104 , "PROPFIND"
105 , "PROPPATCH"
106 , "UNLOCK"
107 , "REPORT"
108 , "MKACTIVITY"
109 , "CHECKOUT"
110 , "MERGE"
111 , "M-SEARCH"
112 , "NOTIFY"
113 , "SUBSCRIBE"
114 , "UNSUBSCRIBE"
115 , "PATCH"
116 , "DESCRIBE"
117 , "ANNOUNCE"
118 , "SETUP"
119 , "PLAY"
120 , "PAUSE"
121 , "TEARDOWN"
122 , "GET_PARAMETER"
123 , "SET_PARAMETER"
124 , "REDIRECT"
125 , "RECORD"
126 , "FLUSH"
127 };
128
129
130 /* Tokens as defined by rfc 2616. Also lowercases them.
131 * token = 1*<any CHAR except CTLs or separators>
132 * separators = "(" | ")" | "<" | ">" | "@"
133 * | "," | ";" | ":" | "\" | <">
134 * | "/" | "[" | "]" | "?" | "="
135 * | "{" | "}" | SP | HT
136 */
137 static const char tokens[256] = {
138 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
139 0, 0, 0, 0, 0, 0, 0, 0,
140 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
141 0, 0, 0, 0, 0, 0, 0, 0,
142 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
143 0, 0, 0, 0, 0, 0, 0, 0,
144 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
145 0, 0, 0, 0, 0, 0, 0, 0,
146 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
147 ' ', '!', '"', '#', '$', '%', '&', '\'',
148 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
149 0, 0, '*', '+', 0, '-', '.', '/',
150 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
151 '0', '1', '2', '3', '4', '5', '6', '7',
152 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
153 '8', '9', 0, 0, 0, 0, 0, 0,
154 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
155 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
156 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
157 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
158 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
159 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
160 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
161 'x', 'y', 'z', 0, 0, 0, '^', '_',
162 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
163 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
164 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
165 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
166 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
167 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
168 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
169 'x', 'y', 'z', 0, '|', '}', '~', 0 };
170
171
172 static const int8_t unhex[256] =
173 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
174 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
175 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
176 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
177 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
178 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
179 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
180 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
181 };
182
183
184 static const uint8_t normal_url_char[256] = {
185 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
186 0, 0, 0, 0, 0, 0, 0, 0,
187 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
188 0, 0, 0, 0, 0, 0, 0, 0,
189 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
190 0, 0, 0, 0, 0, 0, 0, 0,
191 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
192 0, 0, 0, 0, 0, 0, 0, 0,
193 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
194 0, 1, 1, 0, 1, 1, 1, 1,
195 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
196 1, 1, 1, 1, 1, 1, 1, 1,
197 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
198 1, 1, 1, 1, 1, 1, 1, 1,
199 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
200 1, 1, 1, 1, 1, 1, 1, 0,
201 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
202 1, 1, 1, 1, 1, 1, 1, 1,
203 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
204 1, 1, 1, 1, 1, 1, 1, 1,
205 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
206 1, 1, 1, 1, 1, 1, 1, 1,
207 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
208 1, 1, 1, 1, 1, 1, 1, 1,
209 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
210 1, 1, 1, 1, 1, 1, 1, 1,
211 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
212 1, 1, 1, 1, 1, 1, 1, 1,
213 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
214 1, 1, 1, 1, 1, 1, 1, 1,
215 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
216 1, 1, 1, 1, 1, 1, 1, 0, };
217
218
219 enum state
220 { s_dead = 1 /* important that this is > 0 */
221
222 , s_start_req_or_res
223 , s_res_or_resp_H
224 , s_start_res
225 , s_res_H
226 , s_res_HT
227 , s_res_HTT
228 , s_res_HTTP
229 , s_res_first_http_major
230 , s_res_http_major
231 , s_res_first_http_minor
232 , s_res_http_minor
233 , s_res_first_status_code
234 , s_res_status_code
235 , s_res_status
236 , s_res_line_almost_done
237
238 , s_start_req
239
240 , s_req_method
241 , s_req_spaces_before_url
242 , s_req_schema
243 , s_req_schema_slash
244 , s_req_schema_slash_slash
245 , s_req_host
246 , s_req_port
247 , s_req_path
248 , s_req_query_string_start
249 , s_req_query_string
250 , s_req_fragment_start
251 , s_req_fragment
252 , s_req_http_start
253 , s_req_http_H
254 , s_req_http_HT
255 , s_req_http_HTT
256 , s_req_http_HTTP
257 , s_req_first_http_major
258 , s_req_http_major
259 , s_req_first_http_minor
260 , s_req_http_minor
261 , s_req_line_almost_done
262
263 , s_header_field_start
264 , s_header_field
265 , s_header_value_start
266 , s_header_value
267 , s_header_value_lws
268
269 , s_header_almost_done
270
271 , s_chunk_size_start
272 , s_chunk_size
273 , s_chunk_parameters
274 , s_chunk_size_almost_done
275
276 , s_headers_almost_done
277 /* Important: 's_headers_almost_done' must be the last 'header' state. All
278 * states beyond this must be 'body' states. It is used for overflow
279 * checking. See the PARSING_HEADER() macro.
280 */
281
282 , s_chunk_data
283 , s_chunk_data_almost_done
284 , s_chunk_data_done
285
286 , s_body_identity
287 , s_body_identity_eof
288 };
289
290
291 #define PARSING_HEADER(state) (state <= s_headers_almost_done)
292
293
294 enum header_states
295 { h_general = 0
296 , h_C
297 , h_CO
298 , h_CON
299
300 , h_matching_connection
301 , h_matching_proxy_connection
302 , h_matching_content_length
303 , h_matching_transfer_encoding
304 , h_matching_upgrade
305
306 , h_connection
307 , h_content_length
308 , h_transfer_encoding
309 , h_upgrade
310
311 , h_matching_transfer_encoding_chunked
312 , h_matching_connection_keep_alive
313 , h_matching_connection_close
314
315 , h_transfer_encoding_chunked
316 , h_connection_keep_alive
317 , h_connection_close
318 };
319
320
321 /* Macros for character classes; depends on strict-mode */
322 #define CR '\r'
323 #define LF '\n'
324 #define LOWER(c) (unsigned char)(c | 0x20)
325 #define TOKEN(c) (tokens[(unsigned char)c])
326 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
327 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
328 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
329 #define IS_NUMERIC_CHAR(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f') || (c) == '.' || (c) == ':')
330
331 #if HTTP_PARSER_STRICT
332 #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
333 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
334 #else
335 #define IS_URL_CHAR(c) \
336 (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
337 #define IS_HOST_CHAR(c) \
338 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
339 #endif
340
341
342 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
343
344
345 #if HTTP_PARSER_STRICT
346 # define STRICT_CHECK(cond) \
347 do { \
348 if (cond) { \
349 SET_ERRNO(HPE_STRICT); \
350 goto error; \
351 } \
352 } while (0)
353 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
354 #else
355 # define STRICT_CHECK(cond)
356 # define NEW_MESSAGE() start_state
357 #endif
358
359
360 /* Map errno values to strings for human-readable output */
361 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
362 static struct {
363 const char *name;
364 const char *description;
365 } http_strerror_tab[] = {
366 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
367 };
368 #undef HTTP_STRERROR_GEN
369
370
371 size_t http_parser_execute (http_parser *parser,
372 const http_parser_settings *settings,
373 const char *data,
374 size_t len)
375 {
376 char c, ch;
377 int8_t unhex_val;
378 const char *p = data, *pe;
379 int64_t to_read;
380 enum state state;
381 enum header_states header_state;
382 uint64_t index = parser->index;
383 uint64_t nread = parser->nread;
384
385 /* technically we could combine all of these (except for url_mark) into one
386 variable, saving stack space, but it seems more clear to have them
387 separated. */
388 const char *header_field_mark = 0;
389 const char *header_value_mark = 0;
390 const char *url_mark = 0;
391
392 /* We're in an error state. Don't bother doing anything. */
393 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
394 return 0;
395 }
396
397 state = (enum state) parser->state;
398 header_state = (enum header_states) parser->header_state;
399
400 if (len == 0) {
401 switch (state) {
402 case s_body_identity_eof:
403 CALLBACK2(message_complete);
404 return 0;
405
406 case s_dead:
407 case s_start_req_or_res:
408 case s_start_res:
409 case s_start_req:
410 return 0;
411
412 default:
413 SET_ERRNO(HPE_INVALID_EOF_STATE);
414 return 1;
415 }
416 }
417
418
419 if (state == s_header_field)
420 header_field_mark = data;
421 if (state == s_header_value)
422 header_value_mark = data;
423 if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
424 || state == s_req_schema_slash_slash || state == s_req_port
425 || state == s_req_query_string_start || state == s_req_query_string
426 || state == s_req_host
427 || state == s_req_fragment_start || state == s_req_fragment)
428 url_mark = data;
429
430 for (p=data, pe=data+len; p != pe; p++) {
431 ch = *p;
432
433 if (PARSING_HEADER(state)) {
434 ++nread;
435 /* Buffer overflow attack */
436 if (nread > HTTP_MAX_HEADER_SIZE) {
437 SET_ERRNO(HPE_HEADER_OVERFLOW);
438 goto error;
439 }
440 }
441
442 switch (state) {
443
444 case s_dead:
445 /* this state is used after a 'Connection: close' message
446 * the parser will error out if it reads another message
447 */
448 SET_ERRNO(HPE_CLOSED_CONNECTION);
449 goto error;
450
451 case s_start_req_or_res:
452 {
453 if (ch == CR || ch == LF)
454 break;
455 parser->flags = 0;
456 parser->content_length = -1;
457
458 CALLBACK2(message_begin);
459
460 if (ch == 'H')
461 state = s_res_or_resp_H;
462 else {
463 parser->type = HTTP_REQUEST;
464 goto start_req_method_assign;
465 }
466 break;
467 }
468
469 case s_res_or_resp_H:
470 if (ch == 'T') {
471 parser->type = HTTP_RESPONSE;
472 state = s_res_HT;
473 } else {
474 if (ch != 'E') {
475 SET_ERRNO(HPE_INVALID_CONSTANT);
476 goto error;
477 }
478
479 parser->type = HTTP_REQUEST;
480 parser->method = HTTP_HEAD;
481 index = 2;
482 state = s_req_method;
483 }
484 break;
485
486 case s_start_res:
487 {
488 parser->flags = 0;
489 parser->content_length = -1;
490
491 CALLBACK2(message_begin);
492
493 switch (ch) {
494 case 'H':
495 state = s_res_H;
496 break;
497
498 case CR:
499 case LF:
500 break;
501
502 default:
503 SET_ERRNO(HPE_INVALID_CONSTANT);
504 goto error;
505 }
506 break;
507 }
508
509 case s_res_H:
510 STRICT_CHECK(ch != 'T');
511 state = s_res_HT;
512 break;
513
514 case s_res_HT:
515 STRICT_CHECK(ch != 'T');
516 state = s_res_HTT;
517 break;
518
519 case s_res_HTT:
520 STRICT_CHECK(ch != 'P');
521 state = s_res_HTTP;
522 break;
523
524 case s_res_HTTP:
525 STRICT_CHECK(ch != '/');
526 state = s_res_first_http_major;
527 break;
528
529 case s_res_first_http_major:
530 if (ch < '0' || ch > '9') {
531 SET_ERRNO(HPE_INVALID_VERSION);
532 goto error;
533 }
534
535 parser->http_major = ch - '0';
536 state = s_res_http_major;
537 break;
538
539 /* major HTTP version or dot */
540 case s_res_http_major:
541 {
542 if (ch == '.') {
543 state = s_res_first_http_minor;
544 break;
545 }
546
547 if (!IS_NUM(ch)) {
548 SET_ERRNO(HPE_INVALID_VERSION);
549 goto error;
550 }
551
552 parser->http_major *= 10;
553 parser->http_major += ch - '0';
554
555 if (parser->http_major > 999) {
556 SET_ERRNO(HPE_INVALID_VERSION);
557 goto error;
558 }
559
560 break;
561 }
562
563 /* first digit of minor HTTP version */
564 case s_res_first_http_minor:
565 if (!IS_NUM(ch)) {
566 SET_ERRNO(HPE_INVALID_VERSION);
567 goto error;
568 }
569
570 parser->http_minor = ch - '0';
571 state = s_res_http_minor;
572 break;
573
574 /* minor HTTP version or end of request line */
575 case s_res_http_minor:
576 {
577 if (ch == ' ') {
578 state = s_res_first_status_code;
579 break;
580 }
581
582 if (!IS_NUM(ch)) {
583 SET_ERRNO(HPE_INVALID_VERSION);
584 goto error;
585 }
586
587 parser->http_minor *= 10;
588 parser->http_minor += ch - '0';
589
590 if (parser->http_minor > 999) {
591 SET_ERRNO(HPE_INVALID_VERSION);
592 goto error;
593 }
594
595 break;
596 }
597
598 case s_res_first_status_code:
599 {
600 if (!IS_NUM(ch)) {
601 if (ch == ' ') {
602 break;
603 }
604
605 SET_ERRNO(HPE_INVALID_STATUS);
606 goto error;
607 }
608 parser->status_code = ch - '0';
609 state = s_res_status_code;
610 break;
611 }
612
613 case s_res_status_code:
614 {
615 if (!IS_NUM(ch)) {
616 switch (ch) {
617 case ' ':
618 state = s_res_status;
619 break;
620 case CR:
621 state = s_res_line_almost_done;
622 break;
623 case LF:
624 state = s_header_field_start;
625 break;
626 default:
627 SET_ERRNO(HPE_INVALID_STATUS);
628 goto error;
629 }
630 break;
631 }
632
633 parser->status_code *= 10;
634 parser->status_code += ch - '0';
635
636 if (parser->status_code > 999) {
637 SET_ERRNO(HPE_INVALID_STATUS);
638 goto error;
639 }
640
641 break;
642 }
643
644 case s_res_status:
645 /* the human readable status. e.g. "NOT FOUND"
646 * we are not humans so just ignore this */
647 if (ch == CR) {
648 state = s_res_line_almost_done;
649 break;
650 }
651
652 if (ch == LF) {
653 state = s_header_field_start;
654 break;
655 }
656 break;
657
658 case s_res_line_almost_done:
659 STRICT_CHECK(ch != LF);
660 state = s_header_field_start;
661 break;
662
663 case s_start_req:
664 {
665 if (ch == CR || ch == LF)
666 break;
667 parser->flags = 0;
668 parser->content_length = -1;
669
670 CALLBACK2(message_begin);
671
672 if (!IS_ALPHA(ch)) {
673 SET_ERRNO(HPE_INVALID_METHOD);
674 goto error;
675 }
676
677 start_req_method_assign:
678 parser->method = (enum http_method) 0;
679 index = 1;
680 switch (ch) {
681 case 'A': parser->method = HTTP_ANNOUNCE; break;
682 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
683 case 'D': parser->method = HTTP_DELETE; /* or DESCRIBE */ break;
684 case 'F': parser->method = HTTP_FLUSH; break;
685 case 'G': parser->method = HTTP_GET; /* or GET_PARAMETER */ break;
686 case 'H': parser->method = HTTP_HEAD; break;
687 case 'L': parser->method = HTTP_LOCK; break;
688 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
689 case 'N': parser->method = HTTP_NOTIFY; break;
690 case 'O': parser->method = HTTP_OPTIONS; break;
691 case 'P': parser->method = HTTP_POST;
692 /* or PROPFIND or PROPPATCH or PUT or PATCH or PLAY or PAUSE */
693 break;
694 case 'R': parser->method = HTTP_REPORT; /* or REDIRECT, RECORD */ break;
695 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SETUP, SET_PARAMETER */ break;
696 case 'T': parser->method = HTTP_TRACE; /* or TEARDOWN */ break;
697 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
698 default:
699 SET_ERRNO(HPE_INVALID_METHOD);
700 goto error;
701 }
702 state = s_req_method;
703 break;
704 }
705
706 case s_req_method:
707 {
708 const char *matcher;
709 if (ch == '\0') {
710 SET_ERRNO(HPE_INVALID_METHOD);
711 goto error;
712 }
713
714 matcher = method_strings[parser->method];
715 if (ch == ' ' && matcher[index] == '\0') {
716 state = s_req_spaces_before_url;
717 } else if (ch == matcher[index]) {
718 ; /* nada */
719 } else if (parser->method == HTTP_CONNECT) {
720 if (index == 1 && ch == 'H') {
721 parser->method = HTTP_CHECKOUT;
722 } else if (index == 2 && ch == 'P') {
723 parser->method = HTTP_COPY;
724 } else {
725 goto error;
726 }
727 } else if (index == 2 && parser->method == HTTP_DELETE && ch == 'S') {
728 parser->method = HTTP_DESCRIBE;
729 } else if (index == 3 && parser->method == HTTP_GET && ch == '_') {
730 parser->method = HTTP_GET_PARAMETER;
731 } else if (parser->method == HTTP_MKCOL) {
732 if (index == 1 && ch == 'O') {
733 parser->method = HTTP_MOVE;
734 } else if (index == 1 && ch == 'E') {
735 parser->method = HTTP_MERGE;
736 } else if (index == 1 && ch == '-') {
737 parser->method = HTTP_MSEARCH;
738 } else if (index == 2 && ch == 'A') {
739 parser->method = HTTP_MKACTIVITY;
740 } else {
741 goto error;
742 }
743 } else if (index == 1 && parser->method == HTTP_POST) {
744 if (ch == 'R') {
745 parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
746 } else if (ch == 'U') {
747 parser->method = HTTP_PUT;
748 } else if (ch == 'A') {
749 parser->method = HTTP_PATCH; /* or HTTP_PAUSE */
750 } else if (ch == 'L') {
751 parser->method = HTTP_PLAY;
752 } else {
753 goto error;
754 }
755 } else if (index == 2 && parser->method == HTTP_REPORT) {
756 if (ch == 'D') {
757 parser->method = HTTP_REDIRECT;
758 } else if (ch == 'C') {
759 parser->method = HTTP_RECORD;
760 } else {
761 goto error;
762 }
763 } else if (index == 1 && parser->method == HTTP_SUBSCRIBE && ch == 'E') {
764 parser->method = HTTP_SETUP; /* or HTTP_SET_PARAMETER */
765 } else if (index == 3 && parser->method == HTTP_SETUP && ch == '_') {
766 parser->method = HTTP_SET_PARAMETER;
767 } else if (index == 1 && parser->method == HTTP_TRACE && ch == 'E') {
768 parser->method = HTTP_TEARDOWN;
769 } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
770 parser->method = HTTP_UNSUBSCRIBE;
771 } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
772 parser->method = HTTP_PROPPATCH;
773 } else if (index == 2 && parser->method == HTTP_PATCH && ch == 'U') {
774 parser->method = HTTP_PAUSE;
775 } else {
776 SET_ERRNO(HPE_INVALID_METHOD);
777 goto error;
778 }
779
780 ++index;
781 break;
782 }
783 case s_req_spaces_before_url:
784 {
785 if (ch == ' ') break;
786
787 if (ch == '/' || ch == '*') {
788 MARK(url);
789 state = s_req_path;
790 break;
791 }
792
793 /* Proxied requests are followed by scheme of an absolute URI (alpha).
794 * CONNECT is followed by a hostname, which begins with alphanum.
795 * All other methods are followed by '/' or '*' (handled above).
796 */
797 if (IS_ALPHA(ch) || (parser->method == HTTP_CONNECT && IS_NUM(ch))) {
798 MARK(url);
799 state = (parser->method == HTTP_CONNECT) ? s_req_host : s_req_schema;
800 break;
801 }
802
803 SET_ERRNO(HPE_INVALID_URL);
804 goto error;
805 }
806
807 case s_req_schema:
808 {
809 if (IS_ALPHA(ch)) break;
810
811 if (ch == ':') {
812 state = s_req_schema_slash;
813 break;
814 }
815
816 SET_ERRNO(HPE_INVALID_URL);
817 goto error;
818 }
819
820 case s_req_schema_slash:
821 STRICT_CHECK(ch != '/');
822 state = s_req_schema_slash_slash;
823 break;
824
825 case s_req_schema_slash_slash:
826 STRICT_CHECK(ch != '/');
827 state = s_req_host;
828 break;
829
830 case s_req_host:
831 {
832 if (parser->numerichost) {
833 if (IS_NUMERIC_CHAR(ch)) break;
834 } else {
835 if (IS_HOST_CHAR(ch)) break;
836 }
837 switch (ch) {
838 case ':':
839 state = s_req_port;
840 break;
841 case '/':
842 state = s_req_path;
843 break;
844 case ' ':
845 /* The request line looks like:
846 * "GET http://foo.bar.com HTTP/1.1"
847 * That is, there is no path.
848 */
849 CALLBACK(url);
850 state = s_req_http_start;
851 break;
852 case '?':
853 state = s_req_query_string_start;
854 break;
855 default:
856 SET_ERRNO(HPE_INVALID_HOST);
857 goto error;
858 }
859 break;
860 }
861
862 case s_req_port:
863 {
864 if (IS_NUM(ch)) break;
865 switch (ch) {
866 case '/':
867 state = s_req_path;
868 break;
869 case ' ':
870 /* The request line looks like:
871 * "GET http://foo.bar.com:1234 HTTP/1.1"
872 * That is, there is no path.
873 */
874 CALLBACK(url);
875 state = s_req_http_start;
876 break;
877 case '?':
878 state = s_req_query_string_start;
879 break;
880 default:
881 SET_ERRNO(HPE_INVALID_PORT);
882 goto error;
883 }
884 break;
885 }
886
887 case s_req_path:
888 {
889 if (IS_URL_CHAR(ch)) break;
890
891 switch (ch) {
892 case ' ':
893 CALLBACK(url);
894 state = s_req_http_start;
895 break;
896 case CR:
897 CALLBACK(url);
898 parser->http_major = 0;
899 parser->http_minor = 9;
900 state = s_req_line_almost_done;
901 break;
902 case LF:
903 CALLBACK(url);
904 parser->http_major = 0;
905 parser->http_minor = 9;
906 state = s_header_field_start;
907 break;
908 case '?':
909 state = s_req_query_string_start;
910 break;
911 case '#':
912 state = s_req_fragment_start;
913 break;
914 default:
915 SET_ERRNO(HPE_INVALID_PATH);
916 goto error;
917 }
918 break;
919 }
920
921 case s_req_query_string_start:
922 {
923 if (IS_URL_CHAR(ch)) {
924 state = s_req_query_string;
925 break;
926 }
927
928 switch (ch) {
929 case '?':
930 break; /* XXX ignore extra '?' ... is this right? */
931 case ' ':
932 CALLBACK(url);
933 state = s_req_http_start;
934 break;
935 case CR:
936 CALLBACK(url);
937 parser->http_major = 0;
938 parser->http_minor = 9;
939 state = s_req_line_almost_done;
940 break;
941 case LF:
942 CALLBACK(url);
943 parser->http_major = 0;
944 parser->http_minor = 9;
945 state = s_header_field_start;
946 break;
947 case '#':
948 state = s_req_fragment_start;
949 break;
950 default:
951 SET_ERRNO(HPE_INVALID_QUERY_STRING);
952 goto error;
953 }
954 break;
955 }
956
957 case s_req_query_string:
958 {
959 if (IS_URL_CHAR(ch)) break;
960
961 switch (ch) {
962 case '?':
963 /* allow extra '?' in query string */
964 break;
965 case ' ':
966 CALLBACK(url);
967 state = s_req_http_start;
968 break;
969 case CR:
970 CALLBACK(url);
971 parser->http_major = 0;
972 parser->http_minor = 9;
973 state = s_req_line_almost_done;
974 break;
975 case LF:
976 CALLBACK(url);
977 parser->http_major = 0;
978 parser->http_minor = 9;
979 state = s_header_field_start;
980 break;
981 case '#':
982 state = s_req_fragment_start;
983 break;
984 default:
985 SET_ERRNO(HPE_INVALID_QUERY_STRING);
986 goto error;
987 }
988 break;
989 }
990
991 case s_req_fragment_start:
992 {
993 if (IS_URL_CHAR(ch)) {
994 state = s_req_fragment;
995 break;
996 }
997
998 switch (ch) {
999 case ' ':
1000 CALLBACK(url);
1001 state = s_req_http_start;
1002 break;
1003 case CR:
1004 CALLBACK(url);
1005 parser->http_major = 0;
1006 parser->http_minor = 9;
1007 state = s_req_line_almost_done;
1008 break;
1009 case LF:
1010 CALLBACK(url);
1011 parser->http_major = 0;
1012 parser->http_minor = 9;
1013 state = s_header_field_start;
1014 break;
1015 case '?':
1016 state = s_req_fragment;
1017 break;
1018 case '#':
1019 break;
1020 default:
1021 SET_ERRNO(HPE_INVALID_FRAGMENT);
1022 goto error;
1023 }
1024 break;
1025 }
1026
1027 case s_req_fragment:
1028 {
1029 if (IS_URL_CHAR(ch)) break;
1030
1031 switch (ch) {
1032 case ' ':
1033 CALLBACK(url);
1034 state = s_req_http_start;
1035 break;
1036 case CR:
1037 CALLBACK(url);
1038 parser->http_major = 0;
1039 parser->http_minor = 9;
1040 state = s_req_line_almost_done;
1041 break;
1042 case LF:
1043 CALLBACK(url);
1044 parser->http_major = 0;
1045 parser->http_minor = 9;
1046 state = s_header_field_start;
1047 break;
1048 case '?':
1049 case '#':
1050 break;
1051 default:
1052 SET_ERRNO(HPE_INVALID_FRAGMENT);
1053 goto error;
1054 }
1055 break;
1056 }
1057
1058 case s_req_http_start:
1059 switch (ch) {
1060 case 'H':
1061 case 'R':
1062 state = s_req_http_H;
1063 break;
1064 case ' ':
1065 break;
1066 default:
1067 SET_ERRNO(HPE_INVALID_CONSTANT);
1068 goto error;
1069 }
1070 break;
1071
1072 case s_req_http_H:
1073 STRICT_CHECK(ch != 'T');
1074 state = s_req_http_HT;
1075 break;
1076
1077 case s_req_http_HT:
1078 STRICT_CHECK(ch != 'T');
1079 state = s_req_http_HTT;
1080 break;
1081
1082 case s_req_http_HTT:
1083 STRICT_CHECK(ch != 'P');
1084 state = s_req_http_HTTP;
1085 break;
1086
1087 case s_req_http_HTTP:
1088 STRICT_CHECK(ch != '/');
1089 state = s_req_first_http_major;
1090 break;
1091
1092 /* first digit of major HTTP version */
1093 case s_req_first_http_major:
1094 if (ch < '1' || ch > '9') {
1095 SET_ERRNO(HPE_INVALID_VERSION);
1096 goto error;
1097 }
1098
1099 parser->http_major = ch - '0';
1100 state = s_req_http_major;
1101 break;
1102
1103 /* major HTTP version or dot */
1104 case s_req_http_major:
1105 {
1106 if (ch == '.') {
1107 state = s_req_first_http_minor;
1108 break;
1109 }
1110
1111 if (!IS_NUM(ch)) {
1112 SET_ERRNO(HPE_INVALID_VERSION);
1113 goto error;
1114 }
1115
1116 parser->http_major *= 10;
1117 parser->http_major += ch - '0';
1118
1119 if (parser->http_major > 999) {
1120 SET_ERRNO(HPE_INVALID_VERSION);
1121 goto error;
1122 }
1123
1124 break;
1125 }
1126
1127 /* first digit of minor HTTP version */
1128 case s_req_first_http_minor:
1129 if (!IS_NUM(ch)) {
1130 SET_ERRNO(HPE_INVALID_VERSION);
1131 goto error;
1132 }
1133
1134 parser->http_minor = ch - '0';
1135 state = s_req_http_minor;
1136 break;
1137
1138 /* minor HTTP version or end of request line */
1139 case s_req_http_minor:
1140 {
1141 if (ch == CR) {
1142 state = s_req_line_almost_done;
1143 break;
1144 }
1145
1146 if (ch == LF) {
1147 state = s_header_field_start;
1148 break;
1149 }
1150
1151 /* XXX allow spaces after digit? */
1152
1153 if (!IS_NUM(ch)) {
1154 SET_ERRNO(HPE_INVALID_VERSION);
1155 goto error;
1156 }
1157
1158 parser->http_minor *= 10;
1159 parser->http_minor += ch - '0';
1160
1161 if (parser->http_minor > 999) {
1162 SET_ERRNO(HPE_INVALID_VERSION);
1163 goto error;
1164 }
1165
1166 break;
1167 }
1168
1169 /* end of request line */
1170 case s_req_line_almost_done:
1171 {
1172 if (ch != LF) {
1173 SET_ERRNO(HPE_LF_EXPECTED);
1174 goto error;
1175 }
1176
1177 state = s_header_field_start;
1178 break;
1179 }
1180
1181 case s_header_field_start:
1182 header_field_start:
1183 {
1184 if (ch == CR) {
1185 state = s_headers_almost_done;
1186 break;
1187 }
1188
1189 if (ch == LF) {
1190 /* they might be just sending \n instead of \r\n so this would be
1191 * the second \n to denote the end of headers*/
1192 state = s_headers_almost_done;
1193 goto headers_almost_done;
1194 }
1195
1196 c = TOKEN(ch);
1197
1198 if (!c) {
1199 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1200 goto error;
1201 }
1202
1203 MARK(header_field);
1204
1205 index = 0;
1206 state = s_header_field;
1207
1208 switch (c) {
1209 case 'c':
1210 header_state = h_C;
1211 break;
1212
1213 case 'p':
1214 header_state = h_matching_proxy_connection;
1215 break;
1216
1217 case 't':
1218 header_state = h_matching_transfer_encoding;
1219 break;
1220
1221 case 'u':
1222 header_state = h_matching_upgrade;
1223 break;
1224
1225 default:
1226 header_state = h_general;
1227 break;
1228 }
1229 break;
1230 }
1231
1232 case s_header_field:
1233 {
1234 c = TOKEN(ch);
1235
1236 if (c) {
1237 switch (header_state) {
1238 case h_general:
1239 break;
1240
1241 case h_C:
1242 index++;
1243 header_state = (c == 'o' ? h_CO : h_general);
1244 break;
1245
1246 case h_CO:
1247 index++;
1248 header_state = (c == 'n' ? h_CON : h_general);
1249 break;
1250
1251 case h_CON:
1252 index++;
1253 switch (c) {
1254 case 'n':
1255 header_state = h_matching_connection;
1256 break;
1257 case 't':
1258 header_state = h_matching_content_length;
1259 break;
1260 default:
1261 header_state = h_general;
1262 break;
1263 }
1264 break;
1265
1266 /* connection */
1267
1268 case h_matching_connection:
1269 index++;
1270 if (index > sizeof(CONNECTION)-1
1271 || c != CONNECTION[index]) {
1272 header_state = h_general;
1273 } else if (index == sizeof(CONNECTION)-2) {
1274 header_state = h_connection;
1275 }
1276 break;
1277
1278 /* proxy-connection */
1279
1280 case h_matching_proxy_connection:
1281 index++;
1282 if (index > sizeof(PROXY_CONNECTION)-1
1283 || c != PROXY_CONNECTION[index]) {
1284 header_state = h_general;
1285 } else if (index == sizeof(PROXY_CONNECTION)-2) {
1286 header_state = h_connection;
1287 }
1288 break;
1289
1290 /* content-length */
1291
1292 case h_matching_content_length:
1293 index++;
1294 if (index > sizeof(CONTENT_LENGTH)-1
1295 || c != CONTENT_LENGTH[index]) {
1296 header_state = h_general;
1297 } else if (index == sizeof(CONTENT_LENGTH)-2) {
1298 header_state = h_content_length;
1299 }
1300 break;
1301
1302 /* transfer-encoding */
1303
1304 case h_matching_transfer_encoding:
1305 index++;
1306 if (index > sizeof(TRANSFER_ENCODING)-1
1307 || c != TRANSFER_ENCODING[index]) {
1308 header_state = h_general;
1309 } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1310 header_state = h_transfer_encoding;
1311 }
1312 break;
1313
1314 /* upgrade */
1315
1316 case h_matching_upgrade:
1317 index++;
1318 if (index > sizeof(UPGRADE)-1
1319 || c != UPGRADE[index]) {
1320 header_state = h_general;
1321 } else if (index == sizeof(UPGRADE)-2) {
1322 header_state = h_upgrade;
1323 }
1324 break;
1325
1326 case h_connection:
1327 case h_content_length:
1328 case h_transfer_encoding:
1329 case h_upgrade:
1330 if (ch != ' ') header_state = h_general;
1331 break;
1332
1333 default:
1334 assert(0 && "Unknown header_state");
1335 break;
1336 }
1337 break;
1338 }
1339
1340 if (ch == ':') {
1341 CALLBACK(header_field);
1342 state = s_header_value_start;
1343 break;
1344 }
1345
1346 if (ch == CR) {
1347 state = s_header_almost_done;
1348 CALLBACK(header_field);
1349 break;
1350 }
1351
1352 if (ch == LF) {
1353 CALLBACK(header_field);
1354 state = s_header_field_start;
1355 break;
1356 }
1357
1358 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1359 goto error;
1360 }
1361
1362 case s_header_value_start:
1363 {
1364 if (ch == ' ' || ch == '\t') break;
1365
1366 MARK(header_value);
1367
1368 state = s_header_value;
1369 index = 0;
1370
1371 if (ch == CR) {
1372 CALLBACK(header_value);
1373 header_state = h_general;
1374 state = s_header_almost_done;
1375 break;
1376 }
1377
1378 if (ch == LF) {
1379 CALLBACK(header_value);
1380 state = s_header_field_start;
1381 break;
1382 }
1383
1384 c = LOWER(ch);
1385
1386 switch (header_state) {
1387 case h_upgrade:
1388 parser->flags |= F_UPGRADE;
1389 header_state = h_general;
1390 break;
1391
1392 case h_transfer_encoding:
1393 /* looking for 'Transfer-Encoding: chunked' */
1394 if ('c' == c) {
1395 header_state = h_matching_transfer_encoding_chunked;
1396 } else {
1397 header_state = h_general;
1398 }
1399 break;
1400
1401 case h_content_length:
1402 if (!IS_NUM(ch)) {
1403 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1404 goto error;
1405 }
1406
1407 parser->content_length = ch - '0';
1408 break;
1409
1410 case h_connection:
1411 /* looking for 'Connection: keep-alive' */
1412 if (c == 'k') {
1413 header_state = h_matching_connection_keep_alive;
1414 /* looking for 'Connection: close' */
1415 } else if (c == 'c') {
1416 header_state = h_matching_connection_close;
1417 } else {
1418 header_state = h_general;
1419 }
1420 break;
1421
1422 default:
1423 header_state = h_general;
1424 break;
1425 }
1426 break;
1427 }
1428
1429 case s_header_value:
1430 {
1431
1432 if (ch == CR) {
1433 CALLBACK(header_value);
1434 state = s_header_almost_done;
1435 break;
1436 }
1437
1438 if (ch == LF) {
1439 CALLBACK(header_value);
1440 goto header_almost_done;
1441 }
1442
1443 c = LOWER(ch);
1444
1445 switch (header_state) {
1446 case h_general:
1447 break;
1448
1449 case h_connection:
1450 case h_transfer_encoding:
1451 assert(0 && "Shouldn't get here.");
1452 break;
1453
1454 case h_content_length:
1455 if (ch == ' ') break;
1456 if (!IS_NUM(ch)) {
1457 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1458 goto error;
1459 }
1460
1461 parser->content_length *= 10;
1462 parser->content_length += ch - '0';
1463 break;
1464
1465 /* Transfer-Encoding: chunked */
1466 case h_matching_transfer_encoding_chunked:
1467 index++;
1468 if (index > sizeof(CHUNKED)-1
1469 || c != CHUNKED[index]) {
1470 header_state = h_general;
1471 } else if (index == sizeof(CHUNKED)-2) {
1472 header_state = h_transfer_encoding_chunked;
1473 }
1474 break;
1475
1476 /* looking for 'Connection: keep-alive' */
1477 case h_matching_connection_keep_alive:
1478 index++;
1479 if (index > sizeof(KEEP_ALIVE)-1
1480 || c != KEEP_ALIVE[index]) {
1481 header_state = h_general;
1482 } else if (index == sizeof(KEEP_ALIVE)-2) {
1483 header_state = h_connection_keep_alive;
1484 }
1485 break;
1486
1487 /* looking for 'Connection: close' */
1488 case h_matching_connection_close:
1489 index++;
1490 if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1491 header_state = h_general;
1492 } else if (index == sizeof(CLOSE)-2) {
1493 header_state = h_connection_close;
1494 }
1495 break;
1496
1497 case h_transfer_encoding_chunked:
1498 case h_connection_keep_alive:
1499 case h_connection_close:
1500 if (ch != ' ') header_state = h_general;
1501 break;
1502
1503 default:
1504 state = s_header_value;
1505 header_state = h_general;
1506 break;
1507 }
1508 break;
1509 }
1510
1511 case s_header_almost_done:
1512 header_almost_done:
1513 {
1514 STRICT_CHECK(ch != LF);
1515
1516 state = s_header_value_lws;
1517
1518 switch (header_state) {
1519 case h_connection_keep_alive:
1520 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1521 break;
1522 case h_connection_close:
1523 parser->flags |= F_CONNECTION_CLOSE;
1524 break;
1525 case h_transfer_encoding_chunked:
1526 parser->flags |= F_CHUNKED;
1527 break;
1528 default:
1529 break;
1530 }
1531 break;
1532 }
1533
1534 case s_header_value_lws:
1535 {
1536 if (ch == ' ' || ch == '\t')
1537 state = s_header_value_start;
1538 else
1539 {
1540 state = s_header_field_start;
1541 goto header_field_start;
1542 }
1543 break;
1544 }
1545
1546 case s_headers_almost_done:
1547 headers_almost_done:
1548 {
1549 STRICT_CHECK(ch != LF);
1550
1551 if (parser->flags & F_TRAILING) {
1552 /* End of a chunked request */
1553 CALLBACK2(message_complete);
1554 state = NEW_MESSAGE();
1555 break;
1556 }
1557
1558 nread = 0;
1559
1560 if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1561 parser->upgrade = 1;
1562 }
1563
1564 /* Here we call the headers_complete callback. This is somewhat
1565 * different than other callbacks because if the user returns 1, we
1566 * will interpret that as saying that this message has no body. This
1567 * is needed for the annoying case of recieving a response to a HEAD
1568 * request.
1569 */
1570 if (settings->on_headers_complete) {
1571 switch (settings->on_headers_complete(parser)) {
1572 case 0:
1573 break;
1574
1575 case 1:
1576 parser->flags |= F_SKIPBODY;
1577 break;
1578
1579 default:
1580 parser->state = state;
1581 SET_ERRNO(HPE_CB_headers_complete);
1582 return p - data; /* Error */
1583 }
1584 }
1585
1586 /* Exit, the rest of the connect is in a different protocol. */
1587 if (parser->upgrade) {
1588 CALLBACK2(message_complete);
1589 return (p - data) + 1;
1590 }
1591
1592 if (parser->flags & F_SKIPBODY) {
1593 CALLBACK2(message_complete);
1594 state = NEW_MESSAGE();
1595 } else if (parser->flags & F_CHUNKED) {
1596 /* chunked encoding - ignore Content-Length header */
1597 state = s_chunk_size_start;
1598 } else {
1599 if (parser->content_length == 0) {
1600 /* Content-Length header given but zero: Content-Length: 0\r\n */
1601 CALLBACK2(message_complete);
1602 state = NEW_MESSAGE();
1603 } else if (parser->content_length > 0) {
1604 /* Content-Length header given and non-zero */
1605 state = s_body_identity;
1606 } else {
1607 if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1608 /* Assume content-length 0 - read the next */
1609 CALLBACK2(message_complete);
1610 state = NEW_MESSAGE();
1611 } else {
1612 /* Read body until EOF */
1613 state = s_body_identity_eof;
1614 }
1615 }
1616 }
1617
1618 break;
1619 }
1620
1621 case s_body_identity:
1622 to_read = MIN(pe - p, (int64_t)parser->content_length);
1623 if (to_read > 0) {
1624 if (settings->on_body) settings->on_body(parser, p, to_read);
1625 p += to_read - 1;
1626 parser->content_length -= to_read;
1627 if (parser->content_length == 0) {
1628 CALLBACK2(message_complete);
1629 state = NEW_MESSAGE();
1630 }
1631 }
1632 break;
1633
1634 /* read until EOF */
1635 case s_body_identity_eof:
1636 to_read = pe - p;
1637 if (to_read > 0) {
1638 if (settings->on_body) settings->on_body(parser, p, to_read);
1639 p += to_read - 1;
1640 }
1641 break;
1642
1643 case s_chunk_size_start:
1644 {
1645 assert(nread == 1);
1646 assert(parser->flags & F_CHUNKED);
1647
1648 unhex_val = unhex[(unsigned char)ch];
1649 if (unhex_val == -1) {
1650 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1651 goto error;
1652 }
1653
1654 parser->content_length = unhex_val;
1655 state = s_chunk_size;
1656 break;
1657 }
1658
1659 case s_chunk_size:
1660 {
1661 assert(parser->flags & F_CHUNKED);
1662
1663 if (ch == CR) {
1664 state = s_chunk_size_almost_done;
1665 break;
1666 }
1667
1668 unhex_val = unhex[(unsigned char)ch];
1669
1670 if (unhex_val == -1) {
1671 if (ch == ';' || ch == ' ') {
1672 state = s_chunk_parameters;
1673 break;
1674 }
1675
1676 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1677 goto error;
1678 }
1679
1680 parser->content_length *= 16;
1681 parser->content_length += unhex_val;
1682 break;
1683 }
1684
1685 case s_chunk_parameters:
1686 {
1687 assert(parser->flags & F_CHUNKED);
1688 /* just ignore this shit. TODO check for overflow */
1689 if (ch == CR) {
1690 state = s_chunk_size_almost_done;
1691 break;
1692 }
1693 break;
1694 }
1695
1696 case s_chunk_size_almost_done:
1697 {
1698 assert(parser->flags & F_CHUNKED);
1699 STRICT_CHECK(ch != LF);
1700
1701 nread = 0;
1702
1703 if (parser->content_length == 0) {
1704 parser->flags |= F_TRAILING;
1705 state = s_header_field_start;
1706 } else {
1707 state = s_chunk_data;
1708 }
1709 break;
1710 }
1711
1712 case s_chunk_data:
1713 {
1714 assert(parser->flags & F_CHUNKED);
1715
1716 to_read = MIN(pe - p, (int64_t)(parser->content_length));
1717
1718 if (to_read > 0) {
1719 if (settings->on_body) settings->on_body(parser, p, to_read);
1720 p += to_read - 1;
1721 }
1722
1723 if (to_read == parser->content_length) {
1724 state = s_chunk_data_almost_done;
1725 }
1726
1727 parser->content_length -= to_read;
1728 break;
1729 }
1730
1731 case s_chunk_data_almost_done:
1732 assert(parser->flags & F_CHUNKED);
1733 STRICT_CHECK(ch != CR);
1734 state = s_chunk_data_done;
1735 break;
1736
1737 case s_chunk_data_done:
1738 assert(parser->flags & F_CHUNKED);
1739 STRICT_CHECK(ch != LF);
1740 state = s_chunk_size_start;
1741 break;
1742
1743 default:
1744 assert(0 && "unhandled state");
1745 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1746 goto error;
1747 }
1748 }
1749
1750 CALLBACK(header_field);
1751 CALLBACK(header_value);
1752 CALLBACK(url);
1753
1754 parser->state = state;
1755 parser->header_state = header_state;
1756 parser->index = index;
1757 parser->nread = nread;
1758
1759 return len;
1760
1761 error:
1762 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1763 SET_ERRNO(HPE_UNKNOWN);
1764 }
1765
1766 return (p - data);
1767 }
1768
1769
1770 int
1771 http_should_keep_alive (http_parser *parser)
1772 {
1773 if (parser->http_major > 0 && parser->http_minor > 0) {
1774 /* HTTP/1.1 */
1775 if (parser->flags & F_CONNECTION_CLOSE) {
1776 return 0;
1777 } else {
1778 return 1;
1779 }
1780 } else {
1781 /* HTTP/1.0 or earlier */
1782 if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1783 return 1;
1784 } else {
1785 return 0;
1786 }
1787 }
1788 }
1789
1790
1791 const char * http_method_str (enum http_method m)
1792 {
1793 return method_strings[m];
1794 }
1795
1796
1797 void
1798 http_parser_init (http_parser *parser, enum http_parser_type t, char numerichost)
1799 {
1800 parser->numerichost = numerichost;
1801 parser->type = t;
1802 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1803 parser->nread = 0;
1804 parser->upgrade = 0;
1805 parser->flags = 0;
1806 parser->method = 0;
1807 parser->http_errno = 0;
1808 }
1809
1810 const char *
1811 http_errno_name(enum http_errno err) {
1812 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1813 return http_strerror_tab[err].name;
1814 }
1815
1816 const char *
1817 http_errno_description(enum http_errno err) {
1818 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1819 return http_strerror_tab[err].description;
1820 }