Update the Joyent http_parser.c to version 2.0, cleanup code accordingly
[deb_shairplay.git] / src / lib / http_parser.c
1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2 *
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "http_parser.h"
25 #include <assert.h>
26 #include <stddef.h>
27 #include <ctype.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <limits.h>
31
32 #ifndef ULLONG_MAX
33 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34 #endif
35
36 #ifndef MIN
37 # define MIN(a,b) ((a) < (b) ? (a) : (b))
38 #endif
39
40 #ifndef ARRAY_SIZE
41 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42 #endif
43
44 #ifndef BIT_AT
45 # define BIT_AT(a, i) \
46 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47 (1 << ((unsigned int) (i) & 7))))
48 #endif
49
50 #ifndef ELEM_AT
51 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52 #endif
53
54 #if HTTP_PARSER_DEBUG
55 #define SET_ERRNO(e) \
56 do { \
57 parser->http_errno = (e); \
58 parser->error_lineno = __LINE__; \
59 } while (0)
60 #else
61 #define SET_ERRNO(e) \
62 do { \
63 parser->http_errno = (e); \
64 } while(0)
65 #endif
66
67
68 /* Run the notify callback FOR, returning ER if it fails */
69 #define CALLBACK_NOTIFY_(FOR, ER) \
70 do { \
71 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
72 \
73 if (settings->on_##FOR) { \
74 if (0 != settings->on_##FOR(parser)) { \
75 SET_ERRNO(HPE_CB_##FOR); \
76 } \
77 \
78 /* We either errored above or got paused; get out */ \
79 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
80 return (ER); \
81 } \
82 } \
83 } while (0)
84
85 /* Run the notify callback FOR and consume the current byte */
86 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
87
88 /* Run the notify callback FOR and don't consume the current byte */
89 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
90
91 /* Run data callback FOR with LEN bytes, returning ER if it fails */
92 #define CALLBACK_DATA_(FOR, LEN, ER) \
93 do { \
94 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
95 \
96 if (FOR##_mark) { \
97 if (settings->on_##FOR) { \
98 if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
99 SET_ERRNO(HPE_CB_##FOR); \
100 } \
101 \
102 /* We either errored above or got paused; get out */ \
103 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
104 return (ER); \
105 } \
106 } \
107 FOR##_mark = NULL; \
108 } \
109 } while (0)
110
111 /* Run the data callback FOR and consume the current byte */
112 #define CALLBACK_DATA(FOR) \
113 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
114
115 /* Run the data callback FOR and don't consume the current byte */
116 #define CALLBACK_DATA_NOADVANCE(FOR) \
117 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
118
119 /* Set the mark FOR; non-destructive if mark is already set */
120 #define MARK(FOR) \
121 do { \
122 if (!FOR##_mark) { \
123 FOR##_mark = p; \
124 } \
125 } while (0)
126
127
128 #define PROXY_CONNECTION "proxy-connection"
129 #define CONNECTION "connection"
130 #define CONTENT_LENGTH "content-length"
131 #define TRANSFER_ENCODING "transfer-encoding"
132 #define UPGRADE "upgrade"
133 #define CHUNKED "chunked"
134 #define KEEP_ALIVE "keep-alive"
135 #define CLOSE "close"
136
137
138 static const char *method_strings[] =
139 {
140 #define XX(num, name, string) #string,
141 HTTP_METHOD_MAP(XX)
142 #undef XX
143 };
144
145
146 /* Tokens as defined by rfc 2616. Also lowercases them.
147 * token = 1*<any CHAR except CTLs or separators>
148 * separators = "(" | ")" | "<" | ">" | "@"
149 * | "," | ";" | ":" | "\" | <">
150 * | "/" | "[" | "]" | "?" | "="
151 * | "{" | "}" | SP | HT
152 */
153 static const char tokens[256] = {
154 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
155 0, 0, 0, 0, 0, 0, 0, 0,
156 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
157 0, 0, 0, 0, 0, 0, 0, 0,
158 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
159 0, 0, 0, 0, 0, 0, 0, 0,
160 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
161 0, 0, 0, 0, 0, 0, 0, 0,
162 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
163 0, '!', 0, '#', '$', '%', '&', '\'',
164 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
165 0, 0, '*', '+', 0, '-', '.', 0,
166 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
167 '0', '1', '2', '3', '4', '5', '6', '7',
168 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
169 '8', '9', 0, 0, 0, 0, 0, 0,
170 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
171 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
172 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
173 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
174 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
175 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
176 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
177 'x', 'y', 'z', 0, 0, 0, '^', '_',
178 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
179 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
180 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
181 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
182 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
183 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
184 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
185 'x', 'y', 'z', 0, '|', 0, '~', 0 };
186
187
188 static const int8_t unhex[256] =
189 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
190 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
191 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
192 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
193 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
194 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
195 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
196 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
197 };
198
199
200 #if HTTP_PARSER_STRICT
201 # define T(v) 0
202 #else
203 # define T(v) v
204 #endif
205
206
207 static const uint8_t normal_url_char[32] = {
208 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
209 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
210 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
211 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
212 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
213 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
214 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
215 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
216 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
217 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
218 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
219 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
220 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
221 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
222 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
223 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
224 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
225 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
226 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
227 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
228 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
229 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
230 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
231 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
232 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
233 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
234 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
235 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
236 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
237 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
238 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
239 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
240
241 #undef T
242
243 enum state
244 { s_dead = 1 /* important that this is > 0 */
245
246 , s_start_req_or_res
247 , s_res_or_resp_H
248 , s_start_res
249 , s_res_H
250 , s_res_HT
251 , s_res_HTT
252 , s_res_HTTP
253 , s_res_first_http_major
254 , s_res_http_major
255 , s_res_first_http_minor
256 , s_res_http_minor
257 , s_res_first_status_code
258 , s_res_status_code
259 , s_res_status
260 , s_res_line_almost_done
261
262 , s_start_req
263
264 , s_req_method
265 , s_req_spaces_before_url
266 , s_req_schema
267 , s_req_schema_slash
268 , s_req_schema_slash_slash
269 , s_req_server_start
270 , s_req_server
271 , s_req_server_with_at
272 , s_req_path
273 , s_req_query_string_start
274 , s_req_query_string
275 , s_req_fragment_start
276 , s_req_fragment
277 , s_req_http_start
278 , s_req_http_H
279 , s_req_http_HT
280 , s_req_http_HTT
281 , s_req_http_HTTP
282 , s_req_first_http_major
283 , s_req_http_major
284 , s_req_first_http_minor
285 , s_req_http_minor
286 , s_req_line_almost_done
287
288 , s_header_field_start
289 , s_header_field
290 , s_header_value_start
291 , s_header_value
292 , s_header_value_lws
293
294 , s_header_almost_done
295
296 , s_chunk_size_start
297 , s_chunk_size
298 , s_chunk_parameters
299 , s_chunk_size_almost_done
300
301 , s_headers_almost_done
302 , s_headers_done
303
304 /* Important: 's_headers_done' must be the last 'header' state. All
305 * states beyond this must be 'body' states. It is used for overflow
306 * checking. See the PARSING_HEADER() macro.
307 */
308
309 , s_chunk_data
310 , s_chunk_data_almost_done
311 , s_chunk_data_done
312
313 , s_body_identity
314 , s_body_identity_eof
315
316 , s_message_done
317 };
318
319
320 #define PARSING_HEADER(state) (state <= s_headers_done)
321
322
323 enum header_states
324 { h_general = 0
325 , h_C
326 , h_CO
327 , h_CON
328
329 , h_matching_connection
330 , h_matching_proxy_connection
331 , h_matching_content_length
332 , h_matching_transfer_encoding
333 , h_matching_upgrade
334
335 , h_connection
336 , h_content_length
337 , h_transfer_encoding
338 , h_upgrade
339
340 , h_matching_transfer_encoding_chunked
341 , h_matching_connection_keep_alive
342 , h_matching_connection_close
343
344 , h_transfer_encoding_chunked
345 , h_connection_keep_alive
346 , h_connection_close
347 };
348
349 enum http_host_state
350 {
351 s_http_host_dead = 1
352 , s_http_userinfo_start
353 , s_http_userinfo
354 , s_http_host_start
355 , s_http_host_v6_start
356 , s_http_host
357 , s_http_host_v6
358 , s_http_host_v6_end
359 , s_http_host_port_start
360 , s_http_host_port
361 };
362
363 /* Macros for character classes; depends on strict-mode */
364 #define CR '\r'
365 #define LF '\n'
366 #define LOWER(c) (unsigned char)(c | 0x20)
367 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
368 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
369 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
370 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
371 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
372 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
373 (c) == ')')
374 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
375 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
376 (c) == '$' || (c) == ',')
377
378 #if HTTP_PARSER_STRICT
379 #define TOKEN(c) (tokens[(unsigned char)c])
380 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
381 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
382 #else
383 #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
384 #define IS_URL_CHAR(c) \
385 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
386 #define IS_HOST_CHAR(c) \
387 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
388 #endif
389
390
391 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
392
393
394 #if HTTP_PARSER_STRICT
395 # define STRICT_CHECK(cond) \
396 do { \
397 if (cond) { \
398 SET_ERRNO(HPE_STRICT); \
399 goto error; \
400 } \
401 } while (0)
402 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
403 #else
404 # define STRICT_CHECK(cond)
405 # define NEW_MESSAGE() start_state
406 #endif
407
408
409 /* Map errno values to strings for human-readable output */
410 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
411 static struct {
412 const char *name;
413 const char *description;
414 } http_strerror_tab[] = {
415 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
416 };
417 #undef HTTP_STRERROR_GEN
418
419 int http_message_needs_eof(const http_parser *parser);
420
421 /* Our URL parser.
422 *
423 * This is designed to be shared by http_parser_execute() for URL validation,
424 * hence it has a state transition + byte-for-byte interface. In addition, it
425 * is meant to be embedded in http_parser_parse_url(), which does the dirty
426 * work of turning state transitions URL components for its API.
427 *
428 * This function should only be invoked with non-space characters. It is
429 * assumed that the caller cares about (and can detect) the transition between
430 * URL and non-URL states by looking for these.
431 */
432 static enum state
433 parse_url_char(enum state s, const char ch)
434 {
435 if (ch == ' ' || ch == '\r' || ch == '\n') {
436 return s_dead;
437 }
438
439 #if HTTP_PARSER_STRICT
440 if (ch == '\t' || ch == '\f') {
441 return s_dead;
442 }
443 #endif
444
445 switch (s) {
446 case s_req_spaces_before_url:
447 /* Proxied requests are followed by scheme of an absolute URI (alpha).
448 * All methods except CONNECT are followed by '/' or '*'.
449 */
450
451 if (ch == '/' || ch == '*') {
452 return s_req_path;
453 }
454
455 if (IS_ALPHA(ch)) {
456 return s_req_schema;
457 }
458
459 break;
460
461 case s_req_schema:
462 if (IS_ALPHA(ch)) {
463 return s;
464 }
465
466 if (ch == ':') {
467 return s_req_schema_slash;
468 }
469
470 break;
471
472 case s_req_schema_slash:
473 if (ch == '/') {
474 return s_req_schema_slash_slash;
475 }
476
477 break;
478
479 case s_req_schema_slash_slash:
480 if (ch == '/') {
481 return s_req_server_start;
482 }
483
484 break;
485
486 case s_req_server_with_at:
487 if (ch == '@') {
488 return s_dead;
489 }
490
491 /* FALLTHROUGH */
492 case s_req_server_start:
493 case s_req_server:
494 if (ch == '/') {
495 return s_req_path;
496 }
497
498 if (ch == '?') {
499 return s_req_query_string_start;
500 }
501
502 if (ch == '@') {
503 return s_req_server_with_at;
504 }
505
506 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
507 return s_req_server;
508 }
509
510 break;
511
512 case s_req_path:
513 if (IS_URL_CHAR(ch)) {
514 return s;
515 }
516
517 switch (ch) {
518 case '?':
519 return s_req_query_string_start;
520
521 case '#':
522 return s_req_fragment_start;
523 }
524
525 break;
526
527 case s_req_query_string_start:
528 case s_req_query_string:
529 if (IS_URL_CHAR(ch)) {
530 return s_req_query_string;
531 }
532
533 switch (ch) {
534 case '?':
535 /* allow extra '?' in query string */
536 return s_req_query_string;
537
538 case '#':
539 return s_req_fragment_start;
540 }
541
542 break;
543
544 case s_req_fragment_start:
545 if (IS_URL_CHAR(ch)) {
546 return s_req_fragment;
547 }
548
549 switch (ch) {
550 case '?':
551 return s_req_fragment;
552
553 case '#':
554 return s;
555 }
556
557 break;
558
559 case s_req_fragment:
560 if (IS_URL_CHAR(ch)) {
561 return s;
562 }
563
564 switch (ch) {
565 case '?':
566 case '#':
567 return s;
568 }
569
570 break;
571
572 default:
573 break;
574 }
575
576 /* We should never fall out of the switch above unless there's an error */
577 return s_dead;
578 }
579
580 size_t http_parser_execute (http_parser *parser,
581 const http_parser_settings *settings,
582 const char *data,
583 size_t len)
584 {
585 char c, ch;
586 int8_t unhex_val;
587 const char *p = data;
588 const char *header_field_mark = 0;
589 const char *header_value_mark = 0;
590 const char *url_mark = 0;
591 const char *body_mark = 0;
592
593 /* We're in an error state. Don't bother doing anything. */
594 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
595 return 0;
596 }
597
598 if (len == 0) {
599 switch (parser->state) {
600 case s_body_identity_eof:
601 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
602 * we got paused.
603 */
604 CALLBACK_NOTIFY_NOADVANCE(message_complete);
605 return 0;
606
607 case s_dead:
608 case s_start_req_or_res:
609 case s_start_res:
610 case s_start_req:
611 return 0;
612
613 default:
614 SET_ERRNO(HPE_INVALID_EOF_STATE);
615 return 1;
616 }
617 }
618
619
620 if (parser->state == s_header_field)
621 header_field_mark = data;
622 if (parser->state == s_header_value)
623 header_value_mark = data;
624 switch (parser->state) {
625 case s_req_path:
626 case s_req_schema:
627 case s_req_schema_slash:
628 case s_req_schema_slash_slash:
629 case s_req_server_start:
630 case s_req_server:
631 case s_req_server_with_at:
632 case s_req_query_string_start:
633 case s_req_query_string:
634 case s_req_fragment_start:
635 case s_req_fragment:
636 url_mark = data;
637 break;
638 }
639
640 for (p=data; p != data + len; p++) {
641 ch = *p;
642
643 if (PARSING_HEADER(parser->state)) {
644 ++parser->nread;
645 /* Buffer overflow attack */
646 if (parser->nread > HTTP_MAX_HEADER_SIZE) {
647 SET_ERRNO(HPE_HEADER_OVERFLOW);
648 goto error;
649 }
650 }
651
652 reexecute_byte:
653 switch (parser->state) {
654
655 case s_dead:
656 /* this state is used after a 'Connection: close' message
657 * the parser will error out if it reads another message
658 */
659 if (ch == CR || ch == LF)
660 break;
661
662 SET_ERRNO(HPE_CLOSED_CONNECTION);
663 goto error;
664
665 case s_start_req_or_res:
666 {
667 if (ch == CR || ch == LF)
668 break;
669 parser->flags = 0;
670 parser->content_length = ULLONG_MAX;
671
672 if (ch == 'H') {
673 parser->state = s_res_or_resp_H;
674
675 CALLBACK_NOTIFY(message_begin);
676 } else {
677 parser->type = HTTP_REQUEST;
678 parser->state = s_start_req;
679 goto reexecute_byte;
680 }
681
682 break;
683 }
684
685 case s_res_or_resp_H:
686 if (ch == 'T') {
687 parser->type = HTTP_RESPONSE;
688 parser->state = s_res_HT;
689 } else {
690 if (ch != 'E') {
691 SET_ERRNO(HPE_INVALID_CONSTANT);
692 goto error;
693 }
694
695 parser->type = HTTP_REQUEST;
696 parser->method = HTTP_HEAD;
697 parser->index = 2;
698 parser->state = s_req_method;
699 }
700 break;
701
702 case s_start_res:
703 {
704 parser->flags = 0;
705 parser->content_length = ULLONG_MAX;
706
707 switch (ch) {
708 case 'H':
709 parser->state = s_res_H;
710 break;
711
712 case CR:
713 case LF:
714 break;
715
716 default:
717 SET_ERRNO(HPE_INVALID_CONSTANT);
718 goto error;
719 }
720
721 CALLBACK_NOTIFY(message_begin);
722 break;
723 }
724
725 case s_res_H:
726 STRICT_CHECK(ch != 'T');
727 parser->state = s_res_HT;
728 break;
729
730 case s_res_HT:
731 STRICT_CHECK(ch != 'T');
732 parser->state = s_res_HTT;
733 break;
734
735 case s_res_HTT:
736 STRICT_CHECK(ch != 'P');
737 parser->state = s_res_HTTP;
738 break;
739
740 case s_res_HTTP:
741 STRICT_CHECK(ch != '/');
742 parser->state = s_res_first_http_major;
743 break;
744
745 case s_res_first_http_major:
746 if (ch < '0' || ch > '9') {
747 SET_ERRNO(HPE_INVALID_VERSION);
748 goto error;
749 }
750
751 parser->http_major = ch - '0';
752 parser->state = s_res_http_major;
753 break;
754
755 /* major HTTP version or dot */
756 case s_res_http_major:
757 {
758 if (ch == '.') {
759 parser->state = s_res_first_http_minor;
760 break;
761 }
762
763 if (!IS_NUM(ch)) {
764 SET_ERRNO(HPE_INVALID_VERSION);
765 goto error;
766 }
767
768 parser->http_major *= 10;
769 parser->http_major += ch - '0';
770
771 if (parser->http_major > 999) {
772 SET_ERRNO(HPE_INVALID_VERSION);
773 goto error;
774 }
775
776 break;
777 }
778
779 /* first digit of minor HTTP version */
780 case s_res_first_http_minor:
781 if (!IS_NUM(ch)) {
782 SET_ERRNO(HPE_INVALID_VERSION);
783 goto error;
784 }
785
786 parser->http_minor = ch - '0';
787 parser->state = s_res_http_minor;
788 break;
789
790 /* minor HTTP version or end of request line */
791 case s_res_http_minor:
792 {
793 if (ch == ' ') {
794 parser->state = s_res_first_status_code;
795 break;
796 }
797
798 if (!IS_NUM(ch)) {
799 SET_ERRNO(HPE_INVALID_VERSION);
800 goto error;
801 }
802
803 parser->http_minor *= 10;
804 parser->http_minor += ch - '0';
805
806 if (parser->http_minor > 999) {
807 SET_ERRNO(HPE_INVALID_VERSION);
808 goto error;
809 }
810
811 break;
812 }
813
814 case s_res_first_status_code:
815 {
816 if (!IS_NUM(ch)) {
817 if (ch == ' ') {
818 break;
819 }
820
821 SET_ERRNO(HPE_INVALID_STATUS);
822 goto error;
823 }
824 parser->status_code = ch - '0';
825 parser->state = s_res_status_code;
826 break;
827 }
828
829 case s_res_status_code:
830 {
831 if (!IS_NUM(ch)) {
832 switch (ch) {
833 case ' ':
834 parser->state = s_res_status;
835 break;
836 case CR:
837 parser->state = s_res_line_almost_done;
838 break;
839 case LF:
840 parser->state = s_header_field_start;
841 break;
842 default:
843 SET_ERRNO(HPE_INVALID_STATUS);
844 goto error;
845 }
846 break;
847 }
848
849 parser->status_code *= 10;
850 parser->status_code += ch - '0';
851
852 if (parser->status_code > 999) {
853 SET_ERRNO(HPE_INVALID_STATUS);
854 goto error;
855 }
856
857 break;
858 }
859
860 case s_res_status:
861 /* the human readable status. e.g. "NOT FOUND"
862 * we are not humans so just ignore this */
863 if (ch == CR) {
864 parser->state = s_res_line_almost_done;
865 break;
866 }
867
868 if (ch == LF) {
869 parser->state = s_header_field_start;
870 break;
871 }
872 break;
873
874 case s_res_line_almost_done:
875 STRICT_CHECK(ch != LF);
876 parser->state = s_header_field_start;
877 break;
878
879 case s_start_req:
880 {
881 if (ch == CR || ch == LF)
882 break;
883 parser->flags = 0;
884 parser->content_length = ULLONG_MAX;
885
886 if (!IS_ALPHA(ch)) {
887 SET_ERRNO(HPE_INVALID_METHOD);
888 goto error;
889 }
890
891 parser->method = (enum http_method) 0;
892 parser->index = 1;
893 switch (ch) {
894 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
895 case 'D': parser->method = HTTP_DELETE; break;
896 case 'G': parser->method = HTTP_GET; break;
897 case 'H': parser->method = HTTP_HEAD; break;
898 case 'L': parser->method = HTTP_LOCK; break;
899 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
900 case 'N': parser->method = HTTP_NOTIFY; break;
901 case 'O': parser->method = HTTP_OPTIONS; break;
902 case 'P': parser->method = HTTP_POST;
903 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
904 break;
905 case 'R': parser->method = HTTP_REPORT; break;
906 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
907 case 'T': parser->method = HTTP_TRACE; break;
908 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
909 default:
910 SET_ERRNO(HPE_INVALID_METHOD);
911 goto error;
912 }
913 parser->state = s_req_method;
914
915 CALLBACK_NOTIFY(message_begin);
916
917 break;
918 }
919
920 case s_req_method:
921 {
922 const char *matcher;
923 if (ch == '\0') {
924 SET_ERRNO(HPE_INVALID_METHOD);
925 goto error;
926 }
927
928 matcher = method_strings[parser->method];
929 if (ch == ' ' && matcher[parser->index] == '\0') {
930 parser->state = s_req_spaces_before_url;
931 } else if (ch == matcher[parser->index]) {
932 ; /* nada */
933 } else if (parser->method == HTTP_CONNECT) {
934 if (parser->index == 1 && ch == 'H') {
935 parser->method = HTTP_CHECKOUT;
936 } else if (parser->index == 2 && ch == 'P') {
937 parser->method = HTTP_COPY;
938 } else {
939 goto error;
940 }
941 } else if (parser->method == HTTP_MKCOL) {
942 if (parser->index == 1 && ch == 'O') {
943 parser->method = HTTP_MOVE;
944 } else if (parser->index == 1 && ch == 'E') {
945 parser->method = HTTP_MERGE;
946 } else if (parser->index == 1 && ch == '-') {
947 parser->method = HTTP_MSEARCH;
948 } else if (parser->index == 2 && ch == 'A') {
949 parser->method = HTTP_MKACTIVITY;
950 } else {
951 goto error;
952 }
953 } else if (parser->method == HTTP_SUBSCRIBE) {
954 if (parser->index == 1 && ch == 'E') {
955 parser->method = HTTP_SEARCH;
956 } else {
957 goto error;
958 }
959 } else if (parser->index == 1 && parser->method == HTTP_POST) {
960 if (ch == 'R') {
961 parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
962 } else if (ch == 'U') {
963 parser->method = HTTP_PUT; /* or HTTP_PURGE */
964 } else if (ch == 'A') {
965 parser->method = HTTP_PATCH;
966 } else {
967 goto error;
968 }
969 } else if (parser->index == 2) {
970 if (parser->method == HTTP_PUT) {
971 if (ch == 'R') parser->method = HTTP_PURGE;
972 } else if (parser->method == HTTP_UNLOCK) {
973 if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
974 }
975 } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
976 parser->method = HTTP_PROPPATCH;
977 } else {
978 SET_ERRNO(HPE_INVALID_METHOD);
979 goto error;
980 }
981
982 ++parser->index;
983 break;
984 }
985
986 case s_req_spaces_before_url:
987 {
988 if (ch == ' ') break;
989
990 MARK(url);
991 if (parser->method == HTTP_CONNECT) {
992 parser->state = s_req_server_start;
993 }
994
995 parser->state = parse_url_char((enum state)parser->state, ch);
996 if (parser->state == s_dead) {
997 SET_ERRNO(HPE_INVALID_URL);
998 goto error;
999 }
1000
1001 break;
1002 }
1003
1004 case s_req_schema:
1005 case s_req_schema_slash:
1006 case s_req_schema_slash_slash:
1007 case s_req_server_start:
1008 {
1009 switch (ch) {
1010 /* No whitespace allowed here */
1011 case ' ':
1012 case CR:
1013 case LF:
1014 SET_ERRNO(HPE_INVALID_URL);
1015 goto error;
1016 default:
1017 parser->state = parse_url_char((enum state)parser->state, ch);
1018 if (parser->state == s_dead) {
1019 SET_ERRNO(HPE_INVALID_URL);
1020 goto error;
1021 }
1022 }
1023
1024 break;
1025 }
1026
1027 case s_req_server:
1028 case s_req_server_with_at:
1029 case s_req_path:
1030 case s_req_query_string_start:
1031 case s_req_query_string:
1032 case s_req_fragment_start:
1033 case s_req_fragment:
1034 {
1035 switch (ch) {
1036 case ' ':
1037 parser->state = s_req_http_start;
1038 CALLBACK_DATA(url);
1039 break;
1040 case CR:
1041 case LF:
1042 parser->http_major = 0;
1043 parser->http_minor = 9;
1044 parser->state = (ch == CR) ?
1045 s_req_line_almost_done :
1046 s_header_field_start;
1047 CALLBACK_DATA(url);
1048 break;
1049 default:
1050 parser->state = parse_url_char((enum state)parser->state, ch);
1051 if (parser->state == s_dead) {
1052 SET_ERRNO(HPE_INVALID_URL);
1053 goto error;
1054 }
1055 }
1056 break;
1057 }
1058
1059 case s_req_http_start:
1060 switch (ch) {
1061 case 'H':
1062 parser->state = s_req_http_H;
1063 break;
1064 case ' ':
1065 break;
1066 default:
1067 SET_ERRNO(HPE_INVALID_CONSTANT);
1068 goto error;
1069 }
1070 break;
1071
1072 case s_req_http_H:
1073 STRICT_CHECK(ch != 'T');
1074 parser->state = s_req_http_HT;
1075 break;
1076
1077 case s_req_http_HT:
1078 STRICT_CHECK(ch != 'T');
1079 parser->state = s_req_http_HTT;
1080 break;
1081
1082 case s_req_http_HTT:
1083 STRICT_CHECK(ch != 'P');
1084 parser->state = s_req_http_HTTP;
1085 break;
1086
1087 case s_req_http_HTTP:
1088 STRICT_CHECK(ch != '/');
1089 parser->state = s_req_first_http_major;
1090 break;
1091
1092 /* first digit of major HTTP version */
1093 case s_req_first_http_major:
1094 if (ch < '1' || ch > '9') {
1095 SET_ERRNO(HPE_INVALID_VERSION);
1096 goto error;
1097 }
1098
1099 parser->http_major = ch - '0';
1100 parser->state = s_req_http_major;
1101 break;
1102
1103 /* major HTTP version or dot */
1104 case s_req_http_major:
1105 {
1106 if (ch == '.') {
1107 parser->state = s_req_first_http_minor;
1108 break;
1109 }
1110
1111 if (!IS_NUM(ch)) {
1112 SET_ERRNO(HPE_INVALID_VERSION);
1113 goto error;
1114 }
1115
1116 parser->http_major *= 10;
1117 parser->http_major += ch - '0';
1118
1119 if (parser->http_major > 999) {
1120 SET_ERRNO(HPE_INVALID_VERSION);
1121 goto error;
1122 }
1123
1124 break;
1125 }
1126
1127 /* first digit of minor HTTP version */
1128 case s_req_first_http_minor:
1129 if (!IS_NUM(ch)) {
1130 SET_ERRNO(HPE_INVALID_VERSION);
1131 goto error;
1132 }
1133
1134 parser->http_minor = ch - '0';
1135 parser->state = s_req_http_minor;
1136 break;
1137
1138 /* minor HTTP version or end of request line */
1139 case s_req_http_minor:
1140 {
1141 if (ch == CR) {
1142 parser->state = s_req_line_almost_done;
1143 break;
1144 }
1145
1146 if (ch == LF) {
1147 parser->state = s_header_field_start;
1148 break;
1149 }
1150
1151 /* XXX allow spaces after digit? */
1152
1153 if (!IS_NUM(ch)) {
1154 SET_ERRNO(HPE_INVALID_VERSION);
1155 goto error;
1156 }
1157
1158 parser->http_minor *= 10;
1159 parser->http_minor += ch - '0';
1160
1161 if (parser->http_minor > 999) {
1162 SET_ERRNO(HPE_INVALID_VERSION);
1163 goto error;
1164 }
1165
1166 break;
1167 }
1168
1169 /* end of request line */
1170 case s_req_line_almost_done:
1171 {
1172 if (ch != LF) {
1173 SET_ERRNO(HPE_LF_EXPECTED);
1174 goto error;
1175 }
1176
1177 parser->state = s_header_field_start;
1178 break;
1179 }
1180
1181 case s_header_field_start:
1182 {
1183 if (ch == CR) {
1184 parser->state = s_headers_almost_done;
1185 break;
1186 }
1187
1188 if (ch == LF) {
1189 /* they might be just sending \n instead of \r\n so this would be
1190 * the second \n to denote the end of headers*/
1191 parser->state = s_headers_almost_done;
1192 goto reexecute_byte;
1193 }
1194
1195 c = TOKEN(ch);
1196
1197 if (!c) {
1198 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1199 goto error;
1200 }
1201
1202 MARK(header_field);
1203
1204 parser->index = 0;
1205 parser->state = s_header_field;
1206
1207 switch (c) {
1208 case 'c':
1209 parser->header_state = h_C;
1210 break;
1211
1212 case 'p':
1213 parser->header_state = h_matching_proxy_connection;
1214 break;
1215
1216 case 't':
1217 parser->header_state = h_matching_transfer_encoding;
1218 break;
1219
1220 case 'u':
1221 parser->header_state = h_matching_upgrade;
1222 break;
1223
1224 default:
1225 parser->header_state = h_general;
1226 break;
1227 }
1228 break;
1229 }
1230
1231 case s_header_field:
1232 {
1233 c = TOKEN(ch);
1234
1235 if (c) {
1236 switch (parser->header_state) {
1237 case h_general:
1238 break;
1239
1240 case h_C:
1241 parser->index++;
1242 parser->header_state = (c == 'o' ? h_CO : h_general);
1243 break;
1244
1245 case h_CO:
1246 parser->index++;
1247 parser->header_state = (c == 'n' ? h_CON : h_general);
1248 break;
1249
1250 case h_CON:
1251 parser->index++;
1252 switch (c) {
1253 case 'n':
1254 parser->header_state = h_matching_connection;
1255 break;
1256 case 't':
1257 parser->header_state = h_matching_content_length;
1258 break;
1259 default:
1260 parser->header_state = h_general;
1261 break;
1262 }
1263 break;
1264
1265 /* connection */
1266
1267 case h_matching_connection:
1268 parser->index++;
1269 if (parser->index > sizeof(CONNECTION)-1
1270 || c != CONNECTION[parser->index]) {
1271 parser->header_state = h_general;
1272 } else if (parser->index == sizeof(CONNECTION)-2) {
1273 parser->header_state = h_connection;
1274 }
1275 break;
1276
1277 /* proxy-connection */
1278
1279 case h_matching_proxy_connection:
1280 parser->index++;
1281 if (parser->index > sizeof(PROXY_CONNECTION)-1
1282 || c != PROXY_CONNECTION[parser->index]) {
1283 parser->header_state = h_general;
1284 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1285 parser->header_state = h_connection;
1286 }
1287 break;
1288
1289 /* content-length */
1290
1291 case h_matching_content_length:
1292 parser->index++;
1293 if (parser->index > sizeof(CONTENT_LENGTH)-1
1294 || c != CONTENT_LENGTH[parser->index]) {
1295 parser->header_state = h_general;
1296 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1297 parser->header_state = h_content_length;
1298 }
1299 break;
1300
1301 /* transfer-encoding */
1302
1303 case h_matching_transfer_encoding:
1304 parser->index++;
1305 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1306 || c != TRANSFER_ENCODING[parser->index]) {
1307 parser->header_state = h_general;
1308 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1309 parser->header_state = h_transfer_encoding;
1310 }
1311 break;
1312
1313 /* upgrade */
1314
1315 case h_matching_upgrade:
1316 parser->index++;
1317 if (parser->index > sizeof(UPGRADE)-1
1318 || c != UPGRADE[parser->index]) {
1319 parser->header_state = h_general;
1320 } else if (parser->index == sizeof(UPGRADE)-2) {
1321 parser->header_state = h_upgrade;
1322 }
1323 break;
1324
1325 case h_connection:
1326 case h_content_length:
1327 case h_transfer_encoding:
1328 case h_upgrade:
1329 if (ch != ' ') parser->header_state = h_general;
1330 break;
1331
1332 default:
1333 assert(0 && "Unknown header_state");
1334 break;
1335 }
1336 break;
1337 }
1338
1339 if (ch == ':') {
1340 parser->state = s_header_value_start;
1341 CALLBACK_DATA(header_field);
1342 break;
1343 }
1344
1345 if (ch == CR) {
1346 parser->state = s_header_almost_done;
1347 CALLBACK_DATA(header_field);
1348 break;
1349 }
1350
1351 if (ch == LF) {
1352 parser->state = s_header_field_start;
1353 CALLBACK_DATA(header_field);
1354 break;
1355 }
1356
1357 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1358 goto error;
1359 }
1360
1361 case s_header_value_start:
1362 {
1363 if (ch == ' ' || ch == '\t') break;
1364
1365 MARK(header_value);
1366
1367 parser->state = s_header_value;
1368 parser->index = 0;
1369
1370 if (ch == CR) {
1371 parser->header_state = h_general;
1372 parser->state = s_header_almost_done;
1373 CALLBACK_DATA(header_value);
1374 break;
1375 }
1376
1377 if (ch == LF) {
1378 parser->state = s_header_field_start;
1379 CALLBACK_DATA(header_value);
1380 break;
1381 }
1382
1383 c = LOWER(ch);
1384
1385 switch (parser->header_state) {
1386 case h_upgrade:
1387 parser->flags |= F_UPGRADE;
1388 parser->header_state = h_general;
1389 break;
1390
1391 case h_transfer_encoding:
1392 /* looking for 'Transfer-Encoding: chunked' */
1393 if ('c' == c) {
1394 parser->header_state = h_matching_transfer_encoding_chunked;
1395 } else {
1396 parser->header_state = h_general;
1397 }
1398 break;
1399
1400 case h_content_length:
1401 if (!IS_NUM(ch)) {
1402 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1403 goto error;
1404 }
1405
1406 parser->content_length = ch - '0';
1407 break;
1408
1409 case h_connection:
1410 /* looking for 'Connection: keep-alive' */
1411 if (c == 'k') {
1412 parser->header_state = h_matching_connection_keep_alive;
1413 /* looking for 'Connection: close' */
1414 } else if (c == 'c') {
1415 parser->header_state = h_matching_connection_close;
1416 } else {
1417 parser->header_state = h_general;
1418 }
1419 break;
1420
1421 default:
1422 parser->header_state = h_general;
1423 break;
1424 }
1425 break;
1426 }
1427
1428 case s_header_value:
1429 {
1430
1431 if (ch == CR) {
1432 parser->state = s_header_almost_done;
1433 CALLBACK_DATA(header_value);
1434 break;
1435 }
1436
1437 if (ch == LF) {
1438 parser->state = s_header_almost_done;
1439 CALLBACK_DATA_NOADVANCE(header_value);
1440 goto reexecute_byte;
1441 }
1442
1443 c = LOWER(ch);
1444
1445 switch (parser->header_state) {
1446 case h_general:
1447 break;
1448
1449 case h_connection:
1450 case h_transfer_encoding:
1451 assert(0 && "Shouldn't get here.");
1452 break;
1453
1454 case h_content_length:
1455 {
1456 uint64_t t;
1457
1458 if (ch == ' ') break;
1459
1460 if (!IS_NUM(ch)) {
1461 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1462 goto error;
1463 }
1464
1465 t = parser->content_length;
1466 t *= 10;
1467 t += ch - '0';
1468
1469 /* Overflow? */
1470 if (t < parser->content_length || t == ULLONG_MAX) {
1471 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1472 goto error;
1473 }
1474
1475 parser->content_length = t;
1476 break;
1477 }
1478
1479 /* Transfer-Encoding: chunked */
1480 case h_matching_transfer_encoding_chunked:
1481 parser->index++;
1482 if (parser->index > sizeof(CHUNKED)-1
1483 || c != CHUNKED[parser->index]) {
1484 parser->header_state = h_general;
1485 } else if (parser->index == sizeof(CHUNKED)-2) {
1486 parser->header_state = h_transfer_encoding_chunked;
1487 }
1488 break;
1489
1490 /* looking for 'Connection: keep-alive' */
1491 case h_matching_connection_keep_alive:
1492 parser->index++;
1493 if (parser->index > sizeof(KEEP_ALIVE)-1
1494 || c != KEEP_ALIVE[parser->index]) {
1495 parser->header_state = h_general;
1496 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1497 parser->header_state = h_connection_keep_alive;
1498 }
1499 break;
1500
1501 /* looking for 'Connection: close' */
1502 case h_matching_connection_close:
1503 parser->index++;
1504 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1505 parser->header_state = h_general;
1506 } else if (parser->index == sizeof(CLOSE)-2) {
1507 parser->header_state = h_connection_close;
1508 }
1509 break;
1510
1511 case h_transfer_encoding_chunked:
1512 case h_connection_keep_alive:
1513 case h_connection_close:
1514 if (ch != ' ') parser->header_state = h_general;
1515 break;
1516
1517 default:
1518 parser->state = s_header_value;
1519 parser->header_state = h_general;
1520 break;
1521 }
1522 break;
1523 }
1524
1525 case s_header_almost_done:
1526 {
1527 STRICT_CHECK(ch != LF);
1528
1529 parser->state = s_header_value_lws;
1530
1531 switch (parser->header_state) {
1532 case h_connection_keep_alive:
1533 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1534 break;
1535 case h_connection_close:
1536 parser->flags |= F_CONNECTION_CLOSE;
1537 break;
1538 case h_transfer_encoding_chunked:
1539 parser->flags |= F_CHUNKED;
1540 break;
1541 default:
1542 break;
1543 }
1544
1545 break;
1546 }
1547
1548 case s_header_value_lws:
1549 {
1550 if (ch == ' ' || ch == '\t')
1551 parser->state = s_header_value_start;
1552 else
1553 {
1554 parser->state = s_header_field_start;
1555 goto reexecute_byte;
1556 }
1557 break;
1558 }
1559
1560 case s_headers_almost_done:
1561 {
1562 STRICT_CHECK(ch != LF);
1563
1564 if (parser->flags & F_TRAILING) {
1565 /* End of a chunked request */
1566 parser->state = NEW_MESSAGE();
1567 CALLBACK_NOTIFY(message_complete);
1568 break;
1569 }
1570
1571 parser->state = s_headers_done;
1572
1573 /* Set this here so that on_headers_complete() callbacks can see it */
1574 parser->upgrade =
1575 (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1576
1577 /* Here we call the headers_complete callback. This is somewhat
1578 * different than other callbacks because if the user returns 1, we
1579 * will interpret that as saying that this message has no body. This
1580 * is needed for the annoying case of recieving a response to a HEAD
1581 * request.
1582 *
1583 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1584 * we have to simulate it by handling a change in errno below.
1585 */
1586 if (settings->on_headers_complete) {
1587 switch (settings->on_headers_complete(parser)) {
1588 case 0:
1589 break;
1590
1591 case 1:
1592 parser->flags |= F_SKIPBODY;
1593 break;
1594
1595 default:
1596 SET_ERRNO(HPE_CB_headers_complete);
1597 return p - data; /* Error */
1598 }
1599 }
1600
1601 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1602 return p - data;
1603 }
1604
1605 goto reexecute_byte;
1606 }
1607
1608 case s_headers_done:
1609 {
1610 STRICT_CHECK(ch != LF);
1611
1612 parser->nread = 0;
1613
1614 /* Exit, the rest of the connect is in a different protocol. */
1615 if (parser->upgrade) {
1616 parser->state = NEW_MESSAGE();
1617 CALLBACK_NOTIFY(message_complete);
1618 return (p - data) + 1;
1619 }
1620
1621 if (parser->flags & F_SKIPBODY) {
1622 parser->state = NEW_MESSAGE();
1623 CALLBACK_NOTIFY(message_complete);
1624 } else if (parser->flags & F_CHUNKED) {
1625 /* chunked encoding - ignore Content-Length header */
1626 parser->state = s_chunk_size_start;
1627 } else {
1628 if (parser->content_length == 0) {
1629 /* Content-Length header given but zero: Content-Length: 0\r\n */
1630 parser->state = NEW_MESSAGE();
1631 CALLBACK_NOTIFY(message_complete);
1632 } else if (parser->content_length != ULLONG_MAX) {
1633 /* Content-Length header given and non-zero */
1634 parser->state = s_body_identity;
1635 } else {
1636 if (parser->type == HTTP_REQUEST ||
1637 !http_message_needs_eof(parser)) {
1638 /* Assume content-length 0 - read the next */
1639 parser->state = NEW_MESSAGE();
1640 CALLBACK_NOTIFY(message_complete);
1641 } else {
1642 /* Read body until EOF */
1643 parser->state = s_body_identity_eof;
1644 }
1645 }
1646 }
1647
1648 break;
1649 }
1650
1651 case s_body_identity:
1652 {
1653 uint64_t to_read = MIN(parser->content_length,
1654 (uint64_t) ((data + len) - p));
1655
1656 assert(parser->content_length != 0
1657 && parser->content_length != ULLONG_MAX);
1658
1659 /* The difference between advancing content_length and p is because
1660 * the latter will automaticaly advance on the next loop iteration.
1661 * Further, if content_length ends up at 0, we want to see the last
1662 * byte again for our message complete callback.
1663 */
1664 MARK(body);
1665 parser->content_length -= to_read;
1666 p += to_read - 1;
1667
1668 if (parser->content_length == 0) {
1669 parser->state = s_message_done;
1670
1671 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1672 *
1673 * The alternative to doing this is to wait for the next byte to
1674 * trigger the data callback, just as in every other case. The
1675 * problem with this is that this makes it difficult for the test
1676 * harness to distinguish between complete-on-EOF and
1677 * complete-on-length. It's not clear that this distinction is
1678 * important for applications, but let's keep it for now.
1679 */
1680 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1681 goto reexecute_byte;
1682 }
1683
1684 break;
1685 }
1686
1687 /* read until EOF */
1688 case s_body_identity_eof:
1689 MARK(body);
1690 p = data + len - 1;
1691
1692 break;
1693
1694 case s_message_done:
1695 parser->state = NEW_MESSAGE();
1696 CALLBACK_NOTIFY(message_complete);
1697 break;
1698
1699 case s_chunk_size_start:
1700 {
1701 assert(parser->nread == 1);
1702 assert(parser->flags & F_CHUNKED);
1703
1704 unhex_val = unhex[(unsigned char)ch];
1705 if (unhex_val == -1) {
1706 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1707 goto error;
1708 }
1709
1710 parser->content_length = unhex_val;
1711 parser->state = s_chunk_size;
1712 break;
1713 }
1714
1715 case s_chunk_size:
1716 {
1717 uint64_t t;
1718
1719 assert(parser->flags & F_CHUNKED);
1720
1721 if (ch == CR) {
1722 parser->state = s_chunk_size_almost_done;
1723 break;
1724 }
1725
1726 unhex_val = unhex[(unsigned char)ch];
1727
1728 if (unhex_val == -1) {
1729 if (ch == ';' || ch == ' ') {
1730 parser->state = s_chunk_parameters;
1731 break;
1732 }
1733
1734 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1735 goto error;
1736 }
1737
1738 t = parser->content_length;
1739 t *= 16;
1740 t += unhex_val;
1741
1742 /* Overflow? */
1743 if (t < parser->content_length || t == ULLONG_MAX) {
1744 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1745 goto error;
1746 }
1747
1748 parser->content_length = t;
1749 break;
1750 }
1751
1752 case s_chunk_parameters:
1753 {
1754 assert(parser->flags & F_CHUNKED);
1755 /* just ignore this shit. TODO check for overflow */
1756 if (ch == CR) {
1757 parser->state = s_chunk_size_almost_done;
1758 break;
1759 }
1760 break;
1761 }
1762
1763 case s_chunk_size_almost_done:
1764 {
1765 assert(parser->flags & F_CHUNKED);
1766 STRICT_CHECK(ch != LF);
1767
1768 parser->nread = 0;
1769
1770 if (parser->content_length == 0) {
1771 parser->flags |= F_TRAILING;
1772 parser->state = s_header_field_start;
1773 } else {
1774 parser->state = s_chunk_data;
1775 }
1776 break;
1777 }
1778
1779 case s_chunk_data:
1780 {
1781 uint64_t to_read = MIN(parser->content_length,
1782 (uint64_t) ((data + len) - p));
1783
1784 assert(parser->flags & F_CHUNKED);
1785 assert(parser->content_length != 0
1786 && parser->content_length != ULLONG_MAX);
1787
1788 /* See the explanation in s_body_identity for why the content
1789 * length and data pointers are managed this way.
1790 */
1791 MARK(body);
1792 parser->content_length -= to_read;
1793 p += to_read - 1;
1794
1795 if (parser->content_length == 0) {
1796 parser->state = s_chunk_data_almost_done;
1797 }
1798
1799 break;
1800 }
1801
1802 case s_chunk_data_almost_done:
1803 assert(parser->flags & F_CHUNKED);
1804 assert(parser->content_length == 0);
1805 STRICT_CHECK(ch != CR);
1806 parser->state = s_chunk_data_done;
1807 CALLBACK_DATA(body);
1808 break;
1809
1810 case s_chunk_data_done:
1811 assert(parser->flags & F_CHUNKED);
1812 STRICT_CHECK(ch != LF);
1813 parser->nread = 0;
1814 parser->state = s_chunk_size_start;
1815 break;
1816
1817 default:
1818 assert(0 && "unhandled state");
1819 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1820 goto error;
1821 }
1822 }
1823
1824 /* Run callbacks for any marks that we have leftover after we ran our of
1825 * bytes. There should be at most one of these set, so it's OK to invoke
1826 * them in series (unset marks will not result in callbacks).
1827 *
1828 * We use the NOADVANCE() variety of callbacks here because 'p' has already
1829 * overflowed 'data' and this allows us to correct for the off-by-one that
1830 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1831 * value that's in-bounds).
1832 */
1833
1834 assert(((header_field_mark ? 1 : 0) +
1835 (header_value_mark ? 1 : 0) +
1836 (url_mark ? 1 : 0) +
1837 (body_mark ? 1 : 0)) <= 1);
1838
1839 CALLBACK_DATA_NOADVANCE(header_field);
1840 CALLBACK_DATA_NOADVANCE(header_value);
1841 CALLBACK_DATA_NOADVANCE(url);
1842 CALLBACK_DATA_NOADVANCE(body);
1843
1844 return len;
1845
1846 error:
1847 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1848 SET_ERRNO(HPE_UNKNOWN);
1849 }
1850
1851 return (p - data);
1852 }
1853
1854
1855 /* Does the parser need to see an EOF to find the end of the message? */
1856 int
1857 http_message_needs_eof (const http_parser *parser)
1858 {
1859 if (parser->type == HTTP_REQUEST) {
1860 return 0;
1861 }
1862
1863 /* See RFC 2616 section 4.4 */
1864 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1865 parser->status_code == 204 || /* No Content */
1866 parser->status_code == 304 || /* Not Modified */
1867 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1868 return 0;
1869 }
1870
1871 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1872 return 0;
1873 }
1874
1875 return 1;
1876 }
1877
1878
1879 int
1880 http_should_keep_alive (const http_parser *parser)
1881 {
1882 if (parser->http_major > 0 && parser->http_minor > 0) {
1883 /* HTTP/1.1 */
1884 if (parser->flags & F_CONNECTION_CLOSE) {
1885 return 0;
1886 }
1887 } else {
1888 /* HTTP/1.0 or earlier */
1889 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1890 return 0;
1891 }
1892 }
1893
1894 return !http_message_needs_eof(parser);
1895 }
1896
1897
1898 const char *
1899 http_method_str (enum http_method m)
1900 {
1901 return ELEM_AT(method_strings, m, "<unknown>");
1902 }
1903
1904
1905 void
1906 http_parser_init (http_parser *parser, enum http_parser_type t)
1907 {
1908 void *data = parser->data; /* preserve application data */
1909 memset(parser, 0, sizeof(*parser));
1910 parser->data = data;
1911 parser->type = t;
1912 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1913 parser->http_errno = HPE_OK;
1914 }
1915
1916 const char *
1917 http_errno_name(enum http_errno err) {
1918 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1919 return http_strerror_tab[err].name;
1920 }
1921
1922 const char *
1923 http_errno_description(enum http_errno err) {
1924 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1925 return http_strerror_tab[err].description;
1926 }
1927
1928 static enum http_host_state
1929 http_parse_host_char(enum http_host_state s, const char ch) {
1930 switch(s) {
1931 case s_http_userinfo:
1932 case s_http_userinfo_start:
1933 if (ch == '@') {
1934 return s_http_host_start;
1935 }
1936
1937 if (IS_USERINFO_CHAR(ch)) {
1938 return s_http_userinfo;
1939 }
1940 break;
1941
1942 case s_http_host_start:
1943 if (ch == '[') {
1944 return s_http_host_v6_start;
1945 }
1946
1947 if (IS_HOST_CHAR(ch)) {
1948 return s_http_host;
1949 }
1950
1951 break;
1952
1953 case s_http_host:
1954 if (IS_HOST_CHAR(ch)) {
1955 return s_http_host;
1956 }
1957
1958 /* FALLTHROUGH */
1959 case s_http_host_v6_end:
1960 if (ch == ':') {
1961 return s_http_host_port_start;
1962 }
1963
1964 break;
1965
1966 case s_http_host_v6:
1967 if (ch == ']') {
1968 return s_http_host_v6_end;
1969 }
1970
1971 /* FALLTHROUGH */
1972 case s_http_host_v6_start:
1973 if (IS_HEX(ch) || ch == ':') {
1974 return s_http_host_v6;
1975 }
1976
1977 break;
1978
1979 case s_http_host_port:
1980 case s_http_host_port_start:
1981 if (IS_NUM(ch)) {
1982 return s_http_host_port;
1983 }
1984
1985 break;
1986
1987 default:
1988 break;
1989 }
1990 return s_http_host_dead;
1991 }
1992
1993 static int
1994 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1995 enum http_host_state s;
1996
1997 const char *p;
1998 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1999
2000 u->field_data[UF_HOST].len = 0;
2001
2002 s = found_at ? s_http_userinfo_start : s_http_host_start;
2003
2004 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2005 enum http_host_state new_s = http_parse_host_char(s, *p);
2006
2007 if (new_s == s_http_host_dead) {
2008 return 1;
2009 }
2010
2011 switch(new_s) {
2012 case s_http_host:
2013 if (s != s_http_host) {
2014 u->field_data[UF_HOST].off = p - buf;
2015 }
2016 u->field_data[UF_HOST].len++;
2017 break;
2018
2019 case s_http_host_v6:
2020 if (s != s_http_host_v6) {
2021 u->field_data[UF_HOST].off = p - buf;
2022 }
2023 u->field_data[UF_HOST].len++;
2024 break;
2025
2026 case s_http_host_port:
2027 if (s != s_http_host_port) {
2028 u->field_data[UF_PORT].off = p - buf;
2029 u->field_data[UF_PORT].len = 0;
2030 u->field_set |= (1 << UF_PORT);
2031 }
2032 u->field_data[UF_PORT].len++;
2033 break;
2034
2035 case s_http_userinfo:
2036 if (s != s_http_userinfo) {
2037 u->field_data[UF_USERINFO].off = p - buf ;
2038 u->field_data[UF_USERINFO].len = 0;
2039 u->field_set |= (1 << UF_USERINFO);
2040 }
2041 u->field_data[UF_USERINFO].len++;
2042 break;
2043
2044 default:
2045 break;
2046 }
2047 s = new_s;
2048 }
2049
2050 /* Make sure we don't end somewhere unexpected */
2051 switch (s) {
2052 case s_http_host_start:
2053 case s_http_host_v6_start:
2054 case s_http_host_v6:
2055 case s_http_host_port_start:
2056 case s_http_userinfo:
2057 case s_http_userinfo_start:
2058 return 1;
2059 default:
2060 break;
2061 }
2062
2063 return 0;
2064 }
2065
2066 int
2067 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2068 struct http_parser_url *u)
2069 {
2070 enum state s;
2071 const char *p;
2072 enum http_parser_url_fields uf, old_uf;
2073 int found_at = 0;
2074
2075 u->port = u->field_set = 0;
2076 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2077 uf = old_uf = UF_MAX;
2078
2079 for (p = buf; p < buf + buflen; p++) {
2080 s = parse_url_char(s, *p);
2081
2082 /* Figure out the next field that we're operating on */
2083 switch (s) {
2084 case s_dead:
2085 return 1;
2086
2087 /* Skip delimeters */
2088 case s_req_schema_slash:
2089 case s_req_schema_slash_slash:
2090 case s_req_server_start:
2091 case s_req_query_string_start:
2092 case s_req_fragment_start:
2093 continue;
2094
2095 case s_req_schema:
2096 uf = UF_SCHEMA;
2097 break;
2098
2099 case s_req_server_with_at:
2100 found_at = 1;
2101
2102 /* FALLTROUGH */
2103 case s_req_server:
2104 uf = UF_HOST;
2105 break;
2106
2107 case s_req_path:
2108 uf = UF_PATH;
2109 break;
2110
2111 case s_req_query_string:
2112 uf = UF_QUERY;
2113 break;
2114
2115 case s_req_fragment:
2116 uf = UF_FRAGMENT;
2117 break;
2118
2119 default:
2120 assert(!"Unexpected state");
2121 return 1;
2122 }
2123
2124 /* Nothing's changed; soldier on */
2125 if (uf == old_uf) {
2126 u->field_data[uf].len++;
2127 continue;
2128 }
2129
2130 u->field_data[uf].off = p - buf;
2131 u->field_data[uf].len = 1;
2132
2133 u->field_set |= (1 << uf);
2134 old_uf = uf;
2135 }
2136
2137 /* host must be present if there is a schema */
2138 /* parsing http:///toto will fail */
2139 if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2140 if (http_parse_host(buf, u, found_at) != 0) {
2141 return 1;
2142 }
2143 }
2144
2145 /* CONNECT requests can only contain "hostname:port" */
2146 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2147 return 1;
2148 }
2149
2150 if (u->field_set & (1 << UF_PORT)) {
2151 /* Don't bother with endp; we've already validated the string */
2152 unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2153
2154 /* Ports have a max value of 2^16 */
2155 if (v > 0xffff) {
2156 return 1;
2157 }
2158
2159 u->port = (uint16_t) v;
2160 }
2161
2162 return 0;
2163 }
2164
2165 void
2166 http_parser_pause(http_parser *parser, int paused) {
2167 /* Users should only be pausing/unpausing a parser that is not in an error
2168 * state. In non-debug builds, there's not much that we can do about this
2169 * other than ignore it.
2170 */
2171 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2172 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2173 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2174 } else {
2175 assert(0 && "Attempting to pause parser in error state");
2176 }
2177 }
2178
2179 int
2180 http_body_is_final(const struct http_parser *parser) {
2181 return parser->state == s_message_done;
2182 }