Commit | Line | Data |
---|---|---|
2340bcd3 JVH |
1 | /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev |
2 | * | |
3 | * Additional changes are licensed under the same terms as NGINX and | |
4 | * copyright Joyent, Inc. and other Node contributors. All rights reserved. | |
5 | * | |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 | * of this software and associated documentation files (the "Software"), to | |
8 | * deal in the Software without restriction, including without limitation the | |
9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
10 | * sell copies of the Software, and to permit persons to whom the Software is | |
11 | * furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
22 | * IN THE SOFTWARE. | |
23 | */ | |
24 | #include "http_parser.h" | |
25 | #include <assert.h> | |
26 | #include <stddef.h> | |
462c72aa JVH |
27 | #include <ctype.h> |
28 | #include <stdlib.h> | |
29 | #include <string.h> | |
30 | #include <limits.h> | |
2340bcd3 | 31 | |
462c72aa JVH |
32 | #ifndef ULLONG_MAX |
33 | # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ | |
34 | #endif | |
2340bcd3 JVH |
35 | |
36 | #ifndef MIN | |
37 | # define MIN(a,b) ((a) < (b) ? (a) : (b)) | |
38 | #endif | |
39 | ||
462c72aa JVH |
40 | #ifndef ARRAY_SIZE |
41 | # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) | |
42 | #endif | |
43 | ||
44 | #ifndef BIT_AT | |
45 | # define BIT_AT(a, i) \ | |
46 | (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ | |
47 | (1 << ((unsigned int) (i) & 7)))) | |
48 | #endif | |
49 | ||
50 | #ifndef ELEM_AT | |
51 | # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) | |
52 | #endif | |
2340bcd3 JVH |
53 | |
54 | #if HTTP_PARSER_DEBUG | |
55 | #define SET_ERRNO(e) \ | |
56 | do { \ | |
57 | parser->http_errno = (e); \ | |
58 | parser->error_lineno = __LINE__; \ | |
59 | } while (0) | |
60 | #else | |
61 | #define SET_ERRNO(e) \ | |
62 | do { \ | |
63 | parser->http_errno = (e); \ | |
64 | } while(0) | |
65 | #endif | |
66 | ||
67 | ||
462c72aa JVH |
68 | /* Run the notify callback FOR, returning ER if it fails */ |
69 | #define CALLBACK_NOTIFY_(FOR, ER) \ | |
2340bcd3 | 70 | do { \ |
462c72aa JVH |
71 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ |
72 | \ | |
2340bcd3 JVH |
73 | if (settings->on_##FOR) { \ |
74 | if (0 != settings->on_##FOR(parser)) { \ | |
75 | SET_ERRNO(HPE_CB_##FOR); \ | |
462c72aa JVH |
76 | } \ |
77 | \ | |
78 | /* We either errored above or got paused; get out */ \ | |
79 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ | |
80 | return (ER); \ | |
2340bcd3 JVH |
81 | } \ |
82 | } \ | |
83 | } while (0) | |
84 | ||
462c72aa JVH |
85 | /* Run the notify callback FOR and consume the current byte */ |
86 | #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) | |
2340bcd3 | 87 | |
462c72aa JVH |
88 | /* Run the notify callback FOR and don't consume the current byte */ |
89 | #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) | |
2340bcd3 | 90 | |
462c72aa JVH |
91 | /* Run data callback FOR with LEN bytes, returning ER if it fails */ |
92 | #define CALLBACK_DATA_(FOR, LEN, ER) \ | |
2340bcd3 | 93 | do { \ |
462c72aa JVH |
94 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ |
95 | \ | |
2340bcd3 JVH |
96 | if (FOR##_mark) { \ |
97 | if (settings->on_##FOR) { \ | |
462c72aa | 98 | if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \ |
2340bcd3 | 99 | SET_ERRNO(HPE_CB_##FOR); \ |
462c72aa JVH |
100 | } \ |
101 | \ | |
102 | /* We either errored above or got paused; get out */ \ | |
103 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ | |
104 | return (ER); \ | |
2340bcd3 JVH |
105 | } \ |
106 | } \ | |
107 | FOR##_mark = NULL; \ | |
108 | } \ | |
109 | } while (0) | |
462c72aa JVH |
110 | |
111 | /* Run the data callback FOR and consume the current byte */ | |
112 | #define CALLBACK_DATA(FOR) \ | |
113 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) | |
114 | ||
115 | /* Run the data callback FOR and don't consume the current byte */ | |
116 | #define CALLBACK_DATA_NOADVANCE(FOR) \ | |
117 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) | |
118 | ||
119 | /* Set the mark FOR; non-destructive if mark is already set */ | |
120 | #define MARK(FOR) \ | |
121 | do { \ | |
122 | if (!FOR##_mark) { \ | |
123 | FOR##_mark = p; \ | |
124 | } \ | |
125 | } while (0) | |
2340bcd3 JVH |
126 | |
127 | ||
128 | #define PROXY_CONNECTION "proxy-connection" | |
129 | #define CONNECTION "connection" | |
130 | #define CONTENT_LENGTH "content-length" | |
131 | #define TRANSFER_ENCODING "transfer-encoding" | |
132 | #define UPGRADE "upgrade" | |
133 | #define CHUNKED "chunked" | |
134 | #define KEEP_ALIVE "keep-alive" | |
135 | #define CLOSE "close" | |
136 | ||
137 | ||
138 | static const char *method_strings[] = | |
462c72aa JVH |
139 | { |
140 | #define XX(num, name, string) #string, | |
141 | HTTP_METHOD_MAP(XX) | |
142 | #undef XX | |
2340bcd3 JVH |
143 | }; |
144 | ||
145 | ||
146 | /* Tokens as defined by rfc 2616. Also lowercases them. | |
147 | * token = 1*<any CHAR except CTLs or separators> | |
148 | * separators = "(" | ")" | "<" | ">" | "@" | |
149 | * | "," | ";" | ":" | "\" | <"> | |
150 | * | "/" | "[" | "]" | "?" | "=" | |
151 | * | "{" | "}" | SP | HT | |
152 | */ | |
153 | static const char tokens[256] = { | |
154 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ | |
155 | 0, 0, 0, 0, 0, 0, 0, 0, | |
156 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ | |
157 | 0, 0, 0, 0, 0, 0, 0, 0, | |
158 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ | |
159 | 0, 0, 0, 0, 0, 0, 0, 0, | |
160 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ | |
161 | 0, 0, 0, 0, 0, 0, 0, 0, | |
162 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ | |
462c72aa | 163 | 0, '!', 0, '#', '$', '%', '&', '\'', |
2340bcd3 | 164 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ |
462c72aa | 165 | 0, 0, '*', '+', 0, '-', '.', 0, |
2340bcd3 JVH |
166 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ |
167 | '0', '1', '2', '3', '4', '5', '6', '7', | |
168 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ | |
169 | '8', '9', 0, 0, 0, 0, 0, 0, | |
170 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ | |
171 | 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', | |
172 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ | |
173 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', | |
174 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ | |
175 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', | |
176 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ | |
177 | 'x', 'y', 'z', 0, 0, 0, '^', '_', | |
178 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ | |
179 | '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', | |
180 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ | |
181 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', | |
182 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ | |
183 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', | |
184 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ | |
462c72aa | 185 | 'x', 'y', 'z', 0, '|', 0, '~', 0 }; |
2340bcd3 JVH |
186 | |
187 | ||
188 | static const int8_t unhex[256] = | |
189 | {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 | |
190 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 | |
191 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 | |
192 | , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 | |
193 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 | |
194 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 | |
195 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 | |
196 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 | |
197 | }; | |
198 | ||
199 | ||
462c72aa JVH |
200 | #if HTTP_PARSER_STRICT |
201 | # define T(v) 0 | |
202 | #else | |
203 | # define T(v) v | |
204 | #endif | |
205 | ||
206 | ||
207 | static const uint8_t normal_url_char[32] = { | |
2340bcd3 | 208 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ |
462c72aa | 209 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, |
2340bcd3 | 210 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ |
462c72aa | 211 | 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, |
2340bcd3 | 212 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ |
462c72aa | 213 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, |
2340bcd3 | 214 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ |
462c72aa | 215 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, |
2340bcd3 | 216 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ |
462c72aa | 217 | 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, |
2340bcd3 | 218 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ |
462c72aa | 219 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, |
2340bcd3 | 220 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ |
462c72aa | 221 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, |
2340bcd3 | 222 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ |
462c72aa | 223 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, |
2340bcd3 | 224 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ |
462c72aa | 225 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, |
2340bcd3 | 226 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ |
462c72aa | 227 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, |
2340bcd3 | 228 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ |
462c72aa | 229 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, |
2340bcd3 | 230 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ |
462c72aa | 231 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, |
2340bcd3 | 232 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ |
462c72aa | 233 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, |
2340bcd3 | 234 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ |
462c72aa | 235 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, |
2340bcd3 | 236 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ |
462c72aa | 237 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, |
2340bcd3 | 238 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ |
462c72aa | 239 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; |
2340bcd3 | 240 | |
462c72aa | 241 | #undef T |
2340bcd3 JVH |
242 | |
243 | enum state | |
244 | { s_dead = 1 /* important that this is > 0 */ | |
245 | ||
246 | , s_start_req_or_res | |
247 | , s_res_or_resp_H | |
248 | , s_start_res | |
249 | , s_res_H | |
250 | , s_res_HT | |
251 | , s_res_HTT | |
252 | , s_res_HTTP | |
253 | , s_res_first_http_major | |
254 | , s_res_http_major | |
255 | , s_res_first_http_minor | |
256 | , s_res_http_minor | |
257 | , s_res_first_status_code | |
258 | , s_res_status_code | |
259 | , s_res_status | |
260 | , s_res_line_almost_done | |
261 | ||
262 | , s_start_req | |
263 | ||
264 | , s_req_method | |
265 | , s_req_spaces_before_url | |
266 | , s_req_schema | |
267 | , s_req_schema_slash | |
268 | , s_req_schema_slash_slash | |
462c72aa JVH |
269 | , s_req_server_start |
270 | , s_req_server | |
271 | , s_req_server_with_at | |
2340bcd3 JVH |
272 | , s_req_path |
273 | , s_req_query_string_start | |
274 | , s_req_query_string | |
275 | , s_req_fragment_start | |
276 | , s_req_fragment | |
277 | , s_req_http_start | |
278 | , s_req_http_H | |
279 | , s_req_http_HT | |
280 | , s_req_http_HTT | |
281 | , s_req_http_HTTP | |
282 | , s_req_first_http_major | |
283 | , s_req_http_major | |
284 | , s_req_first_http_minor | |
285 | , s_req_http_minor | |
286 | , s_req_line_almost_done | |
287 | ||
288 | , s_header_field_start | |
289 | , s_header_field | |
290 | , s_header_value_start | |
291 | , s_header_value | |
292 | , s_header_value_lws | |
293 | ||
294 | , s_header_almost_done | |
295 | ||
296 | , s_chunk_size_start | |
297 | , s_chunk_size | |
298 | , s_chunk_parameters | |
299 | , s_chunk_size_almost_done | |
462c72aa | 300 | |
2340bcd3 | 301 | , s_headers_almost_done |
462c72aa JVH |
302 | , s_headers_done |
303 | ||
304 | /* Important: 's_headers_done' must be the last 'header' state. All | |
2340bcd3 JVH |
305 | * states beyond this must be 'body' states. It is used for overflow |
306 | * checking. See the PARSING_HEADER() macro. | |
307 | */ | |
308 | ||
309 | , s_chunk_data | |
310 | , s_chunk_data_almost_done | |
311 | , s_chunk_data_done | |
312 | ||
313 | , s_body_identity | |
314 | , s_body_identity_eof | |
462c72aa JVH |
315 | |
316 | , s_message_done | |
2340bcd3 JVH |
317 | }; |
318 | ||
319 | ||
462c72aa | 320 | #define PARSING_HEADER(state) (state <= s_headers_done) |
2340bcd3 JVH |
321 | |
322 | ||
323 | enum header_states | |
324 | { h_general = 0 | |
325 | , h_C | |
326 | , h_CO | |
327 | , h_CON | |
328 | ||
329 | , h_matching_connection | |
330 | , h_matching_proxy_connection | |
331 | , h_matching_content_length | |
332 | , h_matching_transfer_encoding | |
333 | , h_matching_upgrade | |
334 | ||
335 | , h_connection | |
336 | , h_content_length | |
337 | , h_transfer_encoding | |
338 | , h_upgrade | |
339 | ||
340 | , h_matching_transfer_encoding_chunked | |
341 | , h_matching_connection_keep_alive | |
342 | , h_matching_connection_close | |
343 | ||
344 | , h_transfer_encoding_chunked | |
345 | , h_connection_keep_alive | |
346 | , h_connection_close | |
347 | }; | |
348 | ||
462c72aa JVH |
349 | enum http_host_state |
350 | { | |
351 | s_http_host_dead = 1 | |
352 | , s_http_userinfo_start | |
353 | , s_http_userinfo | |
354 | , s_http_host_start | |
355 | , s_http_host_v6_start | |
356 | , s_http_host | |
357 | , s_http_host_v6 | |
358 | , s_http_host_v6_end | |
359 | , s_http_host_port_start | |
360 | , s_http_host_port | |
361 | }; | |
2340bcd3 JVH |
362 | |
363 | /* Macros for character classes; depends on strict-mode */ | |
364 | #define CR '\r' | |
365 | #define LF '\n' | |
366 | #define LOWER(c) (unsigned char)(c | 0x20) | |
2340bcd3 JVH |
367 | #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') |
368 | #define IS_NUM(c) ((c) >= '0' && (c) <= '9') | |
369 | #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) | |
462c72aa JVH |
370 | #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) |
371 | #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ | |
372 | (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ | |
373 | (c) == ')') | |
374 | #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ | |
375 | (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ | |
376 | (c) == '$' || (c) == ',') | |
2340bcd3 JVH |
377 | |
378 | #if HTTP_PARSER_STRICT | |
462c72aa JVH |
379 | #define TOKEN(c) (tokens[(unsigned char)c]) |
380 | #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) | |
2340bcd3 JVH |
381 | #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') |
382 | #else | |
462c72aa | 383 | #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) |
2340bcd3 | 384 | #define IS_URL_CHAR(c) \ |
462c72aa | 385 | (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) |
2340bcd3 JVH |
386 | #define IS_HOST_CHAR(c) \ |
387 | (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') | |
388 | #endif | |
389 | ||
390 | ||
391 | #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) | |
392 | ||
393 | ||
394 | #if HTTP_PARSER_STRICT | |
395 | # define STRICT_CHECK(cond) \ | |
396 | do { \ | |
397 | if (cond) { \ | |
398 | SET_ERRNO(HPE_STRICT); \ | |
399 | goto error; \ | |
400 | } \ | |
401 | } while (0) | |
402 | # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) | |
403 | #else | |
404 | # define STRICT_CHECK(cond) | |
405 | # define NEW_MESSAGE() start_state | |
406 | #endif | |
407 | ||
408 | ||
409 | /* Map errno values to strings for human-readable output */ | |
410 | #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, | |
411 | static struct { | |
412 | const char *name; | |
413 | const char *description; | |
414 | } http_strerror_tab[] = { | |
415 | HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) | |
416 | }; | |
417 | #undef HTTP_STRERROR_GEN | |
418 | ||
462c72aa JVH |
419 | int http_message_needs_eof(const http_parser *parser); |
420 | ||
421 | /* Our URL parser. | |
422 | * | |
423 | * This is designed to be shared by http_parser_execute() for URL validation, | |
424 | * hence it has a state transition + byte-for-byte interface. In addition, it | |
425 | * is meant to be embedded in http_parser_parse_url(), which does the dirty | |
426 | * work of turning state transitions URL components for its API. | |
427 | * | |
428 | * This function should only be invoked with non-space characters. It is | |
429 | * assumed that the caller cares about (and can detect) the transition between | |
430 | * URL and non-URL states by looking for these. | |
431 | */ | |
432 | static enum state | |
433 | parse_url_char(enum state s, const char ch) | |
434 | { | |
435 | if (ch == ' ' || ch == '\r' || ch == '\n') { | |
436 | return s_dead; | |
437 | } | |
438 | ||
439 | #if HTTP_PARSER_STRICT | |
440 | if (ch == '\t' || ch == '\f') { | |
441 | return s_dead; | |
442 | } | |
443 | #endif | |
444 | ||
445 | switch (s) { | |
446 | case s_req_spaces_before_url: | |
447 | /* Proxied requests are followed by scheme of an absolute URI (alpha). | |
448 | * All methods except CONNECT are followed by '/' or '*'. | |
449 | */ | |
450 | ||
451 | if (ch == '/' || ch == '*') { | |
452 | return s_req_path; | |
453 | } | |
454 | ||
455 | if (IS_ALPHA(ch)) { | |
456 | return s_req_schema; | |
457 | } | |
458 | ||
459 | break; | |
460 | ||
461 | case s_req_schema: | |
462 | if (IS_ALPHA(ch)) { | |
463 | return s; | |
464 | } | |
465 | ||
466 | if (ch == ':') { | |
467 | return s_req_schema_slash; | |
468 | } | |
469 | ||
470 | break; | |
471 | ||
472 | case s_req_schema_slash: | |
473 | if (ch == '/') { | |
474 | return s_req_schema_slash_slash; | |
475 | } | |
476 | ||
477 | break; | |
478 | ||
479 | case s_req_schema_slash_slash: | |
480 | if (ch == '/') { | |
481 | return s_req_server_start; | |
482 | } | |
483 | ||
484 | break; | |
485 | ||
486 | case s_req_server_with_at: | |
487 | if (ch == '@') { | |
488 | return s_dead; | |
489 | } | |
490 | ||
491 | /* FALLTHROUGH */ | |
492 | case s_req_server_start: | |
493 | case s_req_server: | |
494 | if (ch == '/') { | |
495 | return s_req_path; | |
496 | } | |
497 | ||
498 | if (ch == '?') { | |
499 | return s_req_query_string_start; | |
500 | } | |
501 | ||
502 | if (ch == '@') { | |
503 | return s_req_server_with_at; | |
504 | } | |
505 | ||
506 | if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { | |
507 | return s_req_server; | |
508 | } | |
509 | ||
510 | break; | |
511 | ||
512 | case s_req_path: | |
513 | if (IS_URL_CHAR(ch)) { | |
514 | return s; | |
515 | } | |
516 | ||
517 | switch (ch) { | |
518 | case '?': | |
519 | return s_req_query_string_start; | |
520 | ||
521 | case '#': | |
522 | return s_req_fragment_start; | |
523 | } | |
524 | ||
525 | break; | |
526 | ||
527 | case s_req_query_string_start: | |
528 | case s_req_query_string: | |
529 | if (IS_URL_CHAR(ch)) { | |
530 | return s_req_query_string; | |
531 | } | |
532 | ||
533 | switch (ch) { | |
534 | case '?': | |
535 | /* allow extra '?' in query string */ | |
536 | return s_req_query_string; | |
537 | ||
538 | case '#': | |
539 | return s_req_fragment_start; | |
540 | } | |
541 | ||
542 | break; | |
543 | ||
544 | case s_req_fragment_start: | |
545 | if (IS_URL_CHAR(ch)) { | |
546 | return s_req_fragment; | |
547 | } | |
548 | ||
549 | switch (ch) { | |
550 | case '?': | |
551 | return s_req_fragment; | |
552 | ||
553 | case '#': | |
554 | return s; | |
555 | } | |
556 | ||
557 | break; | |
558 | ||
559 | case s_req_fragment: | |
560 | if (IS_URL_CHAR(ch)) { | |
561 | return s; | |
562 | } | |
563 | ||
564 | switch (ch) { | |
565 | case '?': | |
566 | case '#': | |
567 | return s; | |
568 | } | |
569 | ||
570 | break; | |
571 | ||
572 | default: | |
573 | break; | |
574 | } | |
575 | ||
576 | /* We should never fall out of the switch above unless there's an error */ | |
577 | return s_dead; | |
578 | } | |
2340bcd3 JVH |
579 | |
580 | size_t http_parser_execute (http_parser *parser, | |
581 | const http_parser_settings *settings, | |
582 | const char *data, | |
583 | size_t len) | |
584 | { | |
585 | char c, ch; | |
586 | int8_t unhex_val; | |
462c72aa | 587 | const char *p = data; |
2340bcd3 JVH |
588 | const char *header_field_mark = 0; |
589 | const char *header_value_mark = 0; | |
590 | const char *url_mark = 0; | |
462c72aa | 591 | const char *body_mark = 0; |
2340bcd3 JVH |
592 | |
593 | /* We're in an error state. Don't bother doing anything. */ | |
594 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { | |
595 | return 0; | |
596 | } | |
597 | ||
2340bcd3 | 598 | if (len == 0) { |
462c72aa | 599 | switch (parser->state) { |
2340bcd3 | 600 | case s_body_identity_eof: |
462c72aa JVH |
601 | /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if |
602 | * we got paused. | |
603 | */ | |
604 | CALLBACK_NOTIFY_NOADVANCE(message_complete); | |
2340bcd3 JVH |
605 | return 0; |
606 | ||
607 | case s_dead: | |
608 | case s_start_req_or_res: | |
609 | case s_start_res: | |
610 | case s_start_req: | |
611 | return 0; | |
612 | ||
613 | default: | |
614 | SET_ERRNO(HPE_INVALID_EOF_STATE); | |
615 | return 1; | |
616 | } | |
617 | } | |
618 | ||
619 | ||
462c72aa | 620 | if (parser->state == s_header_field) |
2340bcd3 | 621 | header_field_mark = data; |
462c72aa | 622 | if (parser->state == s_header_value) |
2340bcd3 | 623 | header_value_mark = data; |
462c72aa JVH |
624 | switch (parser->state) { |
625 | case s_req_path: | |
626 | case s_req_schema: | |
627 | case s_req_schema_slash: | |
628 | case s_req_schema_slash_slash: | |
629 | case s_req_server_start: | |
630 | case s_req_server: | |
631 | case s_req_server_with_at: | |
632 | case s_req_query_string_start: | |
633 | case s_req_query_string: | |
634 | case s_req_fragment_start: | |
635 | case s_req_fragment: | |
2340bcd3 | 636 | url_mark = data; |
462c72aa JVH |
637 | break; |
638 | } | |
2340bcd3 | 639 | |
462c72aa | 640 | for (p=data; p != data + len; p++) { |
2340bcd3 JVH |
641 | ch = *p; |
642 | ||
462c72aa JVH |
643 | if (PARSING_HEADER(parser->state)) { |
644 | ++parser->nread; | |
2340bcd3 | 645 | /* Buffer overflow attack */ |
462c72aa | 646 | if (parser->nread > HTTP_MAX_HEADER_SIZE) { |
2340bcd3 JVH |
647 | SET_ERRNO(HPE_HEADER_OVERFLOW); |
648 | goto error; | |
649 | } | |
650 | } | |
651 | ||
462c72aa JVH |
652 | reexecute_byte: |
653 | switch (parser->state) { | |
2340bcd3 JVH |
654 | |
655 | case s_dead: | |
656 | /* this state is used after a 'Connection: close' message | |
657 | * the parser will error out if it reads another message | |
658 | */ | |
462c72aa JVH |
659 | if (ch == CR || ch == LF) |
660 | break; | |
661 | ||
2340bcd3 JVH |
662 | SET_ERRNO(HPE_CLOSED_CONNECTION); |
663 | goto error; | |
664 | ||
665 | case s_start_req_or_res: | |
666 | { | |
667 | if (ch == CR || ch == LF) | |
668 | break; | |
669 | parser->flags = 0; | |
462c72aa | 670 | parser->content_length = ULLONG_MAX; |
2340bcd3 | 671 | |
462c72aa JVH |
672 | if (ch == 'H') { |
673 | parser->state = s_res_or_resp_H; | |
2340bcd3 | 674 | |
462c72aa JVH |
675 | CALLBACK_NOTIFY(message_begin); |
676 | } else { | |
2340bcd3 | 677 | parser->type = HTTP_REQUEST; |
462c72aa JVH |
678 | parser->state = s_start_req; |
679 | goto reexecute_byte; | |
2340bcd3 | 680 | } |
462c72aa | 681 | |
2340bcd3 JVH |
682 | break; |
683 | } | |
684 | ||
685 | case s_res_or_resp_H: | |
686 | if (ch == 'T') { | |
687 | parser->type = HTTP_RESPONSE; | |
462c72aa | 688 | parser->state = s_res_HT; |
2340bcd3 JVH |
689 | } else { |
690 | if (ch != 'E') { | |
691 | SET_ERRNO(HPE_INVALID_CONSTANT); | |
692 | goto error; | |
693 | } | |
694 | ||
695 | parser->type = HTTP_REQUEST; | |
696 | parser->method = HTTP_HEAD; | |
462c72aa JVH |
697 | parser->index = 2; |
698 | parser->state = s_req_method; | |
2340bcd3 JVH |
699 | } |
700 | break; | |
701 | ||
702 | case s_start_res: | |
703 | { | |
704 | parser->flags = 0; | |
462c72aa | 705 | parser->content_length = ULLONG_MAX; |
2340bcd3 JVH |
706 | |
707 | switch (ch) { | |
708 | case 'H': | |
462c72aa | 709 | parser->state = s_res_H; |
2340bcd3 JVH |
710 | break; |
711 | ||
712 | case CR: | |
713 | case LF: | |
714 | break; | |
715 | ||
716 | default: | |
717 | SET_ERRNO(HPE_INVALID_CONSTANT); | |
718 | goto error; | |
719 | } | |
462c72aa JVH |
720 | |
721 | CALLBACK_NOTIFY(message_begin); | |
2340bcd3 JVH |
722 | break; |
723 | } | |
724 | ||
725 | case s_res_H: | |
726 | STRICT_CHECK(ch != 'T'); | |
462c72aa | 727 | parser->state = s_res_HT; |
2340bcd3 JVH |
728 | break; |
729 | ||
730 | case s_res_HT: | |
731 | STRICT_CHECK(ch != 'T'); | |
462c72aa | 732 | parser->state = s_res_HTT; |
2340bcd3 JVH |
733 | break; |
734 | ||
735 | case s_res_HTT: | |
736 | STRICT_CHECK(ch != 'P'); | |
462c72aa | 737 | parser->state = s_res_HTTP; |
2340bcd3 JVH |
738 | break; |
739 | ||
740 | case s_res_HTTP: | |
741 | STRICT_CHECK(ch != '/'); | |
462c72aa | 742 | parser->state = s_res_first_http_major; |
2340bcd3 JVH |
743 | break; |
744 | ||
745 | case s_res_first_http_major: | |
746 | if (ch < '0' || ch > '9') { | |
747 | SET_ERRNO(HPE_INVALID_VERSION); | |
748 | goto error; | |
749 | } | |
750 | ||
751 | parser->http_major = ch - '0'; | |
462c72aa | 752 | parser->state = s_res_http_major; |
2340bcd3 JVH |
753 | break; |
754 | ||
755 | /* major HTTP version or dot */ | |
756 | case s_res_http_major: | |
757 | { | |
758 | if (ch == '.') { | |
462c72aa | 759 | parser->state = s_res_first_http_minor; |
2340bcd3 JVH |
760 | break; |
761 | } | |
762 | ||
763 | if (!IS_NUM(ch)) { | |
764 | SET_ERRNO(HPE_INVALID_VERSION); | |
765 | goto error; | |
766 | } | |
767 | ||
768 | parser->http_major *= 10; | |
769 | parser->http_major += ch - '0'; | |
770 | ||
771 | if (parser->http_major > 999) { | |
772 | SET_ERRNO(HPE_INVALID_VERSION); | |
773 | goto error; | |
774 | } | |
775 | ||
776 | break; | |
777 | } | |
778 | ||
779 | /* first digit of minor HTTP version */ | |
780 | case s_res_first_http_minor: | |
781 | if (!IS_NUM(ch)) { | |
782 | SET_ERRNO(HPE_INVALID_VERSION); | |
783 | goto error; | |
784 | } | |
785 | ||
786 | parser->http_minor = ch - '0'; | |
462c72aa | 787 | parser->state = s_res_http_minor; |
2340bcd3 JVH |
788 | break; |
789 | ||
790 | /* minor HTTP version or end of request line */ | |
791 | case s_res_http_minor: | |
792 | { | |
793 | if (ch == ' ') { | |
462c72aa | 794 | parser->state = s_res_first_status_code; |
2340bcd3 JVH |
795 | break; |
796 | } | |
797 | ||
798 | if (!IS_NUM(ch)) { | |
799 | SET_ERRNO(HPE_INVALID_VERSION); | |
800 | goto error; | |
801 | } | |
802 | ||
803 | parser->http_minor *= 10; | |
804 | parser->http_minor += ch - '0'; | |
805 | ||
806 | if (parser->http_minor > 999) { | |
807 | SET_ERRNO(HPE_INVALID_VERSION); | |
808 | goto error; | |
809 | } | |
810 | ||
811 | break; | |
812 | } | |
813 | ||
814 | case s_res_first_status_code: | |
815 | { | |
816 | if (!IS_NUM(ch)) { | |
817 | if (ch == ' ') { | |
818 | break; | |
819 | } | |
820 | ||
821 | SET_ERRNO(HPE_INVALID_STATUS); | |
822 | goto error; | |
823 | } | |
824 | parser->status_code = ch - '0'; | |
462c72aa | 825 | parser->state = s_res_status_code; |
2340bcd3 JVH |
826 | break; |
827 | } | |
828 | ||
829 | case s_res_status_code: | |
830 | { | |
831 | if (!IS_NUM(ch)) { | |
832 | switch (ch) { | |
833 | case ' ': | |
462c72aa | 834 | parser->state = s_res_status; |
2340bcd3 JVH |
835 | break; |
836 | case CR: | |
462c72aa | 837 | parser->state = s_res_line_almost_done; |
2340bcd3 JVH |
838 | break; |
839 | case LF: | |
462c72aa | 840 | parser->state = s_header_field_start; |
2340bcd3 JVH |
841 | break; |
842 | default: | |
843 | SET_ERRNO(HPE_INVALID_STATUS); | |
844 | goto error; | |
845 | } | |
846 | break; | |
847 | } | |
848 | ||
849 | parser->status_code *= 10; | |
850 | parser->status_code += ch - '0'; | |
851 | ||
852 | if (parser->status_code > 999) { | |
853 | SET_ERRNO(HPE_INVALID_STATUS); | |
854 | goto error; | |
855 | } | |
856 | ||
857 | break; | |
858 | } | |
859 | ||
860 | case s_res_status: | |
861 | /* the human readable status. e.g. "NOT FOUND" | |
862 | * we are not humans so just ignore this */ | |
863 | if (ch == CR) { | |
462c72aa | 864 | parser->state = s_res_line_almost_done; |
2340bcd3 JVH |
865 | break; |
866 | } | |
867 | ||
868 | if (ch == LF) { | |
462c72aa | 869 | parser->state = s_header_field_start; |
2340bcd3 JVH |
870 | break; |
871 | } | |
872 | break; | |
873 | ||
874 | case s_res_line_almost_done: | |
875 | STRICT_CHECK(ch != LF); | |
462c72aa | 876 | parser->state = s_header_field_start; |
2340bcd3 JVH |
877 | break; |
878 | ||
879 | case s_start_req: | |
880 | { | |
881 | if (ch == CR || ch == LF) | |
882 | break; | |
883 | parser->flags = 0; | |
462c72aa | 884 | parser->content_length = ULLONG_MAX; |
2340bcd3 JVH |
885 | |
886 | if (!IS_ALPHA(ch)) { | |
887 | SET_ERRNO(HPE_INVALID_METHOD); | |
888 | goto error; | |
889 | } | |
890 | ||
2340bcd3 | 891 | parser->method = (enum http_method) 0; |
462c72aa | 892 | parser->index = 1; |
2340bcd3 | 893 | switch (ch) { |
efc9c140 | 894 | case 'A': parser->method = HTTP_ANNOUNCE; break; |
2340bcd3 | 895 | case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; |
efc9c140 JVH |
896 | case 'D': parser->method = HTTP_DELETE; /* or DESCRIBE */ break; |
897 | case 'F': parser->method = HTTP_FLUSH; break; | |
898 | case 'G': parser->method = HTTP_GET; /* or GET_PARAMETER */ break; | |
2340bcd3 JVH |
899 | case 'H': parser->method = HTTP_HEAD; break; |
900 | case 'L': parser->method = HTTP_LOCK; break; | |
901 | case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break; | |
902 | case 'N': parser->method = HTTP_NOTIFY; break; | |
903 | case 'O': parser->method = HTTP_OPTIONS; break; | |
904 | case 'P': parser->method = HTTP_POST; | |
efc9c140 | 905 | /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE|PLAY|PAUSE */ |
2340bcd3 | 906 | break; |
efc9c140 JVH |
907 | case 'R': parser->method = HTTP_REPORT; /* or REDIRECT, RECORD */ break; |
908 | case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SETUP, SET_PARAMETER */ break; | |
909 | case 'T': parser->method = HTTP_TRACE; /* or TEARDOWN */ break; | |
2340bcd3 JVH |
910 | case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; |
911 | default: | |
912 | SET_ERRNO(HPE_INVALID_METHOD); | |
913 | goto error; | |
914 | } | |
462c72aa JVH |
915 | parser->state = s_req_method; |
916 | ||
917 | CALLBACK_NOTIFY(message_begin); | |
918 | ||
2340bcd3 JVH |
919 | break; |
920 | } | |
921 | ||
922 | case s_req_method: | |
923 | { | |
924 | const char *matcher; | |
925 | if (ch == '\0') { | |
926 | SET_ERRNO(HPE_INVALID_METHOD); | |
927 | goto error; | |
928 | } | |
929 | ||
930 | matcher = method_strings[parser->method]; | |
462c72aa JVH |
931 | if (ch == ' ' && matcher[parser->index] == '\0') { |
932 | parser->state = s_req_spaces_before_url; | |
933 | } else if (ch == matcher[parser->index]) { | |
2340bcd3 JVH |
934 | ; /* nada */ |
935 | } else if (parser->method == HTTP_CONNECT) { | |
462c72aa | 936 | if (parser->index == 1 && ch == 'H') { |
2340bcd3 | 937 | parser->method = HTTP_CHECKOUT; |
462c72aa | 938 | } else if (parser->index == 2 && ch == 'P') { |
2340bcd3 JVH |
939 | parser->method = HTTP_COPY; |
940 | } else { | |
941 | goto error; | |
942 | } | |
efc9c140 JVH |
943 | } else if (parser->index == 2 && parser->method == HTTP_DELETE && ch == 'S') { |
944 | parser->method = HTTP_DESCRIBE; | |
945 | } else if (parser->index == 3 && parser->method == HTTP_GET && ch == '_') { | |
946 | parser->method = HTTP_GET_PARAMETER; | |
2340bcd3 | 947 | } else if (parser->method == HTTP_MKCOL) { |
462c72aa | 948 | if (parser->index == 1 && ch == 'O') { |
2340bcd3 | 949 | parser->method = HTTP_MOVE; |
462c72aa | 950 | } else if (parser->index == 1 && ch == 'E') { |
2340bcd3 | 951 | parser->method = HTTP_MERGE; |
462c72aa | 952 | } else if (parser->index == 1 && ch == '-') { |
2340bcd3 | 953 | parser->method = HTTP_MSEARCH; |
462c72aa | 954 | } else if (parser->index == 2 && ch == 'A') { |
2340bcd3 JVH |
955 | parser->method = HTTP_MKACTIVITY; |
956 | } else { | |
957 | goto error; | |
958 | } | |
462c72aa JVH |
959 | } else if (parser->method == HTTP_SUBSCRIBE) { |
960 | if (parser->index == 1 && ch == 'E') { | |
efc9c140 JVH |
961 | parser->method = HTTP_SEARCH; /* or HTTP_SETUP or HTTP_SET_PARAMETER */ |
962 | } else { | |
963 | goto error; | |
964 | } | |
965 | } else if (parser->method == HTTP_TRACE) { | |
966 | if (parser->index == 1 && ch == 'E') { | |
967 | parser->method = HTTP_TEARDOWN; | |
462c72aa JVH |
968 | } else { |
969 | goto error; | |
970 | } | |
971 | } else if (parser->index == 1 && parser->method == HTTP_POST) { | |
2340bcd3 JVH |
972 | if (ch == 'R') { |
973 | parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ | |
974 | } else if (ch == 'U') { | |
462c72aa | 975 | parser->method = HTTP_PUT; /* or HTTP_PURGE */ |
2340bcd3 | 976 | } else if (ch == 'A') { |
efc9c140 JVH |
977 | parser->method = HTTP_PATCH; /* or HTTP_PAUSE */ |
978 | } else if (ch == 'L') { | |
979 | parser->method = HTTP_PLAY; | |
2340bcd3 JVH |
980 | } else { |
981 | goto error; | |
982 | } | |
462c72aa JVH |
983 | } else if (parser->index == 2) { |
984 | if (parser->method == HTTP_PUT) { | |
985 | if (ch == 'R') parser->method = HTTP_PURGE; | |
efc9c140 JVH |
986 | } else if (parser->method == HTTP_PATCH) { |
987 | if (ch == 'U') parser->method = HTTP_PAUSE; | |
988 | } else if (parser->method == HTTP_REPORT && ch == 'D') { | |
989 | parser->method = HTTP_REDIRECT; | |
990 | } else if (parser->method == HTTP_REPORT && ch == 'C') { | |
991 | parser->method = HTTP_RECORD; | |
992 | } else if (parser->method == HTTP_SEARCH) { | |
993 | if (ch == 'T') parser->method = HTTP_SETUP; /* or HTTP_SET_PARAMETER */ | |
462c72aa JVH |
994 | } else if (parser->method == HTTP_UNLOCK) { |
995 | if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE; | |
efc9c140 JVH |
996 | } else { |
997 | goto error; | |
2340bcd3 | 998 | } |
efc9c140 JVH |
999 | } else if (parser->index == 3 && parser->method == HTTP_SETUP && ch == '_') { |
1000 | parser->method = HTTP_SET_PARAMETER; | |
462c72aa | 1001 | } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { |
2340bcd3 | 1002 | parser->method = HTTP_PROPPATCH; |
2340bcd3 JVH |
1003 | } else { |
1004 | SET_ERRNO(HPE_INVALID_METHOD); | |
1005 | goto error; | |
1006 | } | |
1007 | ||
462c72aa | 1008 | ++parser->index; |
2340bcd3 JVH |
1009 | break; |
1010 | } | |
462c72aa | 1011 | |
2340bcd3 JVH |
1012 | case s_req_spaces_before_url: |
1013 | { | |
1014 | if (ch == ' ') break; | |
1015 | ||
462c72aa JVH |
1016 | MARK(url); |
1017 | if (parser->method == HTTP_CONNECT) { | |
1018 | parser->state = s_req_server_start; | |
2340bcd3 JVH |
1019 | } |
1020 | ||
462c72aa JVH |
1021 | parser->state = parse_url_char((enum state)parser->state, ch); |
1022 | if (parser->state == s_dead) { | |
1023 | SET_ERRNO(HPE_INVALID_URL); | |
1024 | goto error; | |
2340bcd3 JVH |
1025 | } |
1026 | ||
462c72aa | 1027 | break; |
2340bcd3 JVH |
1028 | } |
1029 | ||
1030 | case s_req_schema: | |
2340bcd3 | 1031 | case s_req_schema_slash: |
2340bcd3 | 1032 | case s_req_schema_slash_slash: |
462c72aa | 1033 | case s_req_server_start: |
2340bcd3 | 1034 | { |
2340bcd3 | 1035 | switch (ch) { |
462c72aa | 1036 | /* No whitespace allowed here */ |
2340bcd3 | 1037 | case ' ': |
2340bcd3 | 1038 | case CR: |
2340bcd3 | 1039 | case LF: |
462c72aa | 1040 | SET_ERRNO(HPE_INVALID_URL); |
2340bcd3 | 1041 | goto error; |
2340bcd3 | 1042 | default: |
462c72aa JVH |
1043 | parser->state = parse_url_char((enum state)parser->state, ch); |
1044 | if (parser->state == s_dead) { | |
1045 | SET_ERRNO(HPE_INVALID_URL); | |
1046 | goto error; | |
1047 | } | |
2340bcd3 | 1048 | } |
2340bcd3 | 1049 | |
2340bcd3 JVH |
1050 | break; |
1051 | } | |
1052 | ||
462c72aa JVH |
1053 | case s_req_server: |
1054 | case s_req_server_with_at: | |
1055 | case s_req_path: | |
1056 | case s_req_query_string_start: | |
1057 | case s_req_query_string: | |
2340bcd3 | 1058 | case s_req_fragment_start: |
2340bcd3 JVH |
1059 | case s_req_fragment: |
1060 | { | |
2340bcd3 JVH |
1061 | switch (ch) { |
1062 | case ' ': | |
462c72aa JVH |
1063 | parser->state = s_req_http_start; |
1064 | CALLBACK_DATA(url); | |
2340bcd3 JVH |
1065 | break; |
1066 | case CR: | |
2340bcd3 | 1067 | case LF: |
2340bcd3 JVH |
1068 | parser->http_major = 0; |
1069 | parser->http_minor = 9; | |
462c72aa JVH |
1070 | parser->state = (ch == CR) ? |
1071 | s_req_line_almost_done : | |
1072 | s_header_field_start; | |
1073 | CALLBACK_DATA(url); | |
2340bcd3 JVH |
1074 | break; |
1075 | default: | |
462c72aa JVH |
1076 | parser->state = parse_url_char((enum state)parser->state, ch); |
1077 | if (parser->state == s_dead) { | |
1078 | SET_ERRNO(HPE_INVALID_URL); | |
1079 | goto error; | |
1080 | } | |
2340bcd3 JVH |
1081 | } |
1082 | break; | |
1083 | } | |
1084 | ||
1085 | case s_req_http_start: | |
1086 | switch (ch) { | |
1087 | case 'H': | |
efc9c140 | 1088 | case 'R': |
462c72aa | 1089 | parser->state = s_req_http_H; |
2340bcd3 JVH |
1090 | break; |
1091 | case ' ': | |
1092 | break; | |
1093 | default: | |
1094 | SET_ERRNO(HPE_INVALID_CONSTANT); | |
1095 | goto error; | |
1096 | } | |
1097 | break; | |
1098 | ||
1099 | case s_req_http_H: | |
1100 | STRICT_CHECK(ch != 'T'); | |
462c72aa | 1101 | parser->state = s_req_http_HT; |
2340bcd3 JVH |
1102 | break; |
1103 | ||
1104 | case s_req_http_HT: | |
1105 | STRICT_CHECK(ch != 'T'); | |
462c72aa | 1106 | parser->state = s_req_http_HTT; |
2340bcd3 JVH |
1107 | break; |
1108 | ||
1109 | case s_req_http_HTT: | |
1110 | STRICT_CHECK(ch != 'P'); | |
462c72aa | 1111 | parser->state = s_req_http_HTTP; |
2340bcd3 JVH |
1112 | break; |
1113 | ||
1114 | case s_req_http_HTTP: | |
1115 | STRICT_CHECK(ch != '/'); | |
462c72aa | 1116 | parser->state = s_req_first_http_major; |
2340bcd3 JVH |
1117 | break; |
1118 | ||
1119 | /* first digit of major HTTP version */ | |
1120 | case s_req_first_http_major: | |
1121 | if (ch < '1' || ch > '9') { | |
1122 | SET_ERRNO(HPE_INVALID_VERSION); | |
1123 | goto error; | |
1124 | } | |
1125 | ||
1126 | parser->http_major = ch - '0'; | |
462c72aa | 1127 | parser->state = s_req_http_major; |
2340bcd3 JVH |
1128 | break; |
1129 | ||
1130 | /* major HTTP version or dot */ | |
1131 | case s_req_http_major: | |
1132 | { | |
1133 | if (ch == '.') { | |
462c72aa | 1134 | parser->state = s_req_first_http_minor; |
2340bcd3 JVH |
1135 | break; |
1136 | } | |
1137 | ||
1138 | if (!IS_NUM(ch)) { | |
1139 | SET_ERRNO(HPE_INVALID_VERSION); | |
1140 | goto error; | |
1141 | } | |
1142 | ||
1143 | parser->http_major *= 10; | |
1144 | parser->http_major += ch - '0'; | |
1145 | ||
1146 | if (parser->http_major > 999) { | |
1147 | SET_ERRNO(HPE_INVALID_VERSION); | |
1148 | goto error; | |
1149 | } | |
1150 | ||
1151 | break; | |
1152 | } | |
1153 | ||
1154 | /* first digit of minor HTTP version */ | |
1155 | case s_req_first_http_minor: | |
1156 | if (!IS_NUM(ch)) { | |
1157 | SET_ERRNO(HPE_INVALID_VERSION); | |
1158 | goto error; | |
1159 | } | |
1160 | ||
1161 | parser->http_minor = ch - '0'; | |
462c72aa | 1162 | parser->state = s_req_http_minor; |
2340bcd3 JVH |
1163 | break; |
1164 | ||
1165 | /* minor HTTP version or end of request line */ | |
1166 | case s_req_http_minor: | |
1167 | { | |
1168 | if (ch == CR) { | |
462c72aa | 1169 | parser->state = s_req_line_almost_done; |
2340bcd3 JVH |
1170 | break; |
1171 | } | |
1172 | ||
1173 | if (ch == LF) { | |
462c72aa | 1174 | parser->state = s_header_field_start; |
2340bcd3 JVH |
1175 | break; |
1176 | } | |
1177 | ||
1178 | /* XXX allow spaces after digit? */ | |
1179 | ||
1180 | if (!IS_NUM(ch)) { | |
1181 | SET_ERRNO(HPE_INVALID_VERSION); | |
1182 | goto error; | |
1183 | } | |
1184 | ||
1185 | parser->http_minor *= 10; | |
1186 | parser->http_minor += ch - '0'; | |
1187 | ||
1188 | if (parser->http_minor > 999) { | |
1189 | SET_ERRNO(HPE_INVALID_VERSION); | |
1190 | goto error; | |
1191 | } | |
1192 | ||
1193 | break; | |
1194 | } | |
1195 | ||
1196 | /* end of request line */ | |
1197 | case s_req_line_almost_done: | |
1198 | { | |
1199 | if (ch != LF) { | |
1200 | SET_ERRNO(HPE_LF_EXPECTED); | |
1201 | goto error; | |
1202 | } | |
1203 | ||
462c72aa | 1204 | parser->state = s_header_field_start; |
2340bcd3 JVH |
1205 | break; |
1206 | } | |
1207 | ||
1208 | case s_header_field_start: | |
2340bcd3 JVH |
1209 | { |
1210 | if (ch == CR) { | |
462c72aa | 1211 | parser->state = s_headers_almost_done; |
2340bcd3 JVH |
1212 | break; |
1213 | } | |
1214 | ||
1215 | if (ch == LF) { | |
1216 | /* they might be just sending \n instead of \r\n so this would be | |
1217 | * the second \n to denote the end of headers*/ | |
462c72aa JVH |
1218 | parser->state = s_headers_almost_done; |
1219 | goto reexecute_byte; | |
2340bcd3 JVH |
1220 | } |
1221 | ||
1222 | c = TOKEN(ch); | |
1223 | ||
1224 | if (!c) { | |
1225 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); | |
1226 | goto error; | |
1227 | } | |
1228 | ||
1229 | MARK(header_field); | |
1230 | ||
462c72aa JVH |
1231 | parser->index = 0; |
1232 | parser->state = s_header_field; | |
2340bcd3 JVH |
1233 | |
1234 | switch (c) { | |
1235 | case 'c': | |
462c72aa | 1236 | parser->header_state = h_C; |
2340bcd3 JVH |
1237 | break; |
1238 | ||
1239 | case 'p': | |
462c72aa | 1240 | parser->header_state = h_matching_proxy_connection; |
2340bcd3 JVH |
1241 | break; |
1242 | ||
1243 | case 't': | |
462c72aa | 1244 | parser->header_state = h_matching_transfer_encoding; |
2340bcd3 JVH |
1245 | break; |
1246 | ||
1247 | case 'u': | |
462c72aa | 1248 | parser->header_state = h_matching_upgrade; |
2340bcd3 JVH |
1249 | break; |
1250 | ||
1251 | default: | |
462c72aa | 1252 | parser->header_state = h_general; |
2340bcd3 JVH |
1253 | break; |
1254 | } | |
1255 | break; | |
1256 | } | |
1257 | ||
1258 | case s_header_field: | |
1259 | { | |
1260 | c = TOKEN(ch); | |
1261 | ||
1262 | if (c) { | |
462c72aa | 1263 | switch (parser->header_state) { |
2340bcd3 JVH |
1264 | case h_general: |
1265 | break; | |
1266 | ||
1267 | case h_C: | |
462c72aa JVH |
1268 | parser->index++; |
1269 | parser->header_state = (c == 'o' ? h_CO : h_general); | |
2340bcd3 JVH |
1270 | break; |
1271 | ||
1272 | case h_CO: | |
462c72aa JVH |
1273 | parser->index++; |
1274 | parser->header_state = (c == 'n' ? h_CON : h_general); | |
2340bcd3 JVH |
1275 | break; |
1276 | ||
1277 | case h_CON: | |
462c72aa | 1278 | parser->index++; |
2340bcd3 JVH |
1279 | switch (c) { |
1280 | case 'n': | |
462c72aa | 1281 | parser->header_state = h_matching_connection; |
2340bcd3 JVH |
1282 | break; |
1283 | case 't': | |
462c72aa | 1284 | parser->header_state = h_matching_content_length; |
2340bcd3 JVH |
1285 | break; |
1286 | default: | |
462c72aa | 1287 | parser->header_state = h_general; |
2340bcd3 JVH |
1288 | break; |
1289 | } | |
1290 | break; | |
1291 | ||
1292 | /* connection */ | |
1293 | ||
1294 | case h_matching_connection: | |
462c72aa JVH |
1295 | parser->index++; |
1296 | if (parser->index > sizeof(CONNECTION)-1 | |
1297 | || c != CONNECTION[parser->index]) { | |
1298 | parser->header_state = h_general; | |
1299 | } else if (parser->index == sizeof(CONNECTION)-2) { | |
1300 | parser->header_state = h_connection; | |
2340bcd3 JVH |
1301 | } |
1302 | break; | |
1303 | ||
1304 | /* proxy-connection */ | |
1305 | ||
1306 | case h_matching_proxy_connection: | |
462c72aa JVH |
1307 | parser->index++; |
1308 | if (parser->index > sizeof(PROXY_CONNECTION)-1 | |
1309 | || c != PROXY_CONNECTION[parser->index]) { | |
1310 | parser->header_state = h_general; | |
1311 | } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { | |
1312 | parser->header_state = h_connection; | |
2340bcd3 JVH |
1313 | } |
1314 | break; | |
1315 | ||
1316 | /* content-length */ | |
1317 | ||
1318 | case h_matching_content_length: | |
462c72aa JVH |
1319 | parser->index++; |
1320 | if (parser->index > sizeof(CONTENT_LENGTH)-1 | |
1321 | || c != CONTENT_LENGTH[parser->index]) { | |
1322 | parser->header_state = h_general; | |
1323 | } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { | |
1324 | parser->header_state = h_content_length; | |
2340bcd3 JVH |
1325 | } |
1326 | break; | |
1327 | ||
1328 | /* transfer-encoding */ | |
1329 | ||
1330 | case h_matching_transfer_encoding: | |
462c72aa JVH |
1331 | parser->index++; |
1332 | if (parser->index > sizeof(TRANSFER_ENCODING)-1 | |
1333 | || c != TRANSFER_ENCODING[parser->index]) { | |
1334 | parser->header_state = h_general; | |
1335 | } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { | |
1336 | parser->header_state = h_transfer_encoding; | |
2340bcd3 JVH |
1337 | } |
1338 | break; | |
1339 | ||
1340 | /* upgrade */ | |
1341 | ||
1342 | case h_matching_upgrade: | |
462c72aa JVH |
1343 | parser->index++; |
1344 | if (parser->index > sizeof(UPGRADE)-1 | |
1345 | || c != UPGRADE[parser->index]) { | |
1346 | parser->header_state = h_general; | |
1347 | } else if (parser->index == sizeof(UPGRADE)-2) { | |
1348 | parser->header_state = h_upgrade; | |
2340bcd3 JVH |
1349 | } |
1350 | break; | |
1351 | ||
1352 | case h_connection: | |
1353 | case h_content_length: | |
1354 | case h_transfer_encoding: | |
1355 | case h_upgrade: | |
462c72aa | 1356 | if (ch != ' ') parser->header_state = h_general; |
2340bcd3 JVH |
1357 | break; |
1358 | ||
1359 | default: | |
1360 | assert(0 && "Unknown header_state"); | |
1361 | break; | |
1362 | } | |
1363 | break; | |
1364 | } | |
1365 | ||
1366 | if (ch == ':') { | |
462c72aa JVH |
1367 | parser->state = s_header_value_start; |
1368 | CALLBACK_DATA(header_field); | |
2340bcd3 JVH |
1369 | break; |
1370 | } | |
1371 | ||
1372 | if (ch == CR) { | |
462c72aa JVH |
1373 | parser->state = s_header_almost_done; |
1374 | CALLBACK_DATA(header_field); | |
2340bcd3 JVH |
1375 | break; |
1376 | } | |
1377 | ||
1378 | if (ch == LF) { | |
462c72aa JVH |
1379 | parser->state = s_header_field_start; |
1380 | CALLBACK_DATA(header_field); | |
2340bcd3 JVH |
1381 | break; |
1382 | } | |
1383 | ||
1384 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); | |
1385 | goto error; | |
1386 | } | |
1387 | ||
1388 | case s_header_value_start: | |
1389 | { | |
1390 | if (ch == ' ' || ch == '\t') break; | |
1391 | ||
1392 | MARK(header_value); | |
1393 | ||
462c72aa JVH |
1394 | parser->state = s_header_value; |
1395 | parser->index = 0; | |
2340bcd3 JVH |
1396 | |
1397 | if (ch == CR) { | |
462c72aa JVH |
1398 | parser->header_state = h_general; |
1399 | parser->state = s_header_almost_done; | |
1400 | CALLBACK_DATA(header_value); | |
2340bcd3 JVH |
1401 | break; |
1402 | } | |
1403 | ||
1404 | if (ch == LF) { | |
462c72aa JVH |
1405 | parser->state = s_header_field_start; |
1406 | CALLBACK_DATA(header_value); | |
2340bcd3 JVH |
1407 | break; |
1408 | } | |
1409 | ||
1410 | c = LOWER(ch); | |
1411 | ||
462c72aa | 1412 | switch (parser->header_state) { |
2340bcd3 JVH |
1413 | case h_upgrade: |
1414 | parser->flags |= F_UPGRADE; | |
462c72aa | 1415 | parser->header_state = h_general; |
2340bcd3 JVH |
1416 | break; |
1417 | ||
1418 | case h_transfer_encoding: | |
1419 | /* looking for 'Transfer-Encoding: chunked' */ | |
1420 | if ('c' == c) { | |
462c72aa | 1421 | parser->header_state = h_matching_transfer_encoding_chunked; |
2340bcd3 | 1422 | } else { |
462c72aa | 1423 | parser->header_state = h_general; |
2340bcd3 JVH |
1424 | } |
1425 | break; | |
1426 | ||
1427 | case h_content_length: | |
1428 | if (!IS_NUM(ch)) { | |
1429 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); | |
1430 | goto error; | |
1431 | } | |
1432 | ||
1433 | parser->content_length = ch - '0'; | |
1434 | break; | |
1435 | ||
1436 | case h_connection: | |
1437 | /* looking for 'Connection: keep-alive' */ | |
1438 | if (c == 'k') { | |
462c72aa | 1439 | parser->header_state = h_matching_connection_keep_alive; |
2340bcd3 JVH |
1440 | /* looking for 'Connection: close' */ |
1441 | } else if (c == 'c') { | |
462c72aa | 1442 | parser->header_state = h_matching_connection_close; |
2340bcd3 | 1443 | } else { |
462c72aa | 1444 | parser->header_state = h_general; |
2340bcd3 JVH |
1445 | } |
1446 | break; | |
1447 | ||
1448 | default: | |
462c72aa | 1449 | parser->header_state = h_general; |
2340bcd3 JVH |
1450 | break; |
1451 | } | |
1452 | break; | |
1453 | } | |
1454 | ||
1455 | case s_header_value: | |
1456 | { | |
1457 | ||
1458 | if (ch == CR) { | |
462c72aa JVH |
1459 | parser->state = s_header_almost_done; |
1460 | CALLBACK_DATA(header_value); | |
2340bcd3 JVH |
1461 | break; |
1462 | } | |
1463 | ||
1464 | if (ch == LF) { | |
462c72aa JVH |
1465 | parser->state = s_header_almost_done; |
1466 | CALLBACK_DATA_NOADVANCE(header_value); | |
1467 | goto reexecute_byte; | |
2340bcd3 JVH |
1468 | } |
1469 | ||
1470 | c = LOWER(ch); | |
1471 | ||
462c72aa | 1472 | switch (parser->header_state) { |
2340bcd3 JVH |
1473 | case h_general: |
1474 | break; | |
1475 | ||
1476 | case h_connection: | |
1477 | case h_transfer_encoding: | |
1478 | assert(0 && "Shouldn't get here."); | |
1479 | break; | |
1480 | ||
1481 | case h_content_length: | |
462c72aa JVH |
1482 | { |
1483 | uint64_t t; | |
1484 | ||
2340bcd3 | 1485 | if (ch == ' ') break; |
462c72aa | 1486 | |
2340bcd3 JVH |
1487 | if (!IS_NUM(ch)) { |
1488 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); | |
1489 | goto error; | |
1490 | } | |
1491 | ||
462c72aa JVH |
1492 | t = parser->content_length; |
1493 | t *= 10; | |
1494 | t += ch - '0'; | |
1495 | ||
1496 | /* Overflow? */ | |
1497 | if (t < parser->content_length || t == ULLONG_MAX) { | |
1498 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); | |
1499 | goto error; | |
1500 | } | |
1501 | ||
1502 | parser->content_length = t; | |
2340bcd3 | 1503 | break; |
462c72aa | 1504 | } |
2340bcd3 JVH |
1505 | |
1506 | /* Transfer-Encoding: chunked */ | |
1507 | case h_matching_transfer_encoding_chunked: | |
462c72aa JVH |
1508 | parser->index++; |
1509 | if (parser->index > sizeof(CHUNKED)-1 | |
1510 | || c != CHUNKED[parser->index]) { | |
1511 | parser->header_state = h_general; | |
1512 | } else if (parser->index == sizeof(CHUNKED)-2) { | |
1513 | parser->header_state = h_transfer_encoding_chunked; | |
2340bcd3 JVH |
1514 | } |
1515 | break; | |
1516 | ||
1517 | /* looking for 'Connection: keep-alive' */ | |
1518 | case h_matching_connection_keep_alive: | |
462c72aa JVH |
1519 | parser->index++; |
1520 | if (parser->index > sizeof(KEEP_ALIVE)-1 | |
1521 | || c != KEEP_ALIVE[parser->index]) { | |
1522 | parser->header_state = h_general; | |
1523 | } else if (parser->index == sizeof(KEEP_ALIVE)-2) { | |
1524 | parser->header_state = h_connection_keep_alive; | |
2340bcd3 JVH |
1525 | } |
1526 | break; | |
1527 | ||
1528 | /* looking for 'Connection: close' */ | |
1529 | case h_matching_connection_close: | |
462c72aa JVH |
1530 | parser->index++; |
1531 | if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { | |
1532 | parser->header_state = h_general; | |
1533 | } else if (parser->index == sizeof(CLOSE)-2) { | |
1534 | parser->header_state = h_connection_close; | |
2340bcd3 JVH |
1535 | } |
1536 | break; | |
1537 | ||
1538 | case h_transfer_encoding_chunked: | |
1539 | case h_connection_keep_alive: | |
1540 | case h_connection_close: | |
462c72aa | 1541 | if (ch != ' ') parser->header_state = h_general; |
2340bcd3 JVH |
1542 | break; |
1543 | ||
1544 | default: | |
462c72aa JVH |
1545 | parser->state = s_header_value; |
1546 | parser->header_state = h_general; | |
2340bcd3 JVH |
1547 | break; |
1548 | } | |
1549 | break; | |
1550 | } | |
1551 | ||
1552 | case s_header_almost_done: | |
2340bcd3 JVH |
1553 | { |
1554 | STRICT_CHECK(ch != LF); | |
1555 | ||
462c72aa | 1556 | parser->state = s_header_value_lws; |
2340bcd3 | 1557 | |
462c72aa | 1558 | switch (parser->header_state) { |
2340bcd3 JVH |
1559 | case h_connection_keep_alive: |
1560 | parser->flags |= F_CONNECTION_KEEP_ALIVE; | |
1561 | break; | |
1562 | case h_connection_close: | |
1563 | parser->flags |= F_CONNECTION_CLOSE; | |
1564 | break; | |
1565 | case h_transfer_encoding_chunked: | |
1566 | parser->flags |= F_CHUNKED; | |
1567 | break; | |
1568 | default: | |
1569 | break; | |
1570 | } | |
462c72aa | 1571 | |
2340bcd3 JVH |
1572 | break; |
1573 | } | |
1574 | ||
1575 | case s_header_value_lws: | |
1576 | { | |
1577 | if (ch == ' ' || ch == '\t') | |
462c72aa | 1578 | parser->state = s_header_value_start; |
2340bcd3 JVH |
1579 | else |
1580 | { | |
462c72aa JVH |
1581 | parser->state = s_header_field_start; |
1582 | goto reexecute_byte; | |
2340bcd3 JVH |
1583 | } |
1584 | break; | |
1585 | } | |
1586 | ||
1587 | case s_headers_almost_done: | |
2340bcd3 JVH |
1588 | { |
1589 | STRICT_CHECK(ch != LF); | |
1590 | ||
1591 | if (parser->flags & F_TRAILING) { | |
1592 | /* End of a chunked request */ | |
462c72aa JVH |
1593 | parser->state = NEW_MESSAGE(); |
1594 | CALLBACK_NOTIFY(message_complete); | |
2340bcd3 JVH |
1595 | break; |
1596 | } | |
1597 | ||
462c72aa | 1598 | parser->state = s_headers_done; |
2340bcd3 | 1599 | |
462c72aa JVH |
1600 | /* Set this here so that on_headers_complete() callbacks can see it */ |
1601 | parser->upgrade = | |
1602 | (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT); | |
2340bcd3 JVH |
1603 | |
1604 | /* Here we call the headers_complete callback. This is somewhat | |
1605 | * different than other callbacks because if the user returns 1, we | |
1606 | * will interpret that as saying that this message has no body. This | |
1607 | * is needed for the annoying case of recieving a response to a HEAD | |
1608 | * request. | |
462c72aa JVH |
1609 | * |
1610 | * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so | |
1611 | * we have to simulate it by handling a change in errno below. | |
2340bcd3 JVH |
1612 | */ |
1613 | if (settings->on_headers_complete) { | |
1614 | switch (settings->on_headers_complete(parser)) { | |
1615 | case 0: | |
1616 | break; | |
1617 | ||
1618 | case 1: | |
1619 | parser->flags |= F_SKIPBODY; | |
1620 | break; | |
1621 | ||
1622 | default: | |
2340bcd3 JVH |
1623 | SET_ERRNO(HPE_CB_headers_complete); |
1624 | return p - data; /* Error */ | |
1625 | } | |
1626 | } | |
1627 | ||
462c72aa JVH |
1628 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { |
1629 | return p - data; | |
1630 | } | |
1631 | ||
1632 | goto reexecute_byte; | |
1633 | } | |
1634 | ||
1635 | case s_headers_done: | |
1636 | { | |
1637 | STRICT_CHECK(ch != LF); | |
1638 | ||
1639 | parser->nread = 0; | |
1640 | ||
2340bcd3 JVH |
1641 | /* Exit, the rest of the connect is in a different protocol. */ |
1642 | if (parser->upgrade) { | |
462c72aa JVH |
1643 | parser->state = NEW_MESSAGE(); |
1644 | CALLBACK_NOTIFY(message_complete); | |
2340bcd3 JVH |
1645 | return (p - data) + 1; |
1646 | } | |
1647 | ||
1648 | if (parser->flags & F_SKIPBODY) { | |
462c72aa JVH |
1649 | parser->state = NEW_MESSAGE(); |
1650 | CALLBACK_NOTIFY(message_complete); | |
2340bcd3 JVH |
1651 | } else if (parser->flags & F_CHUNKED) { |
1652 | /* chunked encoding - ignore Content-Length header */ | |
462c72aa | 1653 | parser->state = s_chunk_size_start; |
2340bcd3 JVH |
1654 | } else { |
1655 | if (parser->content_length == 0) { | |
1656 | /* Content-Length header given but zero: Content-Length: 0\r\n */ | |
462c72aa JVH |
1657 | parser->state = NEW_MESSAGE(); |
1658 | CALLBACK_NOTIFY(message_complete); | |
1659 | } else if (parser->content_length != ULLONG_MAX) { | |
2340bcd3 | 1660 | /* Content-Length header given and non-zero */ |
462c72aa | 1661 | parser->state = s_body_identity; |
2340bcd3 | 1662 | } else { |
462c72aa JVH |
1663 | if (parser->type == HTTP_REQUEST || |
1664 | !http_message_needs_eof(parser)) { | |
2340bcd3 | 1665 | /* Assume content-length 0 - read the next */ |
462c72aa JVH |
1666 | parser->state = NEW_MESSAGE(); |
1667 | CALLBACK_NOTIFY(message_complete); | |
2340bcd3 JVH |
1668 | } else { |
1669 | /* Read body until EOF */ | |
462c72aa | 1670 | parser->state = s_body_identity_eof; |
2340bcd3 JVH |
1671 | } |
1672 | } | |
1673 | } | |
1674 | ||
1675 | break; | |
1676 | } | |
1677 | ||
1678 | case s_body_identity: | |
462c72aa JVH |
1679 | { |
1680 | uint64_t to_read = MIN(parser->content_length, | |
1681 | (uint64_t) ((data + len) - p)); | |
1682 | ||
1683 | assert(parser->content_length != 0 | |
1684 | && parser->content_length != ULLONG_MAX); | |
1685 | ||
1686 | /* The difference between advancing content_length and p is because | |
1687 | * the latter will automaticaly advance on the next loop iteration. | |
1688 | * Further, if content_length ends up at 0, we want to see the last | |
1689 | * byte again for our message complete callback. | |
1690 | */ | |
1691 | MARK(body); | |
1692 | parser->content_length -= to_read; | |
1693 | p += to_read - 1; | |
1694 | ||
1695 | if (parser->content_length == 0) { | |
1696 | parser->state = s_message_done; | |
1697 | ||
1698 | /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. | |
1699 | * | |
1700 | * The alternative to doing this is to wait for the next byte to | |
1701 | * trigger the data callback, just as in every other case. The | |
1702 | * problem with this is that this makes it difficult for the test | |
1703 | * harness to distinguish between complete-on-EOF and | |
1704 | * complete-on-length. It's not clear that this distinction is | |
1705 | * important for applications, but let's keep it for now. | |
1706 | */ | |
1707 | CALLBACK_DATA_(body, p - body_mark + 1, p - data); | |
1708 | goto reexecute_byte; | |
2340bcd3 | 1709 | } |
462c72aa | 1710 | |
2340bcd3 | 1711 | break; |
462c72aa | 1712 | } |
2340bcd3 JVH |
1713 | |
1714 | /* read until EOF */ | |
1715 | case s_body_identity_eof: | |
462c72aa JVH |
1716 | MARK(body); |
1717 | p = data + len - 1; | |
1718 | ||
1719 | break; | |
1720 | ||
1721 | case s_message_done: | |
1722 | parser->state = NEW_MESSAGE(); | |
1723 | CALLBACK_NOTIFY(message_complete); | |
2340bcd3 JVH |
1724 | break; |
1725 | ||
1726 | case s_chunk_size_start: | |
1727 | { | |
462c72aa | 1728 | assert(parser->nread == 1); |
2340bcd3 JVH |
1729 | assert(parser->flags & F_CHUNKED); |
1730 | ||
1731 | unhex_val = unhex[(unsigned char)ch]; | |
1732 | if (unhex_val == -1) { | |
1733 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); | |
1734 | goto error; | |
1735 | } | |
1736 | ||
1737 | parser->content_length = unhex_val; | |
462c72aa | 1738 | parser->state = s_chunk_size; |
2340bcd3 JVH |
1739 | break; |
1740 | } | |
1741 | ||
1742 | case s_chunk_size: | |
1743 | { | |
462c72aa JVH |
1744 | uint64_t t; |
1745 | ||
2340bcd3 JVH |
1746 | assert(parser->flags & F_CHUNKED); |
1747 | ||
1748 | if (ch == CR) { | |
462c72aa | 1749 | parser->state = s_chunk_size_almost_done; |
2340bcd3 JVH |
1750 | break; |
1751 | } | |
1752 | ||
1753 | unhex_val = unhex[(unsigned char)ch]; | |
1754 | ||
1755 | if (unhex_val == -1) { | |
1756 | if (ch == ';' || ch == ' ') { | |
462c72aa | 1757 | parser->state = s_chunk_parameters; |
2340bcd3 JVH |
1758 | break; |
1759 | } | |
1760 | ||
1761 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); | |
1762 | goto error; | |
1763 | } | |
1764 | ||
462c72aa JVH |
1765 | t = parser->content_length; |
1766 | t *= 16; | |
1767 | t += unhex_val; | |
1768 | ||
1769 | /* Overflow? */ | |
1770 | if (t < parser->content_length || t == ULLONG_MAX) { | |
1771 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); | |
1772 | goto error; | |
1773 | } | |
1774 | ||
1775 | parser->content_length = t; | |
2340bcd3 JVH |
1776 | break; |
1777 | } | |
1778 | ||
1779 | case s_chunk_parameters: | |
1780 | { | |
1781 | assert(parser->flags & F_CHUNKED); | |
1782 | /* just ignore this shit. TODO check for overflow */ | |
1783 | if (ch == CR) { | |
462c72aa | 1784 | parser->state = s_chunk_size_almost_done; |
2340bcd3 JVH |
1785 | break; |
1786 | } | |
1787 | break; | |
1788 | } | |
1789 | ||
1790 | case s_chunk_size_almost_done: | |
1791 | { | |
1792 | assert(parser->flags & F_CHUNKED); | |
1793 | STRICT_CHECK(ch != LF); | |
1794 | ||
462c72aa | 1795 | parser->nread = 0; |
2340bcd3 JVH |
1796 | |
1797 | if (parser->content_length == 0) { | |
1798 | parser->flags |= F_TRAILING; | |
462c72aa | 1799 | parser->state = s_header_field_start; |
2340bcd3 | 1800 | } else { |
462c72aa | 1801 | parser->state = s_chunk_data; |
2340bcd3 JVH |
1802 | } |
1803 | break; | |
1804 | } | |
1805 | ||
1806 | case s_chunk_data: | |
1807 | { | |
462c72aa JVH |
1808 | uint64_t to_read = MIN(parser->content_length, |
1809 | (uint64_t) ((data + len) - p)); | |
2340bcd3 | 1810 | |
462c72aa JVH |
1811 | assert(parser->flags & F_CHUNKED); |
1812 | assert(parser->content_length != 0 | |
1813 | && parser->content_length != ULLONG_MAX); | |
2340bcd3 | 1814 | |
462c72aa JVH |
1815 | /* See the explanation in s_body_identity for why the content |
1816 | * length and data pointers are managed this way. | |
1817 | */ | |
1818 | MARK(body); | |
1819 | parser->content_length -= to_read; | |
1820 | p += to_read - 1; | |
2340bcd3 | 1821 | |
462c72aa JVH |
1822 | if (parser->content_length == 0) { |
1823 | parser->state = s_chunk_data_almost_done; | |
2340bcd3 JVH |
1824 | } |
1825 | ||
2340bcd3 JVH |
1826 | break; |
1827 | } | |
1828 | ||
1829 | case s_chunk_data_almost_done: | |
1830 | assert(parser->flags & F_CHUNKED); | |
462c72aa | 1831 | assert(parser->content_length == 0); |
2340bcd3 | 1832 | STRICT_CHECK(ch != CR); |
462c72aa JVH |
1833 | parser->state = s_chunk_data_done; |
1834 | CALLBACK_DATA(body); | |
2340bcd3 JVH |
1835 | break; |
1836 | ||
1837 | case s_chunk_data_done: | |
1838 | assert(parser->flags & F_CHUNKED); | |
1839 | STRICT_CHECK(ch != LF); | |
462c72aa JVH |
1840 | parser->nread = 0; |
1841 | parser->state = s_chunk_size_start; | |
2340bcd3 JVH |
1842 | break; |
1843 | ||
1844 | default: | |
1845 | assert(0 && "unhandled state"); | |
1846 | SET_ERRNO(HPE_INVALID_INTERNAL_STATE); | |
1847 | goto error; | |
1848 | } | |
1849 | } | |
1850 | ||
462c72aa JVH |
1851 | /* Run callbacks for any marks that we have leftover after we ran our of |
1852 | * bytes. There should be at most one of these set, so it's OK to invoke | |
1853 | * them in series (unset marks will not result in callbacks). | |
1854 | * | |
1855 | * We use the NOADVANCE() variety of callbacks here because 'p' has already | |
1856 | * overflowed 'data' and this allows us to correct for the off-by-one that | |
1857 | * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' | |
1858 | * value that's in-bounds). | |
1859 | */ | |
1860 | ||
1861 | assert(((header_field_mark ? 1 : 0) + | |
1862 | (header_value_mark ? 1 : 0) + | |
1863 | (url_mark ? 1 : 0) + | |
1864 | (body_mark ? 1 : 0)) <= 1); | |
2340bcd3 | 1865 | |
462c72aa JVH |
1866 | CALLBACK_DATA_NOADVANCE(header_field); |
1867 | CALLBACK_DATA_NOADVANCE(header_value); | |
1868 | CALLBACK_DATA_NOADVANCE(url); | |
1869 | CALLBACK_DATA_NOADVANCE(body); | |
2340bcd3 JVH |
1870 | |
1871 | return len; | |
1872 | ||
1873 | error: | |
1874 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { | |
1875 | SET_ERRNO(HPE_UNKNOWN); | |
1876 | } | |
1877 | ||
1878 | return (p - data); | |
1879 | } | |
1880 | ||
1881 | ||
462c72aa | 1882 | /* Does the parser need to see an EOF to find the end of the message? */ |
2340bcd3 | 1883 | int |
462c72aa JVH |
1884 | http_message_needs_eof (const http_parser *parser) |
1885 | { | |
1886 | if (parser->type == HTTP_REQUEST) { | |
1887 | return 0; | |
1888 | } | |
1889 | ||
1890 | /* See RFC 2616 section 4.4 */ | |
1891 | if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ | |
1892 | parser->status_code == 204 || /* No Content */ | |
1893 | parser->status_code == 304 || /* Not Modified */ | |
1894 | parser->flags & F_SKIPBODY) { /* response to a HEAD request */ | |
1895 | return 0; | |
1896 | } | |
1897 | ||
1898 | if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { | |
1899 | return 0; | |
1900 | } | |
1901 | ||
1902 | return 1; | |
1903 | } | |
1904 | ||
1905 | ||
1906 | int | |
1907 | http_should_keep_alive (const http_parser *parser) | |
2340bcd3 JVH |
1908 | { |
1909 | if (parser->http_major > 0 && parser->http_minor > 0) { | |
1910 | /* HTTP/1.1 */ | |
1911 | if (parser->flags & F_CONNECTION_CLOSE) { | |
1912 | return 0; | |
2340bcd3 JVH |
1913 | } |
1914 | } else { | |
1915 | /* HTTP/1.0 or earlier */ | |
462c72aa | 1916 | if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { |
2340bcd3 JVH |
1917 | return 0; |
1918 | } | |
1919 | } | |
462c72aa JVH |
1920 | |
1921 | return !http_message_needs_eof(parser); | |
2340bcd3 JVH |
1922 | } |
1923 | ||
1924 | ||
462c72aa JVH |
1925 | const char * |
1926 | http_method_str (enum http_method m) | |
2340bcd3 | 1927 | { |
462c72aa | 1928 | return ELEM_AT(method_strings, m, "<unknown>"); |
2340bcd3 JVH |
1929 | } |
1930 | ||
1931 | ||
1932 | void | |
462c72aa | 1933 | http_parser_init (http_parser *parser, enum http_parser_type t) |
2340bcd3 | 1934 | { |
462c72aa JVH |
1935 | void *data = parser->data; /* preserve application data */ |
1936 | memset(parser, 0, sizeof(*parser)); | |
1937 | parser->data = data; | |
2340bcd3 JVH |
1938 | parser->type = t; |
1939 | parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); | |
462c72aa | 1940 | parser->http_errno = HPE_OK; |
2340bcd3 JVH |
1941 | } |
1942 | ||
1943 | const char * | |
1944 | http_errno_name(enum http_errno err) { | |
1945 | assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); | |
1946 | return http_strerror_tab[err].name; | |
1947 | } | |
1948 | ||
1949 | const char * | |
1950 | http_errno_description(enum http_errno err) { | |
1951 | assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); | |
1952 | return http_strerror_tab[err].description; | |
1953 | } | |
462c72aa JVH |
1954 | |
1955 | static enum http_host_state | |
1956 | http_parse_host_char(enum http_host_state s, const char ch) { | |
1957 | switch(s) { | |
1958 | case s_http_userinfo: | |
1959 | case s_http_userinfo_start: | |
1960 | if (ch == '@') { | |
1961 | return s_http_host_start; | |
1962 | } | |
1963 | ||
1964 | if (IS_USERINFO_CHAR(ch)) { | |
1965 | return s_http_userinfo; | |
1966 | } | |
1967 | break; | |
1968 | ||
1969 | case s_http_host_start: | |
1970 | if (ch == '[') { | |
1971 | return s_http_host_v6_start; | |
1972 | } | |
1973 | ||
1974 | if (IS_HOST_CHAR(ch)) { | |
1975 | return s_http_host; | |
1976 | } | |
1977 | ||
1978 | break; | |
1979 | ||
1980 | case s_http_host: | |
1981 | if (IS_HOST_CHAR(ch)) { | |
1982 | return s_http_host; | |
1983 | } | |
1984 | ||
1985 | /* FALLTHROUGH */ | |
1986 | case s_http_host_v6_end: | |
1987 | if (ch == ':') { | |
1988 | return s_http_host_port_start; | |
1989 | } | |
1990 | ||
1991 | break; | |
1992 | ||
1993 | case s_http_host_v6: | |
1994 | if (ch == ']') { | |
1995 | return s_http_host_v6_end; | |
1996 | } | |
1997 | ||
1998 | /* FALLTHROUGH */ | |
1999 | case s_http_host_v6_start: | |
2000 | if (IS_HEX(ch) || ch == ':') { | |
2001 | return s_http_host_v6; | |
2002 | } | |
2003 | ||
2004 | break; | |
2005 | ||
2006 | case s_http_host_port: | |
2007 | case s_http_host_port_start: | |
2008 | if (IS_NUM(ch)) { | |
2009 | return s_http_host_port; | |
2010 | } | |
2011 | ||
2012 | break; | |
2013 | ||
2014 | default: | |
2015 | break; | |
2016 | } | |
2017 | return s_http_host_dead; | |
2018 | } | |
2019 | ||
2020 | static int | |
2021 | http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { | |
2022 | enum http_host_state s; | |
2023 | ||
2024 | const char *p; | |
2025 | size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; | |
2026 | ||
2027 | u->field_data[UF_HOST].len = 0; | |
2028 | ||
2029 | s = found_at ? s_http_userinfo_start : s_http_host_start; | |
2030 | ||
2031 | for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { | |
2032 | enum http_host_state new_s = http_parse_host_char(s, *p); | |
2033 | ||
2034 | if (new_s == s_http_host_dead) { | |
2035 | return 1; | |
2036 | } | |
2037 | ||
2038 | switch(new_s) { | |
2039 | case s_http_host: | |
2040 | if (s != s_http_host) { | |
2041 | u->field_data[UF_HOST].off = p - buf; | |
2042 | } | |
2043 | u->field_data[UF_HOST].len++; | |
2044 | break; | |
2045 | ||
2046 | case s_http_host_v6: | |
2047 | if (s != s_http_host_v6) { | |
2048 | u->field_data[UF_HOST].off = p - buf; | |
2049 | } | |
2050 | u->field_data[UF_HOST].len++; | |
2051 | break; | |
2052 | ||
2053 | case s_http_host_port: | |
2054 | if (s != s_http_host_port) { | |
2055 | u->field_data[UF_PORT].off = p - buf; | |
2056 | u->field_data[UF_PORT].len = 0; | |
2057 | u->field_set |= (1 << UF_PORT); | |
2058 | } | |
2059 | u->field_data[UF_PORT].len++; | |
2060 | break; | |
2061 | ||
2062 | case s_http_userinfo: | |
2063 | if (s != s_http_userinfo) { | |
2064 | u->field_data[UF_USERINFO].off = p - buf ; | |
2065 | u->field_data[UF_USERINFO].len = 0; | |
2066 | u->field_set |= (1 << UF_USERINFO); | |
2067 | } | |
2068 | u->field_data[UF_USERINFO].len++; | |
2069 | break; | |
2070 | ||
2071 | default: | |
2072 | break; | |
2073 | } | |
2074 | s = new_s; | |
2075 | } | |
2076 | ||
2077 | /* Make sure we don't end somewhere unexpected */ | |
2078 | switch (s) { | |
2079 | case s_http_host_start: | |
2080 | case s_http_host_v6_start: | |
2081 | case s_http_host_v6: | |
2082 | case s_http_host_port_start: | |
2083 | case s_http_userinfo: | |
2084 | case s_http_userinfo_start: | |
2085 | return 1; | |
2086 | default: | |
2087 | break; | |
2088 | } | |
2089 | ||
2090 | return 0; | |
2091 | } | |
2092 | ||
2093 | int | |
2094 | http_parser_parse_url(const char *buf, size_t buflen, int is_connect, | |
2095 | struct http_parser_url *u) | |
2096 | { | |
2097 | enum state s; | |
2098 | const char *p; | |
2099 | enum http_parser_url_fields uf, old_uf; | |
2100 | int found_at = 0; | |
2101 | ||
2102 | u->port = u->field_set = 0; | |
2103 | s = is_connect ? s_req_server_start : s_req_spaces_before_url; | |
2104 | uf = old_uf = UF_MAX; | |
2105 | ||
2106 | for (p = buf; p < buf + buflen; p++) { | |
2107 | s = parse_url_char(s, *p); | |
2108 | ||
2109 | /* Figure out the next field that we're operating on */ | |
2110 | switch (s) { | |
2111 | case s_dead: | |
2112 | return 1; | |
2113 | ||
2114 | /* Skip delimeters */ | |
2115 | case s_req_schema_slash: | |
2116 | case s_req_schema_slash_slash: | |
2117 | case s_req_server_start: | |
2118 | case s_req_query_string_start: | |
2119 | case s_req_fragment_start: | |
2120 | continue; | |
2121 | ||
2122 | case s_req_schema: | |
2123 | uf = UF_SCHEMA; | |
2124 | break; | |
2125 | ||
2126 | case s_req_server_with_at: | |
2127 | found_at = 1; | |
2128 | ||
2129 | /* FALLTROUGH */ | |
2130 | case s_req_server: | |
2131 | uf = UF_HOST; | |
2132 | break; | |
2133 | ||
2134 | case s_req_path: | |
2135 | uf = UF_PATH; | |
2136 | break; | |
2137 | ||
2138 | case s_req_query_string: | |
2139 | uf = UF_QUERY; | |
2140 | break; | |
2141 | ||
2142 | case s_req_fragment: | |
2143 | uf = UF_FRAGMENT; | |
2144 | break; | |
2145 | ||
2146 | default: | |
2147 | assert(!"Unexpected state"); | |
2148 | return 1; | |
2149 | } | |
2150 | ||
2151 | /* Nothing's changed; soldier on */ | |
2152 | if (uf == old_uf) { | |
2153 | u->field_data[uf].len++; | |
2154 | continue; | |
2155 | } | |
2156 | ||
2157 | u->field_data[uf].off = p - buf; | |
2158 | u->field_data[uf].len = 1; | |
2159 | ||
2160 | u->field_set |= (1 << uf); | |
2161 | old_uf = uf; | |
2162 | } | |
2163 | ||
2164 | /* host must be present if there is a schema */ | |
2165 | /* parsing http:///toto will fail */ | |
2166 | if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) { | |
2167 | if (http_parse_host(buf, u, found_at) != 0) { | |
2168 | return 1; | |
2169 | } | |
2170 | } | |
2171 | ||
2172 | /* CONNECT requests can only contain "hostname:port" */ | |
2173 | if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { | |
2174 | return 1; | |
2175 | } | |
2176 | ||
2177 | if (u->field_set & (1 << UF_PORT)) { | |
2178 | /* Don't bother with endp; we've already validated the string */ | |
2179 | unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); | |
2180 | ||
2181 | /* Ports have a max value of 2^16 */ | |
2182 | if (v > 0xffff) { | |
2183 | return 1; | |
2184 | } | |
2185 | ||
2186 | u->port = (uint16_t) v; | |
2187 | } | |
2188 | ||
2189 | return 0; | |
2190 | } | |
2191 | ||
2192 | void | |
2193 | http_parser_pause(http_parser *parser, int paused) { | |
2194 | /* Users should only be pausing/unpausing a parser that is not in an error | |
2195 | * state. In non-debug builds, there's not much that we can do about this | |
2196 | * other than ignore it. | |
2197 | */ | |
2198 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK || | |
2199 | HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { | |
2200 | SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); | |
2201 | } else { | |
2202 | assert(0 && "Attempting to pause parser in error state"); | |
2203 | } | |
2204 | } | |
2205 | ||
2206 | int | |
2207 | http_body_is_final(const struct http_parser *parser) { | |
2208 | return parser->state == s_message_done; | |
2209 | } |