src/lib/http_parser.c

   1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
   2  *
   3  * Additional changes are licensed under the same terms as NGINX and
   4  * copyright Joyent, Inc. and other Node contributors. All rights reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  */
  24 #include "http_parser.h"
  25 #include <assert.h>
  26 #include <stddef.h>
  27 #include <ctype.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <limits.h>
  31
  32 #ifndef ULLONG_MAX
  33 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
  34 #endif
  35
  36 #ifndef MIN
  37 # define MIN(a,b) ((a) < (b) ? (a) : (b))
  38 #endif
  39
  40 #ifndef ARRAY_SIZE
  41 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
  42 #endif
  43
  44 #ifndef BIT_AT
  45 # define BIT_AT(a, i)                                                \
  46   (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
  47    (1 << ((unsigned int) (i) & 7))))
  48 #endif
  49
  50 #ifndef ELEM_AT
  51 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
  52 #endif
  53
  54 #if HTTP_PARSER_DEBUG
  55 #define SET_ERRNO(e)                                                 \
  56 do {                                                                 \
  57   parser->http_errno = (e);                                          \
  58   parser->error_lineno = __LINE__;                                   \
  59 } while (0)
  60 #else
  61 #define SET_ERRNO(e)                                                 \
  62 do {                                                                 \
  63   parser->http_errno = (e);                                          \
  64 } while(0)
  65 #endif
  66
  67
  68 /* Run the notify callback FOR, returning ER if it fails */
  69 #define CALLBACK_NOTIFY_(FOR, ER)                                    \
  70 do {                                                                 \
  71   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
  72                                                                      \
  73   if (settings->on_##FOR) {                                          \
  74     if (0 != settings->on_##FOR(parser)) {                           \
  75       SET_ERRNO(HPE_CB_##FOR);                                       \
  76     }                                                                \
  77                                                                      \
  78     /* We either errored above or got paused; get out */             \
  79     if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {                       \
  80       return (ER);                                                   \
  81     }                                                                \
  82   }                                                                  \
  83 } while (0)
  84
  85 /* Run the notify callback FOR and consume the current byte */
  86 #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
  87
  88 /* Run the notify callback FOR and don't consume the current byte */
  89 #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
  90
  91 /* Run data callback FOR with LEN bytes, returning ER if it fails */
  92 #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
  93 do {                                                                 \
  94   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
  95                                                                      \
  96   if (FOR##_mark) {                                                  \
  97     if (settings->on_##FOR) {                                        \
  98       if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) {      \
  99         SET_ERRNO(HPE_CB_##FOR);                                     \
 100       }                                                              \
 101                                                                      \
 102       /* We either errored above or got paused; get out */           \
 103       if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {                     \
 104         return (ER);                                                 \
 105       }                                                              \
 106     }                                                                \
 107     FOR##_mark = NULL;                                               \
 108   }                                                                  \
 109 } while (0)
 110
 111 /* Run the data callback FOR and consume the current byte */
 112 #define CALLBACK_DATA(FOR)                                           \
 113     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
 114
 115 /* Run the data callback FOR and don't consume the current byte */
 116 #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
 117     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
 118
 119 /* Set the mark FOR; non-destructive if mark is already set */
 120 #define MARK(FOR)                                                    \
 121 do {                                                                 \
 122   if (!FOR##_mark) {                                                 \
 123     FOR##_mark = p;                                                  \
 124   }                                                                  \
 125 } while (0)
 126
 127
 128 #define PROXY_CONNECTION "proxy-connection"
 129 #define CONNECTION "connection"
 130 #define CONTENT_LENGTH "content-length"
 131 #define TRANSFER_ENCODING "transfer-encoding"
 132 #define UPGRADE "upgrade"
 133 #define CHUNKED "chunked"
 134 #define KEEP_ALIVE "keep-alive"
 135 #define CLOSE "close"
 136
 137
 138 static const char *method_strings[] =
 139   {
 140 #define XX(num, name, string) #string,
 141   HTTP_METHOD_MAP(XX)
 142 #undef XX
 143   };
 144
 145
 146 /* Tokens as defined by rfc 2616. Also lowercases them.
 147  *        token       = 1*<any CHAR except CTLs or separators>
 148  *     separators     = "(" | ")" | "<" | ">" | "@"
 149  *                    | "," | ";" | ":" | "\" | <">
 150  *                    | "/" | "[" | "]" | "?" | "="
 151  *                    | "{" | "}" | SP | HT
 152  */
 153 static const char tokens[256] = {
 154 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
 155         0,       0,       0,       0,       0,       0,       0,       0,
 156 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
 157         0,       0,       0,       0,       0,       0,       0,       0,
 158 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
 159         0,       0,       0,       0,       0,       0,       0,       0,
 160 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
 161         0,       0,       0,       0,       0,       0,       0,       0,
 162 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
 163         0,      '!',      0,      '#',     '$',     '%',     '&',    '\'',
 164 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
 165         0,       0,      '*',     '+',      0,      '-',     '.',      0,
 166 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
 167        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
 168 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
 169        '8',     '9',      0,       0,       0,       0,       0,       0,
 170 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
 171         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
 172 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
 173        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
 174 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
 175        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
 176 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
 177        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
 178 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
 179        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
 180 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
 181        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
 182 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
 183        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
 184 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
 185        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
 186
 187
 188 static const int8_t unhex[256] =
 189   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 190   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 191   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 192   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
 193   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
 194   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 195   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
 196   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 197   };
 198
 199
 200 #if HTTP_PARSER_STRICT
 201 # define T(v) 0
 202 #else
 203 # define T(v) v
 204 #endif
 205
 206
 207 static const uint8_t normal_url_char[32] = {
 208 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
 209         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
 210 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
 211         0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
 212 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
 213         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
 214 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
 215         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
 216 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
 217         0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
 218 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
 219         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 220 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
 221         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 222 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
 223         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
 224 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
 225         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 226 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
 227         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 228 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
 229         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 230 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
 231         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 232 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
 233         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 234 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
 235         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 236 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
 237         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 238 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
 239         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
 240
 241 #undef T
 242
 243 enum state
 244   { s_dead = 1 /* important that this is > 0 */
 245
 246   , s_start_req_or_res
 247   , s_res_or_resp_H
 248   , s_start_res
 249   , s_res_H
 250   , s_res_HT
 251   , s_res_HTT
 252   , s_res_HTTP
 253   , s_res_first_http_major
 254   , s_res_http_major
 255   , s_res_first_http_minor
 256   , s_res_http_minor
 257   , s_res_first_status_code
 258   , s_res_status_code
 259   , s_res_status
 260   , s_res_line_almost_done
 261
 262   , s_start_req
 263
 264   , s_req_method
 265   , s_req_spaces_before_url
 266   , s_req_schema
 267   , s_req_schema_slash
 268   , s_req_schema_slash_slash
 269   , s_req_server_start
 270   , s_req_server
 271   , s_req_server_with_at
 272   , s_req_path
 273   , s_req_query_string_start
 274   , s_req_query_string
 275   , s_req_fragment_start
 276   , s_req_fragment
 277   , s_req_http_start
 278   , s_req_http_H
 279   , s_req_http_HT
 280   , s_req_http_HTT
 281   , s_req_http_HTTP
 282   , s_req_first_http_major
 283   , s_req_http_major
 284   , s_req_first_http_minor
 285   , s_req_http_minor
 286   , s_req_line_almost_done
 287
 288   , s_header_field_start
 289   , s_header_field
 290   , s_header_value_start
 291   , s_header_value
 292   , s_header_value_lws
 293
 294   , s_header_almost_done
 295
 296   , s_chunk_size_start
 297   , s_chunk_size
 298   , s_chunk_parameters
 299   , s_chunk_size_almost_done
 300
 301   , s_headers_almost_done
 302   , s_headers_done
 303
 304   /* Important: 's_headers_done' must be the last 'header' state. All
 305    * states beyond this must be 'body' states. It is used for overflow
 306    * checking. See the PARSING_HEADER() macro.
 307    */
 308
 309   , s_chunk_data
 310   , s_chunk_data_almost_done
 311   , s_chunk_data_done
 312
 313   , s_body_identity
 314   , s_body_identity_eof
 315
 316   , s_message_done
 317   };
 318
 319
 320 #define PARSING_HEADER(state) (state <= s_headers_done)
 321
 322
 323 enum header_states
 324   { h_general = 0
 325   , h_C
 326   , h_CO
 327   , h_CON
 328
 329   , h_matching_connection
 330   , h_matching_proxy_connection
 331   , h_matching_content_length
 332   , h_matching_transfer_encoding
 333   , h_matching_upgrade
 334
 335   , h_connection
 336   , h_content_length
 337   , h_transfer_encoding
 338   , h_upgrade
 339
 340   , h_matching_transfer_encoding_chunked
 341   , h_matching_connection_keep_alive
 342   , h_matching_connection_close
 343
 344   , h_transfer_encoding_chunked
 345   , h_connection_keep_alive
 346   , h_connection_close
 347   };
 348
 349 enum http_host_state
 350   {
 351     s_http_host_dead = 1
 352   , s_http_userinfo_start
 353   , s_http_userinfo
 354   , s_http_host_start
 355   , s_http_host_v6_start
 356   , s_http_host
 357   , s_http_host_v6
 358   , s_http_host_v6_end
 359   , s_http_host_port_start
 360   , s_http_host_port
 361 };
 362
 363 /* Macros for character classes; depends on strict-mode  */
 364 #define CR                  '\r'
 365 #define LF                  '\n'
 366 #define LOWER(c)            (unsigned char)(c | 0x20)
 367 #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
 368 #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
 369 #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
 370 #define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
 371 #define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
 372   (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
 373   (c) == ')')
 374 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
 375   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
 376   (c) == '$' || (c) == ',')
 377
 378 #if HTTP_PARSER_STRICT
 379 #define TOKEN(c)            (tokens[(unsigned char)c])
 380 #define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
 381 #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
 382 #else
 383 #define TOKEN(c)            ((c == ' ') ? ' ' : tokens[(unsigned char)c])
 384 #define IS_URL_CHAR(c)                                                         \
 385   (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
 386 #define IS_HOST_CHAR(c)                                                        \
 387   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
 388 #endif
 389
 390
 391 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
 392
 393
 394 #if HTTP_PARSER_STRICT
 395 # define STRICT_CHECK(cond)                                          \
 396 do {                                                                 \
 397   if (cond) {                                                        \
 398     SET_ERRNO(HPE_STRICT);                                           \
 399     goto error;                                                      \
 400   }                                                                  \
 401 } while (0)
 402 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
 403 #else
 404 # define STRICT_CHECK(cond)
 405 # define NEW_MESSAGE() start_state
 406 #endif
 407
 408
 409 /* Map errno values to strings for human-readable output */
 410 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
 411 static struct {
 412   const char *name;
 413   const char *description;
 414 } http_strerror_tab[] = {
 415   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
 416 };
 417 #undef HTTP_STRERROR_GEN
 418
 419 int http_message_needs_eof(const http_parser *parser);
 420
 421 /* Our URL parser.
 422  *
 423  * This is designed to be shared by http_parser_execute() for URL validation,
 424  * hence it has a state transition + byte-for-byte interface. In addition, it
 425  * is meant to be embedded in http_parser_parse_url(), which does the dirty
 426  * work of turning state transitions URL components for its API.
 427  *
 428  * This function should only be invoked with non-space characters. It is
 429  * assumed that the caller cares about (and can detect) the transition between
 430  * URL and non-URL states by looking for these.
 431  */
 432 static enum state
 433 parse_url_char(enum state s, const char ch)
 434 {
 435   if (ch == ' ' || ch == '\r' || ch == '\n') {
 436     return s_dead;
 437   }
 438
 439 #if HTTP_PARSER_STRICT
 440   if (ch == '\t' || ch == '\f') {
 441     return s_dead;
 442   }
 443 #endif
 444
 445   switch (s) {
 446     case s_req_spaces_before_url:
 447       /* Proxied requests are followed by scheme of an absolute URI (alpha).
 448        * All methods except CONNECT are followed by '/' or '*'.
 449        */
 450
 451       if (ch == '/' || ch == '*') {
 452         return s_req_path;
 453       }
 454
 455       if (IS_ALPHA(ch)) {
 456         return s_req_schema;
 457       }
 458
 459       break;
 460
 461     case s_req_schema:
 462       if (IS_ALPHA(ch)) {
 463         return s;
 464       }
 465
 466       if (ch == ':') {
 467         return s_req_schema_slash;
 468       }
 469
 470       break;
 471
 472     case s_req_schema_slash:
 473       if (ch == '/') {
 474         return s_req_schema_slash_slash;
 475       }
 476
 477       break;
 478
 479     case s_req_schema_slash_slash:
 480       if (ch == '/') {
 481         return s_req_server_start;
 482       }
 483
 484       break;
 485
 486     case s_req_server_with_at:
 487       if (ch == '@') {
 488         return s_dead;
 489       }
 490
 491     /* FALLTHROUGH */
 492     case s_req_server_start:
 493     case s_req_server:
 494       if (ch == '/') {
 495         return s_req_path;
 496       }
 497
 498       if (ch == '?') {
 499         return s_req_query_string_start;
 500       }
 501
 502       if (ch == '@') {
 503         return s_req_server_with_at;
 504       }
 505
 506       if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
 507         return s_req_server;
 508       }
 509
 510       break;
 511
 512     case s_req_path:
 513       if (IS_URL_CHAR(ch)) {
 514         return s;
 515       }
 516
 517       switch (ch) {
 518         case '?':
 519           return s_req_query_string_start;
 520
 521         case '#':
 522           return s_req_fragment_start;
 523       }
 524
 525       break;
 526
 527     case s_req_query_string_start:
 528     case s_req_query_string:
 529       if (IS_URL_CHAR(ch)) {
 530         return s_req_query_string;
 531       }
 532
 533       switch (ch) {
 534         case '?':
 535           /* allow extra '?' in query string */
 536           return s_req_query_string;
 537
 538         case '#':
 539           return s_req_fragment_start;
 540       }
 541
 542       break;
 543
 544     case s_req_fragment_start:
 545       if (IS_URL_CHAR(ch)) {
 546         return s_req_fragment;
 547       }
 548
 549       switch (ch) {
 550         case '?':
 551           return s_req_fragment;
 552
 553         case '#':
 554           return s;
 555       }
 556
 557       break;
 558
 559     case s_req_fragment:
 560       if (IS_URL_CHAR(ch)) {
 561         return s;
 562       }
 563
 564       switch (ch) {
 565         case '?':
 566         case '#':
 567           return s;
 568       }
 569
 570       break;
 571
 572     default:
 573       break;
 574   }
 575
 576   /* We should never fall out of the switch above unless there's an error */
 577   return s_dead;
 578 }
 579
 580 size_t http_parser_execute (http_parser *parser,
 581                             const http_parser_settings *settings,
 582                             const char *data,
 583                             size_t len)
 584 {
 585   char c, ch;
 586   int8_t unhex_val;
 587   const char *p = data;
 588   const char *header_field_mark = 0;
 589   const char *header_value_mark = 0;
 590   const char *url_mark = 0;
 591   const char *body_mark = 0;
 592
 593   /* We're in an error state. Don't bother doing anything. */
 594   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
 595     return 0;
 596   }
 597
 598   if (len == 0) {
 599     switch (parser->state) {
 600       case s_body_identity_eof:
 601         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
 602          * we got paused.
 603          */
 604         CALLBACK_NOTIFY_NOADVANCE(message_complete);
 605         return 0;
 606
 607       case s_dead:
 608       case s_start_req_or_res:
 609       case s_start_res:
 610       case s_start_req:
 611         return 0;
 612
 613       default:
 614         SET_ERRNO(HPE_INVALID_EOF_STATE);
 615         return 1;
 616     }
 617   }
 618
 619
 620   if (parser->state == s_header_field)
 621     header_field_mark = data;
 622   if (parser->state == s_header_value)
 623     header_value_mark = data;
 624   switch (parser->state) {
 625   case s_req_path:
 626   case s_req_schema:
 627   case s_req_schema_slash:
 628   case s_req_schema_slash_slash:
 629   case s_req_server_start:
 630   case s_req_server:
 631   case s_req_server_with_at:
 632   case s_req_query_string_start:
 633   case s_req_query_string:
 634   case s_req_fragment_start:
 635   case s_req_fragment:
 636     url_mark = data;
 637     break;
 638   }
 639
 640   for (p=data; p != data + len; p++) {
 641     ch = *p;
 642
 643     if (PARSING_HEADER(parser->state)) {
 644       ++parser->nread;
 645       /* Buffer overflow attack */
 646       if (parser->nread > HTTP_MAX_HEADER_SIZE) {
 647         SET_ERRNO(HPE_HEADER_OVERFLOW);
 648         goto error;
 649       }
 650     }
 651
 652     reexecute_byte:
 653     switch (parser->state) {
 654
 655       case s_dead:
 656         /* this state is used after a 'Connection: close' message
 657          * the parser will error out if it reads another message
 658          */
 659         if (ch == CR || ch == LF)
 660           break;
 661
 662         SET_ERRNO(HPE_CLOSED_CONNECTION);
 663         goto error;
 664
 665       case s_start_req_or_res:
 666       {
 667         if (ch == CR || ch == LF)
 668           break;
 669         parser->flags = 0;
 670         parser->content_length = ULLONG_MAX;
 671
 672         if (ch == 'H') {
 673           parser->state = s_res_or_resp_H;
 674
 675           CALLBACK_NOTIFY(message_begin);
 676         } else {
 677           parser->type = HTTP_REQUEST;
 678           parser->state = s_start_req;
 679           goto reexecute_byte;
 680         }
 681
 682         break;
 683       }
 684
 685       case s_res_or_resp_H:
 686         if (ch == 'T') {
 687           parser->type = HTTP_RESPONSE;
 688           parser->state = s_res_HT;
 689         } else {
 690           if (ch != 'E') {
 691             SET_ERRNO(HPE_INVALID_CONSTANT);
 692             goto error;
 693           }
 694
 695           parser->type = HTTP_REQUEST;
 696           parser->method = HTTP_HEAD;
 697           parser->index = 2;
 698           parser->state = s_req_method;
 699         }
 700         break;
 701
 702       case s_start_res:
 703       {
 704         parser->flags = 0;
 705         parser->content_length = ULLONG_MAX;
 706
 707         switch (ch) {
 708           case 'H':
 709             parser->state = s_res_H;
 710             break;
 711
 712           case CR:
 713           case LF:
 714             break;
 715
 716           default:
 717             SET_ERRNO(HPE_INVALID_CONSTANT);
 718             goto error;
 719         }
 720
 721         CALLBACK_NOTIFY(message_begin);
 722         break;
 723       }
 724
 725       case s_res_H:
 726         STRICT_CHECK(ch != 'T');
 727         parser->state = s_res_HT;
 728         break;
 729
 730       case s_res_HT:
 731         STRICT_CHECK(ch != 'T');
 732         parser->state = s_res_HTT;
 733         break;
 734
 735       case s_res_HTT:
 736         STRICT_CHECK(ch != 'P');
 737         parser->state = s_res_HTTP;
 738         break;
 739
 740       case s_res_HTTP:
 741         STRICT_CHECK(ch != '/');
 742         parser->state = s_res_first_http_major;
 743         break;
 744
 745       case s_res_first_http_major:
 746         if (ch < '0' || ch > '9') {
 747           SET_ERRNO(HPE_INVALID_VERSION);
 748           goto error;
 749         }
 750
 751         parser->http_major = ch - '0';
 752         parser->state = s_res_http_major;
 753         break;
 754
 755       /* major HTTP version or dot */
 756       case s_res_http_major:
 757       {
 758         if (ch == '.') {
 759           parser->state = s_res_first_http_minor;
 760           break;
 761         }
 762
 763         if (!IS_NUM(ch)) {
 764           SET_ERRNO(HPE_INVALID_VERSION);
 765           goto error;
 766         }
 767
 768         parser->http_major *= 10;
 769         parser->http_major += ch - '0';
 770
 771         if (parser->http_major > 999) {
 772           SET_ERRNO(HPE_INVALID_VERSION);
 773           goto error;
 774         }
 775
 776         break;
 777       }
 778
 779       /* first digit of minor HTTP version */
 780       case s_res_first_http_minor:
 781         if (!IS_NUM(ch)) {
 782           SET_ERRNO(HPE_INVALID_VERSION);
 783           goto error;
 784         }
 785
 786         parser->http_minor = ch - '0';
 787         parser->state = s_res_http_minor;
 788         break;
 789
 790       /* minor HTTP version or end of request line */
 791       case s_res_http_minor:
 792       {
 793         if (ch == ' ') {
 794           parser->state = s_res_first_status_code;
 795           break;
 796         }
 797
 798         if (!IS_NUM(ch)) {
 799           SET_ERRNO(HPE_INVALID_VERSION);
 800           goto error;
 801         }
 802
 803         parser->http_minor *= 10;
 804         parser->http_minor += ch - '0';
 805
 806         if (parser->http_minor > 999) {
 807           SET_ERRNO(HPE_INVALID_VERSION);
 808           goto error;
 809         }
 810
 811         break;
 812       }
 813
 814       case s_res_first_status_code:
 815       {
 816         if (!IS_NUM(ch)) {
 817           if (ch == ' ') {
 818             break;
 819           }
 820
 821           SET_ERRNO(HPE_INVALID_STATUS);
 822           goto error;
 823         }
 824         parser->status_code = ch - '0';
 825         parser->state = s_res_status_code;
 826         break;
 827       }
 828
 829       case s_res_status_code:
 830       {
 831         if (!IS_NUM(ch)) {
 832           switch (ch) {
 833             case ' ':
 834               parser->state = s_res_status;
 835               break;
 836             case CR:
 837               parser->state = s_res_line_almost_done;
 838               break;
 839             case LF:
 840               parser->state = s_header_field_start;
 841               break;
 842             default:
 843               SET_ERRNO(HPE_INVALID_STATUS);
 844               goto error;
 845           }
 846           break;
 847         }
 848
 849         parser->status_code *= 10;
 850         parser->status_code += ch - '0';
 851
 852         if (parser->status_code > 999) {
 853           SET_ERRNO(HPE_INVALID_STATUS);
 854           goto error;
 855         }
 856
 857         break;
 858       }
 859
 860       case s_res_status:
 861         /* the human readable status. e.g. "NOT FOUND"
 862          * we are not humans so just ignore this */
 863         if (ch == CR) {
 864           parser->state = s_res_line_almost_done;
 865           break;
 866         }
 867
 868         if (ch == LF) {
 869           parser->state = s_header_field_start;
 870           break;
 871         }
 872         break;
 873
 874       case s_res_line_almost_done:
 875         STRICT_CHECK(ch != LF);
 876         parser->state = s_header_field_start;
 877         break;
 878
 879       case s_start_req:
 880       {
 881         if (ch == CR || ch == LF)
 882           break;
 883         parser->flags = 0;
 884         parser->content_length = ULLONG_MAX;
 885
 886         if (!IS_ALPHA(ch)) {
 887           SET_ERRNO(HPE_INVALID_METHOD);
 888           goto error;
 889         }
 890
 891         parser->method = (enum http_method) 0;
 892         parser->index = 1;
 893         switch (ch) {
 894           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
 895           case 'D': parser->method = HTTP_DELETE; break;
 896           case 'G': parser->method = HTTP_GET; break;
 897           case 'H': parser->method = HTTP_HEAD; break;
 898           case 'L': parser->method = HTTP_LOCK; break;
 899           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
 900           case 'N': parser->method = HTTP_NOTIFY; break;
 901           case 'O': parser->method = HTTP_OPTIONS; break;
 902           case 'P': parser->method = HTTP_POST;
 903             /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
 904             break;
 905           case 'R': parser->method = HTTP_REPORT; break;
 906           case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
 907           case 'T': parser->method = HTTP_TRACE; break;
 908           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
 909           default:
 910             SET_ERRNO(HPE_INVALID_METHOD);
 911             goto error;
 912         }
 913         parser->state = s_req_method;
 914
 915         CALLBACK_NOTIFY(message_begin);
 916
 917         break;
 918       }
 919
 920       case s_req_method:
 921       {
 922         const char *matcher;
 923         if (ch == '\0') {
 924           SET_ERRNO(HPE_INVALID_METHOD);
 925           goto error;
 926         }
 927
 928         matcher = method_strings[parser->method];
 929         if (ch == ' ' && matcher[parser->index] == '\0') {
 930           parser->state = s_req_spaces_before_url;
 931         } else if (ch == matcher[parser->index]) {
 932           ; /* nada */
 933         } else if (parser->method == HTTP_CONNECT) {
 934           if (parser->index == 1 && ch == 'H') {
 935             parser->method = HTTP_CHECKOUT;
 936           } else if (parser->index == 2  && ch == 'P') {
 937             parser->method = HTTP_COPY;
 938           } else {
 939             goto error;
 940           }
 941         } else if (parser->method == HTTP_MKCOL) {
 942           if (parser->index == 1 && ch == 'O') {
 943             parser->method = HTTP_MOVE;
 944           } else if (parser->index == 1 && ch == 'E') {
 945             parser->method = HTTP_MERGE;
 946           } else if (parser->index == 1 && ch == '-') {
 947             parser->method = HTTP_MSEARCH;
 948           } else if (parser->index == 2 && ch == 'A') {
 949             parser->method = HTTP_MKACTIVITY;
 950           } else {
 951             goto error;
 952           }
 953         } else if (parser->method == HTTP_SUBSCRIBE) {
 954           if (parser->index == 1 && ch == 'E') {
 955             parser->method = HTTP_SEARCH;
 956           } else {
 957             goto error;
 958           }
 959         } else if (parser->index == 1 && parser->method == HTTP_POST) {
 960           if (ch == 'R') {
 961             parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
 962           } else if (ch == 'U') {
 963             parser->method = HTTP_PUT; /* or HTTP_PURGE */
 964           } else if (ch == 'A') {
 965             parser->method = HTTP_PATCH;
 966           } else {
 967             goto error;
 968           }
 969         } else if (parser->index == 2) {
 970           if (parser->method == HTTP_PUT) {
 971             if (ch == 'R') parser->method = HTTP_PURGE;
 972           } else if (parser->method == HTTP_UNLOCK) {
 973             if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
 974           }
 975         } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
 976           parser->method = HTTP_PROPPATCH;
 977         } else {
 978           SET_ERRNO(HPE_INVALID_METHOD);
 979           goto error;
 980         }
 981
 982         ++parser->index;
 983         break;
 984       }
 985
 986       case s_req_spaces_before_url:
 987       {
 988         if (ch == ' ') break;
 989
 990         MARK(url);
 991         if (parser->method == HTTP_CONNECT) {
 992           parser->state = s_req_server_start;
 993         }
 994
 995         parser->state = parse_url_char((enum state)parser->state, ch);
 996         if (parser->state == s_dead) {
 997           SET_ERRNO(HPE_INVALID_URL);
 998           goto error;
 999         }
1000
1001         break;
1002       }
1003
1004       case s_req_schema:
1005       case s_req_schema_slash:
1006       case s_req_schema_slash_slash:
1007       case s_req_server_start:
1008       {
1009         switch (ch) {
1010           /* No whitespace allowed here */
1011           case ' ':
1012           case CR:
1013           case LF:
1014             SET_ERRNO(HPE_INVALID_URL);
1015             goto error;
1016           default:
1017             parser->state = parse_url_char((enum state)parser->state, ch);
1018             if (parser->state == s_dead) {
1019               SET_ERRNO(HPE_INVALID_URL);
1020               goto error;
1021             }
1022         }
1023
1024         break;
1025       }
1026
1027       case s_req_server:
1028       case s_req_server_with_at:
1029       case s_req_path:
1030       case s_req_query_string_start:
1031       case s_req_query_string:
1032       case s_req_fragment_start:
1033       case s_req_fragment:
1034       {
1035         switch (ch) {
1036           case ' ':
1037             parser->state = s_req_http_start;
1038             CALLBACK_DATA(url);
1039             break;
1040           case CR:
1041           case LF:
1042             parser->http_major = 0;
1043             parser->http_minor = 9;
1044             parser->state = (ch == CR) ?
1045               s_req_line_almost_done :
1046               s_header_field_start;
1047             CALLBACK_DATA(url);
1048             break;
1049           default:
1050             parser->state = parse_url_char((enum state)parser->state, ch);
1051             if (parser->state == s_dead) {
1052               SET_ERRNO(HPE_INVALID_URL);
1053               goto error;
1054             }
1055         }
1056         break;
1057       }
1058
1059       case s_req_http_start:
1060         switch (ch) {
1061           case 'H':
1062             parser->state = s_req_http_H;
1063             break;
1064           case ' ':
1065             break;
1066           default:
1067             SET_ERRNO(HPE_INVALID_CONSTANT);
1068             goto error;
1069         }
1070         break;
1071
1072       case s_req_http_H:
1073         STRICT_CHECK(ch != 'T');
1074         parser->state = s_req_http_HT;
1075         break;
1076
1077       case s_req_http_HT:
1078         STRICT_CHECK(ch != 'T');
1079         parser->state = s_req_http_HTT;
1080         break;
1081
1082       case s_req_http_HTT:
1083         STRICT_CHECK(ch != 'P');
1084         parser->state = s_req_http_HTTP;
1085         break;
1086
1087       case s_req_http_HTTP:
1088         STRICT_CHECK(ch != '/');
1089         parser->state = s_req_first_http_major;
1090         break;
1091
1092       /* first digit of major HTTP version */
1093       case s_req_first_http_major:
1094         if (ch < '1' || ch > '9') {
1095           SET_ERRNO(HPE_INVALID_VERSION);
1096           goto error;
1097         }
1098
1099         parser->http_major = ch - '0';
1100         parser->state = s_req_http_major;
1101         break;
1102
1103       /* major HTTP version or dot */
1104       case s_req_http_major:
1105       {
1106         if (ch == '.') {
1107           parser->state = s_req_first_http_minor;
1108           break;
1109         }
1110
1111         if (!IS_NUM(ch)) {
1112           SET_ERRNO(HPE_INVALID_VERSION);
1113           goto error;
1114         }
1115
1116         parser->http_major *= 10;
1117         parser->http_major += ch - '0';
1118
1119         if (parser->http_major > 999) {
1120           SET_ERRNO(HPE_INVALID_VERSION);
1121           goto error;
1122         }
1123
1124         break;
1125       }
1126
1127       /* first digit of minor HTTP version */
1128       case s_req_first_http_minor:
1129         if (!IS_NUM(ch)) {
1130           SET_ERRNO(HPE_INVALID_VERSION);
1131           goto error;
1132         }
1133
1134         parser->http_minor = ch - '0';
1135         parser->state = s_req_http_minor;
1136         break;
1137
1138       /* minor HTTP version or end of request line */
1139       case s_req_http_minor:
1140       {
1141         if (ch == CR) {
1142           parser->state = s_req_line_almost_done;
1143           break;
1144         }
1145
1146         if (ch == LF) {
1147           parser->state = s_header_field_start;
1148           break;
1149         }
1150
1151         /* XXX allow spaces after digit? */
1152
1153         if (!IS_NUM(ch)) {
1154           SET_ERRNO(HPE_INVALID_VERSION);
1155           goto error;
1156         }
1157
1158         parser->http_minor *= 10;
1159         parser->http_minor += ch - '0';
1160
1161         if (parser->http_minor > 999) {
1162           SET_ERRNO(HPE_INVALID_VERSION);
1163           goto error;
1164         }
1165
1166         break;
1167       }
1168
1169       /* end of request line */
1170       case s_req_line_almost_done:
1171       {
1172         if (ch != LF) {
1173           SET_ERRNO(HPE_LF_EXPECTED);
1174           goto error;
1175         }
1176
1177         parser->state = s_header_field_start;
1178         break;
1179       }
1180
1181       case s_header_field_start:
1182       {
1183         if (ch == CR) {
1184           parser->state = s_headers_almost_done;
1185           break;
1186         }
1187
1188         if (ch == LF) {
1189           /* they might be just sending \n instead of \r\n so this would be
1190            * the second \n to denote the end of headers*/
1191           parser->state = s_headers_almost_done;
1192           goto reexecute_byte;
1193         }
1194
1195         c = TOKEN(ch);
1196
1197         if (!c) {
1198           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1199           goto error;
1200         }
1201
1202         MARK(header_field);
1203
1204         parser->index = 0;
1205         parser->state = s_header_field;
1206
1207         switch (c) {
1208           case 'c':
1209             parser->header_state = h_C;
1210             break;
1211
1212           case 'p':
1213             parser->header_state = h_matching_proxy_connection;
1214             break;
1215
1216           case 't':
1217             parser->header_state = h_matching_transfer_encoding;
1218             break;
1219
1220           case 'u':
1221             parser->header_state = h_matching_upgrade;
1222             break;
1223
1224           default:
1225             parser->header_state = h_general;
1226             break;
1227         }
1228         break;
1229       }
1230
1231       case s_header_field:
1232       {
1233         c = TOKEN(ch);
1234
1235         if (c) {
1236           switch (parser->header_state) {
1237             case h_general:
1238               break;
1239
1240             case h_C:
1241               parser->index++;
1242               parser->header_state = (c == 'o' ? h_CO : h_general);
1243               break;
1244
1245             case h_CO:
1246               parser->index++;
1247               parser->header_state = (c == 'n' ? h_CON : h_general);
1248               break;
1249
1250             case h_CON:
1251               parser->index++;
1252               switch (c) {
1253                 case 'n':
1254                   parser->header_state = h_matching_connection;
1255                   break;
1256                 case 't':
1257                   parser->header_state = h_matching_content_length;
1258                   break;
1259                 default:
1260                   parser->header_state = h_general;
1261                   break;
1262               }
1263               break;
1264
1265             /* connection */
1266
1267             case h_matching_connection:
1268               parser->index++;
1269               if (parser->index > sizeof(CONNECTION)-1
1270                   || c != CONNECTION[parser->index]) {
1271                 parser->header_state = h_general;
1272               } else if (parser->index == sizeof(CONNECTION)-2) {
1273                 parser->header_state = h_connection;
1274               }
1275               break;
1276
1277             /* proxy-connection */
1278
1279             case h_matching_proxy_connection:
1280               parser->index++;
1281               if (parser->index > sizeof(PROXY_CONNECTION)-1
1282                   || c != PROXY_CONNECTION[parser->index]) {
1283                 parser->header_state = h_general;
1284               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1285                 parser->header_state = h_connection;
1286               }
1287               break;
1288
1289             /* content-length */
1290
1291             case h_matching_content_length:
1292               parser->index++;
1293               if (parser->index > sizeof(CONTENT_LENGTH)-1
1294                   || c != CONTENT_LENGTH[parser->index]) {
1295                 parser->header_state = h_general;
1296               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1297                 parser->header_state = h_content_length;
1298               }
1299               break;
1300
1301             /* transfer-encoding */
1302
1303             case h_matching_transfer_encoding:
1304               parser->index++;
1305               if (parser->index > sizeof(TRANSFER_ENCODING)-1
1306                   || c != TRANSFER_ENCODING[parser->index]) {
1307                 parser->header_state = h_general;
1308               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1309                 parser->header_state = h_transfer_encoding;
1310               }
1311               break;
1312
1313             /* upgrade */
1314
1315             case h_matching_upgrade:
1316               parser->index++;
1317               if (parser->index > sizeof(UPGRADE)-1
1318                   || c != UPGRADE[parser->index]) {
1319                 parser->header_state = h_general;
1320               } else if (parser->index == sizeof(UPGRADE)-2) {
1321                 parser->header_state = h_upgrade;
1322               }
1323               break;
1324
1325             case h_connection:
1326             case h_content_length:
1327             case h_transfer_encoding:
1328             case h_upgrade:
1329               if (ch != ' ') parser->header_state = h_general;
1330               break;
1331
1332             default:
1333               assert(0 && "Unknown header_state");
1334               break;
1335           }
1336           break;
1337         }
1338
1339         if (ch == ':') {
1340           parser->state = s_header_value_start;
1341           CALLBACK_DATA(header_field);
1342           break;
1343         }
1344
1345         if (ch == CR) {
1346           parser->state = s_header_almost_done;
1347           CALLBACK_DATA(header_field);
1348           break;
1349         }
1350
1351         if (ch == LF) {
1352           parser->state = s_header_field_start;
1353           CALLBACK_DATA(header_field);
1354           break;
1355         }
1356
1357         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1358         goto error;
1359       }
1360
1361       case s_header_value_start:
1362       {
1363         if (ch == ' ' || ch == '\t') break;
1364
1365         MARK(header_value);
1366
1367         parser->state = s_header_value;
1368         parser->index = 0;
1369
1370         if (ch == CR) {
1371           parser->header_state = h_general;
1372           parser->state = s_header_almost_done;
1373           CALLBACK_DATA(header_value);
1374           break;
1375         }
1376
1377         if (ch == LF) {
1378           parser->state = s_header_field_start;
1379           CALLBACK_DATA(header_value);
1380           break;
1381         }
1382
1383         c = LOWER(ch);
1384
1385         switch (parser->header_state) {
1386           case h_upgrade:
1387             parser->flags |= F_UPGRADE;
1388             parser->header_state = h_general;
1389             break;
1390
1391           case h_transfer_encoding:
1392             /* looking for 'Transfer-Encoding: chunked' */
1393             if ('c' == c) {
1394               parser->header_state = h_matching_transfer_encoding_chunked;
1395             } else {
1396               parser->header_state = h_general;
1397             }
1398             break;
1399
1400           case h_content_length:
1401             if (!IS_NUM(ch)) {
1402               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1403               goto error;
1404             }
1405
1406             parser->content_length = ch - '0';
1407             break;
1408
1409           case h_connection:
1410             /* looking for 'Connection: keep-alive' */
1411             if (c == 'k') {
1412               parser->header_state = h_matching_connection_keep_alive;
1413             /* looking for 'Connection: close' */
1414             } else if (c == 'c') {
1415               parser->header_state = h_matching_connection_close;
1416             } else {
1417               parser->header_state = h_general;
1418             }
1419             break;
1420
1421           default:
1422             parser->header_state = h_general;
1423             break;
1424         }
1425         break;
1426       }
1427
1428       case s_header_value:
1429       {
1430
1431         if (ch == CR) {
1432           parser->state = s_header_almost_done;
1433           CALLBACK_DATA(header_value);
1434           break;
1435         }
1436
1437         if (ch == LF) {
1438           parser->state = s_header_almost_done;
1439           CALLBACK_DATA_NOADVANCE(header_value);
1440           goto reexecute_byte;
1441         }
1442
1443         c = LOWER(ch);
1444
1445         switch (parser->header_state) {
1446           case h_general:
1447             break;
1448
1449           case h_connection:
1450           case h_transfer_encoding:
1451             assert(0 && "Shouldn't get here.");
1452             break;
1453
1454           case h_content_length:
1455           {
1456             uint64_t t;
1457
1458             if (ch == ' ') break;
1459
1460             if (!IS_NUM(ch)) {
1461               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1462               goto error;
1463             }
1464
1465             t = parser->content_length;
1466             t *= 10;
1467             t += ch - '0';
1468
1469             /* Overflow? */
1470             if (t < parser->content_length || t == ULLONG_MAX) {
1471               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1472               goto error;
1473             }
1474
1475             parser->content_length = t;
1476             break;
1477           }
1478
1479           /* Transfer-Encoding: chunked */
1480           case h_matching_transfer_encoding_chunked:
1481             parser->index++;
1482             if (parser->index > sizeof(CHUNKED)-1
1483                 || c != CHUNKED[parser->index]) {
1484               parser->header_state = h_general;
1485             } else if (parser->index == sizeof(CHUNKED)-2) {
1486               parser->header_state = h_transfer_encoding_chunked;
1487             }
1488             break;
1489
1490           /* looking for 'Connection: keep-alive' */
1491           case h_matching_connection_keep_alive:
1492             parser->index++;
1493             if (parser->index > sizeof(KEEP_ALIVE)-1
1494                 || c != KEEP_ALIVE[parser->index]) {
1495               parser->header_state = h_general;
1496             } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1497               parser->header_state = h_connection_keep_alive;
1498             }
1499             break;
1500
1501           /* looking for 'Connection: close' */
1502           case h_matching_connection_close:
1503             parser->index++;
1504             if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1505               parser->header_state = h_general;
1506             } else if (parser->index == sizeof(CLOSE)-2) {
1507               parser->header_state = h_connection_close;
1508             }
1509             break;
1510
1511           case h_transfer_encoding_chunked:
1512           case h_connection_keep_alive:
1513           case h_connection_close:
1514             if (ch != ' ') parser->header_state = h_general;
1515             break;
1516
1517           default:
1518             parser->state = s_header_value;
1519             parser->header_state = h_general;
1520             break;
1521         }
1522         break;
1523       }
1524
1525       case s_header_almost_done:
1526       {
1527         STRICT_CHECK(ch != LF);
1528
1529         parser->state = s_header_value_lws;
1530
1531         switch (parser->header_state) {
1532           case h_connection_keep_alive:
1533             parser->flags |= F_CONNECTION_KEEP_ALIVE;
1534             break;
1535           case h_connection_close:
1536             parser->flags |= F_CONNECTION_CLOSE;
1537             break;
1538           case h_transfer_encoding_chunked:
1539             parser->flags |= F_CHUNKED;
1540             break;
1541           default:
1542             break;
1543         }
1544
1545         break;
1546       }
1547
1548       case s_header_value_lws:
1549       {
1550         if (ch == ' ' || ch == '\t')
1551           parser->state = s_header_value_start;
1552         else
1553         {
1554           parser->state = s_header_field_start;
1555           goto reexecute_byte;
1556         }
1557         break;
1558       }
1559
1560       case s_headers_almost_done:
1561       {
1562         STRICT_CHECK(ch != LF);
1563
1564         if (parser->flags & F_TRAILING) {
1565           /* End of a chunked request */
1566           parser->state = NEW_MESSAGE();
1567           CALLBACK_NOTIFY(message_complete);
1568           break;
1569         }
1570
1571         parser->state = s_headers_done;
1572
1573         /* Set this here so that on_headers_complete() callbacks can see it */
1574         parser->upgrade =
1575           (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1576
1577         /* Here we call the headers_complete callback. This is somewhat
1578          * different than other callbacks because if the user returns 1, we
1579          * will interpret that as saying that this message has no body. This
1580          * is needed for the annoying case of recieving a response to a HEAD
1581          * request.
1582          *
1583          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1584          * we have to simulate it by handling a change in errno below.
1585          */
1586         if (settings->on_headers_complete) {
1587           switch (settings->on_headers_complete(parser)) {
1588             case 0:
1589               break;
1590
1591             case 1:
1592               parser->flags |= F_SKIPBODY;
1593               break;
1594
1595             default:
1596               SET_ERRNO(HPE_CB_headers_complete);
1597               return p - data; /* Error */
1598           }
1599         }
1600
1601         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1602           return p - data;
1603         }
1604
1605         goto reexecute_byte;
1606       }
1607
1608       case s_headers_done:
1609       {
1610         STRICT_CHECK(ch != LF);
1611
1612         parser->nread = 0;
1613
1614         /* Exit, the rest of the connect is in a different protocol. */
1615         if (parser->upgrade) {
1616           parser->state = NEW_MESSAGE();
1617           CALLBACK_NOTIFY(message_complete);
1618           return (p - data) + 1;
1619         }
1620
1621         if (parser->flags & F_SKIPBODY) {
1622           parser->state = NEW_MESSAGE();
1623           CALLBACK_NOTIFY(message_complete);
1624         } else if (parser->flags & F_CHUNKED) {
1625           /* chunked encoding - ignore Content-Length header */
1626           parser->state = s_chunk_size_start;
1627         } else {
1628           if (parser->content_length == 0) {
1629             /* Content-Length header given but zero: Content-Length: 0\r\n */
1630             parser->state = NEW_MESSAGE();
1631             CALLBACK_NOTIFY(message_complete);
1632           } else if (parser->content_length != ULLONG_MAX) {
1633             /* Content-Length header given and non-zero */
1634             parser->state = s_body_identity;
1635           } else {
1636             if (parser->type == HTTP_REQUEST ||
1637                 !http_message_needs_eof(parser)) {
1638               /* Assume content-length 0 - read the next */
1639               parser->state = NEW_MESSAGE();
1640               CALLBACK_NOTIFY(message_complete);
1641             } else {
1642               /* Read body until EOF */
1643               parser->state = s_body_identity_eof;
1644             }
1645           }
1646         }
1647
1648         break;
1649       }
1650
1651       case s_body_identity:
1652       {
1653         uint64_t to_read = MIN(parser->content_length,
1654                                (uint64_t) ((data + len) - p));
1655
1656         assert(parser->content_length != 0
1657             && parser->content_length != ULLONG_MAX);
1658
1659         /* The difference between advancing content_length and p is because
1660          * the latter will automaticaly advance on the next loop iteration.
1661          * Further, if content_length ends up at 0, we want to see the last
1662          * byte again for our message complete callback.
1663          */
1664         MARK(body);
1665         parser->content_length -= to_read;
1666         p += to_read - 1;
1667
1668         if (parser->content_length == 0) {
1669           parser->state = s_message_done;
1670
1671           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1672            *
1673            * The alternative to doing this is to wait for the next byte to
1674            * trigger the data callback, just as in every other case. The
1675            * problem with this is that this makes it difficult for the test
1676            * harness to distinguish between complete-on-EOF and
1677            * complete-on-length. It's not clear that this distinction is
1678            * important for applications, but let's keep it for now.
1679            */
1680           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1681           goto reexecute_byte;
1682         }
1683
1684         break;
1685       }
1686
1687       /* read until EOF */
1688       case s_body_identity_eof:
1689         MARK(body);
1690         p = data + len - 1;
1691
1692         break;
1693
1694       case s_message_done:
1695         parser->state = NEW_MESSAGE();
1696         CALLBACK_NOTIFY(message_complete);
1697         break;
1698
1699       case s_chunk_size_start:
1700       {
1701         assert(parser->nread == 1);
1702         assert(parser->flags & F_CHUNKED);
1703
1704         unhex_val = unhex[(unsigned char)ch];
1705         if (unhex_val == -1) {
1706           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1707           goto error;
1708         }
1709
1710         parser->content_length = unhex_val;
1711         parser->state = s_chunk_size;
1712         break;
1713       }
1714
1715       case s_chunk_size:
1716       {
1717         uint64_t t;
1718
1719         assert(parser->flags & F_CHUNKED);
1720
1721         if (ch == CR) {
1722           parser->state = s_chunk_size_almost_done;
1723           break;
1724         }
1725
1726         unhex_val = unhex[(unsigned char)ch];
1727
1728         if (unhex_val == -1) {
1729           if (ch == ';' || ch == ' ') {
1730             parser->state = s_chunk_parameters;
1731             break;
1732           }
1733
1734           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1735           goto error;
1736         }
1737
1738         t = parser->content_length;
1739         t *= 16;
1740         t += unhex_val;
1741
1742         /* Overflow? */
1743         if (t < parser->content_length || t == ULLONG_MAX) {
1744           SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1745           goto error;
1746         }
1747
1748         parser->content_length = t;
1749         break;
1750       }
1751
1752       case s_chunk_parameters:
1753       {
1754         assert(parser->flags & F_CHUNKED);
1755         /* just ignore this shit. TODO check for overflow */
1756         if (ch == CR) {
1757           parser->state = s_chunk_size_almost_done;
1758           break;
1759         }
1760         break;
1761       }
1762
1763       case s_chunk_size_almost_done:
1764       {
1765         assert(parser->flags & F_CHUNKED);
1766         STRICT_CHECK(ch != LF);
1767
1768         parser->nread = 0;
1769
1770         if (parser->content_length == 0) {
1771           parser->flags |= F_TRAILING;
1772           parser->state = s_header_field_start;
1773         } else {
1774           parser->state = s_chunk_data;
1775         }
1776         break;
1777       }
1778
1779       case s_chunk_data:
1780       {
1781         uint64_t to_read = MIN(parser->content_length,
1782                                (uint64_t) ((data + len) - p));
1783
1784         assert(parser->flags & F_CHUNKED);
1785         assert(parser->content_length != 0
1786             && parser->content_length != ULLONG_MAX);
1787
1788         /* See the explanation in s_body_identity for why the content
1789          * length and data pointers are managed this way.
1790          */
1791         MARK(body);
1792         parser->content_length -= to_read;
1793         p += to_read - 1;
1794
1795         if (parser->content_length == 0) {
1796           parser->state = s_chunk_data_almost_done;
1797         }
1798
1799         break;
1800       }
1801
1802       case s_chunk_data_almost_done:
1803         assert(parser->flags & F_CHUNKED);
1804         assert(parser->content_length == 0);
1805         STRICT_CHECK(ch != CR);
1806         parser->state = s_chunk_data_done;
1807         CALLBACK_DATA(body);
1808         break;
1809
1810       case s_chunk_data_done:
1811         assert(parser->flags & F_CHUNKED);
1812         STRICT_CHECK(ch != LF);
1813         parser->nread = 0;
1814         parser->state = s_chunk_size_start;
1815         break;
1816
1817       default:
1818         assert(0 && "unhandled state");
1819         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1820         goto error;
1821     }
1822   }
1823
1824   /* Run callbacks for any marks that we have leftover after we ran our of
1825    * bytes. There should be at most one of these set, so it's OK to invoke
1826    * them in series (unset marks will not result in callbacks).
1827    *
1828    * We use the NOADVANCE() variety of callbacks here because 'p' has already
1829    * overflowed 'data' and this allows us to correct for the off-by-one that
1830    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1831    * value that's in-bounds).
1832    */
1833
1834   assert(((header_field_mark ? 1 : 0) +
1835           (header_value_mark ? 1 : 0) +
1836           (url_mark ? 1 : 0)  +
1837           (body_mark ? 1 : 0)) <= 1);
1838
1839   CALLBACK_DATA_NOADVANCE(header_field);
1840   CALLBACK_DATA_NOADVANCE(header_value);
1841   CALLBACK_DATA_NOADVANCE(url);
1842   CALLBACK_DATA_NOADVANCE(body);
1843
1844   return len;
1845
1846 error:
1847   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1848     SET_ERRNO(HPE_UNKNOWN);
1849   }
1850
1851   return (p - data);
1852 }
1853
1854
1855 /* Does the parser need to see an EOF to find the end of the message? */
1856 int
1857 http_message_needs_eof (const http_parser *parser)
1858 {
1859   if (parser->type == HTTP_REQUEST) {
1860     return 0;
1861   }
1862
1863   /* See RFC 2616 section 4.4 */
1864   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1865       parser->status_code == 204 ||     /* No Content */
1866       parser->status_code == 304 ||     /* Not Modified */
1867       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
1868     return 0;
1869   }
1870
1871   if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1872     return 0;
1873   }
1874
1875   return 1;
1876 }
1877
1878
1879 int
1880 http_should_keep_alive (const http_parser *parser)
1881 {
1882   if (parser->http_major > 0 && parser->http_minor > 0) {
1883     /* HTTP/1.1 */
1884     if (parser->flags & F_CONNECTION_CLOSE) {
1885       return 0;
1886     }
1887   } else {
1888     /* HTTP/1.0 or earlier */
1889     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1890       return 0;
1891     }
1892   }
1893
1894   return !http_message_needs_eof(parser);
1895 }
1896
1897
1898 const char *
1899 http_method_str (enum http_method m)
1900 {
1901   return ELEM_AT(method_strings, m, "<unknown>");
1902 }
1903
1904
1905 void
1906 http_parser_init (http_parser *parser, enum http_parser_type t)
1907 {
1908   void *data = parser->data; /* preserve application data */
1909   memset(parser, 0, sizeof(*parser));
1910   parser->data = data;
1911   parser->type = t;
1912   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1913   parser->http_errno = HPE_OK;
1914 }
1915
1916 const char *
1917 http_errno_name(enum http_errno err) {
1918   assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1919   return http_strerror_tab[err].name;
1920 }
1921
1922 const char *
1923 http_errno_description(enum http_errno err) {
1924   assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1925   return http_strerror_tab[err].description;
1926 }
1927
1928 static enum http_host_state
1929 http_parse_host_char(enum http_host_state s, const char ch) {
1930   switch(s) {
1931     case s_http_userinfo:
1932     case s_http_userinfo_start:
1933       if (ch == '@') {
1934         return s_http_host_start;
1935       }
1936
1937       if (IS_USERINFO_CHAR(ch)) {
1938         return s_http_userinfo;
1939       }
1940       break;
1941
1942     case s_http_host_start:
1943       if (ch == '[') {
1944         return s_http_host_v6_start;
1945       }
1946
1947       if (IS_HOST_CHAR(ch)) {
1948         return s_http_host;
1949       }
1950
1951       break;
1952
1953     case s_http_host:
1954       if (IS_HOST_CHAR(ch)) {
1955         return s_http_host;
1956       }
1957
1958     /* FALLTHROUGH */
1959     case s_http_host_v6_end:
1960       if (ch == ':') {
1961         return s_http_host_port_start;
1962       }
1963
1964       break;
1965
1966     case s_http_host_v6:
1967       if (ch == ']') {
1968         return s_http_host_v6_end;
1969       }
1970
1971     /* FALLTHROUGH */
1972     case s_http_host_v6_start:
1973       if (IS_HEX(ch) || ch == ':') {
1974         return s_http_host_v6;
1975       }
1976
1977       break;
1978
1979     case s_http_host_port:
1980     case s_http_host_port_start:
1981       if (IS_NUM(ch)) {
1982         return s_http_host_port;
1983       }
1984
1985       break;
1986
1987     default:
1988       break;
1989   }
1990   return s_http_host_dead;
1991 }
1992
1993 static int
1994 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1995   enum http_host_state s;
1996
1997   const char *p;
1998   size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1999
2000   u->field_data[UF_HOST].len = 0;
2001
2002   s = found_at ? s_http_userinfo_start : s_http_host_start;
2003
2004   for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2005     enum http_host_state new_s = http_parse_host_char(s, *p);
2006
2007     if (new_s == s_http_host_dead) {
2008       return 1;
2009     }
2010
2011     switch(new_s) {
2012       case s_http_host:
2013         if (s != s_http_host) {
2014           u->field_data[UF_HOST].off = p - buf;
2015         }
2016         u->field_data[UF_HOST].len++;
2017         break;
2018
2019       case s_http_host_v6:
2020         if (s != s_http_host_v6) {
2021           u->field_data[UF_HOST].off = p - buf;
2022         }
2023         u->field_data[UF_HOST].len++;
2024         break;
2025
2026       case s_http_host_port:
2027         if (s != s_http_host_port) {
2028           u->field_data[UF_PORT].off = p - buf;
2029           u->field_data[UF_PORT].len = 0;
2030           u->field_set |= (1 << UF_PORT);
2031         }
2032         u->field_data[UF_PORT].len++;
2033         break;
2034
2035       case s_http_userinfo:
2036         if (s != s_http_userinfo) {
2037           u->field_data[UF_USERINFO].off = p - buf ;
2038           u->field_data[UF_USERINFO].len = 0;
2039           u->field_set |= (1 << UF_USERINFO);
2040         }
2041         u->field_data[UF_USERINFO].len++;
2042         break;
2043
2044       default:
2045         break;
2046     }
2047     s = new_s;
2048   }
2049
2050   /* Make sure we don't end somewhere unexpected */
2051   switch (s) {
2052     case s_http_host_start:
2053     case s_http_host_v6_start:
2054     case s_http_host_v6:
2055     case s_http_host_port_start:
2056     case s_http_userinfo:
2057     case s_http_userinfo_start:
2058       return 1;
2059     default:
2060       break;
2061   }
2062
2063   return 0;
2064 }
2065
2066 int
2067 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2068                       struct http_parser_url *u)
2069 {
2070   enum state s;
2071   const char *p;
2072   enum http_parser_url_fields uf, old_uf;
2073   int found_at = 0;
2074
2075   u->port = u->field_set = 0;
2076   s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2077   uf = old_uf = UF_MAX;
2078
2079   for (p = buf; p < buf + buflen; p++) {
2080     s = parse_url_char(s, *p);
2081
2082     /* Figure out the next field that we're operating on */
2083     switch (s) {
2084       case s_dead:
2085         return 1;
2086
2087       /* Skip delimeters */
2088       case s_req_schema_slash:
2089       case s_req_schema_slash_slash:
2090       case s_req_server_start:
2091       case s_req_query_string_start:
2092       case s_req_fragment_start:
2093         continue;
2094
2095       case s_req_schema:
2096         uf = UF_SCHEMA;
2097         break;
2098
2099       case s_req_server_with_at:
2100         found_at = 1;
2101
2102       /* FALLTROUGH */
2103       case s_req_server:
2104         uf = UF_HOST;
2105         break;
2106
2107       case s_req_path:
2108         uf = UF_PATH;
2109         break;
2110
2111       case s_req_query_string:
2112         uf = UF_QUERY;
2113         break;
2114
2115       case s_req_fragment:
2116         uf = UF_FRAGMENT;
2117         break;
2118
2119       default:
2120         assert(!"Unexpected state");
2121         return 1;
2122     }
2123
2124     /* Nothing's changed; soldier on */
2125     if (uf == old_uf) {
2126       u->field_data[uf].len++;
2127       continue;
2128     }
2129
2130     u->field_data[uf].off = p - buf;
2131     u->field_data[uf].len = 1;
2132
2133     u->field_set |= (1 << uf);
2134     old_uf = uf;
2135   }
2136
2137   /* host must be present if there is a schema */
2138   /* parsing http:///toto will fail */
2139   if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2140     if (http_parse_host(buf, u, found_at) != 0) {
2141       return 1;
2142     }
2143   }
2144
2145   /* CONNECT requests can only contain "hostname:port" */
2146   if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2147     return 1;
2148   }
2149
2150   if (u->field_set & (1 << UF_PORT)) {
2151     /* Don't bother with endp; we've already validated the string */
2152     unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2153
2154     /* Ports have a max value of 2^16 */
2155     if (v > 0xffff) {
2156       return 1;
2157     }
2158
2159     u->port = (uint16_t) v;
2160   }
2161
2162   return 0;
2163 }
2164
2165 void
2166 http_parser_pause(http_parser *parser, int paused) {
2167   /* Users should only be pausing/unpausing a parser that is not in an error
2168    * state. In non-debug builds, there's not much that we can do about this
2169    * other than ignore it.
2170    */
2171   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2172       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2173     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2174   } else {
2175     assert(0 && "Attempting to pause parser in error state");
2176   }
2177 }
2178
2179 int
2180 http_body_is_final(const struct http_parser *parser) {
2181     return parser->state == s_message_done;
2182 }