00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <cstdlib>
00011 #include <cstring>
00012 #include <boost/regex.hpp>
00013 #include <boost/assert.hpp>
00014 #include <boost/logic/tribool.hpp>
00015 #include <boost/algorithm/string.hpp>
00016 #include <pion/algorithm.hpp>
00017 #include <pion/http/parser.hpp>
00018 #include <pion/http/request.hpp>
00019 #include <pion/http/response.hpp>
00020 #include <pion/http/message.hpp>
00021
00022
00023 namespace pion {
00024 namespace http {
00025
00026
00027
00028
00029 const boost::uint32_t parser::STATUS_MESSAGE_MAX = 1024;
00030 const boost::uint32_t parser::METHOD_MAX = 1024;
00031 const boost::uint32_t parser::RESOURCE_MAX = 256 * 1024;
00032 const boost::uint32_t parser::QUERY_STRING_MAX = 1024 * 1024;
00033 const boost::uint32_t parser::HEADER_NAME_MAX = 1024;
00034 const boost::uint32_t parser::HEADER_VALUE_MAX = 1024 * 1024;
00035 const boost::uint32_t parser::QUERY_NAME_MAX = 1024;
00036 const boost::uint32_t parser::QUERY_VALUE_MAX = 1024 * 1024;
00037 const boost::uint32_t parser::COOKIE_NAME_MAX = 1024;
00038 const boost::uint32_t parser::COOKIE_VALUE_MAX = 1024 * 1024;
00039 const std::size_t parser::DEFAULT_CONTENT_MAX = 1024 * 1024;
00040 parser::error_category_t * parser::m_error_category_ptr = NULL;
00041 boost::once_flag parser::m_instance_flag = BOOST_ONCE_INIT;
00042
00043
00044
00045
00046 boost::tribool parser::parse(http::message& http_msg,
00047 boost::system::error_code& ec)
00048 {
00049 BOOST_ASSERT(! eof() );
00050
00051 boost::tribool rc = boost::indeterminate;
00052 std::size_t total_bytes_parsed = 0;
00053
00054 if(http_msg.has_missing_packets()) {
00055 http_msg.set_data_after_missing_packet(true);
00056 }
00057
00058 do {
00059 switch (m_message_parse_state) {
00060
00061 case PARSE_START:
00062 m_message_parse_state = PARSE_HEADERS;
00063
00064
00065
00066 case PARSE_HEADERS:
00067 case PARSE_FOOTERS:
00068 rc = parse_headers(http_msg, ec);
00069 total_bytes_parsed += m_bytes_last_read;
00070
00071 if (rc == true && m_message_parse_state == PARSE_HEADERS) {
00072
00073
00074 rc = finish_header_parsing(http_msg, ec);
00075 }
00076 break;
00077
00078
00079 case PARSE_CHUNKS:
00080 rc = parse_chunks(http_msg.get_chunk_cache(), ec);
00081 total_bytes_parsed += m_bytes_last_read;
00082
00083 if (rc == true && !m_payload_handler) {
00084 http_msg.concatenate_chunks();
00085
00086
00087 rc = ((m_message_parse_state == PARSE_FOOTERS) ?
00088 boost::indeterminate : (boost::tribool)true);
00089 }
00090 break;
00091
00092
00093 case PARSE_CONTENT:
00094 rc = consume_content(http_msg, ec);
00095 total_bytes_parsed += m_bytes_last_read;
00096 break;
00097
00098
00099 case PARSE_CONTENT_NO_LENGTH:
00100 consume_content_as_next_chunk(http_msg.get_chunk_cache());
00101 total_bytes_parsed += m_bytes_last_read;
00102 break;
00103
00104
00105 case PARSE_END:
00106 rc = true;
00107 break;
00108 }
00109 } while ( boost::indeterminate(rc) && ! eof() );
00110
00111
00112 if (rc == true) {
00113 m_message_parse_state = PARSE_END;
00114 finish(http_msg);
00115 } else if(rc == false) {
00116 compute_msg_status(http_msg, false);
00117 }
00118
00119
00120 m_bytes_last_read = total_bytes_parsed;
00121
00122 return rc;
00123 }
00124
00125 boost::tribool parser::parse_missing_data(http::message& http_msg,
00126 std::size_t len, boost::system::error_code& ec)
00127 {
00128 static const char MISSING_DATA_CHAR = 'X';
00129 boost::tribool rc = boost::indeterminate;
00130
00131 http_msg.set_missing_packets(true);
00132
00133 switch (m_message_parse_state) {
00134
00135
00136 case PARSE_START:
00137 case PARSE_HEADERS:
00138 case PARSE_FOOTERS:
00139 set_error(ec, ERROR_MISSING_HEADER_DATA);
00140 rc = false;
00141 break;
00142
00143
00144 case PARSE_CHUNKS:
00145
00146 if (m_chunked_content_parse_state == PARSE_CHUNK
00147 && m_bytes_read_in_current_chunk < m_size_of_current_chunk
00148 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
00149 {
00150
00151 if (m_payload_handler) {
00152 for (std::size_t n = 0; n < len; ++n)
00153 m_payload_handler(&MISSING_DATA_CHAR, 1);
00154 } else {
00155 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
00156 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00157 }
00158
00159 m_bytes_read_in_current_chunk += len;
00160 m_bytes_last_read = len;
00161 m_bytes_total_read += len;
00162 m_bytes_content_read += len;
00163
00164 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00165 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00166 }
00167 } else {
00168
00169 set_error(ec, ERROR_MISSING_CHUNK_DATA);
00170 rc = false;
00171 }
00172 break;
00173
00174
00175 case PARSE_CONTENT:
00176
00177 if (m_bytes_content_remaining == 0) {
00178
00179 rc = true;
00180 } else if (m_bytes_content_remaining < len) {
00181
00182 set_error(ec, ERROR_MISSING_TOO_MUCH_CONTENT);
00183 rc = false;
00184 } else {
00185
00186
00187 if (m_payload_handler) {
00188 for (std::size_t n = 0; n < len; ++n)
00189 m_payload_handler(&MISSING_DATA_CHAR, 1);
00190 } else if ( (m_bytes_content_read+len) <= m_max_content_length) {
00191
00192 for (std::size_t n = 0; n < len; ++n)
00193 http_msg.get_content()[m_bytes_content_read++] = MISSING_DATA_CHAR;
00194 } else {
00195 m_bytes_content_read += len;
00196 }
00197
00198 m_bytes_content_remaining -= len;
00199 m_bytes_total_read += len;
00200 m_bytes_last_read = len;
00201
00202 if (m_bytes_content_remaining == 0)
00203 rc = true;
00204 }
00205 break;
00206
00207
00208 case PARSE_CONTENT_NO_LENGTH:
00209
00210 if (m_payload_handler) {
00211 for (std::size_t n = 0; n < len; ++n)
00212 m_payload_handler(&MISSING_DATA_CHAR, 1);
00213 } else {
00214 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
00215 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00216 }
00217 m_bytes_last_read = len;
00218 m_bytes_total_read += len;
00219 m_bytes_content_read += len;
00220 break;
00221
00222
00223 case PARSE_END:
00224 rc = true;
00225 break;
00226 }
00227
00228
00229 if (rc == true) {
00230 m_message_parse_state = PARSE_END;
00231 finish(http_msg);
00232 } else if(rc == false) {
00233 compute_msg_status(http_msg, false);
00234 }
00235
00236 return rc;
00237 }
00238
00239 boost::tribool parser::parse_headers(http::message& http_msg,
00240 boost::system::error_code& ec)
00241 {
00242
00243
00244
00245
00246
00247
00248
00249 const char *read_start_ptr = m_read_ptr;
00250 m_bytes_last_read = 0;
00251 while (m_read_ptr < m_read_end_ptr) {
00252
00253 if (m_save_raw_headers)
00254 m_raw_headers += *m_read_ptr;
00255
00256 switch (m_headers_parse_state) {
00257 case PARSE_METHOD_START:
00258
00259 if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') {
00260 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00261 set_error(ec, ERROR_METHOD_CHAR);
00262 return false;
00263 }
00264 m_headers_parse_state = PARSE_METHOD;
00265 m_method.erase();
00266 m_method.push_back(*m_read_ptr);
00267 }
00268 break;
00269
00270 case PARSE_METHOD:
00271
00272 if (*m_read_ptr == ' ') {
00273 m_resource.erase();
00274 m_headers_parse_state = PARSE_URI_STEM;
00275 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00276 set_error(ec, ERROR_METHOD_CHAR);
00277 return false;
00278 } else if (m_method.size() >= METHOD_MAX) {
00279 set_error(ec, ERROR_METHOD_SIZE);
00280 return false;
00281 } else {
00282 m_method.push_back(*m_read_ptr);
00283 }
00284 break;
00285
00286 case PARSE_URI_STEM:
00287
00288 if (*m_read_ptr == ' ') {
00289 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00290 } else if (*m_read_ptr == '?') {
00291 m_query_string.erase();
00292 m_headers_parse_state = PARSE_URI_QUERY;
00293 } else if (*m_read_ptr == '\r') {
00294 http_msg.set_version_major(0);
00295 http_msg.set_version_minor(0);
00296 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00297 } else if (*m_read_ptr == '\n') {
00298 http_msg.set_version_major(0);
00299 http_msg.set_version_minor(0);
00300 m_headers_parse_state = PARSE_EXPECTING_CR;
00301 } else if (is_control(*m_read_ptr)) {
00302 set_error(ec, ERROR_URI_CHAR);
00303 return false;
00304 } else if (m_resource.size() >= RESOURCE_MAX) {
00305 set_error(ec, ERROR_URI_SIZE);
00306 return false;
00307 } else {
00308 m_resource.push_back(*m_read_ptr);
00309 }
00310 break;
00311
00312 case PARSE_URI_QUERY:
00313
00314 if (*m_read_ptr == ' ') {
00315 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00316 } else if (*m_read_ptr == '\r') {
00317 http_msg.set_version_major(0);
00318 http_msg.set_version_minor(0);
00319 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00320 } else if (*m_read_ptr == '\n') {
00321 http_msg.set_version_major(0);
00322 http_msg.set_version_minor(0);
00323 m_headers_parse_state = PARSE_EXPECTING_CR;
00324 } else if (is_control(*m_read_ptr)) {
00325 set_error(ec, ERROR_QUERY_CHAR);
00326 return false;
00327 } else if (m_query_string.size() >= QUERY_STRING_MAX) {
00328 set_error(ec, ERROR_QUERY_SIZE);
00329 return false;
00330 } else {
00331 m_query_string.push_back(*m_read_ptr);
00332 }
00333 break;
00334
00335 case PARSE_HTTP_VERSION_H:
00336
00337 if (*m_read_ptr == '\r') {
00338
00339 if (! m_is_request) {
00340 set_error(ec, ERROR_VERSION_EMPTY);
00341 return false;
00342 }
00343 http_msg.set_version_major(0);
00344 http_msg.set_version_minor(0);
00345 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00346 } else if (*m_read_ptr == '\n') {
00347
00348 if (! m_is_request) {
00349 set_error(ec, ERROR_VERSION_EMPTY);
00350 return false;
00351 }
00352 http_msg.set_version_major(0);
00353 http_msg.set_version_minor(0);
00354 m_headers_parse_state = PARSE_EXPECTING_CR;
00355 } else if (*m_read_ptr != 'H') {
00356 set_error(ec, ERROR_VERSION_CHAR);
00357 return false;
00358 }
00359 m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
00360 break;
00361
00362 case PARSE_HTTP_VERSION_T_1:
00363
00364 if (*m_read_ptr != 'T') {
00365 set_error(ec, ERROR_VERSION_CHAR);
00366 return false;
00367 }
00368 m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
00369 break;
00370
00371 case PARSE_HTTP_VERSION_T_2:
00372
00373 if (*m_read_ptr != 'T') {
00374 set_error(ec, ERROR_VERSION_CHAR);
00375 return false;
00376 }
00377 m_headers_parse_state = PARSE_HTTP_VERSION_P;
00378 break;
00379
00380 case PARSE_HTTP_VERSION_P:
00381
00382 if (*m_read_ptr != 'P') {
00383 set_error(ec, ERROR_VERSION_CHAR);
00384 return false;
00385 }
00386 m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
00387 break;
00388
00389 case PARSE_HTTP_VERSION_SLASH:
00390
00391 if (*m_read_ptr != '/') {
00392 set_error(ec, ERROR_VERSION_CHAR);
00393 return false;
00394 }
00395 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
00396 break;
00397
00398 case PARSE_HTTP_VERSION_MAJOR_START:
00399
00400 if (!is_digit(*m_read_ptr)) {
00401 set_error(ec, ERROR_VERSION_CHAR);
00402 return false;
00403 }
00404 http_msg.set_version_major(*m_read_ptr - '0');
00405 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
00406 break;
00407
00408 case PARSE_HTTP_VERSION_MAJOR:
00409
00410 if (*m_read_ptr == '.') {
00411 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
00412 } else if (is_digit(*m_read_ptr)) {
00413 http_msg.set_version_major( (http_msg.get_version_major() * 10)
00414 + (*m_read_ptr - '0') );
00415 } else {
00416 set_error(ec, ERROR_VERSION_CHAR);
00417 return false;
00418 }
00419 break;
00420
00421 case PARSE_HTTP_VERSION_MINOR_START:
00422
00423 if (!is_digit(*m_read_ptr)) {
00424 set_error(ec, ERROR_VERSION_CHAR);
00425 return false;
00426 }
00427 http_msg.set_version_minor(*m_read_ptr - '0');
00428 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
00429 break;
00430
00431 case PARSE_HTTP_VERSION_MINOR:
00432
00433 if (*m_read_ptr == ' ') {
00434
00435 if (! m_is_request) {
00436 m_headers_parse_state = PARSE_STATUS_CODE_START;
00437 }
00438 } else if (*m_read_ptr == '\r') {
00439
00440 if (! m_is_request) {
00441 set_error(ec, ERROR_STATUS_EMPTY);
00442 return false;
00443 }
00444 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00445 } else if (*m_read_ptr == '\n') {
00446
00447 if (! m_is_request) {
00448 set_error(ec, ERROR_STATUS_EMPTY);
00449 return false;
00450 }
00451 m_headers_parse_state = PARSE_EXPECTING_CR;
00452 } else if (is_digit(*m_read_ptr)) {
00453 http_msg.set_version_minor( (http_msg.get_version_minor() * 10)
00454 + (*m_read_ptr - '0') );
00455 } else {
00456 set_error(ec, ERROR_VERSION_CHAR);
00457 return false;
00458 }
00459 break;
00460
00461 case PARSE_STATUS_CODE_START:
00462
00463 if (!is_digit(*m_read_ptr)) {
00464 set_error(ec, ERROR_STATUS_CHAR);
00465 return false;
00466 }
00467 m_status_code = (*m_read_ptr - '0');
00468 m_headers_parse_state = PARSE_STATUS_CODE;
00469 break;
00470
00471 case PARSE_STATUS_CODE:
00472
00473 if (*m_read_ptr == ' ') {
00474 m_status_message.erase();
00475 m_headers_parse_state = PARSE_STATUS_MESSAGE;
00476 } else if (is_digit(*m_read_ptr)) {
00477 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
00478 } else if (*m_read_ptr == '\r') {
00479
00480 m_status_message.erase();
00481 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00482 } else if (*m_read_ptr == '\n') {
00483
00484 m_status_message.erase();
00485 m_headers_parse_state = PARSE_EXPECTING_CR;
00486 } else {
00487 set_error(ec, ERROR_STATUS_CHAR);
00488 return false;
00489 }
00490 break;
00491
00492 case PARSE_STATUS_MESSAGE:
00493
00494 if (*m_read_ptr == '\r') {
00495 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00496 } else if (*m_read_ptr == '\n') {
00497 m_headers_parse_state = PARSE_EXPECTING_CR;
00498 } else if (is_control(*m_read_ptr)) {
00499 set_error(ec, ERROR_STATUS_CHAR);
00500 return false;
00501 } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
00502 set_error(ec, ERROR_STATUS_CHAR);
00503 return false;
00504 } else {
00505 m_status_message.push_back(*m_read_ptr);
00506 }
00507 break;
00508
00509 case PARSE_EXPECTING_NEWLINE:
00510
00511 if (*m_read_ptr == '\n') {
00512 m_headers_parse_state = PARSE_HEADER_START;
00513 } else if (*m_read_ptr == '\r') {
00514
00515
00516
00517 ++m_read_ptr;
00518 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00519 m_bytes_total_read += m_bytes_last_read;
00520 return true;
00521 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00522 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00523 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00524 set_error(ec, ERROR_HEADER_CHAR);
00525 return false;
00526 } else {
00527
00528 m_header_name.erase();
00529 m_header_name.push_back(*m_read_ptr);
00530 m_headers_parse_state = PARSE_HEADER_NAME;
00531 }
00532 break;
00533
00534 case PARSE_EXPECTING_CR:
00535
00536 if (*m_read_ptr == '\r') {
00537 m_headers_parse_state = PARSE_HEADER_START;
00538 } else if (*m_read_ptr == '\n') {
00539
00540
00541
00542 ++m_read_ptr;
00543 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00544 m_bytes_total_read += m_bytes_last_read;
00545 return true;
00546 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00547 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00548 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00549 set_error(ec, ERROR_HEADER_CHAR);
00550 return false;
00551 } else {
00552
00553 m_header_name.erase();
00554 m_header_name.push_back(*m_read_ptr);
00555 m_headers_parse_state = PARSE_HEADER_NAME;
00556 }
00557 break;
00558
00559 case PARSE_HEADER_WHITESPACE:
00560
00561 if (*m_read_ptr == '\r') {
00562 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00563 } else if (*m_read_ptr == '\n') {
00564 m_headers_parse_state = PARSE_EXPECTING_CR;
00565 } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
00566 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr))
00567 set_error(ec, ERROR_HEADER_CHAR);
00568 return false;
00569
00570 m_header_name.erase();
00571 m_header_name.push_back(*m_read_ptr);
00572 m_headers_parse_state = PARSE_HEADER_NAME;
00573 }
00574 break;
00575
00576 case PARSE_HEADER_START:
00577
00578 if (*m_read_ptr == '\r') {
00579 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
00580 } else if (*m_read_ptr == '\n') {
00581 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
00582 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00583 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00584 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00585 set_error(ec, ERROR_HEADER_CHAR);
00586 return false;
00587 } else {
00588
00589 m_header_name.erase();
00590 m_header_name.push_back(*m_read_ptr);
00591 m_headers_parse_state = PARSE_HEADER_NAME;
00592 }
00593 break;
00594
00595 case PARSE_HEADER_NAME:
00596
00597 if (*m_read_ptr == ':') {
00598 m_header_value.erase();
00599 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
00600 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00601 set_error(ec, ERROR_HEADER_CHAR);
00602 return false;
00603 } else if (m_header_name.size() >= HEADER_NAME_MAX) {
00604 set_error(ec, ERROR_HEADER_NAME_SIZE);
00605 return false;
00606 } else {
00607
00608 m_header_name.push_back(*m_read_ptr);
00609 }
00610 break;
00611
00612 case PARSE_SPACE_BEFORE_HEADER_VALUE:
00613
00614 if (*m_read_ptr == ' ') {
00615 m_headers_parse_state = PARSE_HEADER_VALUE;
00616 } else if (*m_read_ptr == '\r') {
00617 http_msg.add_header(m_header_name, m_header_value);
00618 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00619 } else if (*m_read_ptr == '\n') {
00620 http_msg.add_header(m_header_name, m_header_value);
00621 m_headers_parse_state = PARSE_EXPECTING_CR;
00622 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00623 set_error(ec, ERROR_HEADER_CHAR);
00624 return false;
00625 } else {
00626
00627 m_header_value.push_back(*m_read_ptr);
00628 m_headers_parse_state = PARSE_HEADER_VALUE;
00629 }
00630 break;
00631
00632 case PARSE_HEADER_VALUE:
00633
00634 if (*m_read_ptr == '\r') {
00635 http_msg.add_header(m_header_name, m_header_value);
00636 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00637 } else if (*m_read_ptr == '\n') {
00638 http_msg.add_header(m_header_name, m_header_value);
00639 m_headers_parse_state = PARSE_EXPECTING_CR;
00640 } else if (*m_read_ptr != '\t' && is_control(*m_read_ptr)) {
00641
00642
00643
00644
00645
00646
00647 set_error(ec, ERROR_HEADER_CHAR);
00648 return false;
00649 } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
00650 set_error(ec, ERROR_HEADER_VALUE_SIZE);
00651 return false;
00652 } else {
00653
00654 m_header_value.push_back(*m_read_ptr);
00655 }
00656 break;
00657
00658 case PARSE_EXPECTING_FINAL_NEWLINE:
00659 if (*m_read_ptr == '\n') ++m_read_ptr;
00660 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00661 m_bytes_total_read += m_bytes_last_read;
00662 return true;
00663
00664 case PARSE_EXPECTING_FINAL_CR:
00665 if (*m_read_ptr == '\r') ++m_read_ptr;
00666 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00667 m_bytes_total_read += m_bytes_last_read;
00668 return true;
00669 }
00670
00671 ++m_read_ptr;
00672 }
00673
00674 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00675 m_bytes_total_read += m_bytes_last_read;
00676 return boost::indeterminate;
00677 }
00678
00679 void parser::update_message_with_header_data(http::message& http_msg) const
00680 {
00681 if (is_parsing_request()) {
00682
00683
00684
00685 http::request& http_request(dynamic_cast<http::request&>(http_msg));
00686 http_request.set_method(m_method);
00687 http_request.set_resource(m_resource);
00688 http_request.set_query_string(m_query_string);
00689
00690
00691 if (! m_query_string.empty()) {
00692 if (! parse_url_encoded(http_request.get_queries(),
00693 m_query_string.c_str(),
00694 m_query_string.size()))
00695 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)");
00696 }
00697
00698
00699 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00700 cookie_pair = http_request.get_headers().equal_range(http::types::HEADER_COOKIE);
00701 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00702 cookie_iterator != http_request.get_headers().end()
00703 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00704 {
00705 if (! parse_cookie_header(http_request.get_cookies(),
00706 cookie_iterator->second, false) )
00707 PION_LOG_WARN(m_logger, "Cookie header parsing failed");
00708 }
00709
00710 } else {
00711
00712
00713
00714 http::response& http_response(dynamic_cast<http::response&>(http_msg));
00715 http_response.set_status_code(m_status_code);
00716 http_response.set_status_message(m_status_message);
00717
00718
00719 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00720 cookie_pair = http_response.get_headers().equal_range(http::types::HEADER_SET_COOKIE);
00721 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00722 cookie_iterator != http_response.get_headers().end()
00723 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00724 {
00725 if (! parse_cookie_header(http_response.get_cookies(),
00726 cookie_iterator->second, true) )
00727 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
00728 }
00729
00730 }
00731 }
00732
00733 boost::tribool parser::finish_header_parsing(http::message& http_msg,
00734 boost::system::error_code& ec)
00735 {
00736 boost::tribool rc = boost::indeterminate;
00737
00738 m_bytes_content_remaining = m_bytes_content_read = 0;
00739 http_msg.set_content_length(0);
00740 http_msg.update_transfer_encoding_using_header();
00741 update_message_with_header_data(http_msg);
00742
00743 if (http_msg.is_chunked()) {
00744
00745
00746 m_message_parse_state = PARSE_CHUNKS;
00747
00748
00749 if (m_parse_headers_only)
00750 rc = true;
00751
00752 } else if (http_msg.is_content_length_implied()) {
00753
00754
00755 m_message_parse_state = PARSE_END;
00756 rc = true;
00757
00758 } else {
00759
00760
00761 if (http_msg.has_header(http::types::HEADER_CONTENT_LENGTH)) {
00762
00763
00764 try {
00765 http_msg.update_content_length_using_header();
00766 } catch (...) {
00767 PION_LOG_ERROR(m_logger, "Unable to update content length");
00768 set_error(ec, ERROR_INVALID_CONTENT_LENGTH);
00769 return false;
00770 }
00771
00772
00773 if (http_msg.get_content_length() == 0) {
00774 m_message_parse_state = PARSE_END;
00775 rc = true;
00776 } else {
00777 m_message_parse_state = PARSE_CONTENT;
00778 m_bytes_content_remaining = http_msg.get_content_length();
00779
00780
00781 if (m_bytes_content_remaining > m_max_content_length)
00782 http_msg.set_content_length(m_max_content_length);
00783
00784
00785 http_msg.create_content_buffer();
00786
00787
00788 if (m_parse_headers_only)
00789 rc = true;
00790 }
00791
00792 } else {
00793
00794
00795
00796
00797 if (! m_is_request) {
00798
00799 http_msg.get_chunk_cache().clear();
00800
00801
00802 m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
00803
00804
00805 if (m_parse_headers_only)
00806 rc = true;
00807 } else {
00808 m_message_parse_state = PARSE_END;
00809 rc = true;
00810 }
00811 }
00812 }
00813
00814 finished_parsing_headers(ec);
00815
00816 return rc;
00817 }
00818
00819 bool parser::parse_uri(const std::string& uri, std::string& proto,
00820 std::string& host, boost::uint16_t& port,
00821 std::string& path, std::string& query)
00822 {
00823 size_t proto_end = uri.find("://");
00824 size_t proto_len = 0;
00825
00826 if(proto_end != std::string::npos) {
00827 proto = uri.substr(0, proto_end);
00828 proto_len = proto_end + 3;
00829 } else {
00830 proto.clear();
00831 }
00832
00833
00834
00835 size_t server_port_end = uri.find('/', proto_len);
00836 if(server_port_end == std::string::npos) {
00837 return false;
00838 }
00839
00840
00841 std::string t;
00842 t = uri.substr(proto_len, server_port_end - proto_len);
00843 size_t port_pos = t.find(':', 0);
00844
00845
00846
00847 host = t.substr(0, port_pos);
00848 if(host.length() == 0) {
00849 return false;
00850 }
00851
00852
00853 if(port_pos != std::string::npos) {
00854 try {
00855 port = boost::lexical_cast<int>(t.substr(port_pos+1));
00856 } catch (boost::bad_lexical_cast &) {
00857 return false;
00858 }
00859 } else if (proto == "http" || proto == "HTTP") {
00860 port = 80;
00861 } else if (proto == "https" || proto == "HTTPS") {
00862 port = 443;
00863 } else {
00864 port = 0;
00865 }
00866
00867
00868 path = uri.substr(server_port_end);
00869
00870
00871 size_t query_pos = path.find('?', 0);
00872
00873 if(query_pos != std::string::npos) {
00874 query = path.substr(query_pos + 1, path.length() - query_pos - 1);
00875 path = path.substr(0, query_pos);
00876 } else {
00877 query.clear();
00878 }
00879
00880 return true;
00881 }
00882
00883 bool parser::parse_url_encoded(ihash_multimap& dict,
00884 const char *ptr, const size_t len)
00885 {
00886
00887 enum QueryParseState {
00888 QUERY_PARSE_NAME, QUERY_PARSE_VALUE
00889 } parse_state = QUERY_PARSE_NAME;
00890
00891
00892 const char * const end = ptr + len;
00893 std::string query_name;
00894 std::string query_value;
00895
00896
00897 while (ptr < end) {
00898 switch (parse_state) {
00899
00900 case QUERY_PARSE_NAME:
00901
00902 if (*ptr == '=') {
00903
00904 parse_state = QUERY_PARSE_VALUE;
00905 } else if (*ptr == '&') {
00906
00907 if (! query_name.empty()) {
00908
00909 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00910 query_name.erase();
00911 }
00912 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00913
00914 } else if (is_control(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
00915
00916 return false;
00917 } else {
00918
00919 query_name.push_back(*ptr);
00920 }
00921 break;
00922
00923 case QUERY_PARSE_VALUE:
00924
00925 if (*ptr == '&') {
00926
00927 if (! query_name.empty()) {
00928 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00929 query_name.erase();
00930 }
00931 query_value.erase();
00932 parse_state = QUERY_PARSE_NAME;
00933 } else if (*ptr == ',') {
00934
00935 if (! query_name.empty())
00936 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00937 query_value.erase();
00938 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00939
00940 } else if (is_control(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
00941
00942 return false;
00943 } else {
00944
00945 query_value.push_back(*ptr);
00946 }
00947 break;
00948 }
00949
00950 ++ptr;
00951 }
00952
00953
00954 if (! query_name.empty())
00955 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00956
00957 return true;
00958 }
00959
00960 bool parser::parse_multipart_form_data(ihash_multimap& dict,
00961 const std::string& content_type,
00962 const char *ptr, const size_t len)
00963 {
00964
00965 std::size_t pos = content_type.find("boundary=");
00966 if (pos == std::string::npos)
00967 return false;
00968 const std::string boundary = std::string("--") + content_type.substr(pos+9);
00969
00970
00971 enum MultiPartParseState {
00972 MP_PARSE_START,
00973 MP_PARSE_HEADER_CR, MP_PARSE_HEADER_LF,
00974 MP_PARSE_HEADER_NAME, MP_PARSE_HEADER_SPACE, MP_PARSE_HEADER_VALUE,
00975 MP_PARSE_HEADER_LAST_LF, MP_PARSE_FIELD_DATA
00976 } parse_state = MP_PARSE_START;
00977
00978
00979 std::string header_name;
00980 std::string header_value;
00981 std::string field_name;
00982 std::string field_value;
00983 bool save_current_field = true;
00984 const char * const end_ptr = ptr + len;
00985
00986 ptr = strstr(ptr, boundary.c_str());
00987
00988 while (ptr != NULL && ptr < end_ptr) {
00989 switch (parse_state) {
00990 case MP_PARSE_START:
00991
00992 header_name.clear();
00993 header_value.clear();
00994 field_name.clear();
00995 field_value.clear();
00996 save_current_field = true;
00997 ptr += boundary.size() - 1;
00998 parse_state = MP_PARSE_HEADER_CR;
00999 break;
01000 case MP_PARSE_HEADER_CR:
01001
01002 if (*ptr == '\r') {
01003
01004 parse_state = MP_PARSE_HEADER_LF;
01005 } else if (*ptr == '\n') {
01006
01007 parse_state = MP_PARSE_HEADER_NAME;
01008 } else if (*ptr == '-' && ptr+1 < end_ptr && ptr[1] == '-') {
01009
01010 return true;
01011 } else return false;
01012 break;
01013 case MP_PARSE_HEADER_LF:
01014
01015 if (*ptr == '\n') {
01016
01017 parse_state = MP_PARSE_HEADER_NAME;
01018 } else return false;
01019 break;
01020 case MP_PARSE_HEADER_NAME:
01021
01022 if (*ptr == '\r' || *ptr == '\n') {
01023 if (header_name.empty()) {
01024
01025 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LAST_LF : MP_PARSE_FIELD_DATA);
01026 } else {
01027
01028 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01029 }
01030 } else if (*ptr == ':') {
01031
01032 parse_state = MP_PARSE_HEADER_SPACE;
01033 } else {
01034
01035 header_name += *ptr;
01036 }
01037 break;
01038 case MP_PARSE_HEADER_SPACE:
01039
01040 if (*ptr == '\r') {
01041
01042 parse_state = MP_PARSE_HEADER_LF;
01043 } else if (*ptr == '\n') {
01044
01045 parse_state = MP_PARSE_HEADER_NAME;
01046 } else if (*ptr != ' ') {
01047
01048 header_value += *ptr;
01049 parse_state = MP_PARSE_HEADER_VALUE;
01050 }
01051
01052 break;
01053 case MP_PARSE_HEADER_VALUE:
01054
01055 if (*ptr == '\r' || *ptr == '\n') {
01056
01057 if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_TYPE)) {
01058
01059 save_current_field = boost::algorithm::iequals(header_value.substr(0, 5), "text/");
01060 } else if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_DISPOSITION)) {
01061
01062 std::size_t name_pos = header_value.find("name=\"");
01063 if (name_pos != std::string::npos) {
01064 for (name_pos += 6; name_pos < header_value.size() && header_value[name_pos] != '\"'; ++name_pos) {
01065 field_name += header_value[name_pos];
01066 }
01067 }
01068 }
01069
01070 header_name.clear();
01071 header_value.clear();
01072 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01073 } else {
01074
01075 header_value += *ptr;
01076 }
01077 break;
01078 case MP_PARSE_HEADER_LAST_LF:
01079
01080 if (*ptr == '\n') {
01081
01082 if (save_current_field && !field_name.empty()) {
01083
01084 parse_state = MP_PARSE_FIELD_DATA;
01085 } else {
01086
01087 parse_state = MP_PARSE_START;
01088 ptr = strstr(ptr, boundary.c_str());
01089 }
01090 } else return false;
01091 break;
01092 case MP_PARSE_FIELD_DATA:
01093
01094 const char *field_end_ptr = end_ptr;
01095 const char *next_ptr = strstr(ptr, boundary.c_str());
01096 if (next_ptr) {
01097
01098 const char *temp_ptr = next_ptr - 2;
01099 if (temp_ptr[0] == '\r' && temp_ptr[1] == '\n')
01100 field_end_ptr = temp_ptr;
01101 else field_end_ptr = next_ptr;
01102 }
01103 field_value.assign(ptr, field_end_ptr - ptr);
01104
01105 dict.insert( std::make_pair(field_name, field_value) );
01106
01107 parse_state = MP_PARSE_START;
01108 ptr = next_ptr;
01109 break;
01110 }
01111
01112 if (parse_state != MP_PARSE_START)
01113 ++ptr;
01114 }
01115
01116 return true;
01117 }
01118
01119 bool parser::parse_cookie_header(ihash_multimap& dict,
01120 const char *ptr, const size_t len,
01121 bool set_cookie_header)
01122 {
01123
01124
01125
01126
01127
01128
01129
01130 enum CookieParseState {
01131 COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
01132 } parse_state = COOKIE_PARSE_NAME;
01133
01134
01135 const char * const end = ptr + len;
01136 std::string cookie_name;
01137 std::string cookie_value;
01138 char value_quote_character = '\0';
01139
01140
01141 while (ptr < end) {
01142 switch (parse_state) {
01143
01144 case COOKIE_PARSE_NAME:
01145
01146 if (*ptr == '=') {
01147
01148 value_quote_character = '\0';
01149 parse_state = COOKIE_PARSE_VALUE;
01150 } else if (*ptr == ';' || *ptr == ',') {
01151
01152
01153 if (! cookie_name.empty()) {
01154
01155 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01156 dict.insert( std::make_pair(cookie_name, cookie_value) );
01157 cookie_name.erase();
01158 }
01159 } else if (*ptr != ' ') {
01160
01161 if (is_control(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
01162 return false;
01163
01164 cookie_name.push_back(*ptr);
01165 }
01166 break;
01167
01168 case COOKIE_PARSE_VALUE:
01169
01170 if (value_quote_character == '\0') {
01171
01172 if (*ptr == ';' || *ptr == ',') {
01173
01174 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01175 dict.insert( std::make_pair(cookie_name, cookie_value) );
01176 cookie_name.erase();
01177 cookie_value.erase();
01178 parse_state = COOKIE_PARSE_NAME;
01179 } else if (*ptr == '\'' || *ptr == '"') {
01180 if (cookie_value.empty()) {
01181
01182 value_quote_character = *ptr;
01183 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01184
01185 return false;
01186 } else {
01187
01188 cookie_value.push_back(*ptr);
01189 }
01190 } else if (*ptr != ' ' || !cookie_value.empty()) {
01191
01192 if (is_control(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
01193 return false;
01194
01195 cookie_value.push_back(*ptr);
01196 }
01197 } else {
01198
01199 if (*ptr == value_quote_character) {
01200
01201 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01202 dict.insert( std::make_pair(cookie_name, cookie_value) );
01203 cookie_name.erase();
01204 cookie_value.erase();
01205 parse_state = COOKIE_PARSE_IGNORE;
01206 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01207
01208 return false;
01209 } else {
01210
01211 cookie_value.push_back(*ptr);
01212 }
01213 }
01214 break;
01215
01216 case COOKIE_PARSE_IGNORE:
01217
01218 if (*ptr == ';' || *ptr == ',')
01219 parse_state = COOKIE_PARSE_NAME;
01220 break;
01221 }
01222
01223 ++ptr;
01224 }
01225
01226
01227 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01228 dict.insert( std::make_pair(cookie_name, cookie_value) );
01229
01230 return true;
01231 }
01232
01233 boost::tribool parser::parse_chunks(http::message::chunk_cache_t& chunks,
01234 boost::system::error_code& ec)
01235 {
01236
01237
01238
01239
01240
01241
01242
01243 const char *read_start_ptr = m_read_ptr;
01244 m_bytes_last_read = 0;
01245 while (m_read_ptr < m_read_end_ptr) {
01246
01247 switch (m_chunked_content_parse_state) {
01248 case PARSE_CHUNK_SIZE_START:
01249
01250 if (is_hex_digit(*m_read_ptr)) {
01251 m_chunk_size_str.erase();
01252 m_chunk_size_str.push_back(*m_read_ptr);
01253 m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
01254 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
01255
01256
01257 break;
01258 } else {
01259 set_error(ec, ERROR_CHUNK_CHAR);
01260 return false;
01261 }
01262 break;
01263
01264 case PARSE_CHUNK_SIZE:
01265 if (is_hex_digit(*m_read_ptr)) {
01266 m_chunk_size_str.push_back(*m_read_ptr);
01267 } else if (*m_read_ptr == '\x0D') {
01268 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01269 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01270
01271
01272 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
01273 } else if (*m_read_ptr == ';') {
01274
01275
01276 m_chunked_content_parse_state = PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE;
01277 } else {
01278 set_error(ec, ERROR_CHUNK_CHAR);
01279 return false;
01280 }
01281 break;
01282
01283 case PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE:
01284 if (*m_read_ptr == '\x0D') {
01285 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01286 }
01287 break;
01288
01289 case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
01290 if (*m_read_ptr == '\x0D') {
01291 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01292 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01293
01294
01295 break;
01296 } else {
01297 set_error(ec, ERROR_CHUNK_CHAR);
01298 return false;
01299 }
01300 break;
01301
01302 case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
01303
01304
01305 if (*m_read_ptr == '\x0A') {
01306 m_bytes_read_in_current_chunk = 0;
01307 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
01308 if (m_size_of_current_chunk == 0) {
01309 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK;
01310 } else {
01311 m_chunked_content_parse_state = PARSE_CHUNK;
01312 }
01313 } else {
01314 set_error(ec, ERROR_CHUNK_CHAR);
01315 return false;
01316 }
01317 break;
01318
01319 case PARSE_CHUNK:
01320 if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
01321 if (m_payload_handler) {
01322 const std::size_t bytes_avail = bytes_available();
01323 const std::size_t bytes_in_chunk = m_size_of_current_chunk - m_bytes_read_in_current_chunk;
01324 const std::size_t len = (bytes_in_chunk > bytes_avail) ? bytes_avail : bytes_in_chunk;
01325 m_payload_handler(m_read_ptr, len);
01326 m_bytes_read_in_current_chunk += len;
01327 if (len > 1) m_read_ptr += (len - 1);
01328 } else if (chunks.size() < m_max_content_length) {
01329 chunks.push_back(*m_read_ptr);
01330 m_bytes_read_in_current_chunk++;
01331 }
01332 }
01333 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
01334 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
01335 }
01336 break;
01337
01338 case PARSE_EXPECTING_CR_AFTER_CHUNK:
01339
01340 if (*m_read_ptr == '\x0D') {
01341 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
01342 } else {
01343 set_error(ec, ERROR_CHUNK_CHAR);
01344 return false;
01345 }
01346 break;
01347
01348 case PARSE_EXPECTING_LF_AFTER_CHUNK:
01349
01350 if (*m_read_ptr == '\x0A') {
01351 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
01352 } else {
01353 set_error(ec, ERROR_CHUNK_CHAR);
01354 return false;
01355 }
01356 break;
01357
01358 case PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK:
01359
01360 if (*m_read_ptr == '\x0D') {
01361 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
01362 } else {
01363
01364
01365 m_message_parse_state = PARSE_FOOTERS;
01366 m_headers_parse_state = PARSE_HEADER_START;
01367 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01368 m_bytes_total_read += m_bytes_last_read;
01369 m_bytes_content_read += m_bytes_last_read;
01370 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01371 return true;
01372 }
01373 break;
01374
01375 case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
01376
01377 if (*m_read_ptr == '\x0A') {
01378 ++m_read_ptr;
01379 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01380 m_bytes_total_read += m_bytes_last_read;
01381 m_bytes_content_read += m_bytes_last_read;
01382 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01383 return true;
01384 } else {
01385 set_error(ec, ERROR_CHUNK_CHAR);
01386 return false;
01387 }
01388 }
01389
01390 ++m_read_ptr;
01391 }
01392
01393 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01394 m_bytes_total_read += m_bytes_last_read;
01395 m_bytes_content_read += m_bytes_last_read;
01396 return boost::indeterminate;
01397 }
01398
01399 boost::tribool parser::consume_content(http::message& http_msg,
01400 boost::system::error_code& ec)
01401 {
01402 size_t content_bytes_to_read;
01403 size_t content_bytes_available = bytes_available();
01404 boost::tribool rc = boost::indeterminate;
01405
01406 if (m_bytes_content_remaining == 0) {
01407
01408 return true;
01409 } else {
01410 if (content_bytes_available >= m_bytes_content_remaining) {
01411
01412 rc = true;
01413 content_bytes_to_read = m_bytes_content_remaining;
01414 } else {
01415
01416 content_bytes_to_read = content_bytes_available;
01417 }
01418 m_bytes_content_remaining -= content_bytes_to_read;
01419 }
01420
01421
01422 if (m_payload_handler) {
01423 m_payload_handler(m_read_ptr, content_bytes_to_read);
01424 } else if (m_bytes_content_read < m_max_content_length) {
01425 if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
01426
01427
01428 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr,
01429 m_max_content_length - m_bytes_content_read);
01430 } else {
01431
01432 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
01433 }
01434 }
01435
01436 m_read_ptr += content_bytes_to_read;
01437 m_bytes_content_read += content_bytes_to_read;
01438 m_bytes_total_read += content_bytes_to_read;
01439 m_bytes_last_read = content_bytes_to_read;
01440
01441 return rc;
01442 }
01443
01444 std::size_t parser::consume_content_as_next_chunk(http::message::chunk_cache_t& chunks)
01445 {
01446 if (bytes_available() == 0) {
01447 m_bytes_last_read = 0;
01448 } else {
01449 m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
01450 if (m_payload_handler) {
01451 if (m_bytes_last_read)
01452 m_payload_handler(m_read_ptr, m_bytes_last_read);
01453 } else {
01454 while (m_read_ptr < m_read_end_ptr) {
01455 if (chunks.size() < m_max_content_length)
01456 chunks.push_back(*m_read_ptr);
01457 ++m_read_ptr;
01458 }
01459 }
01460 m_bytes_total_read += m_bytes_last_read;
01461 m_bytes_content_read += m_bytes_last_read;
01462 }
01463 return m_bytes_last_read;
01464 }
01465
01466 void parser::finish(http::message& http_msg) const
01467 {
01468 switch (m_message_parse_state) {
01469 case PARSE_START:
01470 http_msg.set_is_valid(false);
01471 http_msg.set_content_length(0);
01472 http_msg.create_content_buffer();
01473 return;
01474 case PARSE_END:
01475 http_msg.set_is_valid(true);
01476 break;
01477 case PARSE_HEADERS:
01478 case PARSE_FOOTERS:
01479 http_msg.set_is_valid(false);
01480 update_message_with_header_data(http_msg);
01481 http_msg.set_content_length(0);
01482 http_msg.create_content_buffer();
01483 break;
01484 case PARSE_CONTENT:
01485 http_msg.set_is_valid(false);
01486 if (get_content_bytes_read() < m_max_content_length)
01487 http_msg.set_content_length(get_content_bytes_read());
01488 break;
01489 case PARSE_CHUNKS:
01490 http_msg.set_is_valid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START);
01491 if (!m_payload_handler)
01492 http_msg.concatenate_chunks();
01493 break;
01494 case PARSE_CONTENT_NO_LENGTH:
01495 http_msg.set_is_valid(true);
01496 if (!m_payload_handler)
01497 http_msg.concatenate_chunks();
01498 break;
01499 }
01500
01501 compute_msg_status(http_msg, http_msg.is_valid());
01502
01503 if (is_parsing_request() && !m_payload_handler) {
01504
01505
01506
01507 http::request& http_request(dynamic_cast<http::request&>(http_msg));
01508 const std::string& content_type_header = http_request.get_header(http::types::HEADER_CONTENT_TYPE);
01509 if (content_type_header.compare(0, http::types::CONTENT_TYPE_URLENCODED.length(),
01510 http::types::CONTENT_TYPE_URLENCODED) == 0)
01511 {
01512 if (! parse_url_encoded(http_request.get_queries(),
01513 http_request.get_content(),
01514 http_request.get_content_length()))
01515 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST urlencoded)");
01516 } else if (content_type_header.compare(0, http::types::CONTENT_TYPE_MULTIPART_FORM_DATA.length(),
01517 http::types::CONTENT_TYPE_MULTIPART_FORM_DATA) == 0)
01518 {
01519 if (! parse_multipart_form_data(http_request.get_queries(),
01520 content_type_header,
01521 http_request.get_content(),
01522 http_request.get_content_length()))
01523 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST multipart)");
01524 }
01525 }
01526 }
01527
01528 void parser::compute_msg_status(http::message& http_msg, bool msg_parsed_ok )
01529 {
01530 http::message::data_status_t st = http::message::STATUS_NONE;
01531
01532 if(http_msg.has_missing_packets()) {
01533 st = http_msg.has_data_after_missing_packets() ?
01534 http::message::STATUS_PARTIAL : http::message::STATUS_TRUNCATED;
01535 } else {
01536 st = msg_parsed_ok ? http::message::STATUS_OK : http::message::STATUS_TRUNCATED;
01537 }
01538
01539 http_msg.set_status(st);
01540 }
01541
01542 void parser::create_error_category(void)
01543 {
01544 static error_category_t UNIQUE_ERROR_CATEGORY;
01545 m_error_category_ptr = &UNIQUE_ERROR_CATEGORY;
01546 }
01547
01548 bool parser::parse_forwarded_for(const std::string& header, std::string& public_ip)
01549 {
01550
01551 static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}");
01552
01558 static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}");
01559
01560
01561 if (header.empty())
01562 return false;
01563
01564
01565 boost::match_results<std::string::const_iterator> m;
01566 std::string::const_iterator start_it = header.begin();
01567
01568
01569 while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) {
01570
01571 std::string ip_str(m[0].first, m[0].second);
01572
01573 if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) {
01574
01575 public_ip = ip_str;
01576 return true;
01577 }
01578
01579 start_it = m[0].second;
01580 }
01581
01582
01583 return false;
01584 }
01585
01586 }
01587 }