00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef __PION_HTTP_PARSER_HEADER__
00011 #define __PION_HTTP_PARSER_HEADER__
00012
00013 #include <string>
00014 #include <boost/noncopyable.hpp>
00015 #include <boost/function/function2.hpp>
00016 #include <boost/logic/tribool.hpp>
00017 #include <boost/system/error_code.hpp>
00018 #include <boost/thread/once.hpp>
00019 #include <pion/config.hpp>
00020 #include <pion/logger.hpp>
00021 #include <pion/http/message.hpp>
00022
00023
00024 namespace pion {
00025 namespace http {
00026
00027
00028
00029 class request;
00030 class response;
00031
00035 class PION_API parser :
00036 private boost::noncopyable
00037 {
00038
00039 public:
00040
00042 static const std::size_t DEFAULT_CONTENT_MAX;
00043
00045 typedef boost::function2<void, const char *, std::size_t> payload_handler_t;
00046
00048 enum error_value_t {
00049 ERROR_METHOD_CHAR = 1,
00050 ERROR_METHOD_SIZE,
00051 ERROR_URI_CHAR,
00052 ERROR_URI_SIZE,
00053 ERROR_QUERY_CHAR,
00054 ERROR_QUERY_SIZE,
00055 ERROR_VERSION_EMPTY,
00056 ERROR_VERSION_CHAR,
00057 ERROR_STATUS_EMPTY,
00058 ERROR_STATUS_CHAR,
00059 ERROR_HEADER_CHAR,
00060 ERROR_HEADER_NAME_SIZE,
00061 ERROR_HEADER_VALUE_SIZE,
00062 ERROR_INVALID_CONTENT_LENGTH,
00063 ERROR_CHUNK_CHAR,
00064 ERROR_MISSING_CHUNK_DATA,
00065 ERROR_MISSING_HEADER_DATA,
00066 ERROR_MISSING_TOO_MUCH_CONTENT,
00067 };
00068
00070 class error_category_t
00071 : public boost::system::error_category
00072 {
00073 public:
00074 const char *name() const { return "parser"; }
00075 std::string message(int ev) const {
00076 switch (ev) {
00077 case ERROR_METHOD_CHAR:
00078 return "invalid method character";
00079 case ERROR_METHOD_SIZE:
00080 return "method exceeds maximum size";
00081 case ERROR_URI_CHAR:
00082 return "invalid URI character";
00083 case ERROR_URI_SIZE:
00084 return "method exceeds maximum size";
00085 case ERROR_QUERY_CHAR:
00086 return "invalid query string character";
00087 case ERROR_QUERY_SIZE:
00088 return "query string exceeds maximum size";
00089 case ERROR_VERSION_EMPTY:
00090 return "HTTP version undefined";
00091 case ERROR_VERSION_CHAR:
00092 return "invalid version character";
00093 case ERROR_STATUS_EMPTY:
00094 return "HTTP status undefined";
00095 case ERROR_STATUS_CHAR:
00096 return "invalid status character";
00097 case ERROR_HEADER_CHAR:
00098 return "invalid header character";
00099 case ERROR_HEADER_NAME_SIZE:
00100 return "header name exceeds maximum size";
00101 case ERROR_HEADER_VALUE_SIZE:
00102 return "header value exceeds maximum size";
00103 case ERROR_INVALID_CONTENT_LENGTH:
00104 return "invalid Content-Length header";
00105 case ERROR_CHUNK_CHAR:
00106 return "invalid chunk character";
00107 case ERROR_MISSING_HEADER_DATA:
00108 return "missing header data";
00109 case ERROR_MISSING_CHUNK_DATA:
00110 return "missing chunk data";
00111 case ERROR_MISSING_TOO_MUCH_CONTENT:
00112 return "missing too much content";
00113 }
00114 return "parser error";
00115 }
00116 };
00117
00125 parser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
00126 : m_logger(PION_GET_LOGGER("pion.http.parser")), m_is_request(is_request),
00127 m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
00128 m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
00129 m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
00130 m_bytes_content_remaining(0), m_bytes_content_read(0),
00131 m_bytes_last_read(0), m_bytes_total_read(0),
00132 m_max_content_length(max_content_length),
00133 m_parse_headers_only(false), m_save_raw_headers(false)
00134 {}
00135
00137 virtual ~parser() {}
00138
00150 boost::tribool parse(http::message& http_msg, boost::system::error_code& ec);
00151
00164 boost::tribool parse_missing_data(http::message& http_msg, std::size_t len,
00165 boost::system::error_code& ec);
00166
00172 void finish(http::message& http_msg) const;
00173
00180 inline void set_read_buffer(const char *ptr, size_t len) {
00181 m_read_ptr = ptr;
00182 m_read_end_ptr = ptr + len;
00183 }
00184
00191 inline void load_read_pos(const char *&read_ptr, const char *&read_end_ptr) const {
00192 read_ptr = m_read_ptr;
00193 read_end_ptr = m_read_end_ptr;
00194 }
00195
00204 inline bool check_premature_eof(http::message& http_msg) {
00205 if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
00206 return true;
00207 m_message_parse_state = PARSE_END;
00208 http_msg.concatenate_chunks();
00209 finish(http_msg);
00210 return false;
00211 }
00212
00218 inline void parse_headers_only(bool b = true) { m_parse_headers_only = b; }
00219
00225 inline void skip_header_parsing(http::message& http_msg) {
00226 boost::system::error_code ec;
00227 finish_header_parsing(http_msg, ec);
00228 }
00229
00231 inline void reset(void) {
00232 m_message_parse_state = PARSE_START;
00233 m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
00234 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00235 m_status_code = 0;
00236 m_status_message.erase();
00237 m_method.erase();
00238 m_resource.erase();
00239 m_query_string.erase();
00240 m_raw_headers.erase();
00241 m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
00242 }
00243
00245 inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
00246
00248 inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); }
00249
00251 inline std::size_t gcount(void) const { return m_bytes_last_read; }
00252
00254 inline std::size_t get_total_bytes_read(void) const { return m_bytes_total_read; }
00255
00257 inline std::size_t get_content_bytes_read(void) const { return m_bytes_content_read; }
00258
00260 inline std::size_t get_max_content_length(void) const { return m_max_content_length; }
00261
00263 inline const std::string& get_raw_headers(void) const { return m_raw_headers; }
00264
00266 inline bool get_save_raw_headers(void) const { return m_save_raw_headers; }
00267
00269 inline bool get_parse_headers_only(void) { return m_parse_headers_only; }
00270
00272 inline bool is_parsing_request(void) const { return m_is_request; }
00273
00275 inline bool is_parsing_response(void) const { return ! m_is_request; }
00276
00278 inline void set_payload_handler(payload_handler_t& h) { m_payload_handler = h; }
00279
00281 inline void set_max_content_length(std::size_t n) { m_max_content_length = n; }
00282
00284 inline void reset_max_content_length(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
00285
00287 inline void set_save_raw_headers(bool b) { m_save_raw_headers = b; }
00288
00290 inline void set_logger(logger log_ptr) { m_logger = log_ptr; }
00291
00293 inline logger get_logger(void) { return m_logger; }
00294
00295
00308 static bool parse_uri(const std::string& uri, std::string& proto,
00309 std::string& host, boost::uint16_t& port, std::string& path,
00310 std::string& query);
00311
00322 static bool parse_url_encoded(ihash_multimap& dict,
00323 const char *ptr, const std::size_t len);
00324
00336 static bool parse_multipart_form_data(ihash_multimap& dict,
00337 const std::string& content_type,
00338 const char *ptr, const std::size_t len);
00339
00351 static bool parse_cookie_header(ihash_multimap& dict,
00352 const char *ptr, const std::size_t len,
00353 bool set_cookie_header);
00354
00365 static inline bool parse_cookie_header(ihash_multimap& dict,
00366 const std::string& cookie_header, bool set_cookie_header)
00367 {
00368 return parse_cookie_header(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header);
00369 }
00370
00380 static inline bool parse_url_encoded(ihash_multimap& dict,
00381 const std::string& query)
00382 {
00383 return parse_url_encoded(dict, query.c_str(), query.size());
00384 }
00385
00396 static inline bool parse_multipart_form_data(ihash_multimap& dict,
00397 const std::string& content_type,
00398 const std::string& form_data)
00399 {
00400 return parse_multipart_form_data(dict, content_type, form_data.c_str(), form_data.size());
00401 }
00402
00415 boost::tribool finish_header_parsing(http::message& http_msg,
00416 boost::system::error_code& ec);
00417
00427 static bool parse_forwarded_for(const std::string& header, std::string& public_ip);
00428
00430 static inline error_category_t& get_error_category(void) {
00431 boost::call_once(parser::create_error_category, m_instance_flag);
00432 return *m_error_category_ptr;
00433 }
00434
00435
00436 protected:
00437
00439 virtual void finished_parsing_headers(const boost::system::error_code& ec) {}
00440
00453 boost::tribool parse_headers(http::message& http_msg, boost::system::error_code& ec);
00454
00460 void update_message_with_header_data(http::message& http_msg) const;
00461
00473 boost::tribool parse_chunks(http::message::chunk_cache_t& chunk_buffers,
00474 boost::system::error_code& ec);
00475
00487 boost::tribool consume_content(http::message& http_msg,
00488 boost::system::error_code& ec);
00489
00497 std::size_t consume_content_as_next_chunk(http::message::chunk_cache_t& chunk_buffers);
00498
00504 static void compute_msg_status(http::message& http_msg, bool msg_parsed_ok);
00505
00512 static inline void set_error(boost::system::error_code& ec, error_value_t ev) {
00513 ec = boost::system::error_code(static_cast<int>(ev), get_error_category());
00514 }
00515
00517 static void create_error_category(void);
00518
00519
00520
00521 inline static bool is_char(int c);
00522 inline static bool is_control(int c);
00523 inline static bool is_special(int c);
00524 inline static bool is_digit(int c);
00525 inline static bool is_hex_digit(int c);
00526 inline static bool is_cookie_attribute(const std::string& name, bool set_cookie_header);
00527
00528
00530 static const boost::uint32_t STATUS_MESSAGE_MAX;
00531
00533 static const boost::uint32_t METHOD_MAX;
00534
00536 static const boost::uint32_t RESOURCE_MAX;
00537
00539 static const boost::uint32_t QUERY_STRING_MAX;
00540
00542 static const boost::uint32_t HEADER_NAME_MAX;
00543
00545 static const boost::uint32_t HEADER_VALUE_MAX;
00546
00548 static const boost::uint32_t QUERY_NAME_MAX;
00549
00551 static const boost::uint32_t QUERY_VALUE_MAX;
00552
00554 static const boost::uint32_t COOKIE_NAME_MAX;
00555
00557 static const boost::uint32_t COOKIE_VALUE_MAX;
00558
00559
00561 mutable logger m_logger;
00562
00564 const bool m_is_request;
00565
00567 const char * m_read_ptr;
00568
00570 const char * m_read_end_ptr;
00571
00572
00573 private:
00574
00576 enum message_parse_state_t {
00577 PARSE_START, PARSE_HEADERS, PARSE_FOOTERS, PARSE_CONTENT,
00578 PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
00579 };
00580
00583 enum header_parse_state_t {
00584 PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
00585 PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
00586 PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
00587 PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
00588 PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
00589 PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
00590 PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
00591 PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
00592 PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
00593 PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
00594 };
00595
00598 enum chunk_parse_state_t {
00599 PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE,
00600 PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE,
00601 PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
00602 PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK,
00603 PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
00604 PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK,
00605 PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
00606 };
00607
00608
00610 message_parse_state_t m_message_parse_state;
00611
00613 header_parse_state_t m_headers_parse_state;
00614
00616 chunk_parse_state_t m_chunked_content_parse_state;
00617
00619 payload_handler_t m_payload_handler;
00620
00622 boost::uint16_t m_status_code;
00623
00625 std::string m_status_message;
00626
00628 std::string m_method;
00629
00631 std::string m_resource;
00632
00634 std::string m_query_string;
00635
00637 std::string m_raw_headers;
00638
00640 std::string m_header_name;
00641
00643 std::string m_header_value;
00644
00646 std::string m_chunk_size_str;
00647
00649 std::size_t m_size_of_current_chunk;
00650
00652 std::size_t m_bytes_read_in_current_chunk;
00653
00655 std::size_t m_bytes_content_remaining;
00656
00658 std::size_t m_bytes_content_read;
00659
00661 std::size_t m_bytes_last_read;
00662
00664 std::size_t m_bytes_total_read;
00665
00667 std::size_t m_max_content_length;
00668
00670 bool m_parse_headers_only;
00671
00673 bool m_save_raw_headers;
00674
00676 static error_category_t * m_error_category_ptr;
00677
00679 static boost::once_flag m_instance_flag;
00680 };
00681
00682
00683
00684
00685 inline bool parser::is_char(int c)
00686 {
00687 return(c >= 0 && c <= 127);
00688 }
00689
00690 inline bool parser::is_control(int c)
00691 {
00692 return( (c >= 0 && c <= 31) || c == 127);
00693 }
00694
00695 inline bool parser::is_special(int c)
00696 {
00697 switch (c) {
00698 case '(': case ')': case '<': case '>': case '@':
00699 case ',': case ';': case ':': case '\\': case '"':
00700 case '/': case '[': case ']': case '?': case '=':
00701 case '{': case '}': case ' ': case '\t':
00702 return true;
00703 default:
00704 return false;
00705 }
00706 }
00707
00708 inline bool parser::is_digit(int c)
00709 {
00710 return(c >= '0' && c <= '9');
00711 }
00712
00713 inline bool parser::is_hex_digit(int c)
00714 {
00715 return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
00716 }
00717
00718 inline bool parser::is_cookie_attribute(const std::string& name, bool set_cookie_header)
00719 {
00720 return (name.empty() || name[0] == '$' || (set_cookie_header &&
00721 (name=="Comment" || name=="Domain" || name=="Max-Age" || name=="Path" || name=="Secure" || name=="Version" || name=="Expires")
00722 ) );
00723 }
00724
00725 }
00726 }
00727
00728 #endif