Sirikata
libcore/include/sirikata/core/transfer/HttpManager.hpp
Go to the documentation of this file.
00001 /*  Sirikata Transfer -- Content Transfer management system
00002  *  HttpManager.hpp
00003  *
00004  *  Copyright (c) 2010, Jeff Terrace
00005  *  All rights reserved.
00006  *
00007  *  Redistribution and use in source and binary forms, with or without
00008  *  modification, are permitted provided that the following conditions are
00009  *  met:
00010  *  * Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer.
00012  *  * Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in
00014  *    the documentation and/or other materials provided with the
00015  *    distribution.
00016  *  * Neither the name of Sirikata nor the names of its contributors may
00017  *    be used to endorse or promote products derived from this software
00018  *    without specific prior written permission.
00019  *
00020  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
00021  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00022  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00023  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
00024  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00025  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00026  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00027  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00028  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00029  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00030  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031  */
00032 /*  Created on: Jun 18th, 2010 */
00033 
00034 #ifndef SIRIKATA_HttpManager_HPP__
00035 #define SIRIKATA_HttpManager_HPP__
00036 
00037 #include <sirikata/core/util/Platform.hpp>
00038 #include <string>
00039 #include <deque>
00040 #include <sstream>
00041 #include <boost/asio.hpp>
00042 #include <boost/bind.hpp>
00043 #include <boost/function.hpp>
00044 #include <boost/thread/mutex.hpp>
00045 // Apple defines the check macro in AssertMacros.h, which screws up some code in boost's iostreams lib
00046 #ifdef check
00047 #undef check
00048 #endif
00049 #include <boost/iostreams/filtering_streambuf.hpp>
00050 #include <boost/iostreams/filter/gzip.hpp>
00051 #include <boost/iostreams/copy.hpp>
00052 #include <sirikata/core/network/IOServicePool.hpp>
00053 #include <sirikata/core/network/IOWork.hpp>
00054 #include <sirikata/core/network/Asio.hpp>
00055 #include <sirikata/core/network/Address.hpp>
00056 #include <sirikata/core/transfer/TransferData.hpp>
00057 
00058 
00059 // This is a hack around a problem created by different packages
00060 // creating slightly different typedefs -- notably http_parser and
00061 // v8. On windows, we need to provide typedefs and make it skip that
00062 // part of http_parser.h.
00063 #if defined(_WIN32) && !defined(__MINGW32__)
00064 
00065 typedef signed __int8 int8_t;
00066 typedef unsigned __int8 uint8_t;
00067 typedef __int16 int16_t;
00068 typedef unsigned __int16 uint16_t;
00069 typedef __int32 int32_t;
00070 typedef unsigned __int32 uint32_t;
00071 typedef __int64 int64_t;
00072 typedef unsigned __int64 uint64_t;
00073 
00074 typedef unsigned int size_t;
00075 typedef int ssize_t;
00076 
00077 // We turn on mingw to make it skip the include block, but then stdint
00078 // is included, and only available via a zzip dependency. We block
00079 // that out as well by using its include guard define
00080 #define _ZZIP__STDINT_H 1
00081 #define __MINGW32__
00082 #include "http_parser.h"
00083 #undef __MINGW32__
00084 
00085 #else
00086 
00087 #include "http_parser.h"
00088 
00089 #endif
00090 
00091 namespace Sirikata {
00092 namespace Transfer {
00093 
00094 /*
00095  * Handles managing connections to the CDN
00096  */
00097 class SIRIKATA_EXPORT HttpManager
00098     : public AutoSingleton<HttpManager> {
00099 
00100 protected:
00101     enum LAST_HEADER_CB {
00102         NONE,
00103         FIELD,
00104         VALUE
00105     };
00106 
00107 public:
00108         typedef std::map<std::string, std::string> StringDictionary;
00109         // StringDictionary that uses case-insensitive keys, as required for
00110         // http headers by RFC 2616
00111         struct CaseInsensitiveStringLess {
00112             bool operator()(const std::string& lhs, const std::string& rhs) const {
00113                 std::size_t lsize = lhs.size(), rsize = rhs.size();
00114                 if (lsize != rsize) return (lsize < rsize);
00115                 for(std::size_t i = 0; i < lsize; i++) {
00116                     char li = std::tolower(lhs[i]), ri = std::tolower(rhs[i]);
00117                     if (li != ri) return (li < ri);
00118                 }
00119                 return false;
00120             }
00121         };
00122         typedef std::map<std::string, std::string, CaseInsensitiveStringLess> CaseInsensitiveStringDictionary;
00123 
00124         typedef CaseInsensitiveStringDictionary Headers;
00125         typedef StringDictionary QueryParameters;
00126 
00128         struct MultipartData {
00129             MultipartData(const String& _field, const String& _data)
00130              : field(_field), headers(), filename(""), data(_data)
00131             {}
00132             MultipartData(const String& _field, const String& _data, const String& _filename)
00133              : field(_field), headers(), filename(_filename), data(_data)
00134             {}
00135             MultipartData(const String& _field, const String& _data, const String& _filename, const Headers& _headers)
00136              : field(_field), headers(_headers), filename(_filename), data(_data)
00137             {}
00138 
00139             String field;
00140             Headers headers;
00141             String filename;
00142             String data;
00143         };
00144         typedef std::vector<MultipartData> MultipartDataList;
00145 
00146     /*
00147      * Stores headers and data returned from an HTTP request
00148      * Also stores HTTP status code and content length in numeric format
00149      * for convenience. These headers are also present in raw (string) format
00150      *
00151      * Note that getData might return a null pointer even if the
00152      * http request was successful, e.g. for a HEAD request
00153      *
00154      * Note that getContentLength might not be a valid value. If there was no
00155      * content length header in the response, getContentLength is undefined.
00156      */
00157     class HttpResponse {
00158     protected:
00159         // This stuff is all used internally for http-parser
00160         std::string mTempHeaderField;
00161         std::string mTempHeaderValue;
00162         LAST_HEADER_CB mLastCallback;
00163         bool mHeaderComplete;
00164         bool mMessageComplete;
00165         http_parser_settings mHttpSettings;
00166         http_parser mHttpParser;
00167         bool mGzip;
00168         std::stringstream mCompressedStream;
00169         //
00170 
00171         Headers mHeaders;
00172         std::tr1::shared_ptr<DenseData> mData;
00173         ssize_t mContentLength;
00174         unsigned short mStatusCode;
00175 
00176         HttpResponse()
00177             : mLastCallback(NONE), mHeaderComplete(false), mMessageComplete(false),
00178               mGzip(false), mContentLength(0), mStatusCode(0) {}
00179     public:
00180         inline std::tr1::shared_ptr<DenseData> getData() { return mData; }
00181         inline const Headers& getHeaders() { return mHeaders; }
00182         inline StringDictionary getRawHeaders() {
00183             StringDictionary raw_headers;
00184             for(Headers::const_iterator it = mHeaders.begin(); it != mHeaders.end(); it++)
00185                 raw_headers[it->first] = it->second;
00186             return raw_headers;
00187         }
00188         inline ssize_t getContentLength() { return mContentLength; }
00189         inline unsigned short getStatusCode() { return mStatusCode; }
00190 
00191         friend class HttpManager;
00192     };
00193     typedef std::tr1::shared_ptr<HttpResponse> HttpResponsePtr;
00194 
00195     //Type of errors that can be given to callback
00196     enum ERR_TYPE {
00197         SUCCESS,
00198         REQUEST_PARSING_FAILED,
00199         RESPONSE_PARSING_FAILED,
00200         BOOST_ERROR
00201     };
00202 
00203     /*
00204      * Callback for an HTTP request. If error == SUCCESS, then
00205      * response is an HttpResponse object that has headers and data.
00206      * If error == BOOST_ERROR, then boost_error is set to the boost
00207      * error code. If an error is returned, response is NULL.
00208      */
00209     typedef std::tr1::function<void(
00210                 std::tr1::shared_ptr<HttpResponse> response,
00211                 ERR_TYPE error,
00212                 const boost::system::error_code& boost_error
00213             )> HttpCallback;
00214 
00215     static HttpManager& getSingleton();
00216     static void destroy();
00217 
00218     //Methods supported
00219     enum HTTP_METHOD {
00220         HEAD,
00221         GET,
00222         POST
00223     };
00224     static String methodAsString(HTTP_METHOD m);
00225 
00231     void makeRequest(Sirikata::Network::Address addr, HTTP_METHOD method, std::string req, bool allow_redirects, HttpCallback cb);
00232 
00250     void makeRequest(
00251         Sirikata::Network::Address addr, HTTP_METHOD method, const String& path,
00252         HttpCallback cb,
00253         const Headers& headers = Headers(), const QueryParameters& query_params = QueryParameters(),
00254         const String& body = "",
00255         bool allow_redirects = true
00256     );
00257 
00258     static String formatURLEncodedDictionary(const StringDictionary& query_params);
00259     static String formatPath(const String& path, const QueryParameters& query_params);
00260     static String formatURL(const String& host, const String& service, const String& path, const QueryParameters& query_params);
00261 
00262     void head(
00263         Sirikata::Network::Address addr, const String& path,
00264         HttpCallback cb, const Headers& headers = Headers(), const QueryParameters& query_params = QueryParameters(),
00265         bool allow_redirects = true
00266     );
00267 
00268     void get(
00269         Sirikata::Network::Address addr, const String& path,
00270         HttpCallback cb, const Headers& headers = Headers(), const QueryParameters& query_params = QueryParameters(),
00271         bool allow_redirects = true
00272     );
00273 
00278     void post(
00279         Sirikata::Network::Address addr, const String& path,
00280         const String& content_type, const String& body,
00281         HttpCallback cb, const Headers& headers = Headers(), const QueryParameters& query_params = QueryParameters(),
00282         bool allow_redirects = true
00283     );
00284 
00286     void postURLEncoded(
00287         Sirikata::Network::Address addr, const String& path,
00288         const StringDictionary& body,
00289         HttpCallback cb, const Headers& headers = Headers(), const QueryParameters& query_params = QueryParameters(),
00290         bool allow_redirects = true
00291     );
00292 
00294     void postMultipartForm(
00295         Sirikata::Network::Address addr, const String& path,
00296         const MultipartDataList& data,
00297         HttpCallback cb, const Headers& headers = Headers(), const QueryParameters& query_params = QueryParameters(),
00298         bool allow_redirects = true
00299     );
00300 
00301 protected:
00302     /*
00303      * Protect constructor and destructor so can't make an instance of this class
00304      * but allow AutoSingleton<HttpManager> to make a copy, and give access
00305      * to the destructor to the auto_ptr used by AutoSingleton
00306      */
00307     HttpManager();
00308     ~HttpManager();
00309     friend class AutoSingleton<HttpManager>;
00310     friend std::auto_ptr<HttpManager>::~auto_ptr();
00311     friend void std::auto_ptr<HttpManager>::reset(HttpManager*);
00312 
00313     // Formats a URL encoded dictionary -- for form-urlencoded data or query
00314     // strings. NOTE: There is no ? prefixed to this.
00315     static void formatURLEncodedDictionary(std::ostream& os, const StringDictionary& query_params);
00316     // Formats the entire path portion of a URL -- path + query args
00317     static void formatPath(std::ostream& os, const String& path, const QueryParameters& query_params);
00318 private:
00319     //For convenience
00320     typedef Sirikata::Network::IOServicePool IOServicePool;
00321     typedef Sirikata::Network::TCPResolver TCPResolver;
00322     typedef Sirikata::Network::TCPSocket TCPSocket;
00323     typedef Sirikata::Network::IOWork IOWork;
00324     typedef Sirikata::Network::IOCallback IOCallback;
00325     typedef boost::asio::ip::tcp::endpoint TCPEndPoint;
00326 
00327     //Convenience of storing request parameters together
00328     class HttpRequest {
00329     public:
00330         const Sirikata::Network::Address addr;
00331         const std::string req;
00332         const HttpCallback cb;
00333         const HTTP_METHOD method;
00334         const bool allow_redirects;
00335         HttpRequest(Sirikata::Network::Address _addr, std::string _req, HTTP_METHOD meth, bool _allow_redirects, HttpCallback _cb)
00336          : addr(_addr), req(_req), cb(_cb), method(meth), allow_redirects(_allow_redirects),
00337            mNumTries(0), mLastCallback(NONE), mHeaderComplete(false) {}
00338 
00339         friend class HttpManager;
00340     protected:
00341         uint32 mNumTries;
00342         http_parser_settings mHttpSettings;
00343         http_parser mHttpParser;
00344         std::string mTempHeaderField;
00345         std::string mTempHeaderValue;
00346         LAST_HEADER_CB mLastCallback;
00347         bool mHeaderComplete;
00348         Headers mHeaders;
00349     };
00350 
00351     //Holds a queue of requests to be made
00352     typedef std::list<std::tr1::shared_ptr<HttpRequest> > RequestQueueType;
00353     RequestQueueType mRequestQueue;
00354     //Lock this to access mRequestQueue
00355     boost::mutex mRequestQueueLock;
00356 
00357     //TODO: should get these from settings
00358     static const uint32 MAX_CONNECTIONS_PER_ENDPOINT = 8;  
00359     static const uint32 MAX_TOTAL_CONNECTIONS = 40;  
00360     static const uint32 SOCKET_BUFFER_SIZE = 10240;
00361 
00362     //Keeps track of the total number of connections currently open
00363     uint32 mNumTotalConnections;
00364 
00365     //Keeps track of the number of connections open per host:port pair
00366     typedef std::map<Sirikata::Network::Address, uint32> NumConnsType;
00367     NumConnsType mNumConnsPerAddr;
00368     //Lock this to access mNumTotalConnections or mNumConnsPerAddr
00369     boost::mutex mNumConnsLock;
00370 
00371     //Holds connections that are open but not being used
00372     typedef std::map<Sirikata::Network::Address,
00373         std::queue<std::tr1::shared_ptr<TCPSocket> > > RecycleBinType;
00374     RecycleBinType mRecycleBin;
00375     //Lock this to access mRecycleBin
00376     boost::mutex mRecycleBinLock;
00377 
00378     IOServicePool* mServicePool;
00379     TCPResolver* mResolver;
00380 
00381     http_parser_settings EMPTY_PARSER_SETTINGS;
00382 
00383     void processQueue();
00384 
00385     void add_req(std::tr1::shared_ptr<HttpRequest> req);
00386     void decrement_connection(const Sirikata::Network::Address& addr);
00387     void write_request(std::tr1::shared_ptr<TCPSocket> socket, std::tr1::shared_ptr<HttpRequest> req);
00388 
00389     void handle_resolve(std::tr1::shared_ptr<HttpRequest> req, const boost::system::error_code& err,
00390             TCPResolver::iterator endpoint_iterator);
00391     void handle_connect(std::tr1::shared_ptr<TCPSocket> socket, std::tr1::shared_ptr<HttpRequest> req,
00392             const boost::system::error_code& err, TCPResolver::iterator endpoint_iterator);
00393     void handle_write_request(std::tr1::shared_ptr<TCPSocket> socket, std::tr1::shared_ptr<HttpRequest> req,
00394             const boost::system::error_code& err, std::tr1::shared_ptr<boost::asio::streambuf> request_stream);
00395     void handle_read(std::tr1::shared_ptr<TCPSocket> socket, std::tr1::shared_ptr<HttpRequest> req,
00396             std::tr1::shared_ptr<std::vector<unsigned char> > vecbuf, std::tr1::shared_ptr<HttpResponse> respPtr,
00397             const boost::system::error_code& err, std::size_t bytes_transferred);
00398 
00399     static int on_header_field(http_parser *_, const char *at, size_t len);
00400     static int on_header_value(http_parser *_, const char *at, size_t len);
00401     static int on_headers_complete(http_parser *_);
00402     static int on_body(http_parser *_, const char *at, size_t len);
00403     static int on_message_complete(http_parser *_);
00404 
00405     static int on_request_header_field(http_parser *_, const char *at, size_t len);
00406     static int on_request_header_value(http_parser *_, const char *at, size_t len);
00407     static int on_request_headers_complete(http_parser *_);
00408 
00409     enum HTTP_PARSER_FLAGS
00410       { F_CHUNKED = 1 << 0
00411       , F_CONNECTION_KEEP_ALIVE = 1 << 1
00412       , F_CONNECTION_CLOSE = 1 << 2
00413       , F_TRAILING = 1 << 3
00414       , F_UPGRADE = 1 << 4
00415       , F_SKIPBODY = 1 << 5
00416       };
00417 
00418     static void print_flags(std::tr1::shared_ptr<HttpResponse> resp);
00419 
00420 public:
00421 
00422     /*
00423      * Posts a callback on the service pool
00424      */
00425     void postCallback(IOCallback cb, const char* tag);
00426     void postCallback(const Duration& waitFor, IOCallback cb, const char* tag);
00427 
00428 };
00429 
00430 }
00431 }
00432 
00433 #endif