Sirikata
|
00001 /* Sirikata Transfer -- Content Transfer management system 00002 * URL.hpp 00003 * 00004 * Copyright (c) 2008, Patrick Reiter Horn 00005 * All rights reserved. 00006 * 00007 * Redistribution and use in source and binary forms, with or without 00008 * modification, are permitted provided that the following conditions are 00009 * met: 00010 * * Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * * Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in 00014 * the documentation and/or other materials provided with the 00015 * distribution. 00016 * * Neither the name of Sirikata nor the names of its contributors may 00017 * be used to endorse or promote products derived from this software 00018 * without specific prior written permission. 00019 * 00020 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00021 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00022 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00023 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 00024 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00025 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00026 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 */ 00032 /* Created on: Jan 5, 2009 */ 00033 00034 #ifndef SIRIKATA_URL_HPP__ 00035 #define SIRIKATA_URL_HPP__ 00036 00037 #include <sirikata/core/util/Platform.hpp> 00038 #include <sirikata/core/transfer/URI.hpp> 00039 00040 #ifdef _WIN32 00041 #include <locale> 00042 #endif 00043 00044 namespace Sirikata { 00045 namespace Transfer { 00046 00055 class URLContext { 00056 friend class URL; 00057 00058 std::string mProto; 00059 std::string mHost; 00060 std::string mUser; 00061 std::string mDirectory; 00062 // AuthenticationCreds mAuth; 00063 00065 static inline void resolveParentDirectories(std::string &str) { 00066 // Do nothing for now. 00067 /* 00068 std::string::size_type slashpos = 0; 00069 while (true) { 00070 slashpos = str.find('/', slashpos); 00071 if (slashpos != std::string::npos) { 00072 std::string dir = str.substr(slashpos, slashpos) 00073 00074 } 00075 slashpos++; 00076 } 00077 */ 00078 /* 00079 while (str[0]=='.'&&str[1]=='.'&&str[2]=='/') { 00080 str = str.substr(3); 00081 } 00082 std::string::size_type nextdotdot = 0; 00083 while ((nextdotdot = str.find("../", nextdotdot+1)) != std::string::npos) { 00084 00085 } 00086 */ 00087 } 00088 00089 struct IsSpace { 00090 inline bool operator()(const unsigned char c) { 00091 int kspace=(char)c; 00092 return std::isspace(kspace 00093 #ifdef _WIN32 00094 ,std::locale() 00095 #endif 00096 )!=false; 00097 } 00098 }; 00099 00100 void cleanup(std::string &s) { 00101 // hostnames and protocols are case-insensitive. 00102 for (std::string::size_type i = 0; i < s.length(); ++i) { 00103 s[i] = std::tolower(s[i] 00104 #ifdef _WIN32 00105 ,std::locale() 00106 #endif 00107 00108 ); 00109 } 00110 // remove any illegal characters such as spaces. 00111 s.erase(std::remove_if(s.begin(), s.end(), IsSpace()), s.end()); 00112 } 00113 00114 public: 00116 URLContext() { 00117 } 00118 00120 URLContext(const std::string &newProto, 00121 const std::string &newHost, 00122 const std::string &newUser, 00123 const std::string &newDirectory) 00124 : mProto(newProto), 00125 mHost(newHost), 00126 mUser(newUser), 00127 mDirectory(newDirectory){ 00128 00129 cleanup(mProto); 00130 cleanup(mHost); 00131 } 00132 00136 URLContext(const URLContext &parent, 00137 const std::string *newProto, 00138 const std::string *newHost, 00139 const std::string *newUser, 00140 const std::string *newDirectory) 00141 : mProto(newProto?*newProto:parent.proto()), 00142 mHost(newHost?*newHost:parent.host()), 00143 mUser(newUser?*newUser:parent.username()), 00144 mDirectory(newDirectory?*newDirectory:parent.basepath()){ 00145 cleanup(mProto); 00146 cleanup(mHost); 00147 } 00148 00152 URLContext(const URLContext &parent, const std::string &identifier) 00153 : mProto(parent.proto()), 00154 mHost(parent.host()), 00155 mUser(parent.username()), 00156 mDirectory(parent.basepath()) { 00157 parse(identifier); 00158 } 00159 00166 URLContext(const std::string &identifier) { 00167 parse(identifier); 00168 } 00169 private: 00170 void parse(std::string identifier) { 00171 if (!identifier.empty() && identifier[identifier.length()-1] != '/') { 00172 identifier += '/'; 00173 } 00174 std::string::size_type colonpos = identifier.find(':'); 00175 std::string::size_type firstslashpos = identifier.find('/'); 00176 std::string::size_type startpos = 0; 00177 if (colonpos != std::string::npos && 00178 (firstslashpos == std::string::npos || colonpos < firstslashpos)) { 00179 /* FIXME: Only accept [a-z0-9] as part of the protocol. We don't want an IPv6 address or 00180 long filename with a colon in it being misinterpreted as a protocol. 00181 Also, port numbers will be preceded by a colon */ 00182 00183 // global path 00184 mProto = identifier.substr(0, colonpos); 00185 mUser = std::string(); 00186 startpos = colonpos+1; 00187 } 00188 00189 if (identifier.length() > startpos+2 && identifier[startpos]=='/' && identifier[startpos+1]=='/') { 00190 /* FIXME: IPv6 addresses have colons and are surrounded by braces. 00191 * Example: "http://someuser@[2001:5c0:1101:4300::1]:8080/somedir/somefile*/ 00192 00193 // protocol-relative path 00194 mUser = std::string(); // clear out user (set to blank if unspecified) 00195 //mHost = std::string(); // we actually keep this the same. 00196 00197 std::string::size_type atpos, slashpos; 00198 std::string::size_type beginpos = startpos+2; 00199 slashpos = identifier.find('/',beginpos); 00200 atpos = identifier.rfind('@', slashpos); 00201 // Authenticated URL 00202 if (atpos != std::string::npos && atpos >= beginpos && atpos < slashpos) { 00203 mUser = identifier.substr(beginpos, atpos-beginpos); 00204 beginpos = atpos+1; 00205 } 00206 if (slashpos != beginpos) { 00207 if (slashpos == std::string::npos) { 00208 mHost = identifier.substr(beginpos); 00209 } else { 00210 mHost = identifier.substr(beginpos, slashpos-beginpos); 00211 } 00212 } else { 00213 mHost = std::string(); 00214 } 00215 startpos = slashpos; 00216 } 00217 if (identifier.length() > startpos && identifier[startpos]=='/') { 00218 // server-relative path 00219 std::string::size_type lastdir = identifier.rfind('/'); 00220 if (lastdir > startpos) { 00221 mDirectory = identifier.substr(startpos+1, lastdir-startpos-1); 00222 } else { 00223 mDirectory = std::string(); 00224 } 00225 } else { 00226 // directory-relative path -- not implemented here 00227 std::string::size_type lastdir = identifier.rfind('/'); 00228 if (lastdir != std::string::npos && lastdir > startpos) { 00229 if (mDirectory.empty()) { 00230 mDirectory = identifier.substr(startpos, lastdir-startpos); 00231 } else { 00232 mDirectory += '/' + identifier.substr(startpos, lastdir-startpos); 00233 } 00234 } 00235 } 00236 00237 resolveParentDirectories(mDirectory); 00238 00239 cleanup(mProto); 00240 cleanup(mHost); 00241 } 00242 00243 public: 00245 inline const std::string &proto() const { 00246 return mProto; 00247 } 00248 00249 inline void setProto(const std::string &proto) { 00250 mProto = proto; 00251 } 00252 00257 inline const std::string &username() const { 00258 return mUser; 00259 } 00260 00261 inline void setUsername(const std::string &user) { 00262 mUser = user; 00263 } 00264 00271 inline const std::string &host() const { 00272 return mHost; 00273 } 00274 00275 /* 00276 * hostname getter -- without port number 00277 */ 00278 inline const std::string hostname() const { 00279 std::string::size_type colonPos = mHost.find(":"); 00280 if (colonPos == std::string::npos) { 00281 return mHost; 00282 } else { 00283 return mHost.substr(0, colonPos); 00284 } 00285 } 00286 00287 /* 00288 * service getter -- returns the "port" of a URL, or empty string 00289 * if there is none 00290 * Example: 00291 * http://www.example.com/path -> "" 00292 * meerkat://hostname.edu:8080/path -> "8080" 00293 */ 00294 inline const std::string service() const { 00295 std::string::size_type colonPos = mHost.find(":"); 00296 if (colonPos == std::string::npos) { 00297 return ""; 00298 } else { 00299 return mHost.substr(colonPos+1); 00300 } 00301 } 00302 00303 00304 inline void setHost(const std::string &host) { 00305 mHost = host; 00306 } 00307 00308 /* The directory, excluding both initial slash and ending slash. 00309 * Often, this may just be the empty string, but depends on protocol. */ 00310 inline const std::string &basepath() const { 00311 return mDirectory; 00312 } 00313 00314 inline void setBasepath(const std::string &basepath) { 00315 mDirectory = basepath; 00316 } 00317 00318 void toParentPath(std::string *pathString) { 00319 std::string::size_type slash = mDirectory.rfind('/'); 00320 if (slash == std::string::npos) { 00321 if (pathString) { 00322 if (!mDirectory.empty()) { 00323 *pathString = mDirectory + "/" + *pathString; 00324 } 00325 } 00326 mDirectory = std::string(); 00327 } else { 00328 if (pathString) { 00329 if (slash > 0) { 00330 *pathString = mDirectory.substr(slash+1) + "/" + *pathString; 00331 } 00332 } 00333 mDirectory = mDirectory.substr(0, slash); 00334 } 00335 } 00336 00337 void toParentContext(std::string *pathString) { 00338 if (mDirectory.empty()) { 00339 if (mHost.empty()) { 00340 if (pathString) { 00341 *pathString = mProto + ":" + *pathString; 00342 } 00343 mProto = std::string(); 00344 } else { 00345 if (pathString) { 00346 *pathString = "//" + (mUser.empty()?"":mUser+"@") + mHost + "/" + *pathString; 00347 } 00348 mHost = std::string(); 00349 } 00350 mUser = std::string(); 00351 } else { 00352 toParentPath(pathString); 00353 } 00354 } 00355 /* 00356 void relocate(const URLContext &source, const URLContext &dest) { 00357 std::string 00358 } 00359 */ 00360 00362 inline std::string toString(bool trailingSlash=true) const { 00363 std::string ret (mProto + "://" + (mUser.empty() ? "" : (mUser + "@")) + mHost); 00364 if (!mDirectory.empty()) { 00365 ret += ("/" + mDirectory); 00366 } 00367 if (trailingSlash) { 00368 return ret + '/'; 00369 } 00370 return ret; 00371 } 00372 00374 inline bool operator< (const URLContext &other) const { 00375 /* Note: I am testing user before hostname to keep this ordering 00376 * the same as if you used a string version of the URL. 00377 */ 00378 if (mProto == other.mProto) { 00379 if (mUser == other.mUser) { 00380 if (mHost == other.mHost) { 00381 return mDirectory < other.mDirectory; 00382 } 00383 return mHost < other.mHost; 00384 } 00385 return mUser < other.mUser; 00386 } 00387 return mProto < other.mProto; 00388 } 00389 00391 inline bool operator==(const URLContext &other) const { 00392 return mDirectory == other.mDirectory && 00393 mUser == other.mUser && 00394 mHost == other.mHost && 00395 mProto == other.mProto; 00396 } 00397 inline bool operator!=(const URLContext &other) const { 00398 // We can ignore the hash if it references the same URL. 00399 return !((*this) == other); 00400 } 00401 00402 bool empty() const { 00403 return 00404 mDirectory.empty() && 00405 mUser.empty() && 00406 mHost.empty() && 00407 mProto.empty(); 00408 } 00409 00410 operator bool() const { 00411 return !empty(); 00412 } 00413 }; 00414 00416 inline std::ostream &operator<<(std::ostream &str, const URLContext &urlctx) { 00417 return str << urlctx.toString(); 00418 } 00419 00421 class URL { 00422 URLContext mContext; 00423 std::string mPath; // should have no slashes. 00424 00425 void findSlash(const std::string &url) { 00426 std::string::size_type slash = url.rfind('/'); 00427 if (slash != std::string::npos) { 00428 // FIXME: handle incomplete URLs correctly 00429 if (slash > 0 && url[slash-1] == '/' && !(slash > 1 && url[slash-2] == '/')) { 00430 // this is actually a hostname section... don't copy it into the filename. 00431 // unless there were three slashes in a row. 00432 mPath = std::string(); 00433 mContext.parse(url); 00434 } else { 00435 mPath = url.substr(slash+1); 00436 mContext.parse(url.substr(0, slash+1)); 00437 } 00438 } else { 00439 std::string::size_type colon = url.find(':'); 00440 if (colon != std::string::npos) { 00441 mPath = url.substr(colon+1); 00442 mContext.parse(url.substr(0, colon+1)); 00443 } else { 00444 mPath = url; 00445 } 00446 } 00447 } 00448 public: 00450 explicit URL() { 00451 } 00452 00459 URL(const URLContext &parentContext, const std::string &url) 00460 : mContext(parentContext) { 00461 findSlash(url); 00462 } 00463 00469 explicit URL(const char *url) { 00470 findSlash(url); 00471 } 00472 00478 explicit URL(const std::string &url) { 00479 findSlash(url); 00480 } 00481 00482 00484 explicit URL(const URI& uri) { 00485 findSlash(uri.toString()); 00486 } 00487 00491 inline const URLContext &context() const { 00492 return mContext; 00493 } 00494 00496 inline URLContext &getContext() { 00497 return mContext; 00498 } 00499 00501 inline const std::string &proto() const { 00502 return mContext.proto(); 00503 } 00504 00506 inline const std::string &host() const { 00507 return mContext.host(); 00508 } 00509 00511 inline const std::string hostname() const { 00512 return mContext.hostname(); 00513 } 00514 00516 inline const std::string &username() const { 00517 return mContext.username(); 00518 } 00519 00521 inline const std::string &basepath() const { 00522 return mContext.basepath(); 00523 } 00524 00526 inline const std::string &filename() const { 00527 return mPath; 00528 } 00529 00530 inline void setFilename(const std::string &file) { 00531 mPath = file; 00532 } 00533 00538 inline std::string fullpath() const { 00539 if (mContext.basepath().empty()) { 00540 return '/' + mPath; 00541 } else { 00542 return '/' + mContext.basepath() + '/' + mPath; 00543 } 00544 } 00545 /* 00546 void relocate(const URLContext &source, const URLContext &dest) { 00547 getContext().relocate(source, dest); 00548 } 00549 */ 00553 inline std::string toString () const { 00554 return mContext.toString() + mPath; 00555 } 00556 00558 inline bool operator<(const URL &other) const { 00559 // We can ignore the hash if it references the same URL. 00560 if (mContext == other.mContext) { 00561 return mPath < other.mPath; 00562 } 00563 return mContext < other.mContext; 00564 } 00565 00567 inline bool operator==(const URL &other) const { 00568 // We can ignore the hash if it references the same URL. 00569 return mPath == other.mPath && mContext == other.mContext; 00570 } 00571 inline bool operator!=(const URL &other) const { 00572 // We can ignore the hash if it references the same URL. 00573 return !((*this) == other); 00574 } 00575 00576 bool empty() const { 00577 return mContext.empty() && mPath.empty(); 00578 } 00579 00580 operator bool() const { 00581 return !empty(); 00582 } 00583 00584 struct Hasher { 00585 size_t operator() (const URL& url)const { 00586 return std::tr1::hash<std::string>()(url.mPath); 00587 } 00588 }; 00589 }; 00590 00592 inline std::ostream &operator<<(std::ostream &str, const URL &url) { 00593 return str << url.toString(); 00594 } 00595 00596 } 00597 } 00598 00599 #endif /* SIRIKATA_URL_HPP__ */