Sirikata
|
00001 /* Sirikata Transfer -- Content Transfer management system 00002 * TransferData.hpp 00003 * 00004 * Copyright (c) 2008, Patrick Reiter Horn 00005 * All rights reserved. 00006 * 00007 * Redistribution and use in source and binary forms, with or without 00008 * modification, are permitted provided that the following conditions are 00009 * met: 00010 * * Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * * Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in 00014 * the documentation and/or other materials provided with the 00015 * distribution. 00016 * * Neither the name of Sirikata nor the names of its contributors may 00017 * be used to endorse or promote products derived from this software 00018 * without specific prior written permission. 00019 * 00020 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00021 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00022 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00023 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 00024 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00025 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00026 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 */ 00032 00033 #ifndef SIRIKATA_TransferData_HPP__ 00034 #define SIRIKATA_TransferData_HPP__ 00035 00036 #include <sirikata/core/util/Sha256.hpp> 00037 #include <sirikata/core/transfer/Range.hpp> 00038 00039 namespace Sirikata { 00040 namespace Transfer { 00041 00042 00044 class DenseData : Noncopyable, public Range { 00045 std::vector<unsigned char> mData; 00046 00047 // All too easy to mix up string constructors (binarydata,length) with (string,startbyte) 00048 DenseData(const char *str, size_t len) : Range(false) {} 00049 DenseData(const unsigned char *str, size_t len) : Range(false) {} 00050 00051 public: 00052 DenseData(const Range &range) 00053 :Range(range) { 00054 if (range.length()) { 00055 mData.resize((std::vector<unsigned char>::size_type)range.length()); 00056 } 00057 } 00058 00059 DenseData(const std::string &str, Range::base_type start=0, bool wholeFile=true) 00060 :Range(start, str.length(), LENGTH, wholeFile) { 00061 setLength(str.length(), wholeFile); 00062 std::copy(str.begin(), str.end(), writableData()); 00063 } 00064 00065 DenseData(const Range& range, const char* str) 00066 : Range(range), mData(str, str+range.length()) { 00067 if(range.length() == 0) 00068 throw std::invalid_argument("Tried to create DenseData with length of 0"); 00069 } 00070 00071 DenseData(const Range& range, const std::vector<unsigned char>& data) 00072 : Range(range), mData(data) { 00073 if(range.length() != data.size()) { 00074 throw std::invalid_argument("Tried to create DenseData with vector length not equal to Range"); 00075 } 00076 } 00077 00079 inline const unsigned char *data() const { 00080 if(mData.size() == 0) 00081 throw std::length_error("Tried to get a const pointer to DenseData with 0 length"); 00082 return &(mData[0]); 00083 } 00084 00085 inline const unsigned char *begin() const { 00086 return data(); 00087 } 00088 00089 inline const unsigned char *end() const { 00090 return data()+length(); 00091 } 00092 00094 inline unsigned char *writableData() { 00095 if(mData.size() == 0) 00096 throw std::length_error("Tried to get a writable pointer to DenseData with 0 length"); 00097 return &(mData[0]); 00098 } 00099 00104 inline const unsigned char *dataAt(base_type offset) const { 00105 if (offset > endbyte() || offset < startbyte()) 00106 return NULL; 00107 return &(mData[(std::vector<unsigned char>::size_type)(offset-startbyte())]); 00108 } 00109 00110 inline std::string asString() const { 00111 return std::string((const char *)data(),(size_t)length()); 00112 } 00113 00115 inline void setLength(size_t len, bool is_npos) { 00116 Range::setLength(len, is_npos); 00117 mData.resize(len); 00118 } 00119 00120 //Appends len bytes from data to internal data vector and adds to length of range 00121 inline void append(const char* data, size_t len, bool is_npos) { 00122 if(len <= 0) return; 00123 size_t prev_end = length(); 00124 Range::setLength(prev_end + len, is_npos); 00125 mData.resize(prev_end + len, 0); 00126 std::copy(data, data+len, writableData() + prev_end); 00127 } 00128 00129 // Appends the entire contents of data to internal data vector and adds to length of Range 00130 inline void append(const std::vector<unsigned char>& data, bool is_npos) { 00131 append(data, is_npos, data.begin(), data.end()); 00132 } 00133 00134 // Appends the range (begin->end) from data to internal data vector and adds to length of Range 00135 inline void append(const std::vector<unsigned char>& data, bool is_npos, 00136 std::vector<unsigned char>::const_iterator begin, std::vector<unsigned char>::const_iterator end) { 00137 if (end - begin < 0) { 00138 throw std::invalid_argument("Tried to append to DenseData with invalid iterators"); 00139 } else if(end - begin == 0) { 00140 return; 00141 } 00142 00143 Range::setLength(length() + (end-begin), is_npos); 00144 mData.insert(mData.end(), begin, end); 00145 } 00146 }; 00147 00148 typedef std::tr1::shared_ptr<DenseData> MutableDenseDataPtr; 00149 typedef std::tr1::shared_ptr<const DenseData> DenseDataPtr; 00150 00151 // Meant to act like an STL list. 00152 class DenseDataList { 00153 protected: 00154 typedef std::list<DenseDataPtr> ListType; 00155 00157 ListType mSparseData; 00158 00159 DenseDataList() { 00160 } 00161 00162 public: 00164 typedef DenseDataPtr value_type; 00165 00167 class iterator : public ListType::iterator { 00168 public: 00169 iterator(const ListType::iterator &e) : 00170 ListType::iterator(e) { 00171 } 00172 inline const DenseData &operator* () const { 00173 return *(this->ListType::iterator::operator*()); 00174 } 00175 00176 inline const DenseData *operator-> () const { 00177 return &(*(this->ListType::iterator::operator*())); 00178 } 00179 00180 inline const DenseDataPtr &getPtr() const { 00181 return this->ListType::iterator::operator*(); 00182 } 00183 }; 00184 00186 inline iterator begin() { 00187 return mSparseData.begin(); 00188 } 00190 inline iterator end() { 00191 return mSparseData.end(); 00192 } 00193 00195 class const_iterator : public ListType::const_iterator { 00196 public: 00197 const_iterator() { 00198 } 00199 const_iterator(const ListType::const_iterator &e) : 00200 ListType::const_iterator(e) { 00201 } 00202 const_iterator(const ListType::iterator &e) : 00203 ListType::const_iterator(e) { 00204 } 00205 inline const DenseData &operator* () const { 00206 return *(this->ListType::const_iterator::operator*()); 00207 } 00208 00209 inline const DenseData *operator-> () const { 00210 return &(*(this->ListType::const_iterator::operator*())); 00211 } 00212 }; 00214 inline const_iterator begin() const { 00215 return mSparseData.begin(); 00216 } 00218 inline const_iterator end() const { 00219 return mSparseData.end(); 00220 } 00221 00223 inline iterator insert(const iterator &iter, const value_type &dd) { 00224 return mSparseData.insert(iter, dd); 00225 } 00226 00228 inline void erase(const iterator &iter) { 00229 mSparseData.erase(iter); 00230 } 00231 00233 inline void clear() { 00234 return mSparseData.clear(); 00235 } 00236 00238 inline bool empty() const { 00239 return mSparseData.empty(); 00240 } 00241 00243 void addValidData(const DenseDataPtr &data) { 00244 data->addToList(data, *this); 00245 } 00246 00248 inline cache_usize_type getSpaceUsed() const { 00249 cache_usize_type length = 0; 00250 const_iterator myend = end(); 00251 for (const_iterator iter = begin(); iter != myend; ++iter) { 00252 length += (*iter).length(); 00253 } 00254 return length; 00255 } 00256 00264 const unsigned char *dataAt(Range::base_type offset, Range::length_type &length) const { 00265 const_iterator enditer = end(); 00266 for (const_iterator iter = begin(); iter != enditer; ++iter) { 00267 const Range &range = (*iter); 00268 if (offset >= range.startbyte() && 00269 (range.goesToEndOfFile() || offset <= range.endbyte())) { 00270 // We're within some valid data... return the DenseData. 00271 length = range.length() + (Range::length_type)(range.startbyte() - offset); 00272 return (*iter).dataAt(offset); 00273 } else if (offset < range.startbyte()){ 00274 // we missed it. 00275 length = (size_t)(range.startbyte() - offset); 00276 return NULL; 00277 } 00278 } 00279 length = 0; 00280 return NULL; 00281 } 00282 00283 inline cache_usize_type startbyte() const { 00284 if (mSparseData.empty()) { 00285 return 0; 00286 } 00287 return mSparseData.front()->startbyte(); 00288 } 00289 00290 inline cache_usize_type endbyte() const { 00291 if (mSparseData.empty()) { 00292 return 0; 00293 } 00294 return mSparseData.back()->endbyte(); 00295 } 00296 00297 inline bool contiguous() const { 00298 if (mSparseData.empty()) 00299 return true; 00300 return Range(startbyte(), endbyte(), BOUNDS).isContainedBy(*this); 00301 } 00302 00303 inline bool contains(const Range &other) const { 00304 return other.isContainedBy(*this); 00305 } 00306 }; 00307 00309 class SparseData : public DenseDataList { 00310 public: 00311 typedef cache_usize_type size_type; 00312 typedef cache_ssize_type difference_type; 00313 typedef const unsigned char value_type; 00314 00315 class const_iterator { 00316 public: 00317 typedef SparseData::size_type size_type; 00318 typedef SparseData::difference_type difference_type; 00319 typedef SparseData::value_type value_type; 00320 typedef std::random_access_iterator_tag iterator_category; 00321 typedef value_type* pointer; 00322 typedef value_type& reference; 00323 00324 private: 00325 DenseDataList::const_iterator iter; 00326 const SparseData *parent; 00327 value_type *data; 00328 size_type globalbyte; 00329 size_type datastart; 00330 size_type dataend; 00331 00332 void setDataPtr() { 00333 if (globalbyte > parent->endbyte() || iter == (parent->DenseDataList::end())) { 00334 data = NULL; 00335 dataend = globalbyte; 00336 datastart = globalbyte; 00337 } else { 00338 const DenseData&dd = *iter; 00339 data = dd.data(); 00340 datastart = dd.startbyte(); 00341 dataend = dd.endbyte(); 00342 } 00343 } 00344 void fixData() { 00345 while (iter != (parent->DenseDataList::begin()) && datastart > globalbyte) { 00346 --iter; 00347 setDataPtr(); 00348 } 00349 while (data && dataend <= globalbyte) { 00350 ++iter; 00351 setDataPtr(); 00352 } 00353 if (iter == (parent->DenseDataList::end())) { 00354 globalbyte = parent->endbyte(); 00355 } 00356 } 00357 public: 00358 const_iterator() { 00359 parent = NULL; 00360 data = NULL; 00361 datastart = 0; 00362 dataend = 0; 00363 globalbyte = 0; 00364 } 00365 const_iterator (const DenseDataList::const_iterator &iter, 00366 const SparseData *parent, SparseData::size_type pos) 00367 : iter(iter), parent(parent) { 00368 /* // Caller's responsibility to check if contiguous. 00369 if (!parent->contiguous()) { 00370 throw std::domain_error("Cannot create iterator over noncontiguous SparseData"); 00371 } 00372 */ 00373 globalbyte = pos; 00374 setDataPtr(); 00375 } 00376 00377 inline unsigned char operator*() const{ 00378 assert(valid()); 00379 return data[globalbyte-datastart]; 00380 } 00381 const_iterator &operator+=(difference_type diff) { 00382 globalbyte += diff; 00383 if (globalbyte < datastart || globalbyte >= dataend) { 00384 fixData(); 00385 } 00386 return *this; 00387 } 00388 const_iterator &operator-=(difference_type diff) { 00389 operator+=(-diff); 00390 return *this; 00391 } 00392 const_iterator &operator++() { 00393 ++globalbyte; 00394 if (globalbyte >= dataend) { 00395 fixData(); 00396 } 00397 return *this; 00398 } 00399 const_iterator &operator--() { 00400 --globalbyte; 00401 if (globalbyte < datastart) { 00402 fixData(); 00403 } 00404 return *this; 00405 } 00406 const_iterator operator+(difference_type diff) const { 00407 const_iterator other (*this); 00408 return (other += diff); 00409 } 00410 const_iterator operator-(difference_type diff) const { 00411 const_iterator other (*this); 00412 return (other -= diff); 00413 } 00414 00415 difference_type operator-(const const_iterator&other) const { 00416 return (globalbyte - other.globalbyte); 00417 } 00418 00419 bool operator<(const const_iterator&other) const { 00420 return (globalbyte < other.globalbyte); 00421 } 00422 bool operator==(const const_iterator&other) const { 00423 return (other.iter == iter && other.globalbyte == globalbyte); 00424 } 00425 bool operator!=(const const_iterator&other) const { 00426 return (other.iter != iter || other.globalbyte != globalbyte); 00427 } 00428 00429 inline bool valid() const { 00430 return data && datastart <= globalbyte; 00431 } 00432 00433 inline bool eof() const { 00434 return !data; 00435 } 00436 00437 value_type *dataAt() const { 00438 if (valid()) { 00439 return &(data[globalbyte-datastart]); 00440 } else { 00441 return NULL; 00442 } 00443 } 00444 size_type lengthAt() const { 00445 if (valid()) { 00446 return dataend - globalbyte + 1; 00447 } else if (eof()) { 00448 return 0; 00449 } else { 00450 return datastart - globalbyte; 00451 } 00452 } 00453 00454 }; 00455 00456 const_iterator begin() const { 00457 return const_iterator(DenseDataList::begin(), this, startbyte()); 00458 } 00459 const_iterator end() const { 00460 return const_iterator(DenseDataList::end(), this, endbyte()); 00461 } 00462 00463 /*// slow: better to use iterators. 00464 const char operator[] (cache_usize_type location) { 00465 Range::length_type length; 00466 const unsigned char *data = dataAt(location, length); 00467 if (data == NULL) { 00468 return '\0'; 00469 } 00470 if (length == 0) { 00471 throw new std::domain_error("byte outside range passed to SparseData::operator[]"); 00472 } 00473 return *data; 00474 } 00475 */ 00476 00477 typedef const_iterator iterator; // Only supports read-only operations 00478 00479 SparseData() { 00480 } 00481 00482 SparseData(DenseDataPtr contents) { 00483 addValidData(contents); 00484 } 00485 00486 inline cache_usize_type length() const { 00487 return endbyte() - startbyte() + 1; 00488 } 00489 00490 inline cache_usize_type size() const { 00491 return length(); 00492 } 00493 00497 std::ostream & debugPrint(std::ostream &os) const { 00498 Range::base_type position = 0, len; 00499 do { 00500 const unsigned char *data = dataAt(position, len); 00501 if (data) { 00502 os<<"{GOT DATA "<<len<<"}"; 00503 os<<std::string(data, data+len); 00504 } else if (len) { 00505 os<<"[INVALID:" <<len<< "]"; 00506 } 00507 position += len; 00508 } while (len); 00509 return os; 00510 } 00511 00512 SHA256 computeFingerprint() const { 00513 SHA256Context context; 00514 const unsigned char *data; 00515 Range::length_type length; 00516 Range::base_type start = 0; 00517 00518 while (true) { 00519 data = dataAt(start, length); 00520 start += length; 00521 if (data == NULL && length == 0) { 00522 break; 00523 } else if (data == NULL) { 00524 context.updateZeros((size_t)length); 00525 } else { 00526 context.update(data, (size_t)length); 00527 } 00528 } 00529 return context.get(); 00530 } 00531 00532 DenseDataPtr flatten() const { 00533 if (mSparseData.size() == 0) { 00534 return DenseDataPtr(new DenseData(Range(false))); 00535 } 00536 if (mSparseData.size() == 1) { 00537 return mSparseData.front(); 00538 } 00539 MutableDenseDataPtr denseData (new DenseData(Range(startbyte(),endbyte(),BOUNDS))); 00540 unsigned char *outdata = denseData->writableData(); 00541 std::copy(begin(), end(), outdata); 00542 return denseData; 00543 } 00544 00545 }; 00546 //typedef std::tr1::shared_ptr<SparseData> SparseDataPtr; 00547 00548 } 00549 } 00550 00551 #endif /* SIRIKATA_TransferData_HPP__ */