Sirikata
libcore/include/sirikata/core/transfer/TransferData.hpp
Go to the documentation of this file.
00001 /*  Sirikata Transfer -- Content Transfer management system
00002  *  TransferData.hpp
00003  *
00004  *  Copyright (c) 2008, Patrick Reiter Horn
00005  *  All rights reserved.
00006  *
00007  *  Redistribution and use in source and binary forms, with or without
00008  *  modification, are permitted provided that the following conditions are
00009  *  met:
00010  *  * Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer.
00012  *  * Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in
00014  *    the documentation and/or other materials provided with the
00015  *    distribution.
00016  *  * Neither the name of Sirikata nor the names of its contributors may
00017  *    be used to endorse or promote products derived from this software
00018  *    without specific prior written permission.
00019  *
00020  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
00021  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00022  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00023  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
00024  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00025  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00026  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00027  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00028  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00029  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00030  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031  */
00032 
00033 #ifndef SIRIKATA_TransferData_HPP__
00034 #define SIRIKATA_TransferData_HPP__
00035 
00036 #include <sirikata/core/util/Sha256.hpp>
00037 #include <sirikata/core/transfer/Range.hpp>
00038 
00039 namespace Sirikata {
00040 namespace Transfer {
00041 
00042 
00044 class DenseData : Noncopyable, public Range {
00045     std::vector<unsigned char> mData;
00046 
00047     // All too easy to mix up string constructors (binarydata,length) with (string,startbyte)
00048     DenseData(const char *str, size_t len) : Range(false) {}
00049     DenseData(const unsigned char *str, size_t len) : Range(false) {}
00050 
00051 public:
00052     DenseData(const Range &range)
00053             :Range(range) {
00054         if (range.length()) {
00055             mData.resize((std::vector<unsigned char>::size_type)range.length());
00056         }
00057     }
00058 
00059     DenseData(const std::string &str, Range::base_type start=0, bool wholeFile=true)
00060             :Range(start, str.length(), LENGTH, wholeFile) {
00061         setLength(str.length(), wholeFile);
00062         std::copy(str.begin(), str.end(), writableData());
00063     }
00064 
00065     DenseData(const Range& range, const char* str)
00066         : Range(range), mData(str, str+range.length()) {
00067         if(range.length() == 0)
00068             throw std::invalid_argument("Tried to create DenseData with length of 0");
00069     }
00070 
00071     DenseData(const Range& range, const std::vector<unsigned char>& data)
00072         : Range(range), mData(data) {
00073         if(range.length() != data.size()) {
00074             throw std::invalid_argument("Tried to create DenseData with vector length not equal to Range");
00075         }
00076     }
00077 
00079     inline const unsigned char *data() const {
00080         if(mData.size() == 0)
00081             throw std::length_error("Tried to get a const pointer to DenseData with 0 length");
00082         return &(mData[0]);
00083     }
00084 
00085     inline const unsigned char *begin() const {
00086         return data();
00087     }
00088 
00089     inline const unsigned char *end() const {
00090         return data()+length();
00091     }
00092 
00094     inline unsigned char *writableData() {
00095         if(mData.size() == 0)
00096             throw std::length_error("Tried to get a writable pointer to DenseData with 0 length");
00097         return &(mData[0]);
00098     }
00099 
00104     inline const unsigned char *dataAt(base_type offset) const {
00105         if (offset > endbyte() || offset < startbyte())
00106             return NULL;
00107         return &(mData[(std::vector<unsigned char>::size_type)(offset-startbyte())]);
00108     }
00109 
00110     inline std::string asString() const {
00111         return std::string((const char *)data(),(size_t)length());
00112     }
00113 
00115     inline void setLength(size_t len, bool is_npos) {
00116         Range::setLength(len, is_npos);
00117         mData.resize(len);
00118     }
00119 
00120     //Appends len bytes from data to internal data vector and adds to length of range
00121     inline void append(const char* data, size_t len, bool is_npos) {
00122         if(len <= 0) return;
00123         size_t prev_end = length();
00124         Range::setLength(prev_end + len, is_npos);
00125         mData.resize(prev_end + len, 0);
00126         std::copy(data, data+len, writableData() + prev_end);
00127     }
00128 
00129     // Appends the entire contents of data to internal data vector and adds to length of Range
00130     inline void append(const std::vector<unsigned char>& data, bool is_npos) {
00131         append(data, is_npos, data.begin(), data.end());
00132     }
00133 
00134     // Appends the range (begin->end) from data to internal data vector and adds to length of Range
00135     inline void append(const std::vector<unsigned char>& data, bool is_npos,
00136             std::vector<unsigned char>::const_iterator begin, std::vector<unsigned char>::const_iterator end) {
00137        if (end - begin < 0) {
00138            throw std::invalid_argument("Tried to append to DenseData with invalid iterators");
00139        } else if(end - begin == 0) {
00140            return;
00141        }
00142 
00143        Range::setLength(length() + (end-begin), is_npos);
00144        mData.insert(mData.end(), begin, end);
00145     }
00146 };
00147 
00148 typedef std::tr1::shared_ptr<DenseData> MutableDenseDataPtr;
00149 typedef std::tr1::shared_ptr<const DenseData> DenseDataPtr;
00150 
00151 // Meant to act like an STL list.
00152 class DenseDataList {
00153 protected:
00154     typedef std::list<DenseDataPtr> ListType;
00155 
00157     ListType mSparseData;
00158 
00159     DenseDataList() {
00160     }
00161 
00162 public:
00164     typedef DenseDataPtr value_type;
00165 
00167     class iterator : public ListType::iterator {
00168     public:
00169         iterator(const ListType::iterator &e) :
00170             ListType::iterator(e) {
00171         }
00172         inline const DenseData &operator* () const {
00173             return *(this->ListType::iterator::operator*());
00174         }
00175 
00176         inline const DenseData *operator-> () const {
00177             return &(*(this->ListType::iterator::operator*()));
00178         }
00179 
00180         inline const DenseDataPtr &getPtr() const {
00181             return this->ListType::iterator::operator*();
00182         }
00183     };
00184 
00186     inline iterator begin() {
00187             return mSparseData.begin();
00188     }
00190     inline iterator end() {
00191             return mSparseData.end();
00192     }
00193 
00195     class const_iterator : public ListType::const_iterator {
00196     public:
00197         const_iterator() {
00198         }
00199             const_iterator(const ListType::const_iterator &e) :
00200                     ListType::const_iterator(e) {
00201             }
00202             const_iterator(const ListType::iterator &e) :
00203                     ListType::const_iterator(e) {
00204             }
00205             inline const DenseData &operator* () const {
00206                     return *(this->ListType::const_iterator::operator*());
00207             }
00208 
00209             inline const DenseData *operator-> () const {
00210                     return &(*(this->ListType::const_iterator::operator*()));
00211             }
00212     };
00214     inline const_iterator begin() const {
00215             return mSparseData.begin();
00216     }
00218     inline const_iterator end() const {
00219             return mSparseData.end();
00220     }
00221 
00223     inline iterator insert(const iterator &iter, const value_type &dd) {
00224             return mSparseData.insert(iter, dd);
00225     }
00226 
00228     inline void erase(const iterator &iter) {
00229             mSparseData.erase(iter);
00230     }
00231 
00233     inline void clear() {
00234             return mSparseData.clear();
00235     }
00236 
00238     inline bool empty() const {
00239             return mSparseData.empty();
00240     }
00241 
00243     void addValidData(const DenseDataPtr &data) {
00244             data->addToList(data, *this);
00245     }
00246 
00248     inline cache_usize_type getSpaceUsed() const {
00249             cache_usize_type length = 0;
00250             const_iterator myend = end();
00251             for (const_iterator iter = begin(); iter != myend; ++iter) {
00252                     length += (*iter).length();
00253             }
00254             return length;
00255     }
00256 
00264     const unsigned char *dataAt(Range::base_type offset, Range::length_type &length) const {
00265         const_iterator enditer = end();
00266         for (const_iterator iter = begin(); iter != enditer; ++iter) {
00267             const Range &range = (*iter);
00268             if (offset >= range.startbyte() &&
00269                     (range.goesToEndOfFile() || offset <= range.endbyte())) {
00270                 // We're within some valid data... return the DenseData.
00271                 length = range.length() + (Range::length_type)(range.startbyte() - offset);
00272                 return (*iter).dataAt(offset);
00273             } else if (offset < range.startbyte()){
00274                 // we missed it.
00275                 length = (size_t)(range.startbyte() - offset);
00276                 return NULL;
00277             }
00278         }
00279         length = 0;
00280         return NULL;
00281     }
00282 
00283     inline cache_usize_type startbyte() const {
00284         if (mSparseData.empty()) {
00285             return 0;
00286         }
00287         return mSparseData.front()->startbyte();
00288     }
00289 
00290     inline cache_usize_type endbyte() const {
00291         if (mSparseData.empty()) {
00292             return 0;
00293         }
00294         return mSparseData.back()->endbyte();
00295     }
00296 
00297     inline bool contiguous() const {
00298         if (mSparseData.empty())
00299             return true;
00300         return Range(startbyte(), endbyte(), BOUNDS).isContainedBy(*this);
00301     }
00302 
00303     inline bool contains(const Range &other) const {
00304         return other.isContainedBy(*this);
00305     }
00306 };
00307 
00309 class SparseData : public DenseDataList {
00310 public:
00311     typedef cache_usize_type size_type;
00312     typedef cache_ssize_type difference_type;
00313     typedef const unsigned char value_type;
00314 
00315     class const_iterator {
00316     public:
00317         typedef SparseData::size_type size_type;
00318         typedef SparseData::difference_type difference_type;
00319         typedef SparseData::value_type value_type;
00320         typedef std::random_access_iterator_tag iterator_category;
00321         typedef value_type* pointer;
00322         typedef value_type& reference;
00323 
00324     private:
00325         DenseDataList::const_iterator iter;
00326         const SparseData *parent;
00327         value_type *data;
00328         size_type globalbyte;
00329         size_type datastart;
00330         size_type dataend;
00331 
00332         void setDataPtr() {
00333             if (globalbyte > parent->endbyte() || iter == (parent->DenseDataList::end())) {
00334                 data = NULL;
00335                 dataend = globalbyte;
00336                 datastart = globalbyte;
00337             } else {
00338                 const DenseData&dd = *iter;
00339                 data = dd.data();
00340                 datastart = dd.startbyte();
00341                 dataend = dd.endbyte();
00342             }
00343         }
00344         void fixData() {
00345             while (iter != (parent->DenseDataList::begin()) && datastart > globalbyte) {
00346                 --iter;
00347                 setDataPtr();
00348             }
00349             while (data && dataend <= globalbyte) {
00350                 ++iter;
00351                 setDataPtr();
00352             }
00353             if (iter == (parent->DenseDataList::end())) {
00354                 globalbyte = parent->endbyte();
00355             }
00356         }
00357     public:
00358         const_iterator() {
00359             parent = NULL;
00360             data = NULL;
00361             datastart = 0;
00362             dataend = 0;
00363             globalbyte = 0;
00364         }
00365         const_iterator (const DenseDataList::const_iterator &iter,
00366                 const SparseData *parent, SparseData::size_type pos)
00367                 : iter(iter), parent(parent) {
00368             /* // Caller's responsibility to check if contiguous.
00369             if (!parent->contiguous()) {
00370                 throw std::domain_error("Cannot create iterator over noncontiguous SparseData");
00371             }
00372             */
00373             globalbyte = pos;
00374             setDataPtr();
00375         }
00376 
00377         inline unsigned char operator*() const{
00378             assert(valid());
00379             return data[globalbyte-datastart];
00380         }
00381         const_iterator &operator+=(difference_type diff) {
00382             globalbyte += diff;
00383             if (globalbyte < datastart || globalbyte >= dataend) {
00384                 fixData();
00385             }
00386             return *this;
00387         }
00388         const_iterator &operator-=(difference_type diff) {
00389             operator+=(-diff);
00390             return *this;
00391         }
00392         const_iterator &operator++() {
00393             ++globalbyte;
00394             if (globalbyte >= dataend) {
00395                 fixData();
00396             }
00397             return *this;
00398         }
00399         const_iterator &operator--() {
00400             --globalbyte;
00401             if (globalbyte < datastart) {
00402                 fixData();
00403             }
00404             return *this;
00405         }
00406         const_iterator operator+(difference_type diff) const {
00407             const_iterator other (*this);
00408             return (other += diff);
00409         }
00410         const_iterator operator-(difference_type diff) const {
00411             const_iterator other (*this);
00412             return (other -= diff);
00413         }
00414 
00415         difference_type operator-(const const_iterator&other) const {
00416             return (globalbyte - other.globalbyte);
00417         }
00418 
00419         bool operator<(const const_iterator&other) const {
00420             return (globalbyte < other.globalbyte);
00421         }
00422         bool operator==(const const_iterator&other) const {
00423             return (other.iter == iter && other.globalbyte == globalbyte);
00424         }
00425         bool operator!=(const const_iterator&other) const {
00426             return (other.iter != iter || other.globalbyte != globalbyte);
00427         }
00428 
00429         inline bool valid() const {
00430             return data && datastart <= globalbyte;
00431         }
00432 
00433         inline bool eof() const {
00434             return !data;
00435         }
00436 
00437         value_type *dataAt() const {
00438             if (valid()) {
00439                 return &(data[globalbyte-datastart]);
00440             } else {
00441                 return NULL;
00442             }
00443         }
00444         size_type lengthAt() const {
00445             if (valid()) {
00446                 return dataend - globalbyte + 1;
00447             } else if (eof()) {
00448                 return 0;
00449             } else {
00450                 return datastart - globalbyte;
00451             }
00452         }
00453 
00454     };
00455 
00456     const_iterator begin() const {
00457         return const_iterator(DenseDataList::begin(), this, startbyte());
00458     }
00459     const_iterator end() const {
00460         return const_iterator(DenseDataList::end(), this, endbyte());
00461     }
00462 
00463     /*// slow: better to use iterators.
00464     const char operator[] (cache_usize_type location) {
00465         Range::length_type length;
00466         const unsigned char *data = dataAt(location, length);
00467         if (data == NULL) {
00468             return '\0';
00469         }
00470         if (length == 0) {
00471             throw new std::domain_error("byte outside range passed to SparseData::operator[]");
00472         }
00473         return *data;
00474     }
00475     */
00476 
00477     typedef const_iterator iterator; // Only supports read-only operations
00478 
00479     SparseData() {
00480     }
00481 
00482     SparseData(DenseDataPtr contents) {
00483         addValidData(contents);
00484     }
00485 
00486     inline cache_usize_type length() const {
00487         return endbyte() - startbyte() + 1;
00488     }
00489 
00490     inline cache_usize_type size() const {
00491         return length();
00492     }
00493 
00497     std::ostream & debugPrint(std::ostream &os) const {
00498         Range::base_type position = 0, len;
00499         do {
00500             const unsigned char *data = dataAt(position, len);
00501             if (data) {
00502                 os<<"{GOT DATA "<<len<<"}";
00503                 os<<std::string(data, data+len);
00504             } else if (len) {
00505                 os<<"[INVALID:" <<len<< "]";
00506             }
00507             position += len;
00508         } while (len);
00509         return os;
00510     }
00511 
00512     SHA256 computeFingerprint() const {
00513         SHA256Context context;
00514         const unsigned char *data;
00515         Range::length_type length;
00516         Range::base_type start = 0;
00517 
00518         while (true) {
00519             data = dataAt(start, length);
00520             start += length;
00521             if (data == NULL && length == 0) {
00522                 break;
00523             } else if (data == NULL) {
00524                 context.updateZeros((size_t)length);
00525             } else {
00526                 context.update(data, (size_t)length);
00527             }
00528         }
00529         return context.get();
00530     }
00531 
00532         DenseDataPtr flatten() const {
00533                 if (mSparseData.size() == 0) {
00534                         return DenseDataPtr(new DenseData(Range(false)));
00535                 }
00536                 if (mSparseData.size() == 1) {
00537                         return mSparseData.front();
00538                 }
00539                 MutableDenseDataPtr denseData (new DenseData(Range(startbyte(),endbyte(),BOUNDS)));
00540                 unsigned char *outdata = denseData->writableData();
00541                 std::copy(begin(), end(), outdata);
00542                 return denseData;
00543         }
00544 
00545 };
00546 //typedef std::tr1::shared_ptr<SparseData> SparseDataPtr;
00547 
00548 }
00549 }
00550 
00551 #endif /* SIRIKATA_TransferData_HPP__ */