tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccutil/genericvector.h
Go to the documentation of this file.
00001 
00002 // File:        genericvector.h
00003 // Description: Generic vector class
00004 // Author:      Daria Antonova
00005 // Created:     Mon Jun 23 11:26:43 PDT 2008
00006 //
00007 // (C) Copyright 2007, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 //
00020 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
00021 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
00022 
00023 #include <assert.h>
00024 #include <stdio.h>
00025 #include <stdlib.h>
00026 
00027 #include "tesscallback.h"
00028 #include "errcode.h"
00029 #include "helpers.h"
00030 #include "ndminx.h"
00031 #include "strngs.h"
00032 
00033 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
00034 // provides automatic deletion of pointers, [De]Serialize that works, and
00035 // sort that works.
00036 template <typename T>
00037 class GenericVector {
00038  public:
00039   GenericVector() {
00040     init(kDefaultVectorSize);
00041   }
00042   GenericVector(int size, T init_val) {
00043     init(size);
00044     init_to_size(size, init_val);
00045   }
00046 
00047   // Copy
00048   GenericVector(const GenericVector& other) {
00049     this->init(other.size());
00050     this->operator+=(other);
00051   }
00052   GenericVector<T> &operator+=(const GenericVector& other);
00053   GenericVector<T> &operator=(const GenericVector& other);
00054 
00055   ~GenericVector();
00056 
00057   // Reserve some memory.
00058   void reserve(int size);
00059   // Double the size of the internal array.
00060   void double_the_size();
00061 
00062   // Resizes to size and sets all values to t.
00063   void init_to_size(int size, T t);
00064 
00065   // Return the size used.
00066   int size() const {
00067     return size_used_;
00068   }
00069   int size_reserved() const {
00070     return size_reserved_;
00071   }
00072 
00073   int length() const {
00074     return size_used_;
00075   }
00076 
00077   // Return true if empty.
00078   bool empty() const {
00079     return size_used_ == 0;
00080   }
00081 
00082   // Return the object from an index.
00083   T &get(int index) const;
00084   T &back() const;
00085   T &operator[](int index) const;
00086   // Returns the last object and removes it.
00087   T pop_back();
00088 
00089   // Return the index of the T object.
00090   // This method NEEDS a compare_callback to be passed to
00091   // set_compare_callback.
00092   int get_index(T object) const;
00093 
00094   // Return true if T is in the array
00095   bool contains(T object) const;
00096 
00097   // Return true if the index is valid
00098   T contains_index(int index) const;
00099 
00100   // Push an element in the end of the array
00101   int push_back(T object);
00102   void operator+=(T t);
00103 
00104   // Push an element in the end of the array if the same
00105   // element is not already contained in the array.
00106   int push_back_new(T object);
00107 
00108   // Push an element in the front of the array
00109   // Note: This function is O(n)
00110   int push_front(T object);
00111 
00112   // Set the value at the given index
00113   void set(T t, int index);
00114 
00115   // Insert t at the given index, push other elements to the right.
00116   void insert(T t, int index);
00117 
00118   // Removes an element at the given index and
00119   // shifts the remaining elements to the left.
00120   void remove(int index);
00121 
00122   // Truncates the array to the given size by removing the end.
00123   // If the current size is less, the array is not expanded.
00124   void truncate(int size) {
00125     if (size < size_used_)
00126       size_used_ = size;
00127   }
00128 
00129   // Add a callback to be called to delete the elements when the array took
00130   // their ownership.
00131   void set_clear_callback(TessCallback1<T>* cb);
00132 
00133   // Add a callback to be called to compare the elements when needed (contains,
00134   // get_id, ...)
00135   void set_compare_callback(TessResultCallback2<bool, T const &, T const &>* cb);
00136 
00137   // Clear the array, calling the clear callback function if any.
00138   // All the owned callbacks are also deleted.
00139   // If you don't want the callbacks to be deleted, before calling clear, set
00140   // the callback to NULL.
00141   void clear();
00142 
00143   // Delete objects pointed to by data_[i]
00144   void delete_data_pointers();
00145 
00146   // This method clears the current object, then, does a shallow copy of
00147   // its argument, and finally invalidates its argument.
00148   // Callbacks are moved to the current object;
00149   void move(GenericVector<T>* from);
00150 
00151   // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
00152   // The callback given must be permanent since they will be called more than
00153   // once. The given callback will be deleted at the end.
00154   // If the callbacks are NULL, then the data is simply read/written using
00155   // fread (and swapping)/fwrite.
00156   // Returns false on error or if the callback returns false.
00157   // DEPRECATED. Use [De]Serialize[Classes] instead.
00158   bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
00159   bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
00160   // Writes a vector of simple types to the given file. Assumes that bitwise
00161   // read/write of T will work. Returns false in case of error.
00162   bool Serialize(FILE* fp) const;
00163   // Reads a vector of simple types from the given file. Assumes that bitwise
00164   // read/write will work with ReverseN according to sizeof(T).
00165   // Returns false in case of error.
00166   // If swap is true, assumes a big/little-endian swap is needed.
00167   bool DeSerialize(bool swap, FILE* fp);
00168   // Writes a vector of classes to the given file. Assumes the existence of
00169   // bool T::Serialize(FILE* fp) const that returns false in case of error.
00170   // Returns false in case of error.
00171   bool SerializeClasses(FILE* fp) const;
00172   // Reads a vector of classes from the given file. Assumes the existence of
00173   // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
00174   // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
00175   // this function. Returns false in case of error.
00176   // If swap is true, assumes a big/little-endian swap is needed.
00177   bool DeSerializeClasses(bool swap, FILE* fp);
00178 
00179   // Allocates a new array of double the current_size, copies over the
00180   // information from data to the new location, deletes data and returns
00181   // the pointed to the new larger array.
00182   // This function uses memcpy to copy the data, instead of invoking
00183   // operator=() for each element like double_the_size() does.
00184   static T *double_the_size_memcpy(int current_size, T *data) {
00185     T *data_new = new T[current_size * 2];
00186     memcpy(data_new, data, sizeof(T) * current_size);
00187     delete[] data;
00188     return data_new;
00189   }
00190 
00191   // Sorts the members of this vector using the less than comparator (cmp_lt),
00192   // which compares the values. Useful for GenericVectors to primitive types.
00193   // Will not work so great for pointers (unless you just want to sort some
00194   // pointers). You need to provide a specialization to sort_cmp to use
00195   // your type.
00196   void sort();
00197 
00198   // Sort the array into the order defined by the qsort function comparator.
00199   // The comparator function is as defined by qsort, ie. it receives pointers
00200   // to two Ts and returns negative if the first element is to appear earlier
00201   // in the result and positive if it is to appear later, with 0 for equal.
00202   void sort(int (*comparator)(const void*, const void*)) {
00203     qsort(data_, size_used_, sizeof(*data_), comparator);
00204   }
00205 
00206   // Searches the array (assuming sorted in ascending order, using sort()) for
00207   // an element equal to target and returns true if it is present.
00208   // Use binary_search to get the index of target, or its nearest candidate.
00209   bool bool_binary_search(const T& target) const {
00210     int index = binary_search(target);
00211     if (index >= size_used_)
00212       return false;
00213     return data_[index] == target;
00214   }
00215   // Searches the array (assuming sorted in ascending order, using sort()) for
00216   // an element equal to target and returns the index of the best candidate.
00217   // The return value is conceptually the largest index i such that
00218   // data_[i] <= target or 0 if target < the whole vector.
00219   // NOTE that this function uses operator> so really the return value is
00220   // the largest index i such that data_[i] > target is false.
00221   int binary_search(const T& target) const {
00222     int bottom = 0;
00223     int top = size_used_;
00224     do {
00225       int middle = (bottom + top) / 2;
00226       if (data_[middle] > target)
00227         top = middle;
00228       else
00229         bottom = middle;
00230     }
00231     while (top - bottom > 1);
00232     return bottom;
00233   }
00234 
00235   // Compact the vector by deleting elements using operator!= on basic types.
00236   // The vector must be sorted.
00237   void compact_sorted() {
00238     if (size_used_ == 0)
00239       return;
00240 
00241     // First element is in no matter what, hence the i = 1.
00242     int last_write = 0;
00243     for (int i = 1; i < size_used_; ++i) {
00244       // Finds next unique item and writes it.
00245       if (data_[last_write] != data_[i])
00246         data_[++last_write] = data_[i];
00247     }
00248     // last_write is the index of a valid data cell, so add 1.
00249     size_used_ = last_write + 1;
00250   }
00251 
00252   // Compact the vector by deleting elements for which delete_cb returns
00253   // true. delete_cb is a permanent callback and will be deleted.
00254   void compact(TessResultCallback1<bool, int>* delete_cb) {
00255     int new_size = 0;
00256     int old_index = 0;
00257     // Until the callback returns true, the elements stay the same.
00258     while (old_index < size_used_ && !delete_cb->Run(old_index++))
00259       ++new_size;
00260     // Now just copy anything else that gets false from delete_cb.
00261     for (; old_index < size_used_; ++old_index) {
00262       if (!delete_cb->Run(old_index)) {
00263         data_[new_size++] = data_[old_index];
00264       }
00265     }
00266     size_used_ = new_size;
00267     delete delete_cb;
00268   }
00269 
00270   T dot_product(const GenericVector<T>& other) const {
00271     T result = static_cast<T>(0);
00272     for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i)
00273       result += data_[i] * other.data_[i];
00274     return result;
00275   }
00276 
00277   // Returns the index of what would be the target_index_th item in the array
00278   // if the members were sorted, without actually sorting. Members are
00279   // shuffled around, but it takes O(n) time.
00280   // NOTE: uses operator< and operator== on the members.
00281   int choose_nth_item(int target_index) {
00282     // Make sure target_index is legal.
00283     if (target_index < 0)
00284       target_index = 0;                   // ensure legal
00285     else if (target_index >= size_used_)
00286       target_index = size_used_ - 1;
00287     unsigned int seed = 1;
00288     return choose_nth_item(target_index, 0, size_used_, &seed);
00289   }
00290 
00291   // Swaps the elements with the given indices.
00292   void swap(int index1, int index2) {
00293     if (index1 != index2) {
00294       T tmp = data_[index1];
00295       data_[index1] = data_[index2];
00296       data_[index2] = tmp;
00297     }
00298   }
00299 
00300  protected:
00301   // Internal recursive version of choose_nth_item.
00302   int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
00303 
00304   // Init the object, allocating size memory.
00305   void init(int size);
00306 
00307   // We are assuming that the object generally placed in thie
00308   // vector are small enough that for efficiency it makes sence
00309   // to start with a larger initial size.
00310   static const int kDefaultVectorSize = 4;
00311   inT32   size_used_;
00312   inT32   size_reserved_;
00313   T*    data_;
00314   TessCallback1<T>* clear_cb_;
00315   // Mutable because Run method is not const
00316   mutable TessResultCallback2<bool, T const &, T const &>* compare_cb_;
00317 };
00318 
00319 namespace tesseract {
00320 
00321 // Function to read a GenericVector<char> from a whole file.
00322 // Returns false on failure.
00323 typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data);
00324 // Function to write a GenericVector<char> to a whole file.
00325 // Returns false on failure.
00326 typedef bool (*FileWriter)(const GenericVector<char>& data,
00327                            const STRING& filename);
00328 // The default FileReader loads the whole file into the vector of char,
00329 // returning false on error.
00330 inline bool LoadDataFromFile(const STRING& filename,
00331                              GenericVector<char>* data) {
00332   FILE* fp = fopen(filename.string(), "rb");
00333   if (fp == NULL) return false;
00334   fseek(fp, 0, SEEK_END);
00335   size_t size = ftell(fp);
00336   fseek(fp, 0, SEEK_SET);
00337   // Pad with a 0, just in case we treat the result as a string.
00338   data->init_to_size(size + 1, 0);
00339   bool result = fread(&(*data)[0], 1, size, fp) == size;
00340   fclose(fp);
00341   return result;
00342 }
00343 // The default FileWriter writes the vector of char to the filename file,
00344 // returning false on error.
00345 inline bool SaveDataToFile(const GenericVector<char>& data,
00346                           const STRING& filename) {
00347   FILE* fp = fopen(filename.string(), "wb");
00348   if (fp == NULL) return false;
00349   bool result =
00350       static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
00351   fclose(fp);
00352   return result;
00353 }
00354 
00355 template <typename T>
00356 bool cmp_eq(T const & t1, T const & t2) {
00357   return t1 == t2;
00358 }
00359 
00360 // Used by sort()
00361 // return < 0 if t1 < t2
00362 // return 0 if t1 == t2
00363 // return > 0 if t1 > t2
00364 template <typename T>
00365 int sort_cmp(const void* t1, const void* t2) {
00366   const T* a = static_cast<const T *> (t1);
00367   const T* b = static_cast<const T *> (t2);
00368   if (*a < *b) {
00369     return -1;
00370   } else if (*b < *a) {
00371     return 1;
00372   } else {
00373     return 0;
00374   }
00375 }
00376 
00377 // Used by PointerVector::sort()
00378 // return < 0 if t1 < t2
00379 // return 0 if t1 == t2
00380 // return > 0 if t1 > t2
00381 template <typename T>
00382 int sort_ptr_cmp(const void* t1, const void* t2) {
00383   const T* a = *reinterpret_cast<T * const *>(t1);
00384   const T* b = *reinterpret_cast<T * const *>(t2);
00385   if (*a < *b) {
00386     return -1;
00387   } else if (*b < *a) {
00388     return 1;
00389   } else {
00390     return 0;
00391   }
00392 }
00393 
00394 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
00395 // as it provides automatic deletion and correct serialization, with the
00396 // corollary that all copy operations are deep copies of the pointed-to objects.
00397 template<typename T>
00398 class PointerVector : public GenericVector<T*> {
00399  public:
00400   PointerVector() : GenericVector<T*>() { }
00401   explicit PointerVector(int size) : GenericVector<T*>(size) { }
00402   ~PointerVector() {
00403     // Clear must be called here, even though it is called again by the base,
00404     // as the base will call the wrong clear.
00405     clear();
00406   }
00407   // Copy must be deep, as the pointers will be automatically deleted on
00408   // destruction.
00409   PointerVector(const PointerVector& other) {
00410     this->init(other.size());
00411     this->operator+=(other);
00412   }
00413   PointerVector<T>& operator+=(const PointerVector& other) {
00414     this->reserve(this->size_used_ + other.size_used_);
00415     for (int i = 0; i < other.size(); ++i) {
00416       this->push_back(new T(*other.data_[i]));
00417     }
00418     return *this;
00419   }
00420 
00421   PointerVector<T>& operator=(const PointerVector& other) {
00422     this->truncate(0);
00423     this->operator+=(other);
00424     return *this;
00425   }
00426 
00427   // Removes an element at the given index and
00428   // shifts the remaining elements to the left.
00429   void remove(int index) {
00430     delete GenericVector<T*>::data_[index];
00431     GenericVector<T*>::remove(index);
00432   }
00433 
00434   // Truncates the array to the given size by removing the end.
00435   // If the current size is less, the array is not expanded.
00436   void truncate(int size) {
00437     for (int i = size; i < GenericVector<T*>::size_used_; ++i)
00438       delete GenericVector<T*>::data_[i];
00439     GenericVector<T*>::truncate(size);
00440   }
00441 
00442   // Compact the vector by deleting elements for which delete_cb returns
00443   // true. delete_cb is a permanent callback and will be deleted.
00444   void compact(TessResultCallback1<bool, const T*>* delete_cb) {
00445     int new_size = 0;
00446     int old_index = 0;
00447     // Until the callback returns true, the elements stay the same.
00448     while (old_index < GenericVector<T*>::size_used_ &&
00449            !delete_cb->Run(GenericVector<T*>::data_[old_index++]))
00450       ++new_size;
00451     // Now just copy anything else that gets false from delete_cb.
00452     for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
00453       if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
00454         GenericVector<T*>::data_[new_size++] =
00455             GenericVector<T*>::data_[old_index];
00456       } else {
00457         delete GenericVector<T*>::data_[old_index];
00458       }
00459     }
00460     GenericVector<T*>::size_used_ = new_size;
00461     delete delete_cb;
00462   }
00463 
00464   // Clear the array, calling the clear callback function if any.
00465   // All the owned callbacks are also deleted.
00466   // If you don't want the callbacks to be deleted, before calling clear, set
00467   // the callback to NULL.
00468   void clear() {
00469     GenericVector<T*>::delete_data_pointers();
00470     GenericVector<T*>::clear();
00471   }
00472 
00473   // Writes a vector of simple types to the given file. Assumes that bitwise
00474   // read/write of T will work. Returns false in case of error.
00475   bool Serialize(FILE* fp) const {
00476     inT32 used = GenericVector<T*>::size_used_;
00477     if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
00478     for (int i = 0; i < used; ++i) {
00479       inT8 non_null = GenericVector<T*>::data_[i] != NULL;
00480       if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false;
00481       if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
00482     }
00483     return true;
00484   }
00485   // Reads a vector of simple types from the given file. Assumes that bitwise
00486   // read/write will work with ReverseN according to sizeof(T).
00487   // Also needs T::T(), as new T is used in this function.
00488   // Returns false in case of error.
00489   // If swap is true, assumes a big/little-endian swap is needed.
00490   bool DeSerialize(bool swap, FILE* fp) {
00491     inT32 reserved;
00492     if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
00493     if (swap) Reverse32(&reserved);
00494     GenericVector<T*>::reserve(reserved);
00495     for (int i = 0; i < reserved; ++i) {
00496       inT8 non_null;
00497       if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
00498       T* item = NULL;
00499       if (non_null) {
00500         item = new T;
00501         if (!item->DeSerialize(swap, fp)) return false;
00502       }
00503       this->push_back(item);
00504     }
00505     return true;
00506   }
00507 
00508   // Sorts the items pointed to by the members of this vector using
00509   // t::operator<().
00510   void sort() {
00511     sort(&sort_ptr_cmp<T>);
00512   }
00513 };
00514 
00515 }  // namespace tesseract
00516 
00517 // A useful vector that uses operator== to do comparisons.
00518 template <typename T>
00519 class GenericVectorEqEq : public GenericVector<T> {
00520  public:
00521   GenericVectorEqEq() {
00522     GenericVector<T>::set_compare_callback(
00523         NewPermanentTessCallback(tesseract::cmp_eq<T>));
00524   }
00525   GenericVectorEqEq(int size) : GenericVector<T>(size) {
00526     GenericVector<T>::set_compare_callback(
00527         NewPermanentTessCallback(tesseract::cmp_eq<T>));
00528   }
00529 };
00530 
00531 template <typename T>
00532 void GenericVector<T>::init(int size) {
00533   size_used_ = 0;
00534   size_reserved_ = 0;
00535   data_ = 0;
00536   clear_cb_ = 0;
00537   compare_cb_ = 0;
00538   reserve(size);
00539 }
00540 
00541 template <typename T>
00542 GenericVector<T>::~GenericVector() {
00543   clear();
00544 }
00545 
00546 // Reserve some memory. If the internal array contains elements, they are
00547 // copied.
00548 template <typename T>
00549 void GenericVector<T>::reserve(int size) {
00550   if (size_reserved_ >= size || size <= 0)
00551     return;
00552   T* new_array = new T[size];
00553   for (int i = 0; i < size_used_; ++i)
00554     new_array[i] = data_[i];
00555   if (data_ != NULL) delete[] data_;
00556   data_ = new_array;
00557   size_reserved_ = size;
00558 }
00559 
00560 template <typename T>
00561 void GenericVector<T>::double_the_size() {
00562   if (size_reserved_ == 0) {
00563     reserve(kDefaultVectorSize);
00564   }
00565   else {
00566     reserve(2 * size_reserved_);
00567   }
00568 }
00569 
00570 // Resizes to size and sets all values to t.
00571 template <typename T>
00572 void GenericVector<T>::init_to_size(int size, T t) {
00573   reserve(size);
00574   size_used_ = size;
00575   for (int i = 0; i < size; ++i)
00576     data_[i] = t;
00577 }
00578 
00579 
00580 // Return the object from an index.
00581 template <typename T>
00582 T &GenericVector<T>::get(int index) const {
00583   ASSERT_HOST(index >= 0 && index < size_used_);
00584   return data_[index];
00585 }
00586 
00587 template <typename T>
00588 T &GenericVector<T>::operator[](int index) const {
00589   assert(index >= 0 && index < size_used_);
00590   return data_[index];
00591 }
00592 
00593 template <typename T>
00594 T &GenericVector<T>::back() const {
00595   ASSERT_HOST(size_used_ > 0);
00596   return data_[size_used_ - 1];
00597 }
00598 // Returns the last object and removes it.
00599 template <typename T>
00600 T GenericVector<T>::pop_back() {
00601   ASSERT_HOST(size_used_ > 0);
00602   return data_[--size_used_];
00603 }
00604 
00605 // Return the object from an index.
00606 template <typename T>
00607 void GenericVector<T>::set(T t, int index) {
00608   ASSERT_HOST(index >= 0 && index < size_used_);
00609   data_[index] = t;
00610 }
00611 
00612 // Shifts the rest of the elements to the right to make
00613 // space for the new elements and inserts the given element
00614 // at the specified index.
00615 template <typename T>
00616 void GenericVector<T>::insert(T t, int index) {
00617   ASSERT_HOST(index >= 0 && index <= size_used_);
00618   if (size_reserved_ == size_used_)
00619     double_the_size();
00620   for (int i = size_used_; i > index; --i) {
00621     data_[i] = data_[i-1];
00622   }
00623   data_[index] = t;
00624   size_used_++;
00625 }
00626 
00627 // Removes an element at the given index and
00628 // shifts the remaining elements to the left.
00629 template <typename T>
00630 void GenericVector<T>::remove(int index) {
00631   ASSERT_HOST(index >= 0 && index < size_used_);
00632   for (int i = index; i < size_used_ - 1; ++i) {
00633     data_[i] = data_[i+1];
00634   }
00635   size_used_--;
00636 }
00637 
00638 // Return true if the index is valindex
00639 template <typename T>
00640 T GenericVector<T>::contains_index(int index) const {
00641   return index >= 0 && index < size_used_;
00642 }
00643 
00644 // Return the index of the T object.
00645 template <typename T>
00646 int GenericVector<T>::get_index(T object) const {
00647   for (int i = 0; i < size_used_; ++i) {
00648     ASSERT_HOST(compare_cb_ != NULL);
00649     if (compare_cb_->Run(object, data_[i]))
00650       return i;
00651   }
00652   return -1;
00653 }
00654 
00655 // Return true if T is in the array
00656 template <typename T>
00657 bool GenericVector<T>::contains(T object) const {
00658   return get_index(object) != -1;
00659 }
00660 
00661 // Add an element in the array
00662 template <typename T>
00663 int GenericVector<T>::push_back(T object) {
00664   int index = 0;
00665   if (size_used_ == size_reserved_)
00666     double_the_size();
00667   index = size_used_++;
00668   data_[index] = object;
00669   return index;
00670 }
00671 
00672 template <typename T>
00673 int GenericVector<T>::push_back_new(T object) {
00674   int index = get_index(object);
00675   if (index >= 0)
00676     return index;
00677   return push_back(object);
00678 }
00679 
00680 // Add an element in the array (front)
00681 template <typename T>
00682 int GenericVector<T>::push_front(T object) {
00683   if (size_used_ == size_reserved_)
00684     double_the_size();
00685   for (int i = size_used_; i > 0; --i)
00686     data_[i] = data_[i-1];
00687   data_[0] = object;
00688   ++size_used_;
00689   return 0;
00690 }
00691 
00692 template <typename T>
00693 void GenericVector<T>::operator+=(T t) {
00694   push_back(t);
00695 }
00696 
00697 template <typename T>
00698 GenericVector<T> &GenericVector<T>::operator+=(const GenericVector& other) {
00699   this->reserve(size_used_ + other.size_used_);
00700   for (int i = 0; i < other.size(); ++i) {
00701     this->operator+=(other.data_[i]);
00702   }
00703   return *this;
00704 }
00705 
00706 template <typename T>
00707 GenericVector<T> &GenericVector<T>::operator=(const GenericVector& other) {
00708   this->truncate(0);
00709   this->operator+=(other);
00710   return *this;
00711 }
00712 
00713 // Add a callback to be called to delete the elements when the array took
00714 // their ownership.
00715 template <typename T>
00716 void GenericVector<T>::set_clear_callback(TessCallback1<T>* cb) {
00717   clear_cb_ = cb;
00718 }
00719 
00720 // Add a callback to be called to delete the elements when the array took
00721 // their ownership.
00722 template <typename T>
00723 void GenericVector<T>::set_compare_callback(
00724     TessResultCallback2<bool, T const &, T const &>* cb) {
00725   compare_cb_ = cb;
00726 }
00727 
00728 // Clear the array, calling the callback function if any.
00729 template <typename T>
00730 void GenericVector<T>::clear() {
00731   if (size_reserved_ > 0) {
00732     if (clear_cb_ != NULL)
00733       for (int i = 0; i < size_used_; ++i)
00734         clear_cb_->Run(data_[i]);
00735     delete[] data_;
00736     data_ = NULL;
00737     size_used_ = 0;
00738     size_reserved_ = 0;
00739   }
00740   if (clear_cb_ != NULL) {
00741     delete clear_cb_;
00742     clear_cb_ = NULL;
00743   }
00744   if (compare_cb_ != NULL) {
00745     delete compare_cb_;
00746     compare_cb_ = NULL;
00747   }
00748 }
00749 
00750 template <typename T>
00751 void GenericVector<T>::delete_data_pointers() {
00752   for (int i = 0; i < size_used_; ++i)
00753     if (data_[i]) {
00754       delete data_[i];
00755     }
00756 }
00757 
00758 
00759 template <typename T>
00760 bool GenericVector<T>::write(
00761     FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const {
00762   if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false;
00763   if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
00764   if (cb != NULL) {
00765     for (int i = 0; i < size_used_; ++i) {
00766       if (!cb->Run(f, data_[i])) {
00767         delete cb;
00768         return false;
00769       }
00770     }
00771     delete cb;
00772   } else {
00773     if (fwrite(data_, sizeof(T), size_used_, f) != size_used_) return false;
00774   }
00775   return true;
00776 }
00777 
00778 template <typename T>
00779 bool GenericVector<T>::read(FILE* f,
00780                             TessResultCallback3<bool, FILE*, T*, bool>* cb,
00781                             bool swap) {
00782   inT32 reserved;
00783   if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false;
00784   if (swap) Reverse32(&reserved);
00785   reserve(reserved);
00786   if (fread(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
00787   if (swap) Reverse32(&size_used_);
00788   if (cb != NULL) {
00789     for (int i = 0; i < size_used_; ++i) {
00790       if (!cb->Run(f, data_ + i, swap)) {
00791         delete cb;
00792         return false;
00793       }
00794     }
00795     delete cb;
00796   } else {
00797     if (fread(data_, sizeof(T), size_used_, f) != size_used_) return false;
00798     if (swap) {
00799       for (int i = 0; i < size_used_; ++i)
00800         ReverseN(&data_[i], sizeof(T));
00801     }
00802   }
00803   return true;
00804 }
00805 
00806 // Writes a vector of simple types to the given file. Assumes that bitwise
00807 // read/write of T will work. Returns false in case of error.
00808 template <typename T>
00809 bool GenericVector<T>::Serialize(FILE* fp) const {
00810   if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
00811   if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false;
00812   return true;
00813 }
00814 
00815 // Reads a vector of simple types from the given file. Assumes that bitwise
00816 // read/write will work with ReverseN according to sizeof(T).
00817 // Returns false in case of error.
00818 // If swap is true, assumes a big/little-endian swap is needed.
00819 template <typename T>
00820 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
00821   inT32 reserved;
00822   if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
00823   if (swap) Reverse32(&reserved);
00824   reserve(reserved);
00825   size_used_ = reserved;
00826   if (fread(data_, sizeof(T), size_used_, fp) != size_used_) return false;
00827   if (swap) {
00828     for (int i = 0; i < size_used_; ++i)
00829       ReverseN(&data_[i], sizeof(data_[i]));
00830   }
00831   return true;
00832 }
00833 
00834 // Writes a vector of classes to the given file. Assumes the existence of
00835 // bool T::Serialize(FILE* fp) const that returns false in case of error.
00836 // Returns false in case of error.
00837 template <typename T>
00838 bool GenericVector<T>::SerializeClasses(FILE* fp) const {
00839   if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
00840   for (int i = 0; i < size_used_; ++i) {
00841     if (!data_[i].Serialize(fp)) return false;
00842   }
00843   return true;
00844 }
00845 
00846 // Reads a vector of classes from the given file. Assumes the existence of
00847 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
00848 // error. Alse needs T::T() and T::T(constT&), as init_to_size is used in
00849 // this function. Returns false in case of error.
00850 // If swap is true, assumes a big/little-endian swap is needed.
00851 template <typename T>
00852 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
00853   uinT32 reserved;
00854   if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
00855   if (swap) Reverse32(&reserved);
00856   T empty;
00857   init_to_size(reserved, empty);
00858   for (int i = 0; i < reserved; ++i) {
00859     if (!data_[i].DeSerialize(swap, fp)) return false;
00860   }
00861   return true;
00862 }
00863 
00864 // This method clear the current object, then, does a shallow copy of
00865 // its argument, and finally invalidates its argument.
00866 template <typename T>
00867 void GenericVector<T>::move(GenericVector<T>* from) {
00868   this->clear();
00869   this->data_ = from->data_;
00870   this->size_reserved_ = from->size_reserved_;
00871   this->size_used_ = from->size_used_;
00872   this->compare_cb_ = from->compare_cb_;
00873   this->clear_cb_ = from->clear_cb_;
00874   from->data_ = NULL;
00875   from->clear_cb_ = NULL;
00876   from->compare_cb_ = NULL;
00877   from->size_used_ = 0;
00878   from->size_reserved_ = 0;
00879 }
00880 
00881 template <typename T>
00882 void GenericVector<T>::sort() {
00883   sort(&tesseract::sort_cmp<T>);
00884 }
00885 
00886 // Internal recursive version of choose_nth_item.
00887 // The algorithm used comes from "Algorithms" by Sedgewick:
00888 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
00889 // The principle is to choose a random pivot, and move everything less than
00890 // the pivot to its left, and everything greater than the pivot to the end
00891 // of the array, then recurse on the part that contains the desired index, or
00892 // just return the answer if it is in the equal section in the middle.
00893 // The random pivot guarantees average linear time for the same reason that
00894 // n times vector::push_back takes linear time on average.
00895 // target_index, start and and end are all indices into the full array.
00896 // Seed is a seed for rand_r for thread safety purposes. Its value is
00897 // unimportant as the random numbers do not affect the result except
00898 // between equal answers.
00899 template <typename T>
00900 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
00901                                       unsigned int* seed) {
00902   // Number of elements to process.
00903   int num_elements = end - start;
00904   // Trivial cases.
00905   if (num_elements <= 1)
00906     return start;
00907   if (num_elements == 2) {
00908     if (data_[start] < data_[start + 1]) {
00909       return target_index > start ? start + 1 : start;
00910     } else {
00911       return target_index > start ? start : start + 1;
00912     }
00913   }
00914   // Place the pivot at start.
00915   #ifndef rand_r  // _MSC_VER, ANDROID
00916   srand(*seed);
00917   #define rand_r(seed) rand()
00918   #endif  // _MSC_VER
00919   int pivot = rand_r(seed) % num_elements + start;
00920   swap(pivot, start);
00921   // The invariant condition here is that items [start, next_lesser) are less
00922   // than the pivot (which is at index next_lesser) and items
00923   // [prev_greater, end) are greater than the pivot, with items
00924   // [next_lesser, prev_greater) being equal to the pivot.
00925   int next_lesser = start;
00926   int prev_greater = end;
00927   for (int next_sample = start + 1; next_sample < prev_greater;) {
00928     if (data_[next_sample] < data_[next_lesser]) {
00929       swap(next_lesser++, next_sample++);
00930     } else if (data_[next_sample] == data_[next_lesser]) {
00931       ++next_sample;
00932     } else {
00933       swap(--prev_greater, next_sample);
00934     }
00935   }
00936   // Now the invariant is set up, we recurse on just the section that contains
00937   // the desired index.
00938   if (target_index < next_lesser)
00939     return choose_nth_item(target_index, start, next_lesser, seed);
00940   else if (target_index < prev_greater)
00941     return next_lesser;          // In equal bracket.
00942   else
00943     return choose_nth_item(target_index, prev_greater, end, seed);
00944 }
00945 
00946 
00947 #endif  // TESSERACT_CCUTIL_GENERICVECTOR_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines