tesseract
3.03
|
00001 00002 // File: genericvector.h 00003 // Description: Generic vector class 00004 // Author: Daria Antonova 00005 // Created: Mon Jun 23 11:26:43 PDT 2008 00006 // 00007 // (C) Copyright 2007, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 // 00020 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_ 00021 #define TESSERACT_CCUTIL_GENERICVECTOR_H_ 00022 00023 #include <assert.h> 00024 #include <stdio.h> 00025 #include <stdlib.h> 00026 00027 #include "tesscallback.h" 00028 #include "errcode.h" 00029 #include "helpers.h" 00030 #include "ndminx.h" 00031 #include "strngs.h" 00032 00033 // Use PointerVector<T> below in preference to GenericVector<T*>, as that 00034 // provides automatic deletion of pointers, [De]Serialize that works, and 00035 // sort that works. 00036 template <typename T> 00037 class GenericVector { 00038 public: 00039 GenericVector() { 00040 init(kDefaultVectorSize); 00041 } 00042 GenericVector(int size, T init_val) { 00043 init(size); 00044 init_to_size(size, init_val); 00045 } 00046 00047 // Copy 00048 GenericVector(const GenericVector& other) { 00049 this->init(other.size()); 00050 this->operator+=(other); 00051 } 00052 GenericVector<T> &operator+=(const GenericVector& other); 00053 GenericVector<T> &operator=(const GenericVector& other); 00054 00055 ~GenericVector(); 00056 00057 // Reserve some memory. 00058 void reserve(int size); 00059 // Double the size of the internal array. 00060 void double_the_size(); 00061 00062 // Resizes to size and sets all values to t. 00063 void init_to_size(int size, T t); 00064 00065 // Return the size used. 00066 int size() const { 00067 return size_used_; 00068 } 00069 int size_reserved() const { 00070 return size_reserved_; 00071 } 00072 00073 int length() const { 00074 return size_used_; 00075 } 00076 00077 // Return true if empty. 00078 bool empty() const { 00079 return size_used_ == 0; 00080 } 00081 00082 // Return the object from an index. 00083 T &get(int index) const; 00084 T &back() const; 00085 T &operator[](int index) const; 00086 // Returns the last object and removes it. 00087 T pop_back(); 00088 00089 // Return the index of the T object. 00090 // This method NEEDS a compare_callback to be passed to 00091 // set_compare_callback. 00092 int get_index(T object) const; 00093 00094 // Return true if T is in the array 00095 bool contains(T object) const; 00096 00097 // Return true if the index is valid 00098 T contains_index(int index) const; 00099 00100 // Push an element in the end of the array 00101 int push_back(T object); 00102 void operator+=(T t); 00103 00104 // Push an element in the end of the array if the same 00105 // element is not already contained in the array. 00106 int push_back_new(T object); 00107 00108 // Push an element in the front of the array 00109 // Note: This function is O(n) 00110 int push_front(T object); 00111 00112 // Set the value at the given index 00113 void set(T t, int index); 00114 00115 // Insert t at the given index, push other elements to the right. 00116 void insert(T t, int index); 00117 00118 // Removes an element at the given index and 00119 // shifts the remaining elements to the left. 00120 void remove(int index); 00121 00122 // Truncates the array to the given size by removing the end. 00123 // If the current size is less, the array is not expanded. 00124 void truncate(int size) { 00125 if (size < size_used_) 00126 size_used_ = size; 00127 } 00128 00129 // Add a callback to be called to delete the elements when the array took 00130 // their ownership. 00131 void set_clear_callback(TessCallback1<T>* cb); 00132 00133 // Add a callback to be called to compare the elements when needed (contains, 00134 // get_id, ...) 00135 void set_compare_callback(TessResultCallback2<bool, T const &, T const &>* cb); 00136 00137 // Clear the array, calling the clear callback function if any. 00138 // All the owned callbacks are also deleted. 00139 // If you don't want the callbacks to be deleted, before calling clear, set 00140 // the callback to NULL. 00141 void clear(); 00142 00143 // Delete objects pointed to by data_[i] 00144 void delete_data_pointers(); 00145 00146 // This method clears the current object, then, does a shallow copy of 00147 // its argument, and finally invalidates its argument. 00148 // Callbacks are moved to the current object; 00149 void move(GenericVector<T>* from); 00150 00151 // Read/Write the array to a file. This does _NOT_ read/write the callbacks. 00152 // The callback given must be permanent since they will be called more than 00153 // once. The given callback will be deleted at the end. 00154 // If the callbacks are NULL, then the data is simply read/written using 00155 // fread (and swapping)/fwrite. 00156 // Returns false on error or if the callback returns false. 00157 // DEPRECATED. Use [De]Serialize[Classes] instead. 00158 bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const; 00159 bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap); 00160 // Writes a vector of simple types to the given file. Assumes that bitwise 00161 // read/write of T will work. Returns false in case of error. 00162 bool Serialize(FILE* fp) const; 00163 // Reads a vector of simple types from the given file. Assumes that bitwise 00164 // read/write will work with ReverseN according to sizeof(T). 00165 // Returns false in case of error. 00166 // If swap is true, assumes a big/little-endian swap is needed. 00167 bool DeSerialize(bool swap, FILE* fp); 00168 // Writes a vector of classes to the given file. Assumes the existence of 00169 // bool T::Serialize(FILE* fp) const that returns false in case of error. 00170 // Returns false in case of error. 00171 bool SerializeClasses(FILE* fp) const; 00172 // Reads a vector of classes from the given file. Assumes the existence of 00173 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of 00174 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in 00175 // this function. Returns false in case of error. 00176 // If swap is true, assumes a big/little-endian swap is needed. 00177 bool DeSerializeClasses(bool swap, FILE* fp); 00178 00179 // Allocates a new array of double the current_size, copies over the 00180 // information from data to the new location, deletes data and returns 00181 // the pointed to the new larger array. 00182 // This function uses memcpy to copy the data, instead of invoking 00183 // operator=() for each element like double_the_size() does. 00184 static T *double_the_size_memcpy(int current_size, T *data) { 00185 T *data_new = new T[current_size * 2]; 00186 memcpy(data_new, data, sizeof(T) * current_size); 00187 delete[] data; 00188 return data_new; 00189 } 00190 00191 // Sorts the members of this vector using the less than comparator (cmp_lt), 00192 // which compares the values. Useful for GenericVectors to primitive types. 00193 // Will not work so great for pointers (unless you just want to sort some 00194 // pointers). You need to provide a specialization to sort_cmp to use 00195 // your type. 00196 void sort(); 00197 00198 // Sort the array into the order defined by the qsort function comparator. 00199 // The comparator function is as defined by qsort, ie. it receives pointers 00200 // to two Ts and returns negative if the first element is to appear earlier 00201 // in the result and positive if it is to appear later, with 0 for equal. 00202 void sort(int (*comparator)(const void*, const void*)) { 00203 qsort(data_, size_used_, sizeof(*data_), comparator); 00204 } 00205 00206 // Searches the array (assuming sorted in ascending order, using sort()) for 00207 // an element equal to target and returns true if it is present. 00208 // Use binary_search to get the index of target, or its nearest candidate. 00209 bool bool_binary_search(const T& target) const { 00210 int index = binary_search(target); 00211 if (index >= size_used_) 00212 return false; 00213 return data_[index] == target; 00214 } 00215 // Searches the array (assuming sorted in ascending order, using sort()) for 00216 // an element equal to target and returns the index of the best candidate. 00217 // The return value is conceptually the largest index i such that 00218 // data_[i] <= target or 0 if target < the whole vector. 00219 // NOTE that this function uses operator> so really the return value is 00220 // the largest index i such that data_[i] > target is false. 00221 int binary_search(const T& target) const { 00222 int bottom = 0; 00223 int top = size_used_; 00224 do { 00225 int middle = (bottom + top) / 2; 00226 if (data_[middle] > target) 00227 top = middle; 00228 else 00229 bottom = middle; 00230 } 00231 while (top - bottom > 1); 00232 return bottom; 00233 } 00234 00235 // Compact the vector by deleting elements using operator!= on basic types. 00236 // The vector must be sorted. 00237 void compact_sorted() { 00238 if (size_used_ == 0) 00239 return; 00240 00241 // First element is in no matter what, hence the i = 1. 00242 int last_write = 0; 00243 for (int i = 1; i < size_used_; ++i) { 00244 // Finds next unique item and writes it. 00245 if (data_[last_write] != data_[i]) 00246 data_[++last_write] = data_[i]; 00247 } 00248 // last_write is the index of a valid data cell, so add 1. 00249 size_used_ = last_write + 1; 00250 } 00251 00252 // Compact the vector by deleting elements for which delete_cb returns 00253 // true. delete_cb is a permanent callback and will be deleted. 00254 void compact(TessResultCallback1<bool, int>* delete_cb) { 00255 int new_size = 0; 00256 int old_index = 0; 00257 // Until the callback returns true, the elements stay the same. 00258 while (old_index < size_used_ && !delete_cb->Run(old_index++)) 00259 ++new_size; 00260 // Now just copy anything else that gets false from delete_cb. 00261 for (; old_index < size_used_; ++old_index) { 00262 if (!delete_cb->Run(old_index)) { 00263 data_[new_size++] = data_[old_index]; 00264 } 00265 } 00266 size_used_ = new_size; 00267 delete delete_cb; 00268 } 00269 00270 T dot_product(const GenericVector<T>& other) const { 00271 T result = static_cast<T>(0); 00272 for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i) 00273 result += data_[i] * other.data_[i]; 00274 return result; 00275 } 00276 00277 // Returns the index of what would be the target_index_th item in the array 00278 // if the members were sorted, without actually sorting. Members are 00279 // shuffled around, but it takes O(n) time. 00280 // NOTE: uses operator< and operator== on the members. 00281 int choose_nth_item(int target_index) { 00282 // Make sure target_index is legal. 00283 if (target_index < 0) 00284 target_index = 0; // ensure legal 00285 else if (target_index >= size_used_) 00286 target_index = size_used_ - 1; 00287 unsigned int seed = 1; 00288 return choose_nth_item(target_index, 0, size_used_, &seed); 00289 } 00290 00291 // Swaps the elements with the given indices. 00292 void swap(int index1, int index2) { 00293 if (index1 != index2) { 00294 T tmp = data_[index1]; 00295 data_[index1] = data_[index2]; 00296 data_[index2] = tmp; 00297 } 00298 } 00299 00300 protected: 00301 // Internal recursive version of choose_nth_item. 00302 int choose_nth_item(int target_index, int start, int end, unsigned int* seed); 00303 00304 // Init the object, allocating size memory. 00305 void init(int size); 00306 00307 // We are assuming that the object generally placed in thie 00308 // vector are small enough that for efficiency it makes sence 00309 // to start with a larger initial size. 00310 static const int kDefaultVectorSize = 4; 00311 inT32 size_used_; 00312 inT32 size_reserved_; 00313 T* data_; 00314 TessCallback1<T>* clear_cb_; 00315 // Mutable because Run method is not const 00316 mutable TessResultCallback2<bool, T const &, T const &>* compare_cb_; 00317 }; 00318 00319 namespace tesseract { 00320 00321 // Function to read a GenericVector<char> from a whole file. 00322 // Returns false on failure. 00323 typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data); 00324 // Function to write a GenericVector<char> to a whole file. 00325 // Returns false on failure. 00326 typedef bool (*FileWriter)(const GenericVector<char>& data, 00327 const STRING& filename); 00328 // The default FileReader loads the whole file into the vector of char, 00329 // returning false on error. 00330 inline bool LoadDataFromFile(const STRING& filename, 00331 GenericVector<char>* data) { 00332 FILE* fp = fopen(filename.string(), "rb"); 00333 if (fp == NULL) return false; 00334 fseek(fp, 0, SEEK_END); 00335 size_t size = ftell(fp); 00336 fseek(fp, 0, SEEK_SET); 00337 // Pad with a 0, just in case we treat the result as a string. 00338 data->init_to_size(size + 1, 0); 00339 bool result = fread(&(*data)[0], 1, size, fp) == size; 00340 fclose(fp); 00341 return result; 00342 } 00343 // The default FileWriter writes the vector of char to the filename file, 00344 // returning false on error. 00345 inline bool SaveDataToFile(const GenericVector<char>& data, 00346 const STRING& filename) { 00347 FILE* fp = fopen(filename.string(), "wb"); 00348 if (fp == NULL) return false; 00349 bool result = 00350 static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size(); 00351 fclose(fp); 00352 return result; 00353 } 00354 00355 template <typename T> 00356 bool cmp_eq(T const & t1, T const & t2) { 00357 return t1 == t2; 00358 } 00359 00360 // Used by sort() 00361 // return < 0 if t1 < t2 00362 // return 0 if t1 == t2 00363 // return > 0 if t1 > t2 00364 template <typename T> 00365 int sort_cmp(const void* t1, const void* t2) { 00366 const T* a = static_cast<const T *> (t1); 00367 const T* b = static_cast<const T *> (t2); 00368 if (*a < *b) { 00369 return -1; 00370 } else if (*b < *a) { 00371 return 1; 00372 } else { 00373 return 0; 00374 } 00375 } 00376 00377 // Used by PointerVector::sort() 00378 // return < 0 if t1 < t2 00379 // return 0 if t1 == t2 00380 // return > 0 if t1 > t2 00381 template <typename T> 00382 int sort_ptr_cmp(const void* t1, const void* t2) { 00383 const T* a = *reinterpret_cast<T * const *>(t1); 00384 const T* b = *reinterpret_cast<T * const *>(t2); 00385 if (*a < *b) { 00386 return -1; 00387 } else if (*b < *a) { 00388 return 1; 00389 } else { 00390 return 0; 00391 } 00392 } 00393 00394 // Subclass for a vector of pointers. Use in preference to GenericVector<T*> 00395 // as it provides automatic deletion and correct serialization, with the 00396 // corollary that all copy operations are deep copies of the pointed-to objects. 00397 template<typename T> 00398 class PointerVector : public GenericVector<T*> { 00399 public: 00400 PointerVector() : GenericVector<T*>() { } 00401 explicit PointerVector(int size) : GenericVector<T*>(size) { } 00402 ~PointerVector() { 00403 // Clear must be called here, even though it is called again by the base, 00404 // as the base will call the wrong clear. 00405 clear(); 00406 } 00407 // Copy must be deep, as the pointers will be automatically deleted on 00408 // destruction. 00409 PointerVector(const PointerVector& other) { 00410 this->init(other.size()); 00411 this->operator+=(other); 00412 } 00413 PointerVector<T>& operator+=(const PointerVector& other) { 00414 this->reserve(this->size_used_ + other.size_used_); 00415 for (int i = 0; i < other.size(); ++i) { 00416 this->push_back(new T(*other.data_[i])); 00417 } 00418 return *this; 00419 } 00420 00421 PointerVector<T>& operator=(const PointerVector& other) { 00422 this->truncate(0); 00423 this->operator+=(other); 00424 return *this; 00425 } 00426 00427 // Removes an element at the given index and 00428 // shifts the remaining elements to the left. 00429 void remove(int index) { 00430 delete GenericVector<T*>::data_[index]; 00431 GenericVector<T*>::remove(index); 00432 } 00433 00434 // Truncates the array to the given size by removing the end. 00435 // If the current size is less, the array is not expanded. 00436 void truncate(int size) { 00437 for (int i = size; i < GenericVector<T*>::size_used_; ++i) 00438 delete GenericVector<T*>::data_[i]; 00439 GenericVector<T*>::truncate(size); 00440 } 00441 00442 // Compact the vector by deleting elements for which delete_cb returns 00443 // true. delete_cb is a permanent callback and will be deleted. 00444 void compact(TessResultCallback1<bool, const T*>* delete_cb) { 00445 int new_size = 0; 00446 int old_index = 0; 00447 // Until the callback returns true, the elements stay the same. 00448 while (old_index < GenericVector<T*>::size_used_ && 00449 !delete_cb->Run(GenericVector<T*>::data_[old_index++])) 00450 ++new_size; 00451 // Now just copy anything else that gets false from delete_cb. 00452 for (; old_index < GenericVector<T*>::size_used_; ++old_index) { 00453 if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) { 00454 GenericVector<T*>::data_[new_size++] = 00455 GenericVector<T*>::data_[old_index]; 00456 } else { 00457 delete GenericVector<T*>::data_[old_index]; 00458 } 00459 } 00460 GenericVector<T*>::size_used_ = new_size; 00461 delete delete_cb; 00462 } 00463 00464 // Clear the array, calling the clear callback function if any. 00465 // All the owned callbacks are also deleted. 00466 // If you don't want the callbacks to be deleted, before calling clear, set 00467 // the callback to NULL. 00468 void clear() { 00469 GenericVector<T*>::delete_data_pointers(); 00470 GenericVector<T*>::clear(); 00471 } 00472 00473 // Writes a vector of simple types to the given file. Assumes that bitwise 00474 // read/write of T will work. Returns false in case of error. 00475 bool Serialize(FILE* fp) const { 00476 inT32 used = GenericVector<T*>::size_used_; 00477 if (fwrite(&used, sizeof(used), 1, fp) != 1) return false; 00478 for (int i = 0; i < used; ++i) { 00479 inT8 non_null = GenericVector<T*>::data_[i] != NULL; 00480 if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false; 00481 if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false; 00482 } 00483 return true; 00484 } 00485 // Reads a vector of simple types from the given file. Assumes that bitwise 00486 // read/write will work with ReverseN according to sizeof(T). 00487 // Also needs T::T(), as new T is used in this function. 00488 // Returns false in case of error. 00489 // If swap is true, assumes a big/little-endian swap is needed. 00490 bool DeSerialize(bool swap, FILE* fp) { 00491 inT32 reserved; 00492 if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; 00493 if (swap) Reverse32(&reserved); 00494 GenericVector<T*>::reserve(reserved); 00495 for (int i = 0; i < reserved; ++i) { 00496 inT8 non_null; 00497 if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false; 00498 T* item = NULL; 00499 if (non_null) { 00500 item = new T; 00501 if (!item->DeSerialize(swap, fp)) return false; 00502 } 00503 this->push_back(item); 00504 } 00505 return true; 00506 } 00507 00508 // Sorts the items pointed to by the members of this vector using 00509 // t::operator<(). 00510 void sort() { 00511 sort(&sort_ptr_cmp<T>); 00512 } 00513 }; 00514 00515 } // namespace tesseract 00516 00517 // A useful vector that uses operator== to do comparisons. 00518 template <typename T> 00519 class GenericVectorEqEq : public GenericVector<T> { 00520 public: 00521 GenericVectorEqEq() { 00522 GenericVector<T>::set_compare_callback( 00523 NewPermanentTessCallback(tesseract::cmp_eq<T>)); 00524 } 00525 GenericVectorEqEq(int size) : GenericVector<T>(size) { 00526 GenericVector<T>::set_compare_callback( 00527 NewPermanentTessCallback(tesseract::cmp_eq<T>)); 00528 } 00529 }; 00530 00531 template <typename T> 00532 void GenericVector<T>::init(int size) { 00533 size_used_ = 0; 00534 size_reserved_ = 0; 00535 data_ = 0; 00536 clear_cb_ = 0; 00537 compare_cb_ = 0; 00538 reserve(size); 00539 } 00540 00541 template <typename T> 00542 GenericVector<T>::~GenericVector() { 00543 clear(); 00544 } 00545 00546 // Reserve some memory. If the internal array contains elements, they are 00547 // copied. 00548 template <typename T> 00549 void GenericVector<T>::reserve(int size) { 00550 if (size_reserved_ >= size || size <= 0) 00551 return; 00552 T* new_array = new T[size]; 00553 for (int i = 0; i < size_used_; ++i) 00554 new_array[i] = data_[i]; 00555 if (data_ != NULL) delete[] data_; 00556 data_ = new_array; 00557 size_reserved_ = size; 00558 } 00559 00560 template <typename T> 00561 void GenericVector<T>::double_the_size() { 00562 if (size_reserved_ == 0) { 00563 reserve(kDefaultVectorSize); 00564 } 00565 else { 00566 reserve(2 * size_reserved_); 00567 } 00568 } 00569 00570 // Resizes to size and sets all values to t. 00571 template <typename T> 00572 void GenericVector<T>::init_to_size(int size, T t) { 00573 reserve(size); 00574 size_used_ = size; 00575 for (int i = 0; i < size; ++i) 00576 data_[i] = t; 00577 } 00578 00579 00580 // Return the object from an index. 00581 template <typename T> 00582 T &GenericVector<T>::get(int index) const { 00583 ASSERT_HOST(index >= 0 && index < size_used_); 00584 return data_[index]; 00585 } 00586 00587 template <typename T> 00588 T &GenericVector<T>::operator[](int index) const { 00589 assert(index >= 0 && index < size_used_); 00590 return data_[index]; 00591 } 00592 00593 template <typename T> 00594 T &GenericVector<T>::back() const { 00595 ASSERT_HOST(size_used_ > 0); 00596 return data_[size_used_ - 1]; 00597 } 00598 // Returns the last object and removes it. 00599 template <typename T> 00600 T GenericVector<T>::pop_back() { 00601 ASSERT_HOST(size_used_ > 0); 00602 return data_[--size_used_]; 00603 } 00604 00605 // Return the object from an index. 00606 template <typename T> 00607 void GenericVector<T>::set(T t, int index) { 00608 ASSERT_HOST(index >= 0 && index < size_used_); 00609 data_[index] = t; 00610 } 00611 00612 // Shifts the rest of the elements to the right to make 00613 // space for the new elements and inserts the given element 00614 // at the specified index. 00615 template <typename T> 00616 void GenericVector<T>::insert(T t, int index) { 00617 ASSERT_HOST(index >= 0 && index <= size_used_); 00618 if (size_reserved_ == size_used_) 00619 double_the_size(); 00620 for (int i = size_used_; i > index; --i) { 00621 data_[i] = data_[i-1]; 00622 } 00623 data_[index] = t; 00624 size_used_++; 00625 } 00626 00627 // Removes an element at the given index and 00628 // shifts the remaining elements to the left. 00629 template <typename T> 00630 void GenericVector<T>::remove(int index) { 00631 ASSERT_HOST(index >= 0 && index < size_used_); 00632 for (int i = index; i < size_used_ - 1; ++i) { 00633 data_[i] = data_[i+1]; 00634 } 00635 size_used_--; 00636 } 00637 00638 // Return true if the index is valindex 00639 template <typename T> 00640 T GenericVector<T>::contains_index(int index) const { 00641 return index >= 0 && index < size_used_; 00642 } 00643 00644 // Return the index of the T object. 00645 template <typename T> 00646 int GenericVector<T>::get_index(T object) const { 00647 for (int i = 0; i < size_used_; ++i) { 00648 ASSERT_HOST(compare_cb_ != NULL); 00649 if (compare_cb_->Run(object, data_[i])) 00650 return i; 00651 } 00652 return -1; 00653 } 00654 00655 // Return true if T is in the array 00656 template <typename T> 00657 bool GenericVector<T>::contains(T object) const { 00658 return get_index(object) != -1; 00659 } 00660 00661 // Add an element in the array 00662 template <typename T> 00663 int GenericVector<T>::push_back(T object) { 00664 int index = 0; 00665 if (size_used_ == size_reserved_) 00666 double_the_size(); 00667 index = size_used_++; 00668 data_[index] = object; 00669 return index; 00670 } 00671 00672 template <typename T> 00673 int GenericVector<T>::push_back_new(T object) { 00674 int index = get_index(object); 00675 if (index >= 0) 00676 return index; 00677 return push_back(object); 00678 } 00679 00680 // Add an element in the array (front) 00681 template <typename T> 00682 int GenericVector<T>::push_front(T object) { 00683 if (size_used_ == size_reserved_) 00684 double_the_size(); 00685 for (int i = size_used_; i > 0; --i) 00686 data_[i] = data_[i-1]; 00687 data_[0] = object; 00688 ++size_used_; 00689 return 0; 00690 } 00691 00692 template <typename T> 00693 void GenericVector<T>::operator+=(T t) { 00694 push_back(t); 00695 } 00696 00697 template <typename T> 00698 GenericVector<T> &GenericVector<T>::operator+=(const GenericVector& other) { 00699 this->reserve(size_used_ + other.size_used_); 00700 for (int i = 0; i < other.size(); ++i) { 00701 this->operator+=(other.data_[i]); 00702 } 00703 return *this; 00704 } 00705 00706 template <typename T> 00707 GenericVector<T> &GenericVector<T>::operator=(const GenericVector& other) { 00708 this->truncate(0); 00709 this->operator+=(other); 00710 return *this; 00711 } 00712 00713 // Add a callback to be called to delete the elements when the array took 00714 // their ownership. 00715 template <typename T> 00716 void GenericVector<T>::set_clear_callback(TessCallback1<T>* cb) { 00717 clear_cb_ = cb; 00718 } 00719 00720 // Add a callback to be called to delete the elements when the array took 00721 // their ownership. 00722 template <typename T> 00723 void GenericVector<T>::set_compare_callback( 00724 TessResultCallback2<bool, T const &, T const &>* cb) { 00725 compare_cb_ = cb; 00726 } 00727 00728 // Clear the array, calling the callback function if any. 00729 template <typename T> 00730 void GenericVector<T>::clear() { 00731 if (size_reserved_ > 0) { 00732 if (clear_cb_ != NULL) 00733 for (int i = 0; i < size_used_; ++i) 00734 clear_cb_->Run(data_[i]); 00735 delete[] data_; 00736 data_ = NULL; 00737 size_used_ = 0; 00738 size_reserved_ = 0; 00739 } 00740 if (clear_cb_ != NULL) { 00741 delete clear_cb_; 00742 clear_cb_ = NULL; 00743 } 00744 if (compare_cb_ != NULL) { 00745 delete compare_cb_; 00746 compare_cb_ = NULL; 00747 } 00748 } 00749 00750 template <typename T> 00751 void GenericVector<T>::delete_data_pointers() { 00752 for (int i = 0; i < size_used_; ++i) 00753 if (data_[i]) { 00754 delete data_[i]; 00755 } 00756 } 00757 00758 00759 template <typename T> 00760 bool GenericVector<T>::write( 00761 FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const { 00762 if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false; 00763 if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false; 00764 if (cb != NULL) { 00765 for (int i = 0; i < size_used_; ++i) { 00766 if (!cb->Run(f, data_[i])) { 00767 delete cb; 00768 return false; 00769 } 00770 } 00771 delete cb; 00772 } else { 00773 if (fwrite(data_, sizeof(T), size_used_, f) != size_used_) return false; 00774 } 00775 return true; 00776 } 00777 00778 template <typename T> 00779 bool GenericVector<T>::read(FILE* f, 00780 TessResultCallback3<bool, FILE*, T*, bool>* cb, 00781 bool swap) { 00782 inT32 reserved; 00783 if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false; 00784 if (swap) Reverse32(&reserved); 00785 reserve(reserved); 00786 if (fread(&size_used_, sizeof(size_used_), 1, f) != 1) return false; 00787 if (swap) Reverse32(&size_used_); 00788 if (cb != NULL) { 00789 for (int i = 0; i < size_used_; ++i) { 00790 if (!cb->Run(f, data_ + i, swap)) { 00791 delete cb; 00792 return false; 00793 } 00794 } 00795 delete cb; 00796 } else { 00797 if (fread(data_, sizeof(T), size_used_, f) != size_used_) return false; 00798 if (swap) { 00799 for (int i = 0; i < size_used_; ++i) 00800 ReverseN(&data_[i], sizeof(T)); 00801 } 00802 } 00803 return true; 00804 } 00805 00806 // Writes a vector of simple types to the given file. Assumes that bitwise 00807 // read/write of T will work. Returns false in case of error. 00808 template <typename T> 00809 bool GenericVector<T>::Serialize(FILE* fp) const { 00810 if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; 00811 if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false; 00812 return true; 00813 } 00814 00815 // Reads a vector of simple types from the given file. Assumes that bitwise 00816 // read/write will work with ReverseN according to sizeof(T). 00817 // Returns false in case of error. 00818 // If swap is true, assumes a big/little-endian swap is needed. 00819 template <typename T> 00820 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) { 00821 inT32 reserved; 00822 if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; 00823 if (swap) Reverse32(&reserved); 00824 reserve(reserved); 00825 size_used_ = reserved; 00826 if (fread(data_, sizeof(T), size_used_, fp) != size_used_) return false; 00827 if (swap) { 00828 for (int i = 0; i < size_used_; ++i) 00829 ReverseN(&data_[i], sizeof(data_[i])); 00830 } 00831 return true; 00832 } 00833 00834 // Writes a vector of classes to the given file. Assumes the existence of 00835 // bool T::Serialize(FILE* fp) const that returns false in case of error. 00836 // Returns false in case of error. 00837 template <typename T> 00838 bool GenericVector<T>::SerializeClasses(FILE* fp) const { 00839 if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; 00840 for (int i = 0; i < size_used_; ++i) { 00841 if (!data_[i].Serialize(fp)) return false; 00842 } 00843 return true; 00844 } 00845 00846 // Reads a vector of classes from the given file. Assumes the existence of 00847 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of 00848 // error. Alse needs T::T() and T::T(constT&), as init_to_size is used in 00849 // this function. Returns false in case of error. 00850 // If swap is true, assumes a big/little-endian swap is needed. 00851 template <typename T> 00852 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) { 00853 uinT32 reserved; 00854 if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; 00855 if (swap) Reverse32(&reserved); 00856 T empty; 00857 init_to_size(reserved, empty); 00858 for (int i = 0; i < reserved; ++i) { 00859 if (!data_[i].DeSerialize(swap, fp)) return false; 00860 } 00861 return true; 00862 } 00863 00864 // This method clear the current object, then, does a shallow copy of 00865 // its argument, and finally invalidates its argument. 00866 template <typename T> 00867 void GenericVector<T>::move(GenericVector<T>* from) { 00868 this->clear(); 00869 this->data_ = from->data_; 00870 this->size_reserved_ = from->size_reserved_; 00871 this->size_used_ = from->size_used_; 00872 this->compare_cb_ = from->compare_cb_; 00873 this->clear_cb_ = from->clear_cb_; 00874 from->data_ = NULL; 00875 from->clear_cb_ = NULL; 00876 from->compare_cb_ = NULL; 00877 from->size_used_ = 0; 00878 from->size_reserved_ = 0; 00879 } 00880 00881 template <typename T> 00882 void GenericVector<T>::sort() { 00883 sort(&tesseract::sort_cmp<T>); 00884 } 00885 00886 // Internal recursive version of choose_nth_item. 00887 // The algorithm used comes from "Algorithms" by Sedgewick: 00888 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC 00889 // The principle is to choose a random pivot, and move everything less than 00890 // the pivot to its left, and everything greater than the pivot to the end 00891 // of the array, then recurse on the part that contains the desired index, or 00892 // just return the answer if it is in the equal section in the middle. 00893 // The random pivot guarantees average linear time for the same reason that 00894 // n times vector::push_back takes linear time on average. 00895 // target_index, start and and end are all indices into the full array. 00896 // Seed is a seed for rand_r for thread safety purposes. Its value is 00897 // unimportant as the random numbers do not affect the result except 00898 // between equal answers. 00899 template <typename T> 00900 int GenericVector<T>::choose_nth_item(int target_index, int start, int end, 00901 unsigned int* seed) { 00902 // Number of elements to process. 00903 int num_elements = end - start; 00904 // Trivial cases. 00905 if (num_elements <= 1) 00906 return start; 00907 if (num_elements == 2) { 00908 if (data_[start] < data_[start + 1]) { 00909 return target_index > start ? start + 1 : start; 00910 } else { 00911 return target_index > start ? start : start + 1; 00912 } 00913 } 00914 // Place the pivot at start. 00915 #ifndef rand_r // _MSC_VER, ANDROID 00916 srand(*seed); 00917 #define rand_r(seed) rand() 00918 #endif // _MSC_VER 00919 int pivot = rand_r(seed) % num_elements + start; 00920 swap(pivot, start); 00921 // The invariant condition here is that items [start, next_lesser) are less 00922 // than the pivot (which is at index next_lesser) and items 00923 // [prev_greater, end) are greater than the pivot, with items 00924 // [next_lesser, prev_greater) being equal to the pivot. 00925 int next_lesser = start; 00926 int prev_greater = end; 00927 for (int next_sample = start + 1; next_sample < prev_greater;) { 00928 if (data_[next_sample] < data_[next_lesser]) { 00929 swap(next_lesser++, next_sample++); 00930 } else if (data_[next_sample] == data_[next_lesser]) { 00931 ++next_sample; 00932 } else { 00933 swap(--prev_greater, next_sample); 00934 } 00935 } 00936 // Now the invariant is set up, we recurse on just the section that contains 00937 // the desired index. 00938 if (target_index < next_lesser) 00939 return choose_nth_item(target_index, start, next_lesser, seed); 00940 else if (target_index < prev_greater) 00941 return next_lesser; // In equal bracket. 00942 else 00943 return choose_nth_item(target_index, prev_greater, end, seed); 00944 } 00945 00946 00947 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_