tesseract
3.03
|
00001 // Copyright 2012 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00004 // File: genericheap.h 00005 // Description: Template heap class. 00006 // Author: Ray Smith, based on Dan Johnson's original code. 00007 // Created: Wed Mar 14 08:13:00 PDT 2012 00008 // 00009 // (C) Copyright 2012, Google Inc. 00010 // Licensed under the Apache License, Version 2.0 (the "License"); 00011 // you may not use this file except in compliance with the License. 00012 // You may obtain a copy of the License at 00013 // http://www.apache.org/licenses/LICENSE-2.0 00014 // Unless required by applicable law or agreed to in writing, software 00015 // distributed under the License is distributed on an "AS IS" BASIS, 00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00017 // See the License for the specific language governing permissions and 00018 // limitations under the License. 00019 // 00021 00022 #include "errcode.h" 00023 #include "genericvector.h" 00024 00025 #ifndef TESSERACT_CCUTIL_GENERICHEAP_H_ 00026 #define TESSERACT_CCUTIL_GENERICHEAP_H_ 00027 00028 namespace tesseract { 00029 00030 // GenericHeap requires 1 template argument: 00031 // Pair will normally be either KDPairInc<Key, Data> or KDPairDec<Key, Data> 00032 // for some arbitrary Key and scalar, smart pointer, or non-ownership pointer 00033 // Data type, according to whether a MIN heap or a MAX heap is desired, 00034 // respectively. Using KDPtrPairInc<Key, Data> or KDPtrPairDec<Key, Data>, 00035 // GenericHeap can also handle simple Data pointers and own them. 00036 // If no additional data is required, Pair can also be a scalar, since 00037 // GenericHeap doesn't look inside it except for operator<. 00038 // 00039 // The heap is stored as a packed binary tree in an array hosted by a 00040 // GenericVector<Pair>, with the invariant that the children of each node are 00041 // both NOT Pair::operator< the parent node. KDPairInc defines Pair::operator< 00042 // to use Key::operator< to generate a MIN heap and KDPairDec defines 00043 // Pair::operator< to use Key::operator> to generate a MAX heap by reversing 00044 // all the comparisons. 00045 // See http://en.wikipedia.org/wiki/Heap_(data_structure) for more detail on 00046 // the basic heap implementation. 00047 // 00048 // Insertion and removal are both O(log n) and, unlike the STL heap, an 00049 // explicit Reshuffle function allows a node to be repositioned in time O(log n) 00050 // after changing its value. 00051 // 00052 // Accessing the element for revaluation is a more complex matter, since the 00053 // index and pointer can be changed arbitrarily by heap operations. 00054 // Revaluation can be done by making the Data type in the Pair derived from or 00055 // contain a DoublePtr as its first data element, making it possible to convert 00056 // the pointer to a Pair using KDPairInc::RecastDataPointer. 00057 template <typename Pair> 00058 class GenericHeap { 00059 public: 00060 GenericHeap() {} 00061 // The initial size is only a GenericVector::reserve. It is not enforced as 00062 // the size limit of the heap. Caller must implement their own enforcement. 00063 explicit GenericHeap(int initial_size) { 00064 heap_.reserve(initial_size); 00065 } 00066 00067 // Simple accessors. 00068 bool empty() const { 00069 return heap_.empty(); 00070 } 00071 int size() const { 00072 return heap_.size(); 00073 } 00074 int size_reserved() const { 00075 return heap_.size_reserved(); 00076 } 00077 void clear() { 00078 // Clear truncates to 0 to keep the number reserved in tact. 00079 heap_.truncate(0); 00080 } 00081 // Provides access to the underlying vector. 00082 // Caution! any changes that modify the keys will invalidate the heap! 00083 GenericVector<Pair>* heap() { 00084 return &heap_; 00085 } 00086 // Provides read-only access to an element of the underlying vector. 00087 const Pair& get(int index) const { 00088 return heap_[index]; 00089 } 00090 00091 // Add entry to the heap, keeping the smallest item at the top, by operator<. 00092 // Note that *entry is used as the source of operator=, but it is non-const 00093 // to allow for a smart pointer to be contained within. 00094 // Time = O(log n). 00095 void Push(Pair* entry) { 00096 int hole_index = heap_.size(); 00097 // Make a hole in the end of heap_ and sift it up to be the correct 00098 // location for the new *entry. To avoid needing a default constructor 00099 // for primitive types, and to allow for use of DoublePtr in the Pair 00100 // somewhere, we have to incur a double copy here. 00101 heap_.push_back(*entry); 00102 *entry = heap_.back(); 00103 hole_index = SiftUp(hole_index, *entry); 00104 heap_[hole_index] = *entry; 00105 } 00106 00107 // Get the value of the top (smallest, defined by operator< ) element. 00108 const Pair& PeekTop() const { 00109 return heap_[0]; 00110 } 00111 00112 // Removes the top element of the heap. If entry is not NULL, the element 00113 // is copied into *entry, otherwise it is discarded. 00114 // Returns false if the heap was already empty. 00115 // Time = O(log n). 00116 bool Pop(Pair* entry) { 00117 int new_size = heap_.size() - 1; 00118 if (new_size < 0) 00119 return false; // Already empty. 00120 if (entry != NULL) 00121 *entry = heap_[0]; 00122 if (new_size > 0) { 00123 // Sift the hole at the start of the heap_ downwards to match the last 00124 // element. 00125 Pair hole_pair = heap_[new_size]; 00126 heap_.truncate(new_size); 00127 int hole_index = SiftDown(0, hole_pair); 00128 heap_[hole_index] = hole_pair; 00129 } else { 00130 heap_.truncate(new_size); 00131 } 00132 return true; 00133 } 00134 00135 // Removes the MAXIMUM element of the heap. (MIN from a MAX heap.) If entry is 00136 // not NULL, the element is copied into *entry, otherwise it is discarded. 00137 // Time = O(n). Returns false if the heap was already empty. 00138 bool PopWorst(Pair* entry) { 00139 int heap_size = heap_.size(); 00140 if (heap_size == 0) return false; // It cannot be empty! 00141 00142 // Find the maximum element. Its index is guaranteed to be greater than 00143 // the index of the parent of the last element, since by the heap invariant 00144 // the parent must be less than or equal to the children. 00145 int worst_index = heap_size - 1; 00146 int end_parent = ParentNode(worst_index); 00147 for (int i = worst_index - 1; i > end_parent; --i) { 00148 if (heap_[worst_index] < heap_[i]) 00149 worst_index = i; 00150 } 00151 // Extract the worst element from the heap, leaving a hole at worst_index. 00152 if (entry != NULL) 00153 *entry = heap_[worst_index]; 00154 --heap_size; 00155 if (heap_size > 0) { 00156 // Sift the hole upwards to match the last element of the heap_ 00157 Pair hole_pair = heap_[heap_size]; 00158 int hole_index = SiftUp(worst_index, hole_pair); 00159 heap_[hole_index] = hole_pair; 00160 } 00161 heap_.truncate(heap_size); 00162 return true; 00163 } 00164 00165 // The pointed-to Pair has changed its key value, so the location of pair 00166 // is reshuffled to maintain the heap invariant. 00167 // Must be a valid pointer to an element of the heap_! 00168 // Caution! Since GenericHeap is based on GenericVector, reallocs may occur 00169 // whenever the vector is extended and elements may get shuffled by any 00170 // Push or Pop operation. Therefore use this function only if Data in Pair is 00171 // of type DoublePtr, derived (first) from DoublePtr, or has a DoublePtr as 00172 // its first element. Reshuffles the heap to maintain the invariant. 00173 // Time = O(log n). 00174 void Reshuffle(Pair* pair) { 00175 int index = pair - &heap_[0]; 00176 Pair hole_pair = heap_[index]; 00177 index = SiftDown(index, hole_pair); 00178 index = SiftUp(index, hole_pair); 00179 heap_[index] = hole_pair; 00180 } 00181 00182 private: 00183 // A hole in the heap exists at hole_index, and we want to fill it with the 00184 // given pair. SiftUp sifts the hole upward to the correct position and 00185 // returns the destination index without actually putting pair there. 00186 int SiftUp(int hole_index, const Pair& pair) { 00187 int parent; 00188 while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) { 00189 heap_[hole_index] = heap_[parent]; 00190 hole_index = parent; 00191 } 00192 return hole_index; 00193 } 00194 00195 // A hole in the heap exists at hole_index, and we want to fill it with the 00196 // given pair. SiftDown sifts the hole downward to the correct position and 00197 // returns the destination index without actually putting pair there. 00198 int SiftDown(int hole_index, const Pair& pair) { 00199 int heap_size = heap_.size(); 00200 int child; 00201 while ((child = LeftChild(hole_index)) < heap_size) { 00202 if (child + 1 < heap_size && heap_[child + 1] < heap_[child]) 00203 ++child; 00204 if (heap_[child] < pair) { 00205 heap_[hole_index] = heap_[child]; 00206 hole_index = child; 00207 } else { 00208 break; 00209 } 00210 } 00211 return hole_index; 00212 } 00213 00214 // Functions to navigate the tree. Unlike the original implementation, we 00215 // store the root at index 0. 00216 int ParentNode(int index) const { 00217 return (index + 1) / 2 - 1; 00218 } 00219 int LeftChild(int index) const { 00220 return index * 2 + 1; 00221 } 00222 00223 private: 00224 GenericVector<Pair> heap_; 00225 }; 00226 00227 } // namespace tesseract 00228 00229 #endif // TESSERACT_CCUTIL_GENERICHEAP_H_