tesseract
3.03
|
00001 00002 // File: sorthelper.h 00003 // Description: Generic sort and maxfinding class. 00004 // Author: Ray Smith 00005 // Created: Thu May 20 17:48:21 PDT 2010 00006 // 00007 // (C) Copyright 2010, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCUTIL_SORTHELPER_H_ 00021 #define TESSERACT_CCUTIL_SORTHELPER_H_ 00022 00023 #include <stdlib.h> 00024 #include "genericvector.h" 00025 00026 // Generic class to provide functions based on a <value,count> pair. 00027 // T is the value type. 00028 // The class keeps a count of each value and can return the most frequent 00029 // value or a sorted array of the values with counts. 00030 // Note that this class uses linear search for adding. It is better 00031 // to use the STATS class to get the mode of a large number of values 00032 // in a small space. SortHelper is better to get the mode of a small number 00033 // of values from a large space. 00034 // T must have a copy constructor. 00035 template <typename T> 00036 class SortHelper { 00037 public: 00038 // Simple pair class to hold the values and counts. 00039 template<typename PairT> struct SortPair { 00040 PairT value; 00041 int count; 00042 }; 00043 // qsort function to sort by decreasing count. 00044 static int SortPairsByCount(const void* v1, const void* v2) { 00045 const SortPair<T>* p1 = reinterpret_cast<const SortPair<T>*>(v1); 00046 const SortPair<T>* p2 = reinterpret_cast<const SortPair<T>*>(v2); 00047 return p2->count - p1->count; 00048 } 00049 // qsort function to sort by decreasing value. 00050 static int SortPairsByValue(const void* v1, const void* v2) { 00051 const SortPair<T>* p1 = reinterpret_cast<const SortPair<T>*>(v1); 00052 const SortPair<T>* p2 = reinterpret_cast<const SortPair<T>*>(v2); 00053 if (p2->value - p1->value < 0) return -1; 00054 if (p2->value - p1->value > 0) return 1; 00055 return 0; 00056 } 00057 00058 // Constructor takes a hint of the array size, but it need not be accurate. 00059 explicit SortHelper(int sizehint) { 00060 counts_.reserve(sizehint); 00061 } 00062 00063 // Add a value that may be a duplicate of an existing value. 00064 // Uses a linear search. 00065 void Add(T value, int count) { 00066 // Linear search for value. 00067 for (int i = 0; i < counts_.size(); ++i) { 00068 if (counts_[i].value == value) { 00069 counts_[i].count += count; 00070 return; 00071 } 00072 } 00073 SortPair<T> new_pair = {value, count}; 00074 counts_.push_back(SortPair<T>(new_pair)); 00075 } 00076 00077 // Returns the frequency of the most frequent value. 00078 // If max_value is not NULL, returns the most frequent value. 00079 // If the array is empty, returns -MAX_INT32 and max_value is unchanged. 00080 int MaxCount(T* max_value) const { 00081 int best_count = -MAX_INT32; 00082 for (int i = 0; i < counts_.size(); ++i) { 00083 if (counts_[i].count > best_count) { 00084 best_count = counts_[i].count; 00085 if (max_value != NULL) 00086 *max_value = counts_[i].value; 00087 } 00088 } 00089 return best_count; 00090 } 00091 00092 // Returns the data array sorted by decreasing frequency. 00093 const GenericVector<SortPair<T> >& SortByCount() { 00094 counts_.sort(&SortPairsByCount); 00095 return counts_; 00096 } 00097 // Returns the data array sorted by decreasing value. 00098 const GenericVector<SortPair<T> >& SortByValue() { 00099 counts_.sort(&SortPairsByValue); 00100 return counts_; 00101 } 00102 00103 private: 00104 GenericVector<SortPair<T> > counts_; 00105 }; 00106 00107 00108 #endif // TESSERACT_CCUTIL_SORTHELPER_H_.