tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/statistc.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        statistc.c  (Formerly stats.c)
00003  * Description: Simple statistical package for integer values.
00004  * Author:                                      Ray Smith
00005  * Created:                                     Mon Feb 04 16:56:05 GMT 1991
00006  *
00007  * (C) Copyright 1991, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // Include automatically generated configuration file if running autoconf.
00021 #ifdef HAVE_CONFIG_H
00022 #include "config_auto.h"
00023 #endif
00024 
00025 #include          "statistc.h"
00026 #include          <string.h>
00027 #include          <math.h>
00028 #include          <stdlib.h>
00029 #include          "helpers.h"
00030 #include          "scrollview.h"
00031 #include          "tprintf.h"
00032 
00033 using tesseract::KDPairInc;
00034 
00035 /**********************************************************************
00036  * STATS::STATS
00037  *
00038  * Construct a new stats element by allocating and zeroing the memory.
00039  **********************************************************************/
00040 STATS::STATS(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) {
00041   if (max_bucket_value_plus_1 <= min_bucket_value) {
00042     min_bucket_value = 0;
00043     max_bucket_value_plus_1 = 1;
00044   }
00045   rangemin_ = min_bucket_value;                // setup
00046   rangemax_ = max_bucket_value_plus_1;
00047   buckets_ = new inT32[rangemax_ - rangemin_];
00048   clear();
00049 }
00050 
00051 STATS::STATS() {
00052   rangemax_ = 0;
00053   rangemin_ = 0;
00054   buckets_ = NULL;
00055 }
00056 
00057 /**********************************************************************
00058  * STATS::set_range
00059  *
00060  * Alter the range on an existing stats element.
00061  **********************************************************************/
00062 bool STATS::set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) {
00063   if (max_bucket_value_plus_1 <= min_bucket_value) {
00064     return false;
00065   }
00066   if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
00067     delete [] buckets_;
00068     buckets_ = new inT32[max_bucket_value_plus_1 - min_bucket_value];
00069   }
00070   rangemin_ = min_bucket_value;                // setup
00071   rangemax_ = max_bucket_value_plus_1;
00072   clear();                // zero it
00073   return true;
00074 }
00075 
00076 /**********************************************************************
00077  * STATS::clear
00078  *
00079  * Clear out the STATS class by zeroing all the buckets.
00080  **********************************************************************/
00081 void STATS::clear() {  // clear out buckets
00082   total_count_ = 0;
00083   if (buckets_ != NULL)
00084     memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
00085 }
00086 
00087 /**********************************************************************
00088  * STATS::~STATS
00089  *
00090  * Destructor for a stats class.
00091  **********************************************************************/
00092 STATS::~STATS () {
00093   if (buckets_ != NULL) {
00094     delete [] buckets_;
00095     buckets_ = NULL;
00096   }
00097 }
00098 
00099 /**********************************************************************
00100  * STATS::add
00101  *
00102  * Add a set of samples to (or delete from) a pile.
00103  **********************************************************************/
00104 void STATS::add(inT32 value, inT32 count) {
00105   if (buckets_ == NULL) {
00106     return;
00107   }
00108   value = ClipToRange(value, rangemin_, rangemax_ - 1);
00109   buckets_[value - rangemin_] += count;
00110   total_count_ += count;          // keep count of total
00111 }
00112 
00113 /**********************************************************************
00114  * STATS::mode
00115  *
00116  * Find the mode of a stats class.
00117  **********************************************************************/
00118 inT32 STATS::mode() const {  // get mode of samples
00119   if (buckets_ == NULL) {
00120     return rangemin_;
00121   }
00122   inT32 max = buckets_[0];           // max cell count
00123   inT32 maxindex = 0;                // index of max
00124   for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
00125     if (buckets_[index] > max) {
00126       max = buckets_[index];      // find biggest
00127       maxindex = index;
00128     }
00129   }
00130   return maxindex + rangemin_;    // index of biggest
00131 }
00132 
00133 /**********************************************************************
00134  * STATS::mean
00135  *
00136  * Find the mean of a stats class.
00137  **********************************************************************/
00138 double STATS::mean() const {  //get mean of samples
00139   if (buckets_ == NULL || total_count_ <= 0) {
00140     return static_cast<double>(rangemin_);
00141   }
00142   inT64 sum = 0;
00143   for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
00144     sum += static_cast<inT64>(index) * buckets_[index];
00145   }
00146   return static_cast<double>(sum) / total_count_ + rangemin_;
00147 }
00148 
00149 /**********************************************************************
00150  * STATS::sd
00151  *
00152  * Find the standard deviation of a stats class.
00153  **********************************************************************/
00154 double STATS::sd() const {  //standard deviation
00155   if (buckets_ == NULL || total_count_ <= 0) {
00156     return 0.0;
00157   }
00158   inT64 sum = 0;
00159   double sqsum = 0.0;
00160   for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
00161     sum += static_cast<inT64>(index) * buckets_[index];
00162     sqsum += static_cast<double>(index) * index * buckets_[index];
00163   }
00164   double variance = static_cast<double>(sum) / total_count_;
00165   variance = sqsum / total_count_ - variance * variance;
00166   if (variance > 0.0)
00167     return sqrt(variance);
00168   return 0.0;
00169 }
00170 
00171 /**********************************************************************
00172  * STATS::ile
00173  *
00174  * Returns the fractile value such that frac fraction (in [0,1]) of samples
00175  * has a value less than the return value.
00176  **********************************************************************/
00177 double STATS::ile(double frac) const {
00178   if (buckets_ == NULL || total_count_ == 0) {
00179     return static_cast<double>(rangemin_);
00180   }
00181 #if 0
00182   // TODO(rays) The existing code doesn't seem to be doing the right thing
00183   // with target a double but this substitute crashes the code that uses it.
00184   // Investigate and fix properly.
00185   int target = IntCastRounded(frac * total_count_);
00186   target = ClipToRange(target, 1, total_count_);
00187 #else
00188   double target = frac * total_count_;
00189   target = ClipToRange(target, 1.0, static_cast<double>(total_count_));
00190 #endif
00191   int sum = 0;
00192   int index = 0;
00193   for (index = 0; index < rangemax_ - rangemin_ && sum < target;
00194        sum += buckets_[index++]);
00195   if (index > 0) {
00196     ASSERT_HOST(buckets_[index - 1] > 0);
00197     return rangemin_ + index -
00198         static_cast<double>(sum - target) / buckets_[index - 1];
00199   } else {
00200     return static_cast<double>(rangemin_);
00201   }
00202 }
00203 
00204 /**********************************************************************
00205  * STATS::min_bucket
00206  *
00207  * Find REAL minimum bucket - ile(0.0) isnt necessarily correct
00208  **********************************************************************/
00209 inT32 STATS::min_bucket() const {  // Find min
00210   if (buckets_ == NULL || total_count_ == 0) {
00211     return rangemin_;
00212   }
00213   inT32 min = 0;
00214   for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++);
00215   return rangemin_ + min;
00216 }
00217 
00218 
00219 /**********************************************************************
00220  * STATS::max_bucket
00221  *
00222  * Find REAL maximum bucket - ile(1.0) isnt necessarily correct
00223  **********************************************************************/
00224 
00225 inT32 STATS::max_bucket() const {  // Find max
00226   if (buckets_ == NULL || total_count_ == 0) {
00227     return rangemin_;
00228   }
00229   inT32 max;
00230   for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--);
00231   return rangemin_ + max;
00232 }
00233 
00234 /**********************************************************************
00235  * STATS::median
00236  *
00237  * Finds a more useful estimate of median than ile(0.5).
00238  *
00239  * Overcomes a problem with ile() - if the samples are, for example,
00240  * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
00241  * between 6 and 13 = 9.5
00242  **********************************************************************/
00243 double STATS::median() const {  //get median
00244   if (buckets_ == NULL) {
00245     return static_cast<double>(rangemin_);
00246   }
00247   double median = ile(0.5);
00248   int median_pile = static_cast<int>(floor(median));
00249   if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
00250     inT32 min_pile;
00251     inT32 max_pile;
00252     /* Find preceeding non zero pile */
00253     for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--);
00254     /* Find following non zero pile */
00255     for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++);
00256     median = (min_pile + max_pile) / 2.0;
00257   }
00258   return median;
00259 }
00260 
00261 /**********************************************************************
00262  * STATS::local_min
00263  *
00264  * Return TRUE if this point is a local min.
00265  **********************************************************************/
00266 bool STATS::local_min(inT32 x) const {
00267   if (buckets_ == NULL) {
00268     return false;
00269   }
00270   x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
00271   if (buckets_[x] == 0)
00272     return true;
00273   inT32 index;                   // table index
00274   for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
00275   if (index >= 0 && buckets_[index] < buckets_[x])
00276     return false;
00277   for (index = x + 1; index < rangemax_ - rangemin_ &&
00278        buckets_[index] == buckets_[x]; ++index);
00279   if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
00280     return false;
00281   else
00282     return true;
00283 }
00284 
00285 /**********************************************************************
00286  * STATS::smooth
00287  *
00288  * Apply a triangular smoothing filter to the stats.
00289  * This makes the modes a bit more useful.
00290  * The factor gives the height of the triangle, i.e. the weight of the
00291  * centre.
00292  **********************************************************************/
00293 void STATS::smooth(inT32 factor) {
00294   if (buckets_ == NULL || factor < 2) {
00295     return;
00296   }
00297   STATS result(rangemin_, rangemax_);
00298   int entrycount = rangemax_ - rangemin_;
00299   for (int entry = 0; entry < entrycount; entry++) {
00300                                  //centre weight
00301     int count = buckets_[entry] * factor;
00302     for (int offset = 1; offset < factor; offset++) {
00303       if (entry - offset >= 0)
00304         count += buckets_[entry - offset] * (factor - offset);
00305       if (entry + offset < entrycount)
00306         count += buckets_[entry + offset] * (factor - offset);
00307     }
00308     result.add(entry + rangemin_, count);
00309   }
00310   total_count_ = result.total_count_;
00311   memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0]));
00312 }
00313 
00314 /**********************************************************************
00315  * STATS::cluster
00316  *
00317  * Cluster the samples into max_cluster clusters.
00318  * Each call runs one iteration. The array of clusters must be
00319  * max_clusters+1 in size as cluster 0 is used to indicate which samples
00320  * have been used.
00321  * The return value is the current number of clusters.
00322  **********************************************************************/
00323 
00324 inT32 STATS::cluster(float lower,         // thresholds
00325                      float upper,
00326                      float multiple,      // distance threshold
00327                      inT32 max_clusters,  // max no to make
00328                      STATS *clusters) {   // array of clusters
00329   BOOL8 new_cluster;             // added one
00330   float *centres;                // cluster centres
00331   inT32 entry;                   // bucket index
00332   inT32 cluster;                 // cluster index
00333   inT32 best_cluster;            // one to assign to
00334   inT32 new_centre = 0;          // residual mode
00335   inT32 new_mode;                // pile count of new_centre
00336   inT32 count;                   // pile to place
00337   float dist;                    // from cluster
00338   float min_dist;                // from best_cluster
00339   inT32 cluster_count;           // no of clusters
00340 
00341   if (buckets_ == NULL || max_clusters < 1)
00342     return 0;
00343   centres = new float[max_clusters + 1];
00344   for (cluster_count = 1; cluster_count <= max_clusters
00345        && clusters[cluster_count].buckets_ != NULL
00346        && clusters[cluster_count].total_count_ > 0;
00347        cluster_count++) {
00348     centres[cluster_count] =
00349       static_cast<float>(clusters[cluster_count].ile(0.5));
00350     new_centre = clusters[cluster_count].mode();
00351     for (entry = new_centre - 1; centres[cluster_count] - entry < lower
00352          && entry >= rangemin_
00353          && pile_count(entry) <= pile_count(entry + 1);
00354          entry--) {
00355       count = pile_count(entry) - clusters[0].pile_count(entry);
00356       if (count > 0) {
00357         clusters[cluster_count].add(entry, count);
00358         clusters[0].add (entry, count);
00359       }
00360     }
00361     for (entry = new_centre + 1; entry - centres[cluster_count] < lower
00362          && entry < rangemax_
00363          && pile_count(entry) <= pile_count(entry - 1);
00364          entry++) {
00365       count = pile_count(entry) - clusters[0].pile_count(entry);
00366       if (count > 0) {
00367         clusters[cluster_count].add(entry, count);
00368         clusters[0].add(entry, count);
00369       }
00370     }
00371   }
00372   cluster_count--;
00373 
00374   if (cluster_count == 0) {
00375     clusters[0].set_range(rangemin_, rangemax_);
00376   }
00377   do {
00378     new_cluster = FALSE;
00379     new_mode = 0;
00380     for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
00381       count = buckets_[entry] - clusters[0].buckets_[entry];
00382       //remaining pile
00383       if (count > 0) {           //any to handle
00384         min_dist = static_cast<float>(MAX_INT32);
00385         best_cluster = 0;
00386         for (cluster = 1; cluster <= cluster_count; cluster++) {
00387           dist = entry + rangemin_ - centres[cluster];
00388           //find distance
00389           if (dist < 0)
00390             dist = -dist;
00391           if (dist < min_dist) {
00392             min_dist = dist;     //find least
00393             best_cluster = cluster;
00394           }
00395         }
00396         if (min_dist > upper     //far enough for new
00397           && (best_cluster == 0
00398           || entry + rangemin_ > centres[best_cluster] * multiple
00399         || entry + rangemin_ < centres[best_cluster] / multiple)) {
00400           if (count > new_mode) {
00401             new_mode = count;
00402             new_centre = entry + rangemin_;
00403           }
00404         }
00405       }
00406     }
00407                                  // need new and room
00408     if (new_mode > 0 && cluster_count < max_clusters) {
00409       cluster_count++;
00410       new_cluster = TRUE;
00411       if (!clusters[cluster_count].set_range(rangemin_, rangemax_))
00412         return 0;
00413       centres[cluster_count] = static_cast<float>(new_centre);
00414       clusters[cluster_count].add(new_centre, new_mode);
00415       clusters[0].add(new_centre, new_mode);
00416       for (entry = new_centre - 1; centres[cluster_count] - entry < lower
00417         && entry >= rangemin_
00418       && pile_count (entry) <= pile_count(entry + 1); entry--) {
00419         count = pile_count(entry) - clusters[0].pile_count(entry);
00420         if (count > 0) {
00421           clusters[cluster_count].add(entry, count);
00422           clusters[0].add(entry, count);
00423         }
00424       }
00425       for (entry = new_centre + 1; entry - centres[cluster_count] < lower
00426         && entry < rangemax_
00427       && pile_count (entry) <= pile_count(entry - 1); entry++) {
00428         count = pile_count(entry) - clusters[0].pile_count(entry);
00429         if (count > 0) {
00430           clusters[cluster_count].add(entry, count);
00431           clusters[0].add (entry, count);
00432         }
00433       }
00434       centres[cluster_count] =
00435         static_cast<float>(clusters[cluster_count].ile(0.5));
00436     }
00437   } while (new_cluster && cluster_count < max_clusters);
00438   delete [] centres;
00439   return cluster_count;
00440 }
00441 
00442 // Helper tests that the current index is still part of the peak and gathers
00443 // the data into the peak, returning false when the peak is ended.
00444 // src_buckets[index] - used_buckets[index] is the unused part of the histogram.
00445 // prev_count is the histogram count of the previous index on entry and is
00446 // updated to the current index on return.
00447 // total_count and total_value are accumulating the mean of the peak.
00448 static bool GatherPeak(int index, const int* src_buckets, int* used_buckets,
00449                        int* prev_count, int* total_count, double* total_value) {
00450   int pile_count = src_buckets[index] - used_buckets[index];
00451   if (pile_count <= *prev_count && pile_count > 0) {
00452     // Accumulate count and index.count product.
00453     *total_count += pile_count;
00454     *total_value += index * pile_count;
00455     // Mark this index as used
00456     used_buckets[index] = src_buckets[index];
00457     *prev_count = pile_count;
00458     return true;
00459   } else {
00460     return false;
00461   }
00462 }
00463 
00464 // Finds (at most) the top max_modes modes, well actually the whole peak around
00465 // each mode, returning them in the given modes vector as a <mean of peak,
00466 // total count of peak> pair in order of decreasing total count.
00467 // Since the mean is the key and the count the data in the pair, a single call
00468 // to sort on the output will re-sort by increasing mean of peak if that is
00469 // more useful than decreasing total count.
00470 // Returns the actual number of modes found.
00471 int STATS::top_n_modes(int max_modes,
00472                        GenericVector<KDPairInc<float, int> >* modes) const {
00473   if (max_modes <= 0) return 0;
00474   int src_count = rangemax_ - rangemin_;
00475   // Used copies the counts in buckets_ as they get used.
00476   STATS used(rangemin_, rangemax_);
00477   modes->truncate(0);
00478   // Total count of the smallest peak found so far.
00479   int least_count = 1;
00480   // Mode that is used as a seed for each peak
00481   int max_count = 0;
00482   do {
00483     // Find an unused mode.
00484     max_count = 0;
00485     int max_index = 0;
00486     for (int src_index = 0; src_index < src_count; src_index++) {
00487       int pile_count = buckets_[src_index] - used.buckets_[src_index];
00488       if (pile_count > max_count) {
00489         max_count = pile_count;
00490         max_index = src_index;
00491       }
00492     }
00493     if (max_count > 0) {
00494       // Copy the bucket count to used so it doesn't get found again.
00495       used.buckets_[max_index] = max_count;
00496       // Get the entire peak.
00497       double total_value = max_index * max_count;
00498       int total_count = max_count;
00499       int prev_pile = max_count;
00500       for (int offset = 1; max_index + offset < src_count; ++offset) {
00501         if (!GatherPeak(max_index + offset, buckets_, used.buckets_,
00502                         &prev_pile, &total_count, &total_value))
00503           break;
00504       }
00505       prev_pile = buckets_[max_index];
00506       for (int offset = 1; max_index - offset >= 0; ++offset) {
00507         if (!GatherPeak(max_index - offset, buckets_, used.buckets_,
00508                         &prev_pile, &total_count, &total_value))
00509           break;
00510       }
00511       if (total_count > least_count || modes->size() < max_modes) {
00512         // We definitely want this mode, so if we have enough discard the least.
00513         if (modes->size() == max_modes)
00514           modes->truncate(max_modes - 1);
00515         int target_index = 0;
00516         // Linear search for the target insertion point.
00517         while (target_index < modes->size() &&
00518                (*modes)[target_index].data >= total_count)
00519           ++target_index;
00520         float peak_mean =
00521             static_cast<float>(total_value / total_count + rangemin_);
00522         modes->insert(KDPairInc<float, int>(peak_mean, total_count),
00523                       target_index);
00524         least_count = modes->back().data;
00525       }
00526     }
00527   } while (max_count > 0);
00528   return modes->size();
00529 }
00530 
00531 /**********************************************************************
00532  * STATS::print
00533  *
00534  * Prints a summary and table of the histogram.
00535  **********************************************************************/
00536 void STATS::print() const {
00537   if (buckets_ == NULL) {
00538     return;
00539   }
00540   inT32 min = min_bucket() - rangemin_;
00541   inT32 max = max_bucket() - rangemin_;
00542 
00543   int num_printed = 0;
00544   for (int index = min; index <= max; index++) {
00545     if (buckets_[index] != 0) {
00546       tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
00547       if (++num_printed % 8 == 0)
00548         tprintf ("\n");
00549     }
00550   }
00551   tprintf ("\n");
00552   print_summary();
00553 }
00554 
00555 
00556 
00557 /**********************************************************************
00558  * STATS::print_summary
00559  *
00560  * Print a summary of the stats.
00561  **********************************************************************/
00562 void STATS::print_summary() const {
00563   if (buckets_ == NULL) {
00564     return;
00565   }
00566   inT32 min = min_bucket();
00567   inT32 max = max_bucket();
00568   tprintf("Total count=%d\n", total_count_);
00569   tprintf("Min=%.2f Really=%d\n", ile(0.0), min);
00570   tprintf("Lower quartile=%.2f\n", ile(0.25));
00571   tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5));
00572   tprintf("Upper quartile=%.2f\n", ile(0.75));
00573   tprintf("Max=%.2f Really=%d\n", ile(1.0), max);
00574   tprintf("Range=%d\n", max + 1 - min);
00575   tprintf("Mean= %.2f\n", mean());
00576   tprintf("SD= %.2f\n", sd());
00577 }
00578 
00579 
00580 /**********************************************************************
00581  * STATS::plot
00582  *
00583  * Draw a histogram of the stats table.
00584  **********************************************************************/
00585 
00586 #ifndef GRAPHICS_DISABLED
00587 void STATS::plot(ScrollView* window,  // to draw in
00588                  float xorigin,       // bottom left
00589                  float yorigin,
00590                  float xscale,        // one x unit
00591                  float yscale,        // one y unit
00592                  ScrollView::Color colour) const {   // colour to draw in
00593   if (buckets_ == NULL) {
00594     return;
00595   }
00596   window->Pen(colour);
00597 
00598   for (int index = 0; index < rangemax_ - rangemin_; index++) {
00599     window->Rectangle( xorigin + xscale * index, yorigin,
00600       xorigin + xscale * (index + 1),
00601       yorigin + yscale * buckets_[index]);
00602   }
00603 }
00604 #endif
00605 
00606 
00607 /**********************************************************************
00608  * STATS::plotline
00609  *
00610  * Draw a histogram of the stats table. (Line only)
00611  **********************************************************************/
00612 
00613 #ifndef GRAPHICS_DISABLED
00614 void STATS::plotline(ScrollView* window,  // to draw in
00615                      float xorigin,       // bottom left
00616                      float yorigin,
00617                      float xscale,        // one x unit
00618                      float yscale,        // one y unit
00619                      ScrollView::Color colour) const {  // colour to draw in
00620   if (buckets_ == NULL) {
00621     return;
00622   }
00623   window->Pen(colour);
00624   window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
00625   for (int index = 0; index < rangemax_ - rangemin_; index++) {
00626     window->DrawTo(xorigin + xscale * index,
00627                    yorigin + yscale * buckets_[index]);
00628   }
00629 }
00630 #endif
00631 
00632 
00633 /**********************************************************************
00634  * choose_nth_item
00635  *
00636  * Returns the index of what would b the nth item in the array
00637  * if the members were sorted, without actually sorting.
00638  **********************************************************************/
00639 
00640 inT32 choose_nth_item(inT32 index, float *array, inT32 count) {
00641   inT32 next_sample;             // next one to do
00642   inT32 next_lesser;             // space for new
00643   inT32 prev_greater;            // last one saved
00644   inT32 equal_count;             // no of equal ones
00645   float pivot;                   // proposed median
00646   float sample;                  // current sample
00647 
00648   if (count <= 1)
00649     return 0;
00650   if (count == 2) {
00651     if (array[0] < array[1]) {
00652       return index >= 1 ? 1 : 0;
00653     }
00654     else {
00655       return index >= 1 ? 0 : 1;
00656     }
00657   }
00658   else {
00659     if (index < 0)
00660       index = 0;                 // ensure legal
00661     else if (index >= count)
00662       index = count - 1;
00663     equal_count = (inT32) (rand() % count);
00664     pivot = array[equal_count];
00665                                  // fill gap
00666     array[equal_count] = array[0];
00667     next_lesser = 0;
00668     prev_greater = count;
00669     equal_count = 1;
00670     for (next_sample = 1; next_sample < prev_greater;) {
00671       sample = array[next_sample];
00672       if (sample < pivot) {
00673                                  // shuffle
00674         array[next_lesser++] = sample;
00675         next_sample++;
00676       }
00677       else if (sample > pivot) {
00678         prev_greater--;
00679                                  // juggle
00680         array[next_sample] = array[prev_greater];
00681         array[prev_greater] = sample;
00682       }
00683       else {
00684         equal_count++;
00685         next_sample++;
00686       }
00687     }
00688     for (next_sample = next_lesser; next_sample < prev_greater;)
00689       array[next_sample++] = pivot;
00690     if (index < next_lesser)
00691       return choose_nth_item (index, array, next_lesser);
00692     else if (index < prev_greater)
00693       return next_lesser;        // in equal bracket
00694     else
00695       return choose_nth_item (index - prev_greater,
00696         array + prev_greater,
00697         count - prev_greater) + prev_greater;
00698   }
00699 }
00700 
00701 /**********************************************************************
00702  * choose_nth_item
00703  *
00704  * Returns the index of what would be the nth item in the array
00705  * if the members were sorted, without actually sorting.
00706  **********************************************************************/
00707 inT32 choose_nth_item(inT32 index, void *array, inT32 count, size_t size,
00708                       int (*compar)(const void*, const void*)) {
00709   int result;                    // of compar
00710   inT32 next_sample;             // next one to do
00711   inT32 next_lesser;             // space for new
00712   inT32 prev_greater;            // last one saved
00713   inT32 equal_count;             // no of equal ones
00714   inT32 pivot;                   // proposed median
00715 
00716   if (count <= 1)
00717     return 0;
00718   if (count == 2) {
00719     if (compar (array, (char *) array + size) < 0) {
00720       return index >= 1 ? 1 : 0;
00721     }
00722     else {
00723       return index >= 1 ? 0 : 1;
00724     }
00725   }
00726   if (index < 0)
00727     index = 0;                   // ensure legal
00728   else if (index >= count)
00729     index = count - 1;
00730   pivot = (inT32) (rand () % count);
00731   swap_entries (array, size, pivot, 0);
00732   next_lesser = 0;
00733   prev_greater = count;
00734   equal_count = 1;
00735   for (next_sample = 1; next_sample < prev_greater;) {
00736     result =
00737       compar ((char *) array + size * next_sample,
00738       (char *) array + size * next_lesser);
00739     if (result < 0) {
00740       swap_entries (array, size, next_lesser++, next_sample++);
00741       // shuffle
00742     }
00743     else if (result > 0) {
00744       prev_greater--;
00745       swap_entries(array, size, prev_greater, next_sample);
00746     }
00747     else {
00748       equal_count++;
00749       next_sample++;
00750     }
00751   }
00752   if (index < next_lesser)
00753     return choose_nth_item (index, array, next_lesser, size, compar);
00754   else if (index < prev_greater)
00755     return next_lesser;          // in equal bracket
00756   else
00757     return choose_nth_item (index - prev_greater,
00758       (char *) array + size * prev_greater,
00759       count - prev_greater, size,
00760       compar) + prev_greater;
00761 }
00762 
00763 /**********************************************************************
00764  * swap_entries
00765  *
00766  * Swap 2 entries of arbitrary size in-place in a table.
00767  **********************************************************************/
00768 void swap_entries(void *array,   // array of entries
00769                   size_t size,   // size of entry
00770                   inT32 index1,  // entries to swap
00771                   inT32 index2) {
00772   char tmp;
00773   char *ptr1;                    // to entries
00774   char *ptr2;
00775   size_t count;                  // of bytes
00776 
00777   ptr1 = reinterpret_cast<char*>(array) + index1 * size;
00778   ptr2 = reinterpret_cast<char*>(array) + index2 * size;
00779   for (count = 0; count < size; count++) {
00780     tmp = *ptr1;
00781     *ptr1++ = *ptr2;
00782     *ptr2++ = tmp;               // tedious!
00783   }
00784 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines