tesseract
3.03
|
00001 /********************************************************************** 00002 * File: statistc.c (Formerly stats.c) 00003 * Description: Simple statistical package for integer values. 00004 * Author: Ray Smith 00005 * Created: Mon Feb 04 16:56:05 GMT 1991 00006 * 00007 * (C) Copyright 1991, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // Include automatically generated configuration file if running autoconf. 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 #include "statistc.h" 00026 #include <string.h> 00027 #include <math.h> 00028 #include <stdlib.h> 00029 #include "helpers.h" 00030 #include "scrollview.h" 00031 #include "tprintf.h" 00032 00033 using tesseract::KDPairInc; 00034 00035 /********************************************************************** 00036 * STATS::STATS 00037 * 00038 * Construct a new stats element by allocating and zeroing the memory. 00039 **********************************************************************/ 00040 STATS::STATS(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) { 00041 if (max_bucket_value_plus_1 <= min_bucket_value) { 00042 min_bucket_value = 0; 00043 max_bucket_value_plus_1 = 1; 00044 } 00045 rangemin_ = min_bucket_value; // setup 00046 rangemax_ = max_bucket_value_plus_1; 00047 buckets_ = new inT32[rangemax_ - rangemin_]; 00048 clear(); 00049 } 00050 00051 STATS::STATS() { 00052 rangemax_ = 0; 00053 rangemin_ = 0; 00054 buckets_ = NULL; 00055 } 00056 00057 /********************************************************************** 00058 * STATS::set_range 00059 * 00060 * Alter the range on an existing stats element. 00061 **********************************************************************/ 00062 bool STATS::set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) { 00063 if (max_bucket_value_plus_1 <= min_bucket_value) { 00064 return false; 00065 } 00066 if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) { 00067 delete [] buckets_; 00068 buckets_ = new inT32[max_bucket_value_plus_1 - min_bucket_value]; 00069 } 00070 rangemin_ = min_bucket_value; // setup 00071 rangemax_ = max_bucket_value_plus_1; 00072 clear(); // zero it 00073 return true; 00074 } 00075 00076 /********************************************************************** 00077 * STATS::clear 00078 * 00079 * Clear out the STATS class by zeroing all the buckets. 00080 **********************************************************************/ 00081 void STATS::clear() { // clear out buckets 00082 total_count_ = 0; 00083 if (buckets_ != NULL) 00084 memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0])); 00085 } 00086 00087 /********************************************************************** 00088 * STATS::~STATS 00089 * 00090 * Destructor for a stats class. 00091 **********************************************************************/ 00092 STATS::~STATS () { 00093 if (buckets_ != NULL) { 00094 delete [] buckets_; 00095 buckets_ = NULL; 00096 } 00097 } 00098 00099 /********************************************************************** 00100 * STATS::add 00101 * 00102 * Add a set of samples to (or delete from) a pile. 00103 **********************************************************************/ 00104 void STATS::add(inT32 value, inT32 count) { 00105 if (buckets_ == NULL) { 00106 return; 00107 } 00108 value = ClipToRange(value, rangemin_, rangemax_ - 1); 00109 buckets_[value - rangemin_] += count; 00110 total_count_ += count; // keep count of total 00111 } 00112 00113 /********************************************************************** 00114 * STATS::mode 00115 * 00116 * Find the mode of a stats class. 00117 **********************************************************************/ 00118 inT32 STATS::mode() const { // get mode of samples 00119 if (buckets_ == NULL) { 00120 return rangemin_; 00121 } 00122 inT32 max = buckets_[0]; // max cell count 00123 inT32 maxindex = 0; // index of max 00124 for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) { 00125 if (buckets_[index] > max) { 00126 max = buckets_[index]; // find biggest 00127 maxindex = index; 00128 } 00129 } 00130 return maxindex + rangemin_; // index of biggest 00131 } 00132 00133 /********************************************************************** 00134 * STATS::mean 00135 * 00136 * Find the mean of a stats class. 00137 **********************************************************************/ 00138 double STATS::mean() const { //get mean of samples 00139 if (buckets_ == NULL || total_count_ <= 0) { 00140 return static_cast<double>(rangemin_); 00141 } 00142 inT64 sum = 0; 00143 for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { 00144 sum += static_cast<inT64>(index) * buckets_[index]; 00145 } 00146 return static_cast<double>(sum) / total_count_ + rangemin_; 00147 } 00148 00149 /********************************************************************** 00150 * STATS::sd 00151 * 00152 * Find the standard deviation of a stats class. 00153 **********************************************************************/ 00154 double STATS::sd() const { //standard deviation 00155 if (buckets_ == NULL || total_count_ <= 0) { 00156 return 0.0; 00157 } 00158 inT64 sum = 0; 00159 double sqsum = 0.0; 00160 for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { 00161 sum += static_cast<inT64>(index) * buckets_[index]; 00162 sqsum += static_cast<double>(index) * index * buckets_[index]; 00163 } 00164 double variance = static_cast<double>(sum) / total_count_; 00165 variance = sqsum / total_count_ - variance * variance; 00166 if (variance > 0.0) 00167 return sqrt(variance); 00168 return 0.0; 00169 } 00170 00171 /********************************************************************** 00172 * STATS::ile 00173 * 00174 * Returns the fractile value such that frac fraction (in [0,1]) of samples 00175 * has a value less than the return value. 00176 **********************************************************************/ 00177 double STATS::ile(double frac) const { 00178 if (buckets_ == NULL || total_count_ == 0) { 00179 return static_cast<double>(rangemin_); 00180 } 00181 #if 0 00182 // TODO(rays) The existing code doesn't seem to be doing the right thing 00183 // with target a double but this substitute crashes the code that uses it. 00184 // Investigate and fix properly. 00185 int target = IntCastRounded(frac * total_count_); 00186 target = ClipToRange(target, 1, total_count_); 00187 #else 00188 double target = frac * total_count_; 00189 target = ClipToRange(target, 1.0, static_cast<double>(total_count_)); 00190 #endif 00191 int sum = 0; 00192 int index = 0; 00193 for (index = 0; index < rangemax_ - rangemin_ && sum < target; 00194 sum += buckets_[index++]); 00195 if (index > 0) { 00196 ASSERT_HOST(buckets_[index - 1] > 0); 00197 return rangemin_ + index - 00198 static_cast<double>(sum - target) / buckets_[index - 1]; 00199 } else { 00200 return static_cast<double>(rangemin_); 00201 } 00202 } 00203 00204 /********************************************************************** 00205 * STATS::min_bucket 00206 * 00207 * Find REAL minimum bucket - ile(0.0) isnt necessarily correct 00208 **********************************************************************/ 00209 inT32 STATS::min_bucket() const { // Find min 00210 if (buckets_ == NULL || total_count_ == 0) { 00211 return rangemin_; 00212 } 00213 inT32 min = 0; 00214 for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++); 00215 return rangemin_ + min; 00216 } 00217 00218 00219 /********************************************************************** 00220 * STATS::max_bucket 00221 * 00222 * Find REAL maximum bucket - ile(1.0) isnt necessarily correct 00223 **********************************************************************/ 00224 00225 inT32 STATS::max_bucket() const { // Find max 00226 if (buckets_ == NULL || total_count_ == 0) { 00227 return rangemin_; 00228 } 00229 inT32 max; 00230 for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--); 00231 return rangemin_ + max; 00232 } 00233 00234 /********************************************************************** 00235 * STATS::median 00236 * 00237 * Finds a more useful estimate of median than ile(0.5). 00238 * 00239 * Overcomes a problem with ile() - if the samples are, for example, 00240 * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway 00241 * between 6 and 13 = 9.5 00242 **********************************************************************/ 00243 double STATS::median() const { //get median 00244 if (buckets_ == NULL) { 00245 return static_cast<double>(rangemin_); 00246 } 00247 double median = ile(0.5); 00248 int median_pile = static_cast<int>(floor(median)); 00249 if ((total_count_ > 1) && (pile_count(median_pile) == 0)) { 00250 inT32 min_pile; 00251 inT32 max_pile; 00252 /* Find preceeding non zero pile */ 00253 for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--); 00254 /* Find following non zero pile */ 00255 for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++); 00256 median = (min_pile + max_pile) / 2.0; 00257 } 00258 return median; 00259 } 00260 00261 /********************************************************************** 00262 * STATS::local_min 00263 * 00264 * Return TRUE if this point is a local min. 00265 **********************************************************************/ 00266 bool STATS::local_min(inT32 x) const { 00267 if (buckets_ == NULL) { 00268 return false; 00269 } 00270 x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_; 00271 if (buckets_[x] == 0) 00272 return true; 00273 inT32 index; // table index 00274 for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index); 00275 if (index >= 0 && buckets_[index] < buckets_[x]) 00276 return false; 00277 for (index = x + 1; index < rangemax_ - rangemin_ && 00278 buckets_[index] == buckets_[x]; ++index); 00279 if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) 00280 return false; 00281 else 00282 return true; 00283 } 00284 00285 /********************************************************************** 00286 * STATS::smooth 00287 * 00288 * Apply a triangular smoothing filter to the stats. 00289 * This makes the modes a bit more useful. 00290 * The factor gives the height of the triangle, i.e. the weight of the 00291 * centre. 00292 **********************************************************************/ 00293 void STATS::smooth(inT32 factor) { 00294 if (buckets_ == NULL || factor < 2) { 00295 return; 00296 } 00297 STATS result(rangemin_, rangemax_); 00298 int entrycount = rangemax_ - rangemin_; 00299 for (int entry = 0; entry < entrycount; entry++) { 00300 //centre weight 00301 int count = buckets_[entry] * factor; 00302 for (int offset = 1; offset < factor; offset++) { 00303 if (entry - offset >= 0) 00304 count += buckets_[entry - offset] * (factor - offset); 00305 if (entry + offset < entrycount) 00306 count += buckets_[entry + offset] * (factor - offset); 00307 } 00308 result.add(entry + rangemin_, count); 00309 } 00310 total_count_ = result.total_count_; 00311 memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0])); 00312 } 00313 00314 /********************************************************************** 00315 * STATS::cluster 00316 * 00317 * Cluster the samples into max_cluster clusters. 00318 * Each call runs one iteration. The array of clusters must be 00319 * max_clusters+1 in size as cluster 0 is used to indicate which samples 00320 * have been used. 00321 * The return value is the current number of clusters. 00322 **********************************************************************/ 00323 00324 inT32 STATS::cluster(float lower, // thresholds 00325 float upper, 00326 float multiple, // distance threshold 00327 inT32 max_clusters, // max no to make 00328 STATS *clusters) { // array of clusters 00329 BOOL8 new_cluster; // added one 00330 float *centres; // cluster centres 00331 inT32 entry; // bucket index 00332 inT32 cluster; // cluster index 00333 inT32 best_cluster; // one to assign to 00334 inT32 new_centre = 0; // residual mode 00335 inT32 new_mode; // pile count of new_centre 00336 inT32 count; // pile to place 00337 float dist; // from cluster 00338 float min_dist; // from best_cluster 00339 inT32 cluster_count; // no of clusters 00340 00341 if (buckets_ == NULL || max_clusters < 1) 00342 return 0; 00343 centres = new float[max_clusters + 1]; 00344 for (cluster_count = 1; cluster_count <= max_clusters 00345 && clusters[cluster_count].buckets_ != NULL 00346 && clusters[cluster_count].total_count_ > 0; 00347 cluster_count++) { 00348 centres[cluster_count] = 00349 static_cast<float>(clusters[cluster_count].ile(0.5)); 00350 new_centre = clusters[cluster_count].mode(); 00351 for (entry = new_centre - 1; centres[cluster_count] - entry < lower 00352 && entry >= rangemin_ 00353 && pile_count(entry) <= pile_count(entry + 1); 00354 entry--) { 00355 count = pile_count(entry) - clusters[0].pile_count(entry); 00356 if (count > 0) { 00357 clusters[cluster_count].add(entry, count); 00358 clusters[0].add (entry, count); 00359 } 00360 } 00361 for (entry = new_centre + 1; entry - centres[cluster_count] < lower 00362 && entry < rangemax_ 00363 && pile_count(entry) <= pile_count(entry - 1); 00364 entry++) { 00365 count = pile_count(entry) - clusters[0].pile_count(entry); 00366 if (count > 0) { 00367 clusters[cluster_count].add(entry, count); 00368 clusters[0].add(entry, count); 00369 } 00370 } 00371 } 00372 cluster_count--; 00373 00374 if (cluster_count == 0) { 00375 clusters[0].set_range(rangemin_, rangemax_); 00376 } 00377 do { 00378 new_cluster = FALSE; 00379 new_mode = 0; 00380 for (entry = 0; entry < rangemax_ - rangemin_; entry++) { 00381 count = buckets_[entry] - clusters[0].buckets_[entry]; 00382 //remaining pile 00383 if (count > 0) { //any to handle 00384 min_dist = static_cast<float>(MAX_INT32); 00385 best_cluster = 0; 00386 for (cluster = 1; cluster <= cluster_count; cluster++) { 00387 dist = entry + rangemin_ - centres[cluster]; 00388 //find distance 00389 if (dist < 0) 00390 dist = -dist; 00391 if (dist < min_dist) { 00392 min_dist = dist; //find least 00393 best_cluster = cluster; 00394 } 00395 } 00396 if (min_dist > upper //far enough for new 00397 && (best_cluster == 0 00398 || entry + rangemin_ > centres[best_cluster] * multiple 00399 || entry + rangemin_ < centres[best_cluster] / multiple)) { 00400 if (count > new_mode) { 00401 new_mode = count; 00402 new_centre = entry + rangemin_; 00403 } 00404 } 00405 } 00406 } 00407 // need new and room 00408 if (new_mode > 0 && cluster_count < max_clusters) { 00409 cluster_count++; 00410 new_cluster = TRUE; 00411 if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) 00412 return 0; 00413 centres[cluster_count] = static_cast<float>(new_centre); 00414 clusters[cluster_count].add(new_centre, new_mode); 00415 clusters[0].add(new_centre, new_mode); 00416 for (entry = new_centre - 1; centres[cluster_count] - entry < lower 00417 && entry >= rangemin_ 00418 && pile_count (entry) <= pile_count(entry + 1); entry--) { 00419 count = pile_count(entry) - clusters[0].pile_count(entry); 00420 if (count > 0) { 00421 clusters[cluster_count].add(entry, count); 00422 clusters[0].add(entry, count); 00423 } 00424 } 00425 for (entry = new_centre + 1; entry - centres[cluster_count] < lower 00426 && entry < rangemax_ 00427 && pile_count (entry) <= pile_count(entry - 1); entry++) { 00428 count = pile_count(entry) - clusters[0].pile_count(entry); 00429 if (count > 0) { 00430 clusters[cluster_count].add(entry, count); 00431 clusters[0].add (entry, count); 00432 } 00433 } 00434 centres[cluster_count] = 00435 static_cast<float>(clusters[cluster_count].ile(0.5)); 00436 } 00437 } while (new_cluster && cluster_count < max_clusters); 00438 delete [] centres; 00439 return cluster_count; 00440 } 00441 00442 // Helper tests that the current index is still part of the peak and gathers 00443 // the data into the peak, returning false when the peak is ended. 00444 // src_buckets[index] - used_buckets[index] is the unused part of the histogram. 00445 // prev_count is the histogram count of the previous index on entry and is 00446 // updated to the current index on return. 00447 // total_count and total_value are accumulating the mean of the peak. 00448 static bool GatherPeak(int index, const int* src_buckets, int* used_buckets, 00449 int* prev_count, int* total_count, double* total_value) { 00450 int pile_count = src_buckets[index] - used_buckets[index]; 00451 if (pile_count <= *prev_count && pile_count > 0) { 00452 // Accumulate count and index.count product. 00453 *total_count += pile_count; 00454 *total_value += index * pile_count; 00455 // Mark this index as used 00456 used_buckets[index] = src_buckets[index]; 00457 *prev_count = pile_count; 00458 return true; 00459 } else { 00460 return false; 00461 } 00462 } 00463 00464 // Finds (at most) the top max_modes modes, well actually the whole peak around 00465 // each mode, returning them in the given modes vector as a <mean of peak, 00466 // total count of peak> pair in order of decreasing total count. 00467 // Since the mean is the key and the count the data in the pair, a single call 00468 // to sort on the output will re-sort by increasing mean of peak if that is 00469 // more useful than decreasing total count. 00470 // Returns the actual number of modes found. 00471 int STATS::top_n_modes(int max_modes, 00472 GenericVector<KDPairInc<float, int> >* modes) const { 00473 if (max_modes <= 0) return 0; 00474 int src_count = rangemax_ - rangemin_; 00475 // Used copies the counts in buckets_ as they get used. 00476 STATS used(rangemin_, rangemax_); 00477 modes->truncate(0); 00478 // Total count of the smallest peak found so far. 00479 int least_count = 1; 00480 // Mode that is used as a seed for each peak 00481 int max_count = 0; 00482 do { 00483 // Find an unused mode. 00484 max_count = 0; 00485 int max_index = 0; 00486 for (int src_index = 0; src_index < src_count; src_index++) { 00487 int pile_count = buckets_[src_index] - used.buckets_[src_index]; 00488 if (pile_count > max_count) { 00489 max_count = pile_count; 00490 max_index = src_index; 00491 } 00492 } 00493 if (max_count > 0) { 00494 // Copy the bucket count to used so it doesn't get found again. 00495 used.buckets_[max_index] = max_count; 00496 // Get the entire peak. 00497 double total_value = max_index * max_count; 00498 int total_count = max_count; 00499 int prev_pile = max_count; 00500 for (int offset = 1; max_index + offset < src_count; ++offset) { 00501 if (!GatherPeak(max_index + offset, buckets_, used.buckets_, 00502 &prev_pile, &total_count, &total_value)) 00503 break; 00504 } 00505 prev_pile = buckets_[max_index]; 00506 for (int offset = 1; max_index - offset >= 0; ++offset) { 00507 if (!GatherPeak(max_index - offset, buckets_, used.buckets_, 00508 &prev_pile, &total_count, &total_value)) 00509 break; 00510 } 00511 if (total_count > least_count || modes->size() < max_modes) { 00512 // We definitely want this mode, so if we have enough discard the least. 00513 if (modes->size() == max_modes) 00514 modes->truncate(max_modes - 1); 00515 int target_index = 0; 00516 // Linear search for the target insertion point. 00517 while (target_index < modes->size() && 00518 (*modes)[target_index].data >= total_count) 00519 ++target_index; 00520 float peak_mean = 00521 static_cast<float>(total_value / total_count + rangemin_); 00522 modes->insert(KDPairInc<float, int>(peak_mean, total_count), 00523 target_index); 00524 least_count = modes->back().data; 00525 } 00526 } 00527 } while (max_count > 0); 00528 return modes->size(); 00529 } 00530 00531 /********************************************************************** 00532 * STATS::print 00533 * 00534 * Prints a summary and table of the histogram. 00535 **********************************************************************/ 00536 void STATS::print() const { 00537 if (buckets_ == NULL) { 00538 return; 00539 } 00540 inT32 min = min_bucket() - rangemin_; 00541 inT32 max = max_bucket() - rangemin_; 00542 00543 int num_printed = 0; 00544 for (int index = min; index <= max; index++) { 00545 if (buckets_[index] != 0) { 00546 tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]); 00547 if (++num_printed % 8 == 0) 00548 tprintf ("\n"); 00549 } 00550 } 00551 tprintf ("\n"); 00552 print_summary(); 00553 } 00554 00555 00556 00557 /********************************************************************** 00558 * STATS::print_summary 00559 * 00560 * Print a summary of the stats. 00561 **********************************************************************/ 00562 void STATS::print_summary() const { 00563 if (buckets_ == NULL) { 00564 return; 00565 } 00566 inT32 min = min_bucket(); 00567 inT32 max = max_bucket(); 00568 tprintf("Total count=%d\n", total_count_); 00569 tprintf("Min=%.2f Really=%d\n", ile(0.0), min); 00570 tprintf("Lower quartile=%.2f\n", ile(0.25)); 00571 tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5)); 00572 tprintf("Upper quartile=%.2f\n", ile(0.75)); 00573 tprintf("Max=%.2f Really=%d\n", ile(1.0), max); 00574 tprintf("Range=%d\n", max + 1 - min); 00575 tprintf("Mean= %.2f\n", mean()); 00576 tprintf("SD= %.2f\n", sd()); 00577 } 00578 00579 00580 /********************************************************************** 00581 * STATS::plot 00582 * 00583 * Draw a histogram of the stats table. 00584 **********************************************************************/ 00585 00586 #ifndef GRAPHICS_DISABLED 00587 void STATS::plot(ScrollView* window, // to draw in 00588 float xorigin, // bottom left 00589 float yorigin, 00590 float xscale, // one x unit 00591 float yscale, // one y unit 00592 ScrollView::Color colour) const { // colour to draw in 00593 if (buckets_ == NULL) { 00594 return; 00595 } 00596 window->Pen(colour); 00597 00598 for (int index = 0; index < rangemax_ - rangemin_; index++) { 00599 window->Rectangle( xorigin + xscale * index, yorigin, 00600 xorigin + xscale * (index + 1), 00601 yorigin + yscale * buckets_[index]); 00602 } 00603 } 00604 #endif 00605 00606 00607 /********************************************************************** 00608 * STATS::plotline 00609 * 00610 * Draw a histogram of the stats table. (Line only) 00611 **********************************************************************/ 00612 00613 #ifndef GRAPHICS_DISABLED 00614 void STATS::plotline(ScrollView* window, // to draw in 00615 float xorigin, // bottom left 00616 float yorigin, 00617 float xscale, // one x unit 00618 float yscale, // one y unit 00619 ScrollView::Color colour) const { // colour to draw in 00620 if (buckets_ == NULL) { 00621 return; 00622 } 00623 window->Pen(colour); 00624 window->SetCursor(xorigin, yorigin + yscale * buckets_[0]); 00625 for (int index = 0; index < rangemax_ - rangemin_; index++) { 00626 window->DrawTo(xorigin + xscale * index, 00627 yorigin + yscale * buckets_[index]); 00628 } 00629 } 00630 #endif 00631 00632 00633 /********************************************************************** 00634 * choose_nth_item 00635 * 00636 * Returns the index of what would b the nth item in the array 00637 * if the members were sorted, without actually sorting. 00638 **********************************************************************/ 00639 00640 inT32 choose_nth_item(inT32 index, float *array, inT32 count) { 00641 inT32 next_sample; // next one to do 00642 inT32 next_lesser; // space for new 00643 inT32 prev_greater; // last one saved 00644 inT32 equal_count; // no of equal ones 00645 float pivot; // proposed median 00646 float sample; // current sample 00647 00648 if (count <= 1) 00649 return 0; 00650 if (count == 2) { 00651 if (array[0] < array[1]) { 00652 return index >= 1 ? 1 : 0; 00653 } 00654 else { 00655 return index >= 1 ? 0 : 1; 00656 } 00657 } 00658 else { 00659 if (index < 0) 00660 index = 0; // ensure legal 00661 else if (index >= count) 00662 index = count - 1; 00663 equal_count = (inT32) (rand() % count); 00664 pivot = array[equal_count]; 00665 // fill gap 00666 array[equal_count] = array[0]; 00667 next_lesser = 0; 00668 prev_greater = count; 00669 equal_count = 1; 00670 for (next_sample = 1; next_sample < prev_greater;) { 00671 sample = array[next_sample]; 00672 if (sample < pivot) { 00673 // shuffle 00674 array[next_lesser++] = sample; 00675 next_sample++; 00676 } 00677 else if (sample > pivot) { 00678 prev_greater--; 00679 // juggle 00680 array[next_sample] = array[prev_greater]; 00681 array[prev_greater] = sample; 00682 } 00683 else { 00684 equal_count++; 00685 next_sample++; 00686 } 00687 } 00688 for (next_sample = next_lesser; next_sample < prev_greater;) 00689 array[next_sample++] = pivot; 00690 if (index < next_lesser) 00691 return choose_nth_item (index, array, next_lesser); 00692 else if (index < prev_greater) 00693 return next_lesser; // in equal bracket 00694 else 00695 return choose_nth_item (index - prev_greater, 00696 array + prev_greater, 00697 count - prev_greater) + prev_greater; 00698 } 00699 } 00700 00701 /********************************************************************** 00702 * choose_nth_item 00703 * 00704 * Returns the index of what would be the nth item in the array 00705 * if the members were sorted, without actually sorting. 00706 **********************************************************************/ 00707 inT32 choose_nth_item(inT32 index, void *array, inT32 count, size_t size, 00708 int (*compar)(const void*, const void*)) { 00709 int result; // of compar 00710 inT32 next_sample; // next one to do 00711 inT32 next_lesser; // space for new 00712 inT32 prev_greater; // last one saved 00713 inT32 equal_count; // no of equal ones 00714 inT32 pivot; // proposed median 00715 00716 if (count <= 1) 00717 return 0; 00718 if (count == 2) { 00719 if (compar (array, (char *) array + size) < 0) { 00720 return index >= 1 ? 1 : 0; 00721 } 00722 else { 00723 return index >= 1 ? 0 : 1; 00724 } 00725 } 00726 if (index < 0) 00727 index = 0; // ensure legal 00728 else if (index >= count) 00729 index = count - 1; 00730 pivot = (inT32) (rand () % count); 00731 swap_entries (array, size, pivot, 0); 00732 next_lesser = 0; 00733 prev_greater = count; 00734 equal_count = 1; 00735 for (next_sample = 1; next_sample < prev_greater;) { 00736 result = 00737 compar ((char *) array + size * next_sample, 00738 (char *) array + size * next_lesser); 00739 if (result < 0) { 00740 swap_entries (array, size, next_lesser++, next_sample++); 00741 // shuffle 00742 } 00743 else if (result > 0) { 00744 prev_greater--; 00745 swap_entries(array, size, prev_greater, next_sample); 00746 } 00747 else { 00748 equal_count++; 00749 next_sample++; 00750 } 00751 } 00752 if (index < next_lesser) 00753 return choose_nth_item (index, array, next_lesser, size, compar); 00754 else if (index < prev_greater) 00755 return next_lesser; // in equal bracket 00756 else 00757 return choose_nth_item (index - prev_greater, 00758 (char *) array + size * prev_greater, 00759 count - prev_greater, size, 00760 compar) + prev_greater; 00761 } 00762 00763 /********************************************************************** 00764 * swap_entries 00765 * 00766 * Swap 2 entries of arbitrary size in-place in a table. 00767 **********************************************************************/ 00768 void swap_entries(void *array, // array of entries 00769 size_t size, // size of entry 00770 inT32 index1, // entries to swap 00771 inT32 index2) { 00772 char tmp; 00773 char *ptr1; // to entries 00774 char *ptr2; 00775 size_t count; // of bytes 00776 00777 ptr1 = reinterpret_cast<char*>(array) + index1 * size; 00778 ptr2 = reinterpret_cast<char*>(array) + index2 * size; 00779 for (count = 0; count < size; count++) { 00780 tmp = *ptr1; 00781 *ptr1++ = *ptr2; 00782 *ptr2++ = tmp; // tedious! 00783 } 00784 }