tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/classify/intmatcher.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  **      Filename:    intmatcher.c
00003  **      Purpose:     Generic high level classification routines.
00004  **      Author:      Robert Moss
00005  **      History:     Wed Feb 13 17:35:28 MST 1991, RWM, Created.
00006  **                   Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add
00007  **                        support for adaptive matching.
00008  **      (c) Copyright Hewlett-Packard Company, 1988.
00009  ** Licensed under the Apache License, Version 2.0 (the "License");
00010  ** you may not use this file except in compliance with the License.
00011  ** You may obtain a copy of the License at
00012  ** http://www.apache.org/licenses/LICENSE-2.0
00013  ** Unless required by applicable law or agreed to in writing, software
00014  ** distributed under the License is distributed on an "AS IS" BASIS,
00015  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016  ** See the License for the specific language governing permissions and
00017  ** limitations under the License.
00018  ******************************************************************************/
00019 
00020 // Include automatically generated configuration file if running autoconf.
00021 #ifdef HAVE_CONFIG_H
00022 #include "config_auto.h"
00023 #endif
00024 
00025 /*----------------------------------------------------------------------------
00026                           Include Files and Type Defines
00027 ----------------------------------------------------------------------------*/
00028 #include "intmatcher.h"
00029 #include "intproto.h"
00030 #include "callcpp.h"
00031 #include "scrollview.h"
00032 #include "float2int.h"
00033 #include "globals.h"
00034 #include "helpers.h"
00035 #include "classify.h"
00036 #include "shapetable.h"
00037 #include <math.h>
00038 
00039 /*----------------------------------------------------------------------------
00040                     Global Data Definitions and Declarations
00041 ----------------------------------------------------------------------------*/
00042 // Parameters of the sigmoid used to convert similarity to evidence in the
00043 // similarity_evidence_table_ that is used to convert distance metric to an
00044 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
00045 const float IntegerMatcher::kSEExponentialMultiplier = 0.0;
00046 const float IntegerMatcher::kSimilarityCenter = 0.0075;
00047 
00048 static const uinT8 offset_table[256] = {
00049   255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00050   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00051   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00052   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00053   6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00054   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00055   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00056   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00057   7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00058   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00059   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00060   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00061   6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00062   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00063   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
00064   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
00065 };
00066 
00067 static const uinT8 next_table[256] = {
00068   0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e,
00069   0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, 0x18,
00070   0x1c, 0x1c, 0x1e,
00071   0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, 0x20, 0x28, 0x28, 0x2a, 0x28,
00072   0x2c, 0x2c, 0x2e,
00073   0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a,
00074   0x38, 0x3c, 0x3c, 0x3e,
00075   0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, 0x48,
00076   0x4c, 0x4c, 0x4e,
00077   0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, 0x50, 0x58, 0x58, 0x5a,
00078   0x58, 0x5c, 0x5c, 0x5e,
00079   0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a,
00080   0x68, 0x6c, 0x6c, 0x6e,
00081   0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a,
00082   0x78, 0x7c, 0x7c, 0x7e,
00083   0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, 0x80, 0x88, 0x88, 0x8a, 0x88,
00084   0x8c, 0x8c, 0x8e,
00085   0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a,
00086   0x98, 0x9c, 0x9c, 0x9e,
00087   0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa,
00088   0xa8, 0xac, 0xac, 0xae,
00089   0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, 0xb0, 0xb8, 0xb8, 0xba,
00090   0xb8, 0xbc, 0xbc, 0xbe,
00091   0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca,
00092   0xc8, 0xcc, 0xcc, 0xce,
00093   0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda,
00094   0xd8, 0xdc, 0xdc, 0xde,
00095   0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, 0xe0, 0xe8, 0xe8, 0xea,
00096   0xe8, 0xec, 0xec, 0xee,
00097   0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa,
00098   0xf8, 0xfc, 0xfc, 0xfe
00099 };
00100 
00101 namespace tesseract {
00102 
00103 // Encapsulation of the intermediate data and computations made by the class
00104 // pruner. The class pruner implements a simple linear classifier on binary
00105 // features by heavily quantizing the feature space, and applying
00106 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
00107 // weights is compensated by a non-constant bias that is dependent on the
00108 // number of features present.
00109 class ClassPruner {
00110  public:
00111   ClassPruner(int max_classes) {
00112     // The unrolled loop in ComputeScores means that the array sizes need to
00113     // be rounded up so that the array is big enough to accommodate the extra
00114     // entries accessed by the unrolling. Each pruner word is of sized
00115     // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
00116     // BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
00117     // See ComputeScores.
00118     max_classes_ = max_classes;
00119     rounded_classes_ = RoundUp(
00120         max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS);
00121     class_count_ = new int[rounded_classes_];
00122     norm_count_ = new int[rounded_classes_];
00123     sort_key_ = new int[rounded_classes_ + 1];
00124     sort_index_ = new int[rounded_classes_ + 1];
00125     for (int i = 0; i < rounded_classes_; i++) {
00126       class_count_[i] = 0;
00127     }
00128     pruning_threshold_ = 0;
00129     num_features_ = 0;
00130     num_classes_ = 0;
00131   }
00132 
00133   ~ClassPruner() {
00134     delete []class_count_;
00135     delete []norm_count_;
00136     delete []sort_key_;
00137     delete []sort_index_;
00138   }
00139 
00140   // Computes the scores for every class in the character set, by summing the
00141   // weights for each feature and stores the sums internally in class_count_.
00142   void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
00143                      int num_features, const INT_FEATURE_STRUCT* features) {
00144     num_features_ = num_features;
00145     int num_pruners = int_templates->NumClassPruners;
00146     for (int f = 0; f < num_features; ++f) {
00147       const INT_FEATURE_STRUCT* feature = &features[f];
00148       // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
00149       int x = feature->X * NUM_CP_BUCKETS >> 8;
00150       int y = feature->Y * NUM_CP_BUCKETS >> 8;
00151       int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
00152       int class_id = 0;
00153       // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
00154       // we need a collection of them, indexed by pruner_set.
00155       for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
00156         // Look up quantized feature in a 3-D array, an array of weights for
00157         // each class.
00158         const uinT32* pruner_word_ptr =
00159             int_templates->ClassPruners[pruner_set]->p[x][y][theta];
00160         for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
00161           uinT32 pruner_word = *pruner_word_ptr++;
00162           // This inner loop is unrolled to speed up the ClassPruner.
00163           // Currently gcc would not unroll it unless it is set to O3
00164           // level of optimization or -funroll-loops is specified.
00165           /*
00166           uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
00167           for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
00168             class_count_[class_id++] += pruner_word & class_mask;
00169             pruner_word >>= NUM_BITS_PER_CLASS;
00170           }
00171           */
00172           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00173           pruner_word >>= NUM_BITS_PER_CLASS;
00174           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00175           pruner_word >>= NUM_BITS_PER_CLASS;
00176           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00177           pruner_word >>= NUM_BITS_PER_CLASS;
00178           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00179           pruner_word >>= NUM_BITS_PER_CLASS;
00180           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00181           pruner_word >>= NUM_BITS_PER_CLASS;
00182           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00183           pruner_word >>= NUM_BITS_PER_CLASS;
00184           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00185           pruner_word >>= NUM_BITS_PER_CLASS;
00186           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00187           pruner_word >>= NUM_BITS_PER_CLASS;
00188           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00189           pruner_word >>= NUM_BITS_PER_CLASS;
00190           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00191           pruner_word >>= NUM_BITS_PER_CLASS;
00192           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00193           pruner_word >>= NUM_BITS_PER_CLASS;
00194           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00195           pruner_word >>= NUM_BITS_PER_CLASS;
00196           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00197           pruner_word >>= NUM_BITS_PER_CLASS;
00198           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00199           pruner_word >>= NUM_BITS_PER_CLASS;
00200           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00201           pruner_word >>= NUM_BITS_PER_CLASS;
00202           class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
00203         }
00204       }
00205     }
00206   }
00207 
00208   // Adjusts the scores according to the number of expected features. Used
00209   // in lieu of a constant bias, this penalizes classes that expect more
00210   // features than there are present. Thus an actual c will score higher for c
00211   // than e, even though almost all the features match e as well as c, because
00212   // e expects more features to be present.
00213   void AdjustForExpectedNumFeatures(const uinT16* expected_num_features,
00214                                     int cutoff_strength) {
00215     for (int class_id = 0; class_id < max_classes_; ++class_id) {
00216       if (num_features_ < expected_num_features[class_id]) {
00217         int deficit = expected_num_features[class_id] - num_features_;
00218         class_count_[class_id] -= class_count_[class_id] * deficit /
00219           (num_features_ * cutoff_strength + deficit);
00220       }
00221     }
00222   }
00223 
00224   // Zeros the scores for classes disabled in the unicharset.
00225   // Implements the black-list to recognize a subset of the character set.
00226   void DisableDisabledClasses(const UNICHARSET& unicharset) {
00227     for (int class_id = 0; class_id < max_classes_; ++class_id) {
00228       if (!unicharset.get_enabled(class_id))
00229         class_count_[class_id] = 0;  // This char is disabled!
00230     }
00231   }
00232 
00233   // Zeros the scores of fragments.
00234   void DisableFragments(const UNICHARSET& unicharset) {
00235     for (int class_id = 0; class_id < max_classes_; ++class_id) {
00236       // Do not include character fragments in the class pruner
00237       // results if disable_character_fragments is true.
00238       if (unicharset.get_fragment(class_id)) {
00239         class_count_[class_id] = 0;
00240       }
00241     }
00242   }
00243 
00244   // Normalizes the counts for xheight, putting the normalized result in
00245   // norm_count_. Applies a simple subtractive penalty for incorrect vertical
00246   // position provided by the normalization_factors array, indexed by
00247   // character class, and scaled by the norm_multiplier.
00248   void NormalizeForXheight(int norm_multiplier,
00249                            const uinT8* normalization_factors) {
00250     for (int class_id = 0; class_id < max_classes_; class_id++) {
00251       norm_count_[class_id] = class_count_[class_id] -
00252           ((norm_multiplier * normalization_factors[class_id]) >> 8);
00253     }
00254   }
00255 
00256   // The nop normalization copies the class_count_ array to norm_count_.
00257   void NoNormalization() {
00258     for (int class_id = 0; class_id < max_classes_; class_id++) {
00259       norm_count_[class_id] = class_count_[class_id];
00260     }
00261   }
00262 
00263   // Prunes the classes using <the maximum count> * pruning_factor/256 as a
00264   // threshold for keeping classes. If max_of_non_fragments, then ignore
00265   // fragments in computing the maximum count.
00266   void PruneAndSort(int pruning_factor, bool max_of_non_fragments,
00267                     const UNICHARSET& unicharset) {
00268     int max_count = 0;
00269     for (int c = 0; c < max_classes_; ++c) {
00270       if (norm_count_[c] > max_count &&
00271           // This additional check is added in order to ensure that
00272           // the classifier will return at least one non-fragmented
00273           // character match.
00274           // TODO(daria): verify that this helps accuracy and does not
00275           // hurt performance.
00276           (!max_of_non_fragments || !unicharset.get_fragment(c))) {
00277         max_count = norm_count_[c];
00278       }
00279     }
00280     // Prune Classes.
00281     pruning_threshold_ = (max_count * pruning_factor) >> 8;
00282     // Select Classes.
00283     if (pruning_threshold_ < 1)
00284       pruning_threshold_ = 1;
00285     num_classes_ = 0;
00286     for (int class_id = 0; class_id < max_classes_; class_id++) {
00287       if (norm_count_[class_id] >= pruning_threshold_) {
00288           ++num_classes_;
00289         sort_index_[num_classes_] = class_id;
00290         sort_key_[num_classes_] = norm_count_[class_id];
00291       }
00292     }
00293 
00294     // Sort Classes using Heapsort Algorithm.
00295     if (num_classes_ > 1)
00296       HeapSort(num_classes_, sort_key_, sort_index_);
00297   }
00298 
00299   // Prints debug info on the class pruner matches for the pruned classes only.
00300   void DebugMatch(const Classify& classify,
00301                   const INT_TEMPLATES_STRUCT* int_templates,
00302                   const INT_FEATURE_STRUCT* features) const {
00303     int num_pruners = int_templates->NumClassPruners;
00304     int max_num_classes = int_templates->NumClasses;
00305     for (int f = 0; f < num_features_; ++f) {
00306       const INT_FEATURE_STRUCT* feature = &features[f];
00307       tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
00308       // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
00309       int x = feature->X * NUM_CP_BUCKETS >> 8;
00310       int y = feature->Y * NUM_CP_BUCKETS >> 8;
00311       int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
00312       int class_id = 0;
00313       for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
00314         // Look up quantized feature in a 3-D array, an array of weights for
00315         // each class.
00316         const uinT32* pruner_word_ptr =
00317             int_templates->ClassPruners[pruner_set]->p[x][y][theta];
00318         for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
00319           uinT32 pruner_word = *pruner_word_ptr++;
00320           for (int word_class = 0; word_class < 16 &&
00321                class_id < max_num_classes; ++word_class, ++class_id) {
00322             if (norm_count_[class_id] >= pruning_threshold_) {
00323               tprintf(" %s=%d,",
00324                       classify.ClassIDToDebugStr(int_templates,
00325                                                  class_id, 0).string(),
00326                       pruner_word & CLASS_PRUNER_CLASS_MASK);
00327             }
00328             pruner_word >>= NUM_BITS_PER_CLASS;
00329           }
00330         }
00331         tprintf("\n");
00332       }
00333     }
00334   }
00335 
00336   // Prints a summary of the pruner result.
00337   void SummarizeResult(const Classify& classify,
00338                        const INT_TEMPLATES_STRUCT* int_templates,
00339                        const uinT16* expected_num_features,
00340                        int norm_multiplier,
00341                        const uinT8* normalization_factors) const {
00342     tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
00343     for (int i = 0; i < num_classes_; ++i) {
00344       int class_id = sort_index_[num_classes_ - i];
00345       STRING class_string = classify.ClassIDToDebugStr(int_templates,
00346                                                        class_id, 0);
00347       tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
00348               class_string.string(),
00349               class_count_[class_id],
00350               expected_num_features[class_id],
00351               (norm_multiplier * normalization_factors[class_id]) >> 8,
00352               sort_key_[num_classes_ - i],
00353               100.0 - 100.0 * sort_key_[num_classes_ - i] /
00354                 (CLASS_PRUNER_CLASS_MASK * num_features_));
00355     }
00356   }
00357 
00358   // Copies the pruned, sorted classes into the output results and returns
00359   // the number of classes.
00360   int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const {
00361     CP_RESULT_STRUCT empty;
00362     results->init_to_size(num_classes_, empty);
00363     for (int c = 0; c < num_classes_; ++c) {
00364       (*results)[c].Class = sort_index_[num_classes_ - c];
00365       (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
00366         (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
00367     }
00368     return num_classes_;
00369   }
00370 
00371  private:
00372   // Array[rounded_classes_] of initial counts for each class.
00373   int *class_count_;
00374   // Array[rounded_classes_] of modified counts for each class after normalizing
00375   // for expected number of features, disabled classes, fragments, and xheights.
00376   int *norm_count_;
00377   // Array[rounded_classes_ +1] of pruned counts that gets sorted
00378   int *sort_key_;
00379   // Array[rounded_classes_ +1] of classes corresponding to sort_key_.
00380   int *sort_index_;
00381   // Number of classes in this class pruner.
00382   int max_classes_;
00383   // Rounded up number of classes used for array sizes.
00384   int rounded_classes_;
00385   // Threshold count applied to prune classes.
00386   int pruning_threshold_;
00387   // The number of features used to compute the scores.
00388   int num_features_;
00389   // Final number of pruned classes.
00390   int num_classes_;
00391 };
00392 
00393 /*----------------------------------------------------------------------------
00394               Public Code
00395 ----------------------------------------------------------------------------*/
00396 /*---------------------------------------------------------------------------*/
00397 // Runs the class pruner from int_templates on the given features, returning
00398 // the number of classes output in results.
00399 //    int_templates          Class pruner tables
00400 //    num_features           Number of features in blob
00401 //    features               Array of features
00402 //    normalization_factors  Array of fudge factors from blob
00403 //                           normalization process (by CLASS_INDEX)
00404 //    expected_num_features  Array of expected number of features
00405 //                           for each class (by CLASS_INDEX)
00406 //    results                Sorted Array of pruned classes. Must be an array
00407 //                           of size at least int_templates->NumClasses.
00408 int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
00409                            int num_features,
00410                            const INT_FEATURE_STRUCT* features,
00411                            const uinT8* normalization_factors,
00412                            const uinT16* expected_num_features,
00413                            GenericVector<CP_RESULT_STRUCT>* results) {
00414 /*
00415  **  Operation:
00416  **    Prunes the classes using a modified fast match table.
00417  **    Returns a sorted list of classes along with the number
00418  **      of pruned classes in that list.
00419  **  Return: Number of pruned classes.
00420  **  Exceptions: none
00421  **  History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
00422  */
00423   ClassPruner pruner(int_templates->NumClasses);
00424   // Compute initial match scores for all classes.
00425   pruner.ComputeScores(int_templates, num_features, features);
00426   // Adjust match scores for number of expected features.
00427   pruner.AdjustForExpectedNumFeatures(expected_num_features,
00428                                       classify_cp_cutoff_strength);
00429   // Apply disabled classes in unicharset - only works without a shape_table.
00430   if (shape_table_ == NULL)
00431     pruner.DisableDisabledClasses(unicharset);
00432   // If fragments are disabled, remove them, also only without a shape table.
00433   if (disable_character_fragments && shape_table_ == NULL)
00434     pruner.DisableFragments(unicharset);
00435 
00436   // If we have good x-heights, apply the given normalization factors.
00437   if (normalization_factors != NULL) {
00438     pruner.NormalizeForXheight(classify_class_pruner_multiplier,
00439                                normalization_factors);
00440   } else {
00441     pruner.NoNormalization();
00442   }
00443   // Do the actual pruning and sort the short-list.
00444   pruner.PruneAndSort(classify_class_pruner_threshold,
00445                       shape_table_ == NULL, unicharset);
00446 
00447   if (classify_debug_level > 2) {
00448     pruner.DebugMatch(*this, int_templates, features);
00449   }
00450   if (classify_debug_level > 1) {
00451     pruner.SummarizeResult(*this, int_templates, expected_num_features,
00452                            classify_class_pruner_multiplier,
00453                            normalization_factors);
00454   }
00455   // Convert to the expected output format.
00456   return pruner.SetupResults(results);
00457 }
00458 
00459 }  // namespace tesseract
00460 
00461 /*---------------------------------------------------------------------------*/
00462 void IntegerMatcher::Match(INT_CLASS ClassTemplate,
00463                            BIT_VECTOR ProtoMask,
00464                            BIT_VECTOR ConfigMask,
00465                            inT16 NumFeatures,
00466                            const INT_FEATURE_STRUCT* Features,
00467                            INT_RESULT Result,
00468                            int AdaptFeatureThreshold,
00469                            int Debug,
00470                            bool SeparateDebugWindows) {
00471 /*
00472  **      Parameters:
00473  **              ClassTemplate             Prototypes & tables for a class
00474  **              BlobLength                Length of unormalized blob
00475  **              NumFeatures               Number of features in blob
00476  **              Features                  Array of features
00477  **              NormalizationFactor       Fudge factor from blob
00478  **                                        normalization process
00479  **              Result                    Class rating & configuration:
00480  **                                        (0.0 -> 1.0), 0=good, 1=bad
00481  **              Debug                     Debugger flag: 1=debugger on
00482  **      Globals:
00483  **              local_matcher_multiplier_    Normalization factor multiplier
00484  **      Operation:
00485  **              IntegerMatcher returns the best configuration and rating
00486  **              for a single class.  The class matched against is determined
00487  **              by the uniqueness of the ClassTemplate parameter.  The
00488  **              best rating and its associated configuration are returned.
00489  **      Return:
00490  **      Exceptions: none
00491  **      History: Tue Feb 19 16:36:23 MST 1991, RWM, Created.
00492  */
00493   ScratchEvidence *tables = new ScratchEvidence();
00494   int Feature;
00495   int BestMatch;
00496 
00497   if (MatchDebuggingOn (Debug))
00498     cprintf ("Integer Matcher -------------------------------------------\n");
00499 
00500   tables->Clear(ClassTemplate);
00501   Result->FeatureMisses = 0;
00502 
00503   for (Feature = 0; Feature < NumFeatures; Feature++) {
00504     int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
00505                                       Feature, &Features[Feature],
00506                                       tables, Debug);
00507     // Count features that were missed over all configs.
00508     if (csum == 0)
00509       Result->FeatureMisses++;
00510   }
00511 
00512 #ifndef GRAPHICS_DISABLED
00513   if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
00514     DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
00515                            NumFeatures, Debug);
00516   }
00517 
00518   if (DisplayProtoMatchesOn(Debug)) {
00519     DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask,
00520                           *tables, SeparateDebugWindows);
00521   }
00522 
00523   if (DisplayFeatureMatchesOn(Debug)) {
00524     DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
00525                             Features, AdaptFeatureThreshold, Debug,
00526                             SeparateDebugWindows);
00527   }
00528 #endif
00529 
00530   tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures);
00531   tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures);
00532 
00533   BestMatch = FindBestMatch(ClassTemplate, *tables, Result);
00534 
00535 #ifndef GRAPHICS_DISABLED
00536   if (PrintMatchSummaryOn(Debug))
00537     DebugBestMatch(BestMatch, Result);
00538 
00539   if (MatchDebuggingOn(Debug))
00540     cprintf("Match Complete --------------------------------------------\n");
00541 #endif
00542 
00543   delete tables;
00544 }
00545 
00546 
00547 /*---------------------------------------------------------------------------*/
00548 int IntegerMatcher::FindGoodProtos(
00549     INT_CLASS ClassTemplate,
00550     BIT_VECTOR ProtoMask,
00551     BIT_VECTOR ConfigMask,
00552     uinT16 BlobLength,
00553     inT16 NumFeatures,
00554     INT_FEATURE_ARRAY Features,
00555     PROTO_ID *ProtoArray,
00556     int AdaptProtoThreshold,
00557     int Debug) {
00558 /*
00559  **      Parameters:
00560  **              ClassTemplate             Prototypes & tables for a class
00561  **              ProtoMask                 AND Mask for proto word
00562  **              ConfigMask                AND Mask for config word
00563  **              BlobLength                Length of unormalized blob
00564  **              NumFeatures               Number of features in blob
00565  **              Features                  Array of features
00566  **              ProtoArray                Array of good protos
00567  **              AdaptProtoThreshold       Threshold for good protos
00568  **              Debug                     Debugger flag: 1=debugger on
00569  **      Globals:
00570  **              local_matcher_multiplier_    Normalization factor multiplier
00571  **      Operation:
00572  **              FindGoodProtos finds all protos whose normalized proto-evidence
00573  **              exceed classify_adapt_proto_thresh.  The list is ordered by increasing
00574  **              proto id number.
00575  **      Return:
00576  **              Number of good protos in ProtoArray.
00577  **      Exceptions: none
00578  **      History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
00579  */
00580   ScratchEvidence *tables = new ScratchEvidence();
00581   int NumGoodProtos = 0;
00582 
00583   /* DEBUG opening heading */
00584   if (MatchDebuggingOn (Debug))
00585     cprintf
00586       ("Find Good Protos -------------------------------------------\n");
00587 
00588   tables->Clear(ClassTemplate);
00589 
00590   for (int Feature = 0; Feature < NumFeatures; Feature++)
00591     UpdateTablesForFeature(
00592         ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
00593         tables, Debug);
00594 
00595 #ifndef GRAPHICS_DISABLED
00596   if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
00597     DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
00598                            NumFeatures, Debug);
00599 #endif
00600 
00601   /* Average Proto Evidences & Find Good Protos */
00602   for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
00603     /* Compute Average for Actual Proto */
00604     int Temp = 0;
00605     for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++)
00606       Temp += tables->proto_evidence_[proto][i];
00607 
00608     Temp /= ClassTemplate->ProtoLengths[proto];
00609 
00610     /* Find Good Protos */
00611     if (Temp >= AdaptProtoThreshold) {
00612       *ProtoArray = proto;
00613       ProtoArray++;
00614       NumGoodProtos++;
00615     }
00616   }
00617 
00618   if (MatchDebuggingOn (Debug))
00619     cprintf ("Match Complete --------------------------------------------\n");
00620   delete tables;
00621 
00622   return NumGoodProtos;
00623 }
00624 
00625 
00626 /*---------------------------------------------------------------------------*/
00627 int IntegerMatcher::FindBadFeatures(
00628     INT_CLASS ClassTemplate,
00629     BIT_VECTOR ProtoMask,
00630     BIT_VECTOR ConfigMask,
00631     uinT16 BlobLength,
00632     inT16 NumFeatures,
00633     INT_FEATURE_ARRAY Features,
00634     FEATURE_ID *FeatureArray,
00635     int AdaptFeatureThreshold,
00636     int Debug) {
00637 /*
00638  **  Parameters:
00639  **      ClassTemplate             Prototypes & tables for a class
00640  **      ProtoMask                 AND Mask for proto word
00641  **      ConfigMask                AND Mask for config word
00642  **      BlobLength                Length of unormalized blob
00643  **      NumFeatures               Number of features in blob
00644  **      Features                  Array of features
00645  **      FeatureArray              Array of bad features
00646  **      AdaptFeatureThreshold     Threshold for bad features
00647  **      Debug                     Debugger flag: 1=debugger on
00648  **  Operation:
00649  **      FindBadFeatures finds all features with maximum feature-evidence <
00650  **      AdaptFeatureThresh. The list is ordered by increasing feature number.
00651  **  Return:
00652  **      Number of bad features in FeatureArray.
00653  **  History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
00654  */
00655   ScratchEvidence *tables = new ScratchEvidence();
00656   int NumBadFeatures = 0;
00657 
00658   /* DEBUG opening heading */
00659   if (MatchDebuggingOn(Debug))
00660     cprintf("Find Bad Features -------------------------------------------\n");
00661 
00662   tables->Clear(ClassTemplate);
00663 
00664   for (int Feature = 0; Feature < NumFeatures; Feature++) {
00665     UpdateTablesForFeature(
00666         ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
00667         tables, Debug);
00668 
00669     /* Find Best Evidence for Current Feature */
00670     int best = 0;
00671     for (int i = 0; i < ClassTemplate->NumConfigs; i++)
00672       if (tables->feature_evidence_[i] > best)
00673         best = tables->feature_evidence_[i];
00674 
00675     /* Find Bad Features */
00676     if (best < AdaptFeatureThreshold) {
00677       *FeatureArray = Feature;
00678       FeatureArray++;
00679       NumBadFeatures++;
00680     }
00681   }
00682 
00683 #ifndef GRAPHICS_DISABLED
00684   if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
00685     DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
00686                            NumFeatures, Debug);
00687 #endif
00688 
00689   if (MatchDebuggingOn(Debug))
00690     cprintf("Match Complete --------------------------------------------\n");
00691 
00692   delete tables;
00693   return NumBadFeatures;
00694 }
00695 
00696 
00697 /*---------------------------------------------------------------------------*/
00698 void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
00699   classify_debug_level_ = classify_debug_level;
00700 
00701   /* Initialize table for evidence to similarity lookup */
00702   for (int i = 0; i < SE_TABLE_SIZE; i++) {
00703     uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
00704     double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0;
00705     double evidence = Similarity / kSimilarityCenter;
00706     evidence = 255.0 / (evidence * evidence + 1.0);
00707 
00708     if (kSEExponentialMultiplier > 0.0) {
00709       double scale = 1.0 - exp(-kSEExponentialMultiplier) *
00710         exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE));
00711       evidence *= ClipToRange(scale, 0.0, 1.0);
00712     }
00713 
00714     similarity_evidence_table_[i] = (uinT8) (evidence + 0.5);
00715   }
00716 
00717   /* Initialize evidence computation variables */
00718   evidence_table_mask_ =
00719     ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
00720   mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
00721   table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
00722   evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
00723 }
00724 
00725 
00729 void ScratchEvidence::Clear(const INT_CLASS class_template) {
00730   memset(sum_feature_evidence_, 0,
00731          class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
00732   memset(proto_evidence_, 0,
00733          class_template->NumProtos * sizeof(proto_evidence_[0]));
00734 }
00735 
00736 void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) {
00737   memset(feature_evidence_, 0,
00738          class_template->NumConfigs * sizeof(feature_evidence_[0]));
00739 }
00740 
00741 
00742 
00743 /*---------------------------------------------------------------------------*/
00744 void IMDebugConfiguration(int FeatureNum,
00745                           uinT16 ActualProtoNum,
00746                           uinT8 Evidence,
00747                           BIT_VECTOR ConfigMask,
00748                           uinT32 ConfigWord) {
00749 /*
00750  **      Parameters:
00751  **      Globals:
00752  **      Operation:
00753  **              Print debugging information for Configuations
00754  **      Return:
00755  **      Exceptions: none
00756  **      History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
00757  */
00758   cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
00759     FeatureNum, (int) ActualProtoNum, (int) Evidence);
00760   while (ConfigWord) {
00761     if (ConfigWord & 1)
00762       cprintf ("1");
00763     else
00764       cprintf ("0");
00765     ConfigWord >>= 1;
00766   }
00767   cprintf ("\n");
00768 }
00769 
00770 
00771 /*---------------------------------------------------------------------------*/
00772 void IMDebugConfigurationSum(int FeatureNum,
00773                              uinT8 *FeatureEvidence,
00774                              inT32 ConfigCount) {
00775 /*
00776  **      Parameters:
00777  **      Globals:
00778  **      Operation:
00779  **              Print debugging information for Configuations
00780  **      Return:
00781  **      Exceptions: none
00782  **      History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
00783  */
00784   cprintf("F=%3d, C=", FeatureNum);
00785   for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
00786     cprintf("%4d", FeatureEvidence[ConfigNum]);
00787   }
00788   cprintf("\n");
00789 }
00790 
00791 
00792 
00793 /*---------------------------------------------------------------------------*/
00794 int IntegerMatcher::UpdateTablesForFeature(
00795     INT_CLASS ClassTemplate,
00796     BIT_VECTOR ProtoMask,
00797     BIT_VECTOR ConfigMask,
00798     int FeatureNum,
00799     const INT_FEATURE_STRUCT* Feature,
00800     ScratchEvidence *tables,
00801     int Debug) {
00802 /*
00803  **  Parameters:
00804  **      ClassTemplate         Prototypes & tables for a class
00805  **      FeatureNum            Current feature number (for DEBUG only)
00806  **      Feature               Pointer to a feature struct
00807  **      tables                Evidence tables
00808  **      Debug                 Debugger flag: 1=debugger on
00809  **  Operation:
00810  **       For the given feature: prune protos, compute evidence,
00811  **       update Feature Evidence, Proto Evidence, and Sum of Feature
00812  **       Evidence tables.
00813  **  Return:
00814  */
00815   register uinT32 ConfigWord;
00816   register uinT32 ProtoWord;
00817   register uinT32 ProtoNum;
00818   register uinT32 ActualProtoNum;
00819   uinT8 proto_byte;
00820   inT32 proto_word_offset;
00821   inT32 proto_offset;
00822   uinT8 config_byte;
00823   inT32 config_offset;
00824   PROTO_SET ProtoSet;
00825   uinT32 *ProtoPrunerPtr;
00826   INT_PROTO Proto;
00827   int ProtoSetIndex;
00828   uinT8 Evidence;
00829   uinT32 XFeatureAddress;
00830   uinT32 YFeatureAddress;
00831   uinT32 ThetaFeatureAddress;
00832   register uinT8 *UINT8Pointer;
00833   register int ProtoIndex;
00834   uinT8 Temp;
00835   register int *IntPointer;
00836   int ConfigNum;
00837   register inT32 M3;
00838   register inT32 A3;
00839   register uinT32 A4;
00840 
00841   tables->ClearFeatureEvidence(ClassTemplate);
00842 
00843   /* Precompute Feature Address offset for Proto Pruning */
00844   XFeatureAddress = ((Feature->X >> 2) << 1);
00845   YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
00846   ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
00847 
00848   for (ProtoSetIndex = 0, ActualProtoNum = 0;
00849   ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
00850     ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
00851     ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner);
00852     for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
00853       ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
00854     (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
00855       /* Prune Protos of current Proto Set */
00856       ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
00857       ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
00858       ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
00859       ProtoWord &= *ProtoMask;
00860 
00861       if (ProtoWord != 0) {
00862         proto_byte = ProtoWord & 0xff;
00863         ProtoWord >>= 8;
00864         proto_word_offset = 0;
00865         while (ProtoWord != 0 || proto_byte != 0) {
00866           while (proto_byte == 0) {
00867             proto_byte = ProtoWord & 0xff;
00868             ProtoWord >>= 8;
00869             proto_word_offset += 8;
00870           }
00871           proto_offset = offset_table[proto_byte] + proto_word_offset;
00872           proto_byte = next_table[proto_byte];
00873           Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
00874           ConfigWord = Proto->Configs[0];
00875           A3 = (((Proto->A * (Feature->X - 128)) << 1)
00876             - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9));
00877           M3 =
00878             (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1;
00879 
00880           if (A3 < 0)
00881             A3 = ~A3;
00882           if (M3 < 0)
00883             M3 = ~M3;
00884           A3 >>= mult_trunc_shift_bits_;
00885           M3 >>= mult_trunc_shift_bits_;
00886           if (A3 > evidence_mult_mask_)
00887             A3 = evidence_mult_mask_;
00888           if (M3 > evidence_mult_mask_)
00889             M3 = evidence_mult_mask_;
00890 
00891           A4 = (A3 * A3) + (M3 * M3);
00892           A4 >>= table_trunc_shift_bits_;
00893           if (A4 > evidence_table_mask_)
00894             Evidence = 0;
00895           else
00896             Evidence = similarity_evidence_table_[A4];
00897 
00898           if (PrintFeatureMatchesOn (Debug))
00899             IMDebugConfiguration (FeatureNum,
00900               ActualProtoNum + proto_offset,
00901               Evidence, ConfigMask, ConfigWord);
00902 
00903           ConfigWord &= *ConfigMask;
00904 
00905           UINT8Pointer = tables->feature_evidence_ - 8;
00906           config_byte = 0;
00907           while (ConfigWord != 0 || config_byte != 0) {
00908             while (config_byte == 0) {
00909               config_byte = ConfigWord & 0xff;
00910               ConfigWord >>= 8;
00911               UINT8Pointer += 8;
00912             }
00913             config_offset = offset_table[config_byte];
00914             config_byte = next_table[config_byte];
00915             if (Evidence > UINT8Pointer[config_offset])
00916               UINT8Pointer[config_offset] = Evidence;
00917           }
00918 
00919           UINT8Pointer =
00920             &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
00921           for (ProtoIndex =
00922             ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
00923           ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
00924             if (Evidence > *UINT8Pointer) {
00925               Temp = *UINT8Pointer;
00926               *UINT8Pointer = Evidence;
00927               Evidence = Temp;
00928             }
00929             else if (Evidence == 0)
00930               break;
00931           }
00932         }
00933       }
00934     }
00935   }
00936 
00937   if (PrintFeatureMatchesOn(Debug)) {
00938     IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
00939                             ClassTemplate->NumConfigs);
00940   }
00941 
00942   IntPointer = tables->sum_feature_evidence_;
00943   UINT8Pointer = tables->feature_evidence_;
00944   int SumOverConfigs = 0;
00945   for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
00946     int evidence = *UINT8Pointer++;
00947     SumOverConfigs += evidence;
00948     *IntPointer++ += evidence;
00949   }
00950   return SumOverConfigs;
00951 }
00952 
00953 
00954 /*---------------------------------------------------------------------------*/
00955 #ifndef GRAPHICS_DISABLED
00956 void IntegerMatcher::DebugFeatureProtoError(
00957     INT_CLASS ClassTemplate,
00958     BIT_VECTOR ProtoMask,
00959     BIT_VECTOR ConfigMask,
00960     const ScratchEvidence& tables,
00961     inT16 NumFeatures,
00962     int Debug) {
00963 /*
00964  **      Parameters:
00965  **      Globals:
00966  **      Operation:
00967  **              Print debugging information for Configuations
00968  **      Return:
00969  **      Exceptions: none
00970  **      History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
00971  */
00972   FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS];
00973   int ConfigNum;
00974   uinT32 ConfigWord;
00975   int ProtoSetIndex;
00976   uinT16 ProtoNum;
00977   uinT8 ProtoWordNum;
00978   PROTO_SET ProtoSet;
00979   uinT16 ActualProtoNum;
00980 
00981   if (PrintMatchSummaryOn(Debug)) {
00982     cprintf("Configuration Mask:\n");
00983     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
00984       cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
00985     cprintf("\n");
00986 
00987     cprintf("Feature Error for Configurations:\n");
00988     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
00989       cprintf(
00990           " %5.1f",
00991           100.0 * (1.0 -
00992           (FLOAT32) tables.sum_feature_evidence_[ConfigNum]
00993           / NumFeatures / 256.0));
00994     }
00995     cprintf("\n\n\n");
00996   }
00997 
00998   if (PrintMatchSummaryOn (Debug)) {
00999     cprintf ("Proto Mask:\n");
01000     for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
01001     ProtoSetIndex++) {
01002       ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
01003       for (ProtoWordNum = 0; ProtoWordNum < 2;
01004       ProtoWordNum++, ProtoMask++) {
01005         ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
01006         for (ProtoNum = 0;
01007           ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
01008           && (ActualProtoNum < ClassTemplate->NumProtos));
01009           ProtoNum++, ActualProtoNum++)
01010         cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
01011         cprintf ("\n");
01012       }
01013     }
01014     cprintf ("\n");
01015   }
01016 
01017   for (int i = 0; i < ClassTemplate->NumConfigs; i++)
01018     ProtoConfigs[i] = 0;
01019 
01020   if (PrintProtoMatchesOn (Debug)) {
01021     cprintf ("Proto Evidence:\n");
01022     for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
01023     ProtoSetIndex++) {
01024       ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
01025       ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
01026       for (ProtoNum = 0;
01027            ((ProtoNum < PROTOS_PER_PROTO_SET) &&
01028             (ActualProtoNum < ClassTemplate->NumProtos));
01029            ProtoNum++, ActualProtoNum++) {
01030         cprintf ("P %3d =", ActualProtoNum);
01031         int temp = 0;
01032         for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
01033           uinT8 data = tables.proto_evidence_[ActualProtoNum][j];
01034           cprintf(" %d", data);
01035           temp += data;
01036         }
01037 
01038         cprintf(" = %6.4f%%\n",
01039                 temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
01040 
01041         ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
01042         ConfigNum = 0;
01043         while (ConfigWord) {
01044           cprintf ("%5d", ConfigWord & 1 ? temp : 0);
01045           if (ConfigWord & 1)
01046             ProtoConfigs[ConfigNum] += temp;
01047           ConfigNum++;
01048           ConfigWord >>= 1;
01049         }
01050         cprintf("\n");
01051       }
01052     }
01053   }
01054 
01055   if (PrintMatchSummaryOn (Debug)) {
01056     cprintf ("Proto Error for Configurations:\n");
01057     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
01058       cprintf (" %5.1f",
01059         100.0 * (1.0 -
01060         ProtoConfigs[ConfigNum] /
01061         ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
01062     cprintf ("\n\n");
01063   }
01064 
01065   if (PrintProtoMatchesOn (Debug)) {
01066     cprintf ("Proto Sum for Configurations:\n");
01067     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
01068       cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
01069     cprintf ("\n\n");
01070 
01071     cprintf ("Proto Length for Configurations:\n");
01072     for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
01073       cprintf (" %4.1f",
01074         (float) ClassTemplate->ConfigLengths[ConfigNum]);
01075     cprintf ("\n\n");
01076   }
01077 
01078 }
01079 
01080 
01081 /*---------------------------------------------------------------------------*/
01082 void IntegerMatcher::DisplayProtoDebugInfo(
01083     INT_CLASS ClassTemplate,
01084     BIT_VECTOR ProtoMask,
01085     BIT_VECTOR ConfigMask,
01086     const ScratchEvidence& tables,
01087     bool SeparateDebugWindows) {
01088   uinT16 ProtoNum;
01089   uinT16 ActualProtoNum;
01090   PROTO_SET ProtoSet;
01091   int ProtoSetIndex;
01092 
01093   InitIntMatchWindowIfReqd();
01094   if (SeparateDebugWindows) {
01095     InitFeatureDisplayWindowIfReqd();
01096     InitProtoDisplayWindowIfReqd();
01097   }
01098 
01099 
01100   for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
01101        ProtoSetIndex++) {
01102     ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
01103     ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
01104     for (ProtoNum = 0;
01105          ((ProtoNum < PROTOS_PER_PROTO_SET) &&
01106           (ActualProtoNum < ClassTemplate->NumProtos));
01107          ProtoNum++, ActualProtoNum++) {
01108       /* Compute Average for Actual Proto */
01109       int temp = 0;
01110       for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
01111         temp += tables.proto_evidence_[ActualProtoNum][i];
01112 
01113       temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
01114 
01115       if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
01116         DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
01117       }
01118     }
01119   }
01120 }
01121 
01122 
01123 /*---------------------------------------------------------------------------*/
01124 void IntegerMatcher::DisplayFeatureDebugInfo(
01125     INT_CLASS ClassTemplate,
01126     BIT_VECTOR ProtoMask,
01127     BIT_VECTOR ConfigMask,
01128     inT16 NumFeatures,
01129     const INT_FEATURE_STRUCT* Features,
01130     int AdaptFeatureThreshold,
01131     int Debug,
01132     bool SeparateDebugWindows) {
01133   ScratchEvidence *tables = new ScratchEvidence();
01134 
01135   tables->Clear(ClassTemplate);
01136 
01137   InitIntMatchWindowIfReqd();
01138   if (SeparateDebugWindows) {
01139     InitFeatureDisplayWindowIfReqd();
01140     InitProtoDisplayWindowIfReqd();
01141   }
01142 
01143   for (int Feature = 0; Feature < NumFeatures; Feature++) {
01144     UpdateTablesForFeature(
01145         ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
01146         tables, 0);
01147 
01148     /* Find Best Evidence for Current Feature */
01149     int best = 0;
01150     for (int i = 0; i < ClassTemplate->NumConfigs; i++)
01151       if (tables->feature_evidence_[i] > best)
01152         best = tables->feature_evidence_[i];
01153 
01154     /* Update display for current feature */
01155     if (ClipMatchEvidenceOn(Debug)) {
01156       if (best < AdaptFeatureThreshold)
01157         DisplayIntFeature(&Features[Feature], 0.0);
01158       else
01159         DisplayIntFeature(&Features[Feature], 1.0);
01160     } else {
01161       DisplayIntFeature(&Features[Feature], best / 255.0);
01162     }
01163   }
01164 
01165   delete tables;
01166 }
01167 #endif
01168 
01169 /*---------------------------------------------------------------------------*/
01170 // Add sum of Proto Evidences into Sum Of Feature Evidence Array
01171 void ScratchEvidence::UpdateSumOfProtoEvidences(
01172     INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) {
01173 
01174   int *IntPointer;
01175   uinT32 ConfigWord;
01176   int ProtoSetIndex;
01177   uinT16 ProtoNum;
01178   PROTO_SET ProtoSet;
01179   int NumProtos;
01180   uinT16 ActualProtoNum;
01181 
01182   NumProtos = ClassTemplate->NumProtos;
01183 
01184   for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
01185        ProtoSetIndex++) {
01186     ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
01187     ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
01188     for (ProtoNum = 0;
01189          ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
01190          ProtoNum++, ActualProtoNum++) {
01191       int temp = 0;
01192       for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
01193         temp += proto_evidence_[ActualProtoNum] [i];
01194 
01195       ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
01196       ConfigWord &= *ConfigMask;
01197       IntPointer = sum_feature_evidence_;
01198       while (ConfigWord) {
01199         if (ConfigWord & 1)
01200           *IntPointer += temp;
01201         IntPointer++;
01202         ConfigWord >>= 1;
01203       }
01204     }
01205   }
01206 }
01207 
01208 
01209 
01210 /*---------------------------------------------------------------------------*/
01211 // Normalize Sum of Proto and Feature Evidence by dividing by the sum of
01212 // the Feature Lengths and the Proto Lengths for each configuration.
01213 void ScratchEvidence::NormalizeSums(
01214     INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) {
01215 
01216   for (int i = 0; i < ClassTemplate->NumConfigs; i++) {
01217     sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) /
01218         (NumFeatures + ClassTemplate->ConfigLengths[i]);
01219   }
01220 }
01221 
01222 
01223 /*---------------------------------------------------------------------------*/
01224 int IntegerMatcher::FindBestMatch(
01225     INT_CLASS ClassTemplate,
01226     const ScratchEvidence &tables,
01227     INT_RESULT Result) {
01228 /*
01229  **      Parameters:
01230  **      Globals:
01231  **      Operation:
01232  **              Find the best match for the current class and update the Result
01233  **              with the configuration and match rating.
01234  **      Return:
01235  **              The best normalized sum of evidences
01236  **      Exceptions: none
01237  **      History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
01238  */
01239   int BestMatch = 0;
01240   int Best2Match = 0;
01241   Result->Config = 0;
01242   Result->Config2 = 0;
01243 
01244   /* Find best match */
01245   for (int ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
01246     int rating = tables.sum_feature_evidence_[ConfigNum];
01247     if (*classify_debug_level_ > 2)
01248       cprintf("Config %d, rating=%d\n", ConfigNum, rating);
01249     if (rating > BestMatch) {
01250       if (BestMatch > 0) {
01251         Result->Config2 = Result->Config;
01252         Best2Match = BestMatch;
01253       } else {
01254         Result->Config2 = ConfigNum;
01255       }
01256       Result->Config = ConfigNum;
01257       BestMatch = rating;
01258     } else if (rating > Best2Match) {
01259       Result->Config2 = ConfigNum;
01260       Best2Match = rating;
01261     }
01262   }
01263 
01264   /* Compute Certainty Rating */
01265   Result->Rating = (65536.0 - BestMatch) / 65536.0;
01266 
01267   return BestMatch;
01268 }
01269 
01270 // Applies the CN normalization factor to the given rating and returns
01271 // the modified rating.
01272 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
01273                                         int normalization_factor,
01274                                         int matcher_multiplier) {
01275   return (rating * blob_length +
01276           matcher_multiplier * normalization_factor / 256.0) /
01277       (blob_length + matcher_multiplier);
01278 }
01279 
01280 /*---------------------------------------------------------------------------*/
01281 #ifndef GRAPHICS_DISABLED
01282 // Print debug information about the best match for the current class.
01283 void IntegerMatcher::DebugBestMatch(
01284     int BestMatch, INT_RESULT Result) {
01285   tprintf("Rating = %5.1f%%  Best Config = %3d, Distance = %5.1f\n",
01286           100.0 * Result->Rating, Result->Config,
01287           100.0 * (65536.0 - BestMatch) / 65536.0);
01288 }
01289 #endif
01290 
01291 /*---------------------------------------------------------------------------*/
01292 void
01293 HeapSort (int n, register int ra[], register int rb[]) {
01294 /*
01295  **      Parameters:
01296  **              n      Number of elements to sort
01297  **              ra     Key array [1..n]
01298  **              rb     Index array [1..n]
01299  **      Globals:
01300  **      Operation:
01301  **              Sort Key array in ascending order using heap sort
01302  **              algorithm.  Also sort Index array that is tied to
01303  **              the key array.
01304  **      Return:
01305  **      Exceptions: none
01306  **      History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
01307  */
01308   register int i, rra, rrb;
01309   int l, j, ir;
01310 
01311   l = (n >> 1) + 1;
01312   ir = n;
01313   for (;;) {
01314     if (l > 1) {
01315       rra = ra[--l];
01316       rrb = rb[l];
01317     }
01318     else {
01319       rra = ra[ir];
01320       rrb = rb[ir];
01321       ra[ir] = ra[1];
01322       rb[ir] = rb[1];
01323       if (--ir == 1) {
01324         ra[1] = rra;
01325         rb[1] = rrb;
01326         return;
01327       }
01328     }
01329     i = l;
01330     j = l << 1;
01331     while (j <= ir) {
01332       if (j < ir && ra[j] < ra[j + 1])
01333         ++j;
01334       if (rra < ra[j]) {
01335         ra[i] = ra[j];
01336         rb[i] = rb[j];
01337         j += (i = j);
01338       }
01339       else
01340         j = ir + 1;
01341     }
01342     ra[i] = rra;
01343     rb[i] = rrb;
01344   }
01345 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines