tesseract
3.03
|
00001 /****************************************************************************** 00002 ** Filename: intmatcher.c 00003 ** Purpose: Generic high level classification routines. 00004 ** Author: Robert Moss 00005 ** History: Wed Feb 13 17:35:28 MST 1991, RWM, Created. 00006 ** Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add 00007 ** support for adaptive matching. 00008 ** (c) Copyright Hewlett-Packard Company, 1988. 00009 ** Licensed under the Apache License, Version 2.0 (the "License"); 00010 ** you may not use this file except in compliance with the License. 00011 ** You may obtain a copy of the License at 00012 ** http://www.apache.org/licenses/LICENSE-2.0 00013 ** Unless required by applicable law or agreed to in writing, software 00014 ** distributed under the License is distributed on an "AS IS" BASIS, 00015 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 ** See the License for the specific language governing permissions and 00017 ** limitations under the License. 00018 ******************************************************************************/ 00019 00020 // Include automatically generated configuration file if running autoconf. 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 /*---------------------------------------------------------------------------- 00026 Include Files and Type Defines 00027 ----------------------------------------------------------------------------*/ 00028 #include "intmatcher.h" 00029 #include "intproto.h" 00030 #include "callcpp.h" 00031 #include "scrollview.h" 00032 #include "float2int.h" 00033 #include "globals.h" 00034 #include "helpers.h" 00035 #include "classify.h" 00036 #include "shapetable.h" 00037 #include <math.h> 00038 00039 /*---------------------------------------------------------------------------- 00040 Global Data Definitions and Declarations 00041 ----------------------------------------------------------------------------*/ 00042 // Parameters of the sigmoid used to convert similarity to evidence in the 00043 // similarity_evidence_table_ that is used to convert distance metric to an 00044 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init). 00045 const float IntegerMatcher::kSEExponentialMultiplier = 0.0; 00046 const float IntegerMatcher::kSimilarityCenter = 0.0075; 00047 00048 static const uinT8 offset_table[256] = { 00049 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00050 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00051 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00052 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00053 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00054 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00055 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00056 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00057 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00058 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00059 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00060 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00061 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00062 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00063 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 00064 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 00065 }; 00066 00067 static const uinT8 next_table[256] = { 00068 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, 00069 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, 0x18, 00070 0x1c, 0x1c, 0x1e, 00071 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, 0x20, 0x28, 0x28, 0x2a, 0x28, 00072 0x2c, 0x2c, 0x2e, 00073 0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 00074 0x38, 0x3c, 0x3c, 0x3e, 00075 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, 0x48, 00076 0x4c, 0x4c, 0x4e, 00077 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, 0x50, 0x58, 0x58, 0x5a, 00078 0x58, 0x5c, 0x5c, 0x5e, 00079 0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 00080 0x68, 0x6c, 0x6c, 0x6e, 00081 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, 00082 0x78, 0x7c, 0x7c, 0x7e, 00083 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, 0x80, 0x88, 0x88, 0x8a, 0x88, 00084 0x8c, 0x8c, 0x8e, 00085 0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 00086 0x98, 0x9c, 0x9c, 0x9e, 00087 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, 00088 0xa8, 0xac, 0xac, 0xae, 00089 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, 0xb0, 0xb8, 0xb8, 0xba, 00090 0xb8, 0xbc, 0xbc, 0xbe, 00091 0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 00092 0xc8, 0xcc, 0xcc, 0xce, 00093 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, 00094 0xd8, 0xdc, 0xdc, 0xde, 00095 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, 0xe0, 0xe8, 0xe8, 0xea, 00096 0xe8, 0xec, 0xec, 0xee, 00097 0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 00098 0xf8, 0xfc, 0xfc, 0xfe 00099 }; 00100 00101 namespace tesseract { 00102 00103 // Encapsulation of the intermediate data and computations made by the class 00104 // pruner. The class pruner implements a simple linear classifier on binary 00105 // features by heavily quantizing the feature space, and applying 00106 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in 00107 // weights is compensated by a non-constant bias that is dependent on the 00108 // number of features present. 00109 class ClassPruner { 00110 public: 00111 ClassPruner(int max_classes) { 00112 // The unrolled loop in ComputeScores means that the array sizes need to 00113 // be rounded up so that the array is big enough to accommodate the extra 00114 // entries accessed by the unrolling. Each pruner word is of sized 00115 // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are 00116 // BITS_PER_WERD / NUM_BITS_PER_CLASS entries. 00117 // See ComputeScores. 00118 max_classes_ = max_classes; 00119 rounded_classes_ = RoundUp( 00120 max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS); 00121 class_count_ = new int[rounded_classes_]; 00122 norm_count_ = new int[rounded_classes_]; 00123 sort_key_ = new int[rounded_classes_ + 1]; 00124 sort_index_ = new int[rounded_classes_ + 1]; 00125 for (int i = 0; i < rounded_classes_; i++) { 00126 class_count_[i] = 0; 00127 } 00128 pruning_threshold_ = 0; 00129 num_features_ = 0; 00130 num_classes_ = 0; 00131 } 00132 00133 ~ClassPruner() { 00134 delete []class_count_; 00135 delete []norm_count_; 00136 delete []sort_key_; 00137 delete []sort_index_; 00138 } 00139 00140 // Computes the scores for every class in the character set, by summing the 00141 // weights for each feature and stores the sums internally in class_count_. 00142 void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates, 00143 int num_features, const INT_FEATURE_STRUCT* features) { 00144 num_features_ = num_features; 00145 int num_pruners = int_templates->NumClassPruners; 00146 for (int f = 0; f < num_features; ++f) { 00147 const INT_FEATURE_STRUCT* feature = &features[f]; 00148 // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. 00149 int x = feature->X * NUM_CP_BUCKETS >> 8; 00150 int y = feature->Y * NUM_CP_BUCKETS >> 8; 00151 int theta = feature->Theta * NUM_CP_BUCKETS >> 8; 00152 int class_id = 0; 00153 // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so 00154 // we need a collection of them, indexed by pruner_set. 00155 for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { 00156 // Look up quantized feature in a 3-D array, an array of weights for 00157 // each class. 00158 const uinT32* pruner_word_ptr = 00159 int_templates->ClassPruners[pruner_set]->p[x][y][theta]; 00160 for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { 00161 uinT32 pruner_word = *pruner_word_ptr++; 00162 // This inner loop is unrolled to speed up the ClassPruner. 00163 // Currently gcc would not unroll it unless it is set to O3 00164 // level of optimization or -funroll-loops is specified. 00165 /* 00166 uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1; 00167 for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) { 00168 class_count_[class_id++] += pruner_word & class_mask; 00169 pruner_word >>= NUM_BITS_PER_CLASS; 00170 } 00171 */ 00172 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00173 pruner_word >>= NUM_BITS_PER_CLASS; 00174 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00175 pruner_word >>= NUM_BITS_PER_CLASS; 00176 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00177 pruner_word >>= NUM_BITS_PER_CLASS; 00178 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00179 pruner_word >>= NUM_BITS_PER_CLASS; 00180 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00181 pruner_word >>= NUM_BITS_PER_CLASS; 00182 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00183 pruner_word >>= NUM_BITS_PER_CLASS; 00184 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00185 pruner_word >>= NUM_BITS_PER_CLASS; 00186 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00187 pruner_word >>= NUM_BITS_PER_CLASS; 00188 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00189 pruner_word >>= NUM_BITS_PER_CLASS; 00190 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00191 pruner_word >>= NUM_BITS_PER_CLASS; 00192 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00193 pruner_word >>= NUM_BITS_PER_CLASS; 00194 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00195 pruner_word >>= NUM_BITS_PER_CLASS; 00196 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00197 pruner_word >>= NUM_BITS_PER_CLASS; 00198 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00199 pruner_word >>= NUM_BITS_PER_CLASS; 00200 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00201 pruner_word >>= NUM_BITS_PER_CLASS; 00202 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00203 } 00204 } 00205 } 00206 } 00207 00208 // Adjusts the scores according to the number of expected features. Used 00209 // in lieu of a constant bias, this penalizes classes that expect more 00210 // features than there are present. Thus an actual c will score higher for c 00211 // than e, even though almost all the features match e as well as c, because 00212 // e expects more features to be present. 00213 void AdjustForExpectedNumFeatures(const uinT16* expected_num_features, 00214 int cutoff_strength) { 00215 for (int class_id = 0; class_id < max_classes_; ++class_id) { 00216 if (num_features_ < expected_num_features[class_id]) { 00217 int deficit = expected_num_features[class_id] - num_features_; 00218 class_count_[class_id] -= class_count_[class_id] * deficit / 00219 (num_features_ * cutoff_strength + deficit); 00220 } 00221 } 00222 } 00223 00224 // Zeros the scores for classes disabled in the unicharset. 00225 // Implements the black-list to recognize a subset of the character set. 00226 void DisableDisabledClasses(const UNICHARSET& unicharset) { 00227 for (int class_id = 0; class_id < max_classes_; ++class_id) { 00228 if (!unicharset.get_enabled(class_id)) 00229 class_count_[class_id] = 0; // This char is disabled! 00230 } 00231 } 00232 00233 // Zeros the scores of fragments. 00234 void DisableFragments(const UNICHARSET& unicharset) { 00235 for (int class_id = 0; class_id < max_classes_; ++class_id) { 00236 // Do not include character fragments in the class pruner 00237 // results if disable_character_fragments is true. 00238 if (unicharset.get_fragment(class_id)) { 00239 class_count_[class_id] = 0; 00240 } 00241 } 00242 } 00243 00244 // Normalizes the counts for xheight, putting the normalized result in 00245 // norm_count_. Applies a simple subtractive penalty for incorrect vertical 00246 // position provided by the normalization_factors array, indexed by 00247 // character class, and scaled by the norm_multiplier. 00248 void NormalizeForXheight(int norm_multiplier, 00249 const uinT8* normalization_factors) { 00250 for (int class_id = 0; class_id < max_classes_; class_id++) { 00251 norm_count_[class_id] = class_count_[class_id] - 00252 ((norm_multiplier * normalization_factors[class_id]) >> 8); 00253 } 00254 } 00255 00256 // The nop normalization copies the class_count_ array to norm_count_. 00257 void NoNormalization() { 00258 for (int class_id = 0; class_id < max_classes_; class_id++) { 00259 norm_count_[class_id] = class_count_[class_id]; 00260 } 00261 } 00262 00263 // Prunes the classes using <the maximum count> * pruning_factor/256 as a 00264 // threshold for keeping classes. If max_of_non_fragments, then ignore 00265 // fragments in computing the maximum count. 00266 void PruneAndSort(int pruning_factor, bool max_of_non_fragments, 00267 const UNICHARSET& unicharset) { 00268 int max_count = 0; 00269 for (int c = 0; c < max_classes_; ++c) { 00270 if (norm_count_[c] > max_count && 00271 // This additional check is added in order to ensure that 00272 // the classifier will return at least one non-fragmented 00273 // character match. 00274 // TODO(daria): verify that this helps accuracy and does not 00275 // hurt performance. 00276 (!max_of_non_fragments || !unicharset.get_fragment(c))) { 00277 max_count = norm_count_[c]; 00278 } 00279 } 00280 // Prune Classes. 00281 pruning_threshold_ = (max_count * pruning_factor) >> 8; 00282 // Select Classes. 00283 if (pruning_threshold_ < 1) 00284 pruning_threshold_ = 1; 00285 num_classes_ = 0; 00286 for (int class_id = 0; class_id < max_classes_; class_id++) { 00287 if (norm_count_[class_id] >= pruning_threshold_) { 00288 ++num_classes_; 00289 sort_index_[num_classes_] = class_id; 00290 sort_key_[num_classes_] = norm_count_[class_id]; 00291 } 00292 } 00293 00294 // Sort Classes using Heapsort Algorithm. 00295 if (num_classes_ > 1) 00296 HeapSort(num_classes_, sort_key_, sort_index_); 00297 } 00298 00299 // Prints debug info on the class pruner matches for the pruned classes only. 00300 void DebugMatch(const Classify& classify, 00301 const INT_TEMPLATES_STRUCT* int_templates, 00302 const INT_FEATURE_STRUCT* features) const { 00303 int num_pruners = int_templates->NumClassPruners; 00304 int max_num_classes = int_templates->NumClasses; 00305 for (int f = 0; f < num_features_; ++f) { 00306 const INT_FEATURE_STRUCT* feature = &features[f]; 00307 tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta); 00308 // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. 00309 int x = feature->X * NUM_CP_BUCKETS >> 8; 00310 int y = feature->Y * NUM_CP_BUCKETS >> 8; 00311 int theta = feature->Theta * NUM_CP_BUCKETS >> 8; 00312 int class_id = 0; 00313 for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { 00314 // Look up quantized feature in a 3-D array, an array of weights for 00315 // each class. 00316 const uinT32* pruner_word_ptr = 00317 int_templates->ClassPruners[pruner_set]->p[x][y][theta]; 00318 for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { 00319 uinT32 pruner_word = *pruner_word_ptr++; 00320 for (int word_class = 0; word_class < 16 && 00321 class_id < max_num_classes; ++word_class, ++class_id) { 00322 if (norm_count_[class_id] >= pruning_threshold_) { 00323 tprintf(" %s=%d,", 00324 classify.ClassIDToDebugStr(int_templates, 00325 class_id, 0).string(), 00326 pruner_word & CLASS_PRUNER_CLASS_MASK); 00327 } 00328 pruner_word >>= NUM_BITS_PER_CLASS; 00329 } 00330 } 00331 tprintf("\n"); 00332 } 00333 } 00334 } 00335 00336 // Prints a summary of the pruner result. 00337 void SummarizeResult(const Classify& classify, 00338 const INT_TEMPLATES_STRUCT* int_templates, 00339 const uinT16* expected_num_features, 00340 int norm_multiplier, 00341 const uinT8* normalization_factors) const { 00342 tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_); 00343 for (int i = 0; i < num_classes_; ++i) { 00344 int class_id = sort_index_[num_classes_ - i]; 00345 STRING class_string = classify.ClassIDToDebugStr(int_templates, 00346 class_id, 0); 00347 tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n", 00348 class_string.string(), 00349 class_count_[class_id], 00350 expected_num_features[class_id], 00351 (norm_multiplier * normalization_factors[class_id]) >> 8, 00352 sort_key_[num_classes_ - i], 00353 100.0 - 100.0 * sort_key_[num_classes_ - i] / 00354 (CLASS_PRUNER_CLASS_MASK * num_features_)); 00355 } 00356 } 00357 00358 // Copies the pruned, sorted classes into the output results and returns 00359 // the number of classes. 00360 int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const { 00361 CP_RESULT_STRUCT empty; 00362 results->init_to_size(num_classes_, empty); 00363 for (int c = 0; c < num_classes_; ++c) { 00364 (*results)[c].Class = sort_index_[num_classes_ - c]; 00365 (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] / 00366 (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_); 00367 } 00368 return num_classes_; 00369 } 00370 00371 private: 00372 // Array[rounded_classes_] of initial counts for each class. 00373 int *class_count_; 00374 // Array[rounded_classes_] of modified counts for each class after normalizing 00375 // for expected number of features, disabled classes, fragments, and xheights. 00376 int *norm_count_; 00377 // Array[rounded_classes_ +1] of pruned counts that gets sorted 00378 int *sort_key_; 00379 // Array[rounded_classes_ +1] of classes corresponding to sort_key_. 00380 int *sort_index_; 00381 // Number of classes in this class pruner. 00382 int max_classes_; 00383 // Rounded up number of classes used for array sizes. 00384 int rounded_classes_; 00385 // Threshold count applied to prune classes. 00386 int pruning_threshold_; 00387 // The number of features used to compute the scores. 00388 int num_features_; 00389 // Final number of pruned classes. 00390 int num_classes_; 00391 }; 00392 00393 /*---------------------------------------------------------------------------- 00394 Public Code 00395 ----------------------------------------------------------------------------*/ 00396 /*---------------------------------------------------------------------------*/ 00397 // Runs the class pruner from int_templates on the given features, returning 00398 // the number of classes output in results. 00399 // int_templates Class pruner tables 00400 // num_features Number of features in blob 00401 // features Array of features 00402 // normalization_factors Array of fudge factors from blob 00403 // normalization process (by CLASS_INDEX) 00404 // expected_num_features Array of expected number of features 00405 // for each class (by CLASS_INDEX) 00406 // results Sorted Array of pruned classes. Must be an array 00407 // of size at least int_templates->NumClasses. 00408 int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, 00409 int num_features, 00410 const INT_FEATURE_STRUCT* features, 00411 const uinT8* normalization_factors, 00412 const uinT16* expected_num_features, 00413 GenericVector<CP_RESULT_STRUCT>* results) { 00414 /* 00415 ** Operation: 00416 ** Prunes the classes using a modified fast match table. 00417 ** Returns a sorted list of classes along with the number 00418 ** of pruned classes in that list. 00419 ** Return: Number of pruned classes. 00420 ** Exceptions: none 00421 ** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created. 00422 */ 00423 ClassPruner pruner(int_templates->NumClasses); 00424 // Compute initial match scores for all classes. 00425 pruner.ComputeScores(int_templates, num_features, features); 00426 // Adjust match scores for number of expected features. 00427 pruner.AdjustForExpectedNumFeatures(expected_num_features, 00428 classify_cp_cutoff_strength); 00429 // Apply disabled classes in unicharset - only works without a shape_table. 00430 if (shape_table_ == NULL) 00431 pruner.DisableDisabledClasses(unicharset); 00432 // If fragments are disabled, remove them, also only without a shape table. 00433 if (disable_character_fragments && shape_table_ == NULL) 00434 pruner.DisableFragments(unicharset); 00435 00436 // If we have good x-heights, apply the given normalization factors. 00437 if (normalization_factors != NULL) { 00438 pruner.NormalizeForXheight(classify_class_pruner_multiplier, 00439 normalization_factors); 00440 } else { 00441 pruner.NoNormalization(); 00442 } 00443 // Do the actual pruning and sort the short-list. 00444 pruner.PruneAndSort(classify_class_pruner_threshold, 00445 shape_table_ == NULL, unicharset); 00446 00447 if (classify_debug_level > 2) { 00448 pruner.DebugMatch(*this, int_templates, features); 00449 } 00450 if (classify_debug_level > 1) { 00451 pruner.SummarizeResult(*this, int_templates, expected_num_features, 00452 classify_class_pruner_multiplier, 00453 normalization_factors); 00454 } 00455 // Convert to the expected output format. 00456 return pruner.SetupResults(results); 00457 } 00458 00459 } // namespace tesseract 00460 00461 /*---------------------------------------------------------------------------*/ 00462 void IntegerMatcher::Match(INT_CLASS ClassTemplate, 00463 BIT_VECTOR ProtoMask, 00464 BIT_VECTOR ConfigMask, 00465 inT16 NumFeatures, 00466 const INT_FEATURE_STRUCT* Features, 00467 INT_RESULT Result, 00468 int AdaptFeatureThreshold, 00469 int Debug, 00470 bool SeparateDebugWindows) { 00471 /* 00472 ** Parameters: 00473 ** ClassTemplate Prototypes & tables for a class 00474 ** BlobLength Length of unormalized blob 00475 ** NumFeatures Number of features in blob 00476 ** Features Array of features 00477 ** NormalizationFactor Fudge factor from blob 00478 ** normalization process 00479 ** Result Class rating & configuration: 00480 ** (0.0 -> 1.0), 0=good, 1=bad 00481 ** Debug Debugger flag: 1=debugger on 00482 ** Globals: 00483 ** local_matcher_multiplier_ Normalization factor multiplier 00484 ** Operation: 00485 ** IntegerMatcher returns the best configuration and rating 00486 ** for a single class. The class matched against is determined 00487 ** by the uniqueness of the ClassTemplate parameter. The 00488 ** best rating and its associated configuration are returned. 00489 ** Return: 00490 ** Exceptions: none 00491 ** History: Tue Feb 19 16:36:23 MST 1991, RWM, Created. 00492 */ 00493 ScratchEvidence *tables = new ScratchEvidence(); 00494 int Feature; 00495 int BestMatch; 00496 00497 if (MatchDebuggingOn (Debug)) 00498 cprintf ("Integer Matcher -------------------------------------------\n"); 00499 00500 tables->Clear(ClassTemplate); 00501 Result->FeatureMisses = 0; 00502 00503 for (Feature = 0; Feature < NumFeatures; Feature++) { 00504 int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, 00505 Feature, &Features[Feature], 00506 tables, Debug); 00507 // Count features that were missed over all configs. 00508 if (csum == 0) 00509 Result->FeatureMisses++; 00510 } 00511 00512 #ifndef GRAPHICS_DISABLED 00513 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) { 00514 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, 00515 NumFeatures, Debug); 00516 } 00517 00518 if (DisplayProtoMatchesOn(Debug)) { 00519 DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask, 00520 *tables, SeparateDebugWindows); 00521 } 00522 00523 if (DisplayFeatureMatchesOn(Debug)) { 00524 DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures, 00525 Features, AdaptFeatureThreshold, Debug, 00526 SeparateDebugWindows); 00527 } 00528 #endif 00529 00530 tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures); 00531 tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures); 00532 00533 BestMatch = FindBestMatch(ClassTemplate, *tables, Result); 00534 00535 #ifndef GRAPHICS_DISABLED 00536 if (PrintMatchSummaryOn(Debug)) 00537 DebugBestMatch(BestMatch, Result); 00538 00539 if (MatchDebuggingOn(Debug)) 00540 cprintf("Match Complete --------------------------------------------\n"); 00541 #endif 00542 00543 delete tables; 00544 } 00545 00546 00547 /*---------------------------------------------------------------------------*/ 00548 int IntegerMatcher::FindGoodProtos( 00549 INT_CLASS ClassTemplate, 00550 BIT_VECTOR ProtoMask, 00551 BIT_VECTOR ConfigMask, 00552 uinT16 BlobLength, 00553 inT16 NumFeatures, 00554 INT_FEATURE_ARRAY Features, 00555 PROTO_ID *ProtoArray, 00556 int AdaptProtoThreshold, 00557 int Debug) { 00558 /* 00559 ** Parameters: 00560 ** ClassTemplate Prototypes & tables for a class 00561 ** ProtoMask AND Mask for proto word 00562 ** ConfigMask AND Mask for config word 00563 ** BlobLength Length of unormalized blob 00564 ** NumFeatures Number of features in blob 00565 ** Features Array of features 00566 ** ProtoArray Array of good protos 00567 ** AdaptProtoThreshold Threshold for good protos 00568 ** Debug Debugger flag: 1=debugger on 00569 ** Globals: 00570 ** local_matcher_multiplier_ Normalization factor multiplier 00571 ** Operation: 00572 ** FindGoodProtos finds all protos whose normalized proto-evidence 00573 ** exceed classify_adapt_proto_thresh. The list is ordered by increasing 00574 ** proto id number. 00575 ** Return: 00576 ** Number of good protos in ProtoArray. 00577 ** Exceptions: none 00578 ** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created 00579 */ 00580 ScratchEvidence *tables = new ScratchEvidence(); 00581 int NumGoodProtos = 0; 00582 00583 /* DEBUG opening heading */ 00584 if (MatchDebuggingOn (Debug)) 00585 cprintf 00586 ("Find Good Protos -------------------------------------------\n"); 00587 00588 tables->Clear(ClassTemplate); 00589 00590 for (int Feature = 0; Feature < NumFeatures; Feature++) 00591 UpdateTablesForFeature( 00592 ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]), 00593 tables, Debug); 00594 00595 #ifndef GRAPHICS_DISABLED 00596 if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug)) 00597 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, 00598 NumFeatures, Debug); 00599 #endif 00600 00601 /* Average Proto Evidences & Find Good Protos */ 00602 for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) { 00603 /* Compute Average for Actual Proto */ 00604 int Temp = 0; 00605 for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++) 00606 Temp += tables->proto_evidence_[proto][i]; 00607 00608 Temp /= ClassTemplate->ProtoLengths[proto]; 00609 00610 /* Find Good Protos */ 00611 if (Temp >= AdaptProtoThreshold) { 00612 *ProtoArray = proto; 00613 ProtoArray++; 00614 NumGoodProtos++; 00615 } 00616 } 00617 00618 if (MatchDebuggingOn (Debug)) 00619 cprintf ("Match Complete --------------------------------------------\n"); 00620 delete tables; 00621 00622 return NumGoodProtos; 00623 } 00624 00625 00626 /*---------------------------------------------------------------------------*/ 00627 int IntegerMatcher::FindBadFeatures( 00628 INT_CLASS ClassTemplate, 00629 BIT_VECTOR ProtoMask, 00630 BIT_VECTOR ConfigMask, 00631 uinT16 BlobLength, 00632 inT16 NumFeatures, 00633 INT_FEATURE_ARRAY Features, 00634 FEATURE_ID *FeatureArray, 00635 int AdaptFeatureThreshold, 00636 int Debug) { 00637 /* 00638 ** Parameters: 00639 ** ClassTemplate Prototypes & tables for a class 00640 ** ProtoMask AND Mask for proto word 00641 ** ConfigMask AND Mask for config word 00642 ** BlobLength Length of unormalized blob 00643 ** NumFeatures Number of features in blob 00644 ** Features Array of features 00645 ** FeatureArray Array of bad features 00646 ** AdaptFeatureThreshold Threshold for bad features 00647 ** Debug Debugger flag: 1=debugger on 00648 ** Operation: 00649 ** FindBadFeatures finds all features with maximum feature-evidence < 00650 ** AdaptFeatureThresh. The list is ordered by increasing feature number. 00651 ** Return: 00652 ** Number of bad features in FeatureArray. 00653 ** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created 00654 */ 00655 ScratchEvidence *tables = new ScratchEvidence(); 00656 int NumBadFeatures = 0; 00657 00658 /* DEBUG opening heading */ 00659 if (MatchDebuggingOn(Debug)) 00660 cprintf("Find Bad Features -------------------------------------------\n"); 00661 00662 tables->Clear(ClassTemplate); 00663 00664 for (int Feature = 0; Feature < NumFeatures; Feature++) { 00665 UpdateTablesForFeature( 00666 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], 00667 tables, Debug); 00668 00669 /* Find Best Evidence for Current Feature */ 00670 int best = 0; 00671 for (int i = 0; i < ClassTemplate->NumConfigs; i++) 00672 if (tables->feature_evidence_[i] > best) 00673 best = tables->feature_evidence_[i]; 00674 00675 /* Find Bad Features */ 00676 if (best < AdaptFeatureThreshold) { 00677 *FeatureArray = Feature; 00678 FeatureArray++; 00679 NumBadFeatures++; 00680 } 00681 } 00682 00683 #ifndef GRAPHICS_DISABLED 00684 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) 00685 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, 00686 NumFeatures, Debug); 00687 #endif 00688 00689 if (MatchDebuggingOn(Debug)) 00690 cprintf("Match Complete --------------------------------------------\n"); 00691 00692 delete tables; 00693 return NumBadFeatures; 00694 } 00695 00696 00697 /*---------------------------------------------------------------------------*/ 00698 void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) { 00699 classify_debug_level_ = classify_debug_level; 00700 00701 /* Initialize table for evidence to similarity lookup */ 00702 for (int i = 0; i < SE_TABLE_SIZE; i++) { 00703 uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS); 00704 double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0; 00705 double evidence = Similarity / kSimilarityCenter; 00706 evidence = 255.0 / (evidence * evidence + 1.0); 00707 00708 if (kSEExponentialMultiplier > 0.0) { 00709 double scale = 1.0 - exp(-kSEExponentialMultiplier) * 00710 exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE)); 00711 evidence *= ClipToRange(scale, 0.0, 1.0); 00712 } 00713 00714 similarity_evidence_table_[i] = (uinT8) (evidence + 0.5); 00715 } 00716 00717 /* Initialize evidence computation variables */ 00718 evidence_table_mask_ = 00719 ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits); 00720 mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits); 00721 table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1)); 00722 evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); 00723 } 00724 00725 00729 void ScratchEvidence::Clear(const INT_CLASS class_template) { 00730 memset(sum_feature_evidence_, 0, 00731 class_template->NumConfigs * sizeof(sum_feature_evidence_[0])); 00732 memset(proto_evidence_, 0, 00733 class_template->NumProtos * sizeof(proto_evidence_[0])); 00734 } 00735 00736 void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) { 00737 memset(feature_evidence_, 0, 00738 class_template->NumConfigs * sizeof(feature_evidence_[0])); 00739 } 00740 00741 00742 00743 /*---------------------------------------------------------------------------*/ 00744 void IMDebugConfiguration(int FeatureNum, 00745 uinT16 ActualProtoNum, 00746 uinT8 Evidence, 00747 BIT_VECTOR ConfigMask, 00748 uinT32 ConfigWord) { 00749 /* 00750 ** Parameters: 00751 ** Globals: 00752 ** Operation: 00753 ** Print debugging information for Configuations 00754 ** Return: 00755 ** Exceptions: none 00756 ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. 00757 */ 00758 cprintf ("F = %3d, P = %3d, E = %3d, Configs = ", 00759 FeatureNum, (int) ActualProtoNum, (int) Evidence); 00760 while (ConfigWord) { 00761 if (ConfigWord & 1) 00762 cprintf ("1"); 00763 else 00764 cprintf ("0"); 00765 ConfigWord >>= 1; 00766 } 00767 cprintf ("\n"); 00768 } 00769 00770 00771 /*---------------------------------------------------------------------------*/ 00772 void IMDebugConfigurationSum(int FeatureNum, 00773 uinT8 *FeatureEvidence, 00774 inT32 ConfigCount) { 00775 /* 00776 ** Parameters: 00777 ** Globals: 00778 ** Operation: 00779 ** Print debugging information for Configuations 00780 ** Return: 00781 ** Exceptions: none 00782 ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. 00783 */ 00784 cprintf("F=%3d, C=", FeatureNum); 00785 for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) { 00786 cprintf("%4d", FeatureEvidence[ConfigNum]); 00787 } 00788 cprintf("\n"); 00789 } 00790 00791 00792 00793 /*---------------------------------------------------------------------------*/ 00794 int IntegerMatcher::UpdateTablesForFeature( 00795 INT_CLASS ClassTemplate, 00796 BIT_VECTOR ProtoMask, 00797 BIT_VECTOR ConfigMask, 00798 int FeatureNum, 00799 const INT_FEATURE_STRUCT* Feature, 00800 ScratchEvidence *tables, 00801 int Debug) { 00802 /* 00803 ** Parameters: 00804 ** ClassTemplate Prototypes & tables for a class 00805 ** FeatureNum Current feature number (for DEBUG only) 00806 ** Feature Pointer to a feature struct 00807 ** tables Evidence tables 00808 ** Debug Debugger flag: 1=debugger on 00809 ** Operation: 00810 ** For the given feature: prune protos, compute evidence, 00811 ** update Feature Evidence, Proto Evidence, and Sum of Feature 00812 ** Evidence tables. 00813 ** Return: 00814 */ 00815 register uinT32 ConfigWord; 00816 register uinT32 ProtoWord; 00817 register uinT32 ProtoNum; 00818 register uinT32 ActualProtoNum; 00819 uinT8 proto_byte; 00820 inT32 proto_word_offset; 00821 inT32 proto_offset; 00822 uinT8 config_byte; 00823 inT32 config_offset; 00824 PROTO_SET ProtoSet; 00825 uinT32 *ProtoPrunerPtr; 00826 INT_PROTO Proto; 00827 int ProtoSetIndex; 00828 uinT8 Evidence; 00829 uinT32 XFeatureAddress; 00830 uinT32 YFeatureAddress; 00831 uinT32 ThetaFeatureAddress; 00832 register uinT8 *UINT8Pointer; 00833 register int ProtoIndex; 00834 uinT8 Temp; 00835 register int *IntPointer; 00836 int ConfigNum; 00837 register inT32 M3; 00838 register inT32 A3; 00839 register uinT32 A4; 00840 00841 tables->ClearFeatureEvidence(ClassTemplate); 00842 00843 /* Precompute Feature Address offset for Proto Pruning */ 00844 XFeatureAddress = ((Feature->X >> 2) << 1); 00845 YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1); 00846 ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1); 00847 00848 for (ProtoSetIndex = 0, ActualProtoNum = 0; 00849 ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { 00850 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 00851 ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner); 00852 for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET; 00853 ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum += 00854 (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) { 00855 /* Prune Protos of current Proto Set */ 00856 ProtoWord = *(ProtoPrunerPtr + XFeatureAddress); 00857 ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress); 00858 ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress); 00859 ProtoWord &= *ProtoMask; 00860 00861 if (ProtoWord != 0) { 00862 proto_byte = ProtoWord & 0xff; 00863 ProtoWord >>= 8; 00864 proto_word_offset = 0; 00865 while (ProtoWord != 0 || proto_byte != 0) { 00866 while (proto_byte == 0) { 00867 proto_byte = ProtoWord & 0xff; 00868 ProtoWord >>= 8; 00869 proto_word_offset += 8; 00870 } 00871 proto_offset = offset_table[proto_byte] + proto_word_offset; 00872 proto_byte = next_table[proto_byte]; 00873 Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]); 00874 ConfigWord = Proto->Configs[0]; 00875 A3 = (((Proto->A * (Feature->X - 128)) << 1) 00876 - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9)); 00877 M3 = 00878 (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1; 00879 00880 if (A3 < 0) 00881 A3 = ~A3; 00882 if (M3 < 0) 00883 M3 = ~M3; 00884 A3 >>= mult_trunc_shift_bits_; 00885 M3 >>= mult_trunc_shift_bits_; 00886 if (A3 > evidence_mult_mask_) 00887 A3 = evidence_mult_mask_; 00888 if (M3 > evidence_mult_mask_) 00889 M3 = evidence_mult_mask_; 00890 00891 A4 = (A3 * A3) + (M3 * M3); 00892 A4 >>= table_trunc_shift_bits_; 00893 if (A4 > evidence_table_mask_) 00894 Evidence = 0; 00895 else 00896 Evidence = similarity_evidence_table_[A4]; 00897 00898 if (PrintFeatureMatchesOn (Debug)) 00899 IMDebugConfiguration (FeatureNum, 00900 ActualProtoNum + proto_offset, 00901 Evidence, ConfigMask, ConfigWord); 00902 00903 ConfigWord &= *ConfigMask; 00904 00905 UINT8Pointer = tables->feature_evidence_ - 8; 00906 config_byte = 0; 00907 while (ConfigWord != 0 || config_byte != 0) { 00908 while (config_byte == 0) { 00909 config_byte = ConfigWord & 0xff; 00910 ConfigWord >>= 8; 00911 UINT8Pointer += 8; 00912 } 00913 config_offset = offset_table[config_byte]; 00914 config_byte = next_table[config_byte]; 00915 if (Evidence > UINT8Pointer[config_offset]) 00916 UINT8Pointer[config_offset] = Evidence; 00917 } 00918 00919 UINT8Pointer = 00920 &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]); 00921 for (ProtoIndex = 00922 ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset]; 00923 ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) { 00924 if (Evidence > *UINT8Pointer) { 00925 Temp = *UINT8Pointer; 00926 *UINT8Pointer = Evidence; 00927 Evidence = Temp; 00928 } 00929 else if (Evidence == 0) 00930 break; 00931 } 00932 } 00933 } 00934 } 00935 } 00936 00937 if (PrintFeatureMatchesOn(Debug)) { 00938 IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_, 00939 ClassTemplate->NumConfigs); 00940 } 00941 00942 IntPointer = tables->sum_feature_evidence_; 00943 UINT8Pointer = tables->feature_evidence_; 00944 int SumOverConfigs = 0; 00945 for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) { 00946 int evidence = *UINT8Pointer++; 00947 SumOverConfigs += evidence; 00948 *IntPointer++ += evidence; 00949 } 00950 return SumOverConfigs; 00951 } 00952 00953 00954 /*---------------------------------------------------------------------------*/ 00955 #ifndef GRAPHICS_DISABLED 00956 void IntegerMatcher::DebugFeatureProtoError( 00957 INT_CLASS ClassTemplate, 00958 BIT_VECTOR ProtoMask, 00959 BIT_VECTOR ConfigMask, 00960 const ScratchEvidence& tables, 00961 inT16 NumFeatures, 00962 int Debug) { 00963 /* 00964 ** Parameters: 00965 ** Globals: 00966 ** Operation: 00967 ** Print debugging information for Configuations 00968 ** Return: 00969 ** Exceptions: none 00970 ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. 00971 */ 00972 FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS]; 00973 int ConfigNum; 00974 uinT32 ConfigWord; 00975 int ProtoSetIndex; 00976 uinT16 ProtoNum; 00977 uinT8 ProtoWordNum; 00978 PROTO_SET ProtoSet; 00979 uinT16 ActualProtoNum; 00980 00981 if (PrintMatchSummaryOn(Debug)) { 00982 cprintf("Configuration Mask:\n"); 00983 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 00984 cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1)); 00985 cprintf("\n"); 00986 00987 cprintf("Feature Error for Configurations:\n"); 00988 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { 00989 cprintf( 00990 " %5.1f", 00991 100.0 * (1.0 - 00992 (FLOAT32) tables.sum_feature_evidence_[ConfigNum] 00993 / NumFeatures / 256.0)); 00994 } 00995 cprintf("\n\n\n"); 00996 } 00997 00998 if (PrintMatchSummaryOn (Debug)) { 00999 cprintf ("Proto Mask:\n"); 01000 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01001 ProtoSetIndex++) { 01002 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 01003 for (ProtoWordNum = 0; ProtoWordNum < 2; 01004 ProtoWordNum++, ProtoMask++) { 01005 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 01006 for (ProtoNum = 0; 01007 ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) 01008 && (ActualProtoNum < ClassTemplate->NumProtos)); 01009 ProtoNum++, ActualProtoNum++) 01010 cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1)); 01011 cprintf ("\n"); 01012 } 01013 } 01014 cprintf ("\n"); 01015 } 01016 01017 for (int i = 0; i < ClassTemplate->NumConfigs; i++) 01018 ProtoConfigs[i] = 0; 01019 01020 if (PrintProtoMatchesOn (Debug)) { 01021 cprintf ("Proto Evidence:\n"); 01022 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01023 ProtoSetIndex++) { 01024 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 01025 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 01026 for (ProtoNum = 0; 01027 ((ProtoNum < PROTOS_PER_PROTO_SET) && 01028 (ActualProtoNum < ClassTemplate->NumProtos)); 01029 ProtoNum++, ActualProtoNum++) { 01030 cprintf ("P %3d =", ActualProtoNum); 01031 int temp = 0; 01032 for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) { 01033 uinT8 data = tables.proto_evidence_[ActualProtoNum][j]; 01034 cprintf(" %d", data); 01035 temp += data; 01036 } 01037 01038 cprintf(" = %6.4f%%\n", 01039 temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]); 01040 01041 ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; 01042 ConfigNum = 0; 01043 while (ConfigWord) { 01044 cprintf ("%5d", ConfigWord & 1 ? temp : 0); 01045 if (ConfigWord & 1) 01046 ProtoConfigs[ConfigNum] += temp; 01047 ConfigNum++; 01048 ConfigWord >>= 1; 01049 } 01050 cprintf("\n"); 01051 } 01052 } 01053 } 01054 01055 if (PrintMatchSummaryOn (Debug)) { 01056 cprintf ("Proto Error for Configurations:\n"); 01057 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 01058 cprintf (" %5.1f", 01059 100.0 * (1.0 - 01060 ProtoConfigs[ConfigNum] / 01061 ClassTemplate->ConfigLengths[ConfigNum] / 256.0)); 01062 cprintf ("\n\n"); 01063 } 01064 01065 if (PrintProtoMatchesOn (Debug)) { 01066 cprintf ("Proto Sum for Configurations:\n"); 01067 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 01068 cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0); 01069 cprintf ("\n\n"); 01070 01071 cprintf ("Proto Length for Configurations:\n"); 01072 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 01073 cprintf (" %4.1f", 01074 (float) ClassTemplate->ConfigLengths[ConfigNum]); 01075 cprintf ("\n\n"); 01076 } 01077 01078 } 01079 01080 01081 /*---------------------------------------------------------------------------*/ 01082 void IntegerMatcher::DisplayProtoDebugInfo( 01083 INT_CLASS ClassTemplate, 01084 BIT_VECTOR ProtoMask, 01085 BIT_VECTOR ConfigMask, 01086 const ScratchEvidence& tables, 01087 bool SeparateDebugWindows) { 01088 uinT16 ProtoNum; 01089 uinT16 ActualProtoNum; 01090 PROTO_SET ProtoSet; 01091 int ProtoSetIndex; 01092 01093 InitIntMatchWindowIfReqd(); 01094 if (SeparateDebugWindows) { 01095 InitFeatureDisplayWindowIfReqd(); 01096 InitProtoDisplayWindowIfReqd(); 01097 } 01098 01099 01100 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01101 ProtoSetIndex++) { 01102 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 01103 ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; 01104 for (ProtoNum = 0; 01105 ((ProtoNum < PROTOS_PER_PROTO_SET) && 01106 (ActualProtoNum < ClassTemplate->NumProtos)); 01107 ProtoNum++, ActualProtoNum++) { 01108 /* Compute Average for Actual Proto */ 01109 int temp = 0; 01110 for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) 01111 temp += tables.proto_evidence_[ActualProtoNum][i]; 01112 01113 temp /= ClassTemplate->ProtoLengths[ActualProtoNum]; 01114 01115 if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) { 01116 DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0); 01117 } 01118 } 01119 } 01120 } 01121 01122 01123 /*---------------------------------------------------------------------------*/ 01124 void IntegerMatcher::DisplayFeatureDebugInfo( 01125 INT_CLASS ClassTemplate, 01126 BIT_VECTOR ProtoMask, 01127 BIT_VECTOR ConfigMask, 01128 inT16 NumFeatures, 01129 const INT_FEATURE_STRUCT* Features, 01130 int AdaptFeatureThreshold, 01131 int Debug, 01132 bool SeparateDebugWindows) { 01133 ScratchEvidence *tables = new ScratchEvidence(); 01134 01135 tables->Clear(ClassTemplate); 01136 01137 InitIntMatchWindowIfReqd(); 01138 if (SeparateDebugWindows) { 01139 InitFeatureDisplayWindowIfReqd(); 01140 InitProtoDisplayWindowIfReqd(); 01141 } 01142 01143 for (int Feature = 0; Feature < NumFeatures; Feature++) { 01144 UpdateTablesForFeature( 01145 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], 01146 tables, 0); 01147 01148 /* Find Best Evidence for Current Feature */ 01149 int best = 0; 01150 for (int i = 0; i < ClassTemplate->NumConfigs; i++) 01151 if (tables->feature_evidence_[i] > best) 01152 best = tables->feature_evidence_[i]; 01153 01154 /* Update display for current feature */ 01155 if (ClipMatchEvidenceOn(Debug)) { 01156 if (best < AdaptFeatureThreshold) 01157 DisplayIntFeature(&Features[Feature], 0.0); 01158 else 01159 DisplayIntFeature(&Features[Feature], 1.0); 01160 } else { 01161 DisplayIntFeature(&Features[Feature], best / 255.0); 01162 } 01163 } 01164 01165 delete tables; 01166 } 01167 #endif 01168 01169 /*---------------------------------------------------------------------------*/ 01170 // Add sum of Proto Evidences into Sum Of Feature Evidence Array 01171 void ScratchEvidence::UpdateSumOfProtoEvidences( 01172 INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) { 01173 01174 int *IntPointer; 01175 uinT32 ConfigWord; 01176 int ProtoSetIndex; 01177 uinT16 ProtoNum; 01178 PROTO_SET ProtoSet; 01179 int NumProtos; 01180 uinT16 ActualProtoNum; 01181 01182 NumProtos = ClassTemplate->NumProtos; 01183 01184 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01185 ProtoSetIndex++) { 01186 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 01187 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 01188 for (ProtoNum = 0; 01189 ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos)); 01190 ProtoNum++, ActualProtoNum++) { 01191 int temp = 0; 01192 for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) 01193 temp += proto_evidence_[ActualProtoNum] [i]; 01194 01195 ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; 01196 ConfigWord &= *ConfigMask; 01197 IntPointer = sum_feature_evidence_; 01198 while (ConfigWord) { 01199 if (ConfigWord & 1) 01200 *IntPointer += temp; 01201 IntPointer++; 01202 ConfigWord >>= 1; 01203 } 01204 } 01205 } 01206 } 01207 01208 01209 01210 /*---------------------------------------------------------------------------*/ 01211 // Normalize Sum of Proto and Feature Evidence by dividing by the sum of 01212 // the Feature Lengths and the Proto Lengths for each configuration. 01213 void ScratchEvidence::NormalizeSums( 01214 INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) { 01215 01216 for (int i = 0; i < ClassTemplate->NumConfigs; i++) { 01217 sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) / 01218 (NumFeatures + ClassTemplate->ConfigLengths[i]); 01219 } 01220 } 01221 01222 01223 /*---------------------------------------------------------------------------*/ 01224 int IntegerMatcher::FindBestMatch( 01225 INT_CLASS ClassTemplate, 01226 const ScratchEvidence &tables, 01227 INT_RESULT Result) { 01228 /* 01229 ** Parameters: 01230 ** Globals: 01231 ** Operation: 01232 ** Find the best match for the current class and update the Result 01233 ** with the configuration and match rating. 01234 ** Return: 01235 ** The best normalized sum of evidences 01236 ** Exceptions: none 01237 ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. 01238 */ 01239 int BestMatch = 0; 01240 int Best2Match = 0; 01241 Result->Config = 0; 01242 Result->Config2 = 0; 01243 01244 /* Find best match */ 01245 for (int ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { 01246 int rating = tables.sum_feature_evidence_[ConfigNum]; 01247 if (*classify_debug_level_ > 2) 01248 cprintf("Config %d, rating=%d\n", ConfigNum, rating); 01249 if (rating > BestMatch) { 01250 if (BestMatch > 0) { 01251 Result->Config2 = Result->Config; 01252 Best2Match = BestMatch; 01253 } else { 01254 Result->Config2 = ConfigNum; 01255 } 01256 Result->Config = ConfigNum; 01257 BestMatch = rating; 01258 } else if (rating > Best2Match) { 01259 Result->Config2 = ConfigNum; 01260 Best2Match = rating; 01261 } 01262 } 01263 01264 /* Compute Certainty Rating */ 01265 Result->Rating = (65536.0 - BestMatch) / 65536.0; 01266 01267 return BestMatch; 01268 } 01269 01270 // Applies the CN normalization factor to the given rating and returns 01271 // the modified rating. 01272 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, 01273 int normalization_factor, 01274 int matcher_multiplier) { 01275 return (rating * blob_length + 01276 matcher_multiplier * normalization_factor / 256.0) / 01277 (blob_length + matcher_multiplier); 01278 } 01279 01280 /*---------------------------------------------------------------------------*/ 01281 #ifndef GRAPHICS_DISABLED 01282 // Print debug information about the best match for the current class. 01283 void IntegerMatcher::DebugBestMatch( 01284 int BestMatch, INT_RESULT Result) { 01285 tprintf("Rating = %5.1f%% Best Config = %3d, Distance = %5.1f\n", 01286 100.0 * Result->Rating, Result->Config, 01287 100.0 * (65536.0 - BestMatch) / 65536.0); 01288 } 01289 #endif 01290 01291 /*---------------------------------------------------------------------------*/ 01292 void 01293 HeapSort (int n, register int ra[], register int rb[]) { 01294 /* 01295 ** Parameters: 01296 ** n Number of elements to sort 01297 ** ra Key array [1..n] 01298 ** rb Index array [1..n] 01299 ** Globals: 01300 ** Operation: 01301 ** Sort Key array in ascending order using heap sort 01302 ** algorithm. Also sort Index array that is tied to 01303 ** the key array. 01304 ** Return: 01305 ** Exceptions: none 01306 ** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created. 01307 */ 01308 register int i, rra, rrb; 01309 int l, j, ir; 01310 01311 l = (n >> 1) + 1; 01312 ir = n; 01313 for (;;) { 01314 if (l > 1) { 01315 rra = ra[--l]; 01316 rrb = rb[l]; 01317 } 01318 else { 01319 rra = ra[ir]; 01320 rrb = rb[ir]; 01321 ra[ir] = ra[1]; 01322 rb[ir] = rb[1]; 01323 if (--ir == 1) { 01324 ra[1] = rra; 01325 rb[1] = rrb; 01326 return; 01327 } 01328 } 01329 i = l; 01330 j = l << 1; 01331 while (j <= ir) { 01332 if (j < ir && ra[j] < ra[j + 1]) 01333 ++j; 01334 if (rra < ra[j]) { 01335 ra[i] = ra[j]; 01336 rb[i] = rb[j]; 01337 j += (i = j); 01338 } 01339 else 01340 j = ir + 1; 01341 } 01342 ra[i] = rra; 01343 rb[i] = rrb; 01344 } 01345 }