tesseract
3.03
|
00001 /****************************************************************************** 00002 ** Filename: intmatcher.h 00003 ** Purpose: Interface to high level generic classifier routines. 00004 ** Author: Robert Moss 00005 ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 #ifndef INTMATCHER_H 00019 #define INTMATCHER_H 00020 00021 #include "params.h" 00022 00023 // Character fragments could be present in the trained templaes 00024 // but turned on/off on the language-by-language basis or depending 00025 // on particular properties of the corpus (e.g. when we expect the 00026 // images to have low exposure). 00027 extern BOOL_VAR_H(disable_character_fragments, FALSE, 00028 "Do not include character fragments in the" 00029 " results of the classifier"); 00030 00031 extern INT_VAR_H(classify_integer_matcher_multiplier, 10, 00032 "Integer Matcher Multiplier 0-255: "); 00033 00034 00038 #include "intproto.h" 00039 #include "cutoffs.h" 00040 00041 struct INT_RESULT_STRUCT { 00042 INT_RESULT_STRUCT() : Rating(0.0f), Config(0), Config2(0), FeatureMisses(0) {} 00043 00044 FLOAT32 Rating; 00045 uinT8 Config; 00046 uinT8 Config2; 00047 uinT16 FeatureMisses; 00048 }; 00049 00050 typedef INT_RESULT_STRUCT *INT_RESULT; 00051 00052 00053 struct CP_RESULT_STRUCT { 00054 CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {} 00055 00056 FLOAT32 Rating; 00057 INT_RESULT_STRUCT IMResult; 00058 CLASS_ID Class; 00059 }; 00060 00061 /*---------------------------------------------------------------------------- 00062 Variables 00063 -----------------------------------------------------------------------------*/ 00064 00065 extern INT_VAR_H(classify_adapt_proto_thresh, 230, 00066 "Threshold for good protos during adaptive 0-255: "); 00067 00068 extern INT_VAR_H(classify_adapt_feature_thresh, 230, 00069 "Threshold for good features during adaptive 0-255: "); 00070 00075 #define SE_TABLE_BITS 9 00076 #define SE_TABLE_SIZE 512 00077 00078 struct ScratchEvidence { 00079 uinT8 feature_evidence_[MAX_NUM_CONFIGS]; 00080 int sum_feature_evidence_[MAX_NUM_CONFIGS]; 00081 uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; 00082 00083 void Clear(const INT_CLASS class_template); 00084 void ClearFeatureEvidence(const INT_CLASS class_template); 00085 void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, 00086 inT32 used_features); 00087 void UpdateSumOfProtoEvidences( 00088 INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures); 00089 }; 00090 00091 00092 class IntegerMatcher { 00093 public: 00094 // Integer Matcher Theta Fudge (0-255). 00095 static const int kIntThetaFudge = 128; 00096 // Bits in Similarity to Evidence Lookup (8-9). 00097 static const int kEvidenceTableBits = 9; 00098 // Integer Evidence Truncation Bits (8-14). 00099 static const int kIntEvidenceTruncBits = 14; 00100 // Similarity to Evidence Table Exponential Multiplier. 00101 static const float kSEExponentialMultiplier; 00102 // Center of Similarity Curve. 00103 static const float kSimilarityCenter; 00104 00105 IntegerMatcher() : classify_debug_level_(0) {} 00106 00107 void Init(tesseract::IntParam *classify_debug_level); 00108 00109 void Match(INT_CLASS ClassTemplate, 00110 BIT_VECTOR ProtoMask, 00111 BIT_VECTOR ConfigMask, 00112 inT16 NumFeatures, 00113 const INT_FEATURE_STRUCT* Features, 00114 INT_RESULT Result, 00115 int AdaptFeatureThreshold, 00116 int Debug, 00117 bool SeparateDebugWindows); 00118 00119 // Applies the CN normalization factor to the given rating and returns 00120 // the modified rating. 00121 float ApplyCNCorrection(float rating, int blob_length, 00122 int normalization_factor, int matcher_multiplier); 00123 00124 int FindGoodProtos(INT_CLASS ClassTemplate, 00125 BIT_VECTOR ProtoMask, 00126 BIT_VECTOR ConfigMask, 00127 uinT16 BlobLength, 00128 inT16 NumFeatures, 00129 INT_FEATURE_ARRAY Features, 00130 PROTO_ID *ProtoArray, 00131 int AdaptProtoThreshold, 00132 int Debug); 00133 00134 int FindBadFeatures(INT_CLASS ClassTemplate, 00135 BIT_VECTOR ProtoMask, 00136 BIT_VECTOR ConfigMask, 00137 uinT16 BlobLength, 00138 inT16 NumFeatures, 00139 INT_FEATURE_ARRAY Features, 00140 FEATURE_ID *FeatureArray, 00141 int AdaptFeatureThreshold, 00142 int Debug); 00143 00144 private: 00145 int UpdateTablesForFeature( 00146 INT_CLASS ClassTemplate, 00147 BIT_VECTOR ProtoMask, 00148 BIT_VECTOR ConfigMask, 00149 int FeatureNum, 00150 const INT_FEATURE_STRUCT* Feature, 00151 ScratchEvidence *evidence, 00152 int Debug); 00153 00154 int FindBestMatch(INT_CLASS ClassTemplate, 00155 const ScratchEvidence &tables, 00156 INT_RESULT Result); 00157 00158 #ifndef GRAPHICS_DISABLED 00159 void DebugFeatureProtoError( 00160 INT_CLASS ClassTemplate, 00161 BIT_VECTOR ProtoMask, 00162 BIT_VECTOR ConfigMask, 00163 const ScratchEvidence &tables, 00164 inT16 NumFeatures, 00165 int Debug); 00166 00167 void DisplayProtoDebugInfo( 00168 INT_CLASS ClassTemplate, 00169 BIT_VECTOR ProtoMask, 00170 BIT_VECTOR ConfigMask, 00171 const ScratchEvidence &tables, 00172 bool SeparateDebugWindows); 00173 00174 void DisplayFeatureDebugInfo( 00175 INT_CLASS ClassTemplate, 00176 BIT_VECTOR ProtoMask, 00177 BIT_VECTOR ConfigMask, 00178 inT16 NumFeatures, 00179 const INT_FEATURE_STRUCT* Features, 00180 int AdaptFeatureThreshold, 00181 int Debug, 00182 bool SeparateDebugWindows); 00183 00184 void DebugBestMatch(int BestMatch, INT_RESULT Result); 00185 #endif 00186 00187 00188 private: 00189 uinT8 similarity_evidence_table_[SE_TABLE_SIZE]; 00190 uinT32 evidence_table_mask_; 00191 uinT32 mult_trunc_shift_bits_; 00192 uinT32 table_trunc_shift_bits_; 00193 tesseract::IntParam *classify_debug_level_; 00194 uinT32 evidence_mult_mask_; 00195 }; 00196 00200 void IMDebugConfiguration(INT_FEATURE FeatureNum, 00201 uinT16 ActualProtoNum, 00202 uinT8 Evidence, 00203 BIT_VECTOR ConfigMask, 00204 uinT32 ConfigWord); 00205 00206 void IMDebugConfigurationSum(INT_FEATURE FeatureNum, 00207 uinT8 *FeatureEvidence, 00208 inT32 ConfigCount); 00209 00210 void HeapSort (int n, register int ra[], register int rb[]); 00211 00215 #endif