tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/classify/intmatcher.h
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    intmatcher.h
00003  **     Purpose:     Interface to high level generic classifier routines.
00004  **     Author:      Robert Moss
00005  **     History:     Wed Feb 13 15:24:15 MST 1991, RWM, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 #ifndef   INTMATCHER_H
00019 #define   INTMATCHER_H
00020 
00021 #include "params.h"
00022 
00023 // Character fragments could be present in the trained templaes
00024 // but turned on/off on the language-by-language basis or depending
00025 // on particular properties of the corpus (e.g. when we expect the
00026 // images to have low exposure).
00027 extern BOOL_VAR_H(disable_character_fragments, FALSE,
00028                   "Do not include character fragments in the"
00029                   " results of the classifier");
00030 
00031 extern INT_VAR_H(classify_integer_matcher_multiplier, 10,
00032                  "Integer Matcher Multiplier  0-255:   ");
00033 
00034 
00038 #include "intproto.h"
00039 #include "cutoffs.h"
00040 
00041 struct INT_RESULT_STRUCT {
00042   INT_RESULT_STRUCT() : Rating(0.0f), Config(0), Config2(0), FeatureMisses(0) {}
00043 
00044   FLOAT32 Rating;
00045   uinT8 Config;
00046   uinT8 Config2;
00047   uinT16 FeatureMisses;
00048 };
00049 
00050 typedef INT_RESULT_STRUCT *INT_RESULT;
00051 
00052 
00053 struct CP_RESULT_STRUCT {
00054   CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
00055 
00056   FLOAT32 Rating;
00057   INT_RESULT_STRUCT IMResult;
00058   CLASS_ID Class;
00059 };
00060 
00061 /*----------------------------------------------------------------------------
00062             Variables
00063 -----------------------------------------------------------------------------*/
00064 
00065 extern INT_VAR_H(classify_adapt_proto_thresh, 230,
00066                  "Threshold for good protos during adaptive 0-255:   ");
00067 
00068 extern INT_VAR_H(classify_adapt_feature_thresh, 230,
00069                  "Threshold for good features during adaptive 0-255:   ");
00070 
00075 #define  SE_TABLE_BITS    9
00076 #define  SE_TABLE_SIZE  512
00077 
00078 struct ScratchEvidence {
00079   uinT8 feature_evidence_[MAX_NUM_CONFIGS];
00080   int sum_feature_evidence_[MAX_NUM_CONFIGS];
00081   uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
00082 
00083   void Clear(const INT_CLASS class_template);
00084   void ClearFeatureEvidence(const INT_CLASS class_template);
00085   void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures,
00086                      inT32 used_features);
00087   void UpdateSumOfProtoEvidences(
00088     INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures);
00089 };
00090 
00091 
00092 class IntegerMatcher {
00093  public:
00094   // Integer Matcher Theta Fudge (0-255).
00095   static const int kIntThetaFudge = 128;
00096   // Bits in Similarity to Evidence Lookup (8-9).
00097   static const int kEvidenceTableBits = 9;
00098   // Integer Evidence Truncation Bits (8-14).
00099   static const int kIntEvidenceTruncBits = 14;
00100   // Similarity to Evidence Table Exponential Multiplier.
00101   static const float kSEExponentialMultiplier;
00102   // Center of Similarity Curve.
00103   static const float kSimilarityCenter;
00104 
00105   IntegerMatcher() : classify_debug_level_(0) {}
00106 
00107   void Init(tesseract::IntParam *classify_debug_level);
00108 
00109   void Match(INT_CLASS ClassTemplate,
00110              BIT_VECTOR ProtoMask,
00111              BIT_VECTOR ConfigMask,
00112              inT16 NumFeatures,
00113              const INT_FEATURE_STRUCT* Features,
00114              INT_RESULT Result,
00115              int AdaptFeatureThreshold,
00116              int Debug,
00117              bool SeparateDebugWindows);
00118 
00119   // Applies the CN normalization factor to the given rating and returns
00120   // the modified rating.
00121   float ApplyCNCorrection(float rating, int blob_length,
00122                           int normalization_factor, int matcher_multiplier);
00123 
00124   int FindGoodProtos(INT_CLASS ClassTemplate,
00125                      BIT_VECTOR ProtoMask,
00126                      BIT_VECTOR ConfigMask,
00127                      uinT16 BlobLength,
00128                      inT16 NumFeatures,
00129                      INT_FEATURE_ARRAY Features,
00130                      PROTO_ID *ProtoArray,
00131                      int AdaptProtoThreshold,
00132                      int Debug);
00133 
00134   int FindBadFeatures(INT_CLASS ClassTemplate,
00135                       BIT_VECTOR ProtoMask,
00136                       BIT_VECTOR ConfigMask,
00137                       uinT16 BlobLength,
00138                       inT16 NumFeatures,
00139                       INT_FEATURE_ARRAY Features,
00140                       FEATURE_ID *FeatureArray,
00141                       int AdaptFeatureThreshold,
00142                       int Debug);
00143 
00144  private:
00145   int UpdateTablesForFeature(
00146       INT_CLASS ClassTemplate,
00147       BIT_VECTOR ProtoMask,
00148       BIT_VECTOR ConfigMask,
00149       int FeatureNum,
00150       const INT_FEATURE_STRUCT* Feature,
00151       ScratchEvidence *evidence,
00152       int Debug);
00153 
00154   int FindBestMatch(INT_CLASS ClassTemplate,
00155                     const ScratchEvidence &tables,
00156                     INT_RESULT Result);
00157 
00158 #ifndef GRAPHICS_DISABLED
00159   void DebugFeatureProtoError(
00160       INT_CLASS ClassTemplate,
00161       BIT_VECTOR ProtoMask,
00162       BIT_VECTOR ConfigMask,
00163       const ScratchEvidence &tables,
00164       inT16 NumFeatures,
00165       int Debug);
00166 
00167   void DisplayProtoDebugInfo(
00168       INT_CLASS ClassTemplate,
00169       BIT_VECTOR ProtoMask,
00170       BIT_VECTOR ConfigMask,
00171       const ScratchEvidence &tables,
00172       bool SeparateDebugWindows);
00173 
00174   void DisplayFeatureDebugInfo(
00175       INT_CLASS ClassTemplate,
00176       BIT_VECTOR ProtoMask,
00177       BIT_VECTOR ConfigMask,
00178       inT16 NumFeatures,
00179       const INT_FEATURE_STRUCT* Features,
00180       int AdaptFeatureThreshold,
00181       int Debug,
00182       bool SeparateDebugWindows);
00183 
00184   void DebugBestMatch(int BestMatch, INT_RESULT Result);
00185 #endif
00186 
00187 
00188  private:
00189   uinT8 similarity_evidence_table_[SE_TABLE_SIZE];
00190   uinT32 evidence_table_mask_;
00191   uinT32 mult_trunc_shift_bits_;
00192   uinT32 table_trunc_shift_bits_;
00193   tesseract::IntParam *classify_debug_level_;
00194   uinT32 evidence_mult_mask_;
00195 };
00196 
00200 void IMDebugConfiguration(INT_FEATURE FeatureNum,
00201                           uinT16 ActualProtoNum,
00202                           uinT8 Evidence,
00203                           BIT_VECTOR ConfigMask,
00204                           uinT32 ConfigWord);
00205 
00206 void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
00207                              uinT8 *FeatureEvidence,
00208                              inT32 ConfigCount);
00209 
00210 void HeapSort (int n, register int ra[], register int rb[]);
00211 
00215 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines