tesseract
3.03
|
00001 00002 // File: lm_consistency.h 00003 // Description: Struct for recording consistency of the paths representing 00004 // OCR hypotheses. 00005 // Author: Rika Antonova 00006 // Created: Mon Jun 20 11:26:43 PST 2012 00007 // 00008 // (C) Copyright 2012, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #include "dawg.h" 00022 #include "dict.h" 00023 #include "host.h" 00024 #include "ratngs.h" 00025 00026 #ifndef TESSERACT_WORDREC_CONSISTENCY_H_ 00027 #define TESSERACT_WORDREC_CONSISTENCY_H_ 00028 00029 namespace tesseract { 00030 00031 static const char * const XHeightConsistencyEnumName[] = { 00032 "XH_GOOD", 00033 "XH_SUBNORMAL", 00034 "XH_INCONSISTENT", 00035 }; 00036 00037 // Struct for keeping track of the consistency of the path. 00038 struct LMConsistencyInfo { 00039 enum ChartypeEnum { CT_NONE, CT_ALPHA, CT_DIGIT, CT_OTHER}; 00040 00041 // How much do characters have to be shifted away from normal parameters 00042 // before we say they're not normal? 00043 static const int kShiftThresh = 1; 00044 00045 // How much shifting from subscript to superscript and back 00046 // before we declare shenanigans? 00047 static const int kMaxEntropy = 1; 00048 00049 // Script positions - order important for entropy calculation. 00050 static const int kSUB = 0, kNORM = 1, kSUP = 2; 00051 static const int kNumPos = 3; 00052 00053 explicit LMConsistencyInfo(const LMConsistencyInfo* parent_info) { 00054 if (parent_info == NULL) { 00055 // Initialize from scratch. 00056 num_alphas = 0; 00057 num_digits = 0; 00058 num_punc = 0; 00059 num_other = 0; 00060 chartype = CT_NONE; 00061 punc_ref = NO_EDGE; 00062 invalid_punc = false; 00063 num_non_first_upper = 0; 00064 num_lower = 0; 00065 script_id = 0; 00066 inconsistent_script = false; 00067 num_inconsistent_spaces = 0; 00068 inconsistent_font = false; 00069 // Initialize XHeight stats. 00070 for (int i = 0; i < kNumPos; i++) { 00071 xht_count[i] = 0; 00072 xht_count_punc[i] = 0; 00073 xht_lo[i] = 0; 00074 xht_hi[i] = 256; // kBlnCellHeight 00075 } 00076 xht_sp = -1; // This invalid value indicates that there was no parent. 00077 xpos_entropy = 0; 00078 xht_decision = XH_GOOD; 00079 } else { 00080 // Copy parent info 00081 *this = *parent_info; 00082 } 00083 } 00084 inline int NumInconsistentPunc() const { 00085 return invalid_punc ? num_punc : 0; 00086 } 00087 inline int NumInconsistentCase() const { 00088 return (num_non_first_upper > num_lower) ? num_lower : num_non_first_upper; 00089 } 00090 inline int NumInconsistentChartype() const { 00091 return (NumInconsistentPunc() + num_other + 00092 ((num_alphas > num_digits) ? num_digits : num_alphas)); 00093 } 00094 inline bool Consistent() const { 00095 return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 && 00096 NumInconsistentChartype() == 0 && !inconsistent_script && 00097 !inconsistent_font && !InconsistentXHeight()); 00098 } 00099 inline int NumInconsistentSpaces() const { 00100 return num_inconsistent_spaces; 00101 } 00102 inline int InconsistentXHeight() const { 00103 return xht_decision == XH_INCONSISTENT; 00104 } 00105 void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc); 00106 float BodyMinXHeight() const { 00107 if (InconsistentXHeight()) 00108 return 0.0f; 00109 return xht_lo[kNORM]; 00110 } 00111 float BodyMaxXHeight() const { 00112 if (InconsistentXHeight()) 00113 return static_cast<float>(MAX_INT16); 00114 return xht_hi[kNORM]; 00115 } 00116 00117 int num_alphas; 00118 int num_digits; 00119 int num_punc; 00120 int num_other; 00121 ChartypeEnum chartype; 00122 EDGE_REF punc_ref; 00123 bool invalid_punc; 00124 int num_non_first_upper; 00125 int num_lower; 00126 int script_id; 00127 bool inconsistent_script; 00128 int num_inconsistent_spaces; 00129 bool inconsistent_font; 00130 // Metrics clumped by position. 00131 float xht_lo[kNumPos]; 00132 float xht_hi[kNumPos]; 00133 inT16 xht_count[kNumPos]; 00134 inT16 xht_count_punc[kNumPos]; 00135 inT16 xht_sp; 00136 inT16 xpos_entropy; 00137 XHeightConsistencyEnum xht_decision; 00138 }; 00139 00140 00141 } // namespace tesseract 00142 00143 #endif // TESSERACT_WORDREC_CONSISTENCY_H_