tesseract
3.03
|
00001 00002 // File: lm_state.h 00003 // Description: Structures and functionality for capturing the state of 00004 // segmentation search guided by the language model. 00005 // 00006 // Author: Rika Antonova 00007 // Created: Mon Jun 20 11:26:43 PST 2012 00008 // 00009 // (C) Copyright 2012, Google Inc. 00010 // Licensed under the Apache License, Version 2.0 (the "License"); 00011 // you may not use this file except in compliance with the License. 00012 // You may obtain a copy of the License at 00013 // http://www.apache.org/licenses/LICENSE-2.0 00014 // Unless required by applicable law or agreed to in writing, software 00015 // distributed under the License is distributed on an "AS IS" BASIS, 00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00017 // See the License for the specific language governing permissions and 00018 // limitations under the License. 00019 // 00021 00022 #ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ 00023 #define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ 00024 00025 #include "associate.h" 00026 #include "elst.h" 00027 #include "dawg.h" 00028 #include "lm_consistency.h" 00029 #include "matrix.h" 00030 #include "ratngs.h" 00031 #include "stopper.h" 00032 #include "strngs.h" 00033 00034 namespace tesseract { 00035 00036 // Used for expressing various language model flags. 00037 typedef unsigned char LanguageModelFlagsType; 00038 00039 // The following structs are used for storing the state of the language model 00040 // in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs 00041 // and the links are the relationships between the underlying blobs (see 00042 // segsearch.h for a more detailed description). 00043 // Each of the BLOB_CHOICEs contains LanguageModelState struct, which has 00044 // a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi 00045 // search leading up to and including this BLOB_CHOICE. 00046 // Each ViterbiStateEntry contains information from various components of the 00047 // language model: dawgs in which the path is found, character ngram model 00048 // probability of the path, script/chartype/font consistency info, state for 00049 // language-specific heuristics (e.g. hyphenated and compound words, lower/upper 00050 // case preferences, etc). 00051 // Each ViterbiStateEntry also contains the parent pointer, so that the path 00052 // that it represents (WERD_CHOICE) can be constructed by following these 00053 // parent pointers. 00054 00055 // Struct for storing additional information used by Dawg language model 00056 // component. It stores the set of active dawgs in which the sequence of 00057 // letters on a path can be found. 00058 struct LanguageModelDawgInfo { 00059 LanguageModelDawgInfo(DawgPositionVector *a, PermuterType pt) : permuter(pt) { 00060 active_dawgs = new DawgPositionVector(*a); 00061 } 00062 ~LanguageModelDawgInfo() { 00063 delete active_dawgs; 00064 } 00065 DawgPositionVector *active_dawgs; 00066 PermuterType permuter; 00067 }; 00068 00069 // Struct for storing additional information used by Ngram language model 00070 // component. 00071 struct LanguageModelNgramInfo { 00072 LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc) 00073 : context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc), 00074 ngram_and_classifier_cost(ncc) {} 00075 STRING context; // context string 00076 // Length of the context measured by advancing using UNICHAR::utf8_step() 00077 // (should be at most the order of the character ngram model used). 00078 int context_unichar_step_len; 00079 // The paths with pruned set are pruned out from the perspective of the 00080 // character ngram model. They are explored further because they represent 00081 // a dictionary match or a top choice. Thus ngram_info is still computed 00082 // for them in order to calculate the combined cost. 00083 bool pruned; 00084 // -ln(P_ngram_model(path)) 00085 float ngram_cost; 00086 // -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ] 00087 float ngram_and_classifier_cost; 00088 }; 00089 00090 // Struct for storing the information about a path in the segmentation graph 00091 // explored by Viterbi search. 00092 struct ViterbiStateEntry : public ELIST_LINK { 00093 ViterbiStateEntry(ViterbiStateEntry *pe, 00094 BLOB_CHOICE *b, float c, float ol, 00095 const LMConsistencyInfo &ci, 00096 const AssociateStats &as, 00097 LanguageModelFlagsType tcf, 00098 LanguageModelDawgInfo *d, 00099 LanguageModelNgramInfo *n, 00100 const char *debug_uch) 00101 : cost(c), curr_b(b), parent_vse(pe), competing_vse(NULL), 00102 ratings_sum(b->rating()), 00103 min_certainty(b->certainty()), adapted(b->IsAdapted()), length(1), 00104 outline_length(ol), consistency_info(ci), associate_stats(as), 00105 top_choice_flags(tcf), dawg_info(d), ngram_info(n), 00106 updated(true) { 00107 debug_str = (debug_uch == NULL) ? NULL : new STRING(); 00108 if (pe != NULL) { 00109 ratings_sum += pe->ratings_sum; 00110 if (pe->min_certainty < min_certainty) { 00111 min_certainty = pe->min_certainty; 00112 } 00113 adapted += pe->adapted; 00114 length += pe->length; 00115 outline_length += pe->outline_length; 00116 if (debug_uch != NULL) *debug_str += *(pe->debug_str); 00117 } 00118 if (debug_str != NULL && debug_uch != NULL) *debug_str += debug_uch; 00119 } 00120 ~ViterbiStateEntry() { 00121 delete dawg_info; 00122 delete ngram_info; 00123 delete debug_str; 00124 } 00125 // Comparator function for sorting ViterbiStateEntry_LISTs in 00126 // non-increasing order of costs. 00127 static int Compare(const void *e1, const void *e2) { 00128 const ViterbiStateEntry *ve1 = 00129 *reinterpret_cast<const ViterbiStateEntry * const *>(e1); 00130 const ViterbiStateEntry *ve2 = 00131 *reinterpret_cast<const ViterbiStateEntry * const *>(e2); 00132 return (ve1->cost < ve2->cost) ? -1 : 1; 00133 } 00134 inline bool Consistent() const { 00135 if (dawg_info != NULL && consistency_info.NumInconsistentCase() == 0) { 00136 return true; 00137 } 00138 return consistency_info.Consistent(); 00139 } 00140 // Returns true if this VSE has an alphanumeric character as its classifier 00141 // result. 00142 bool HasAlnumChoice(const UNICHARSET& unicharset) { 00143 if (curr_b == NULL) return false; 00144 UNICHAR_ID unichar_id = curr_b->unichar_id(); 00145 if (unicharset.get_isalpha(unichar_id) || 00146 unicharset.get_isdigit(unichar_id)) 00147 return true; 00148 return false; 00149 } 00150 void Print(const char *msg) const; 00151 00152 // The cost is an adjusted ratings sum, that is adjusted by all the language 00153 // model components that use Viterbi search. 00154 float cost; 00155 00156 // Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this). 00157 BLOB_CHOICE *curr_b; 00158 ViterbiStateEntry *parent_vse; 00159 // Pointer to a case-competing ViterbiStateEntry in the same list that 00160 // represents a path ending in the same letter of the opposite case. 00161 ViterbiStateEntry *competing_vse; 00162 00163 // Various information about the characters on the path represented 00164 // by this ViterbiStateEntry. 00165 float ratings_sum; // sum of ratings of character on the path 00166 float min_certainty; // minimum certainty on the path 00167 int adapted; // number of BLOB_CHOICES from adapted templates 00168 int length; // number of characters on the path 00169 float outline_length; // length of the outline so far 00170 LMConsistencyInfo consistency_info; // path consistency info 00171 AssociateStats associate_stats; // character widths/gaps/seams 00172 00173 // Flags for marking the entry as a top choice path with 00174 // the smallest rating or lower/upper case letters). 00175 LanguageModelFlagsType top_choice_flags; 00176 00177 // Extra information maintained by Dawg laguage model component 00178 // (owned by ViterbiStateEntry). 00179 LanguageModelDawgInfo *dawg_info; 00180 00181 // Extra information maintained by Ngram laguage model component 00182 // (owned by ViterbiStateEntry). 00183 LanguageModelNgramInfo *ngram_info; 00184 00185 bool updated; // set to true if the entry has just been created/updated 00186 // UTF8 string representing the path corresponding to this vse. 00187 // Populated only in when language_model_debug_level > 0. 00188 STRING *debug_str; 00189 }; 00190 00191 ELISTIZEH(ViterbiStateEntry); 00192 00193 // Struct to store information maintained by various language model components. 00194 struct LanguageModelState { 00195 LanguageModelState() : 00196 viterbi_state_entries_prunable_length(0), 00197 viterbi_state_entries_prunable_max_cost(MAX_FLOAT32), 00198 viterbi_state_entries_length(0) {} 00199 ~LanguageModelState() {} 00200 00201 // Clears the viterbi search state back to its initial conditions. 00202 void Clear(); 00203 00204 void Print(const char *msg); 00205 00206 // Storage for the Viterbi state. 00207 ViterbiStateEntry_LIST viterbi_state_entries; 00208 // Number and max cost of prunable paths in viterbi_state_entries. 00209 int viterbi_state_entries_prunable_length; 00210 float viterbi_state_entries_prunable_max_cost; 00211 // Total number of entries in viterbi_state_entries. 00212 int viterbi_state_entries_length; 00213 }; 00214 00215 // Bundle together all the things pertaining to the best choice/state. 00216 struct BestChoiceBundle { 00217 explicit BestChoiceBundle(int matrix_dimension) 00218 : updated(false), best_vse(NULL) { 00219 beam.reserve(matrix_dimension); 00220 for (int i = 0; i < matrix_dimension; ++i) 00221 beam.push_back(new LanguageModelState); 00222 } 00223 ~BestChoiceBundle() {} 00224 00225 // Flag to indicate whether anything was changed. 00226 bool updated; 00227 // Places to try to fix the word suggested by ambiguity checking. 00228 DANGERR fixpt; 00229 // The beam. One LanguageModelState containing a list of ViterbiStateEntry per 00230 // row in the ratings matrix containing all VSEs whose BLOB_CHOICE is 00231 // somewhere in the corresponding row. 00232 PointerVector<LanguageModelState> beam; 00233 // Best ViterbiStateEntry and BLOB_CHOICE. 00234 ViterbiStateEntry *best_vse; 00235 }; 00236 00237 } // namespace tesseract 00238 00239 #endif // TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_