tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/lm_state.h
Go to the documentation of this file.
00001 
00002 // File:        lm_state.h
00003 // Description: Structures and functionality for capturing the state of
00004 //              segmentation search guided by the language model.
00005 //
00006 // Author:      Rika Antonova
00007 // Created:     Mon Jun 20 11:26:43 PST 2012
00008 //
00009 // (C) Copyright 2012, Google Inc.
00010 // Licensed under the Apache License, Version 2.0 (the "License");
00011 // you may not use this file except in compliance with the License.
00012 // You may obtain a copy of the License at
00013 // http://www.apache.org/licenses/LICENSE-2.0
00014 // Unless required by applicable law or agreed to in writing, software
00015 // distributed under the License is distributed on an "AS IS" BASIS,
00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00017 // See the License for the specific language governing permissions and
00018 // limitations under the License.
00019 //
00021 
00022 #ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
00023 #define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
00024 
00025 #include "associate.h"
00026 #include "elst.h"
00027 #include "dawg.h"
00028 #include "lm_consistency.h"
00029 #include "matrix.h"
00030 #include "ratngs.h"
00031 #include "stopper.h"
00032 #include "strngs.h"
00033 
00034 namespace tesseract {
00035 
00036 // Used for expressing various language model flags.
00037 typedef unsigned char LanguageModelFlagsType;
00038 
00039 // The following structs are used for storing the state of the language model
00040 // in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs
00041 // and the links are the relationships between the underlying blobs (see
00042 // segsearch.h for a more detailed description).
00043 // Each of the BLOB_CHOICEs contains LanguageModelState struct, which has
00044 // a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi
00045 // search leading up to and including this BLOB_CHOICE.
00046 // Each ViterbiStateEntry contains information from various components of the
00047 // language model: dawgs in which the path is found, character ngram model
00048 // probability of the path, script/chartype/font consistency info, state for
00049 // language-specific heuristics (e.g. hyphenated and compound words, lower/upper
00050 // case preferences, etc).
00051 // Each ViterbiStateEntry also contains the parent pointer, so that the path
00052 // that it represents (WERD_CHOICE) can be constructed by following these
00053 // parent pointers.
00054 
00055 // Struct for storing additional information used by Dawg language model
00056 // component. It stores the set of active dawgs in which the sequence of
00057 // letters on a path can be found.
00058 struct LanguageModelDawgInfo {
00059   LanguageModelDawgInfo(DawgPositionVector *a, PermuterType pt) : permuter(pt) {
00060     active_dawgs = new DawgPositionVector(*a);
00061   }
00062   ~LanguageModelDawgInfo() {
00063     delete active_dawgs;
00064   }
00065   DawgPositionVector *active_dawgs;
00066   PermuterType permuter;
00067 };
00068 
00069 // Struct for storing additional information used by Ngram language model
00070 // component.
00071 struct LanguageModelNgramInfo {
00072   LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc)
00073     : context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc),
00074       ngram_and_classifier_cost(ncc) {}
00075   STRING context;  // context string
00076   // Length of the context measured by advancing using UNICHAR::utf8_step()
00077   // (should be at most the order of the character ngram model used).
00078   int context_unichar_step_len;
00079   // The paths with pruned set are pruned out from the perspective of the
00080   // character ngram model. They are explored further because they represent
00081   // a dictionary match or a top choice. Thus ngram_info is still computed
00082   // for them in order to calculate the combined cost.
00083   bool pruned;
00084   // -ln(P_ngram_model(path))
00085   float ngram_cost;
00086   // -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
00087   float ngram_and_classifier_cost;
00088 };
00089 
00090 // Struct for storing the information about a path in the segmentation graph
00091 // explored by Viterbi search.
00092 struct ViterbiStateEntry : public ELIST_LINK {
00093   ViterbiStateEntry(ViterbiStateEntry *pe,
00094                     BLOB_CHOICE *b, float c, float ol,
00095                     const LMConsistencyInfo &ci,
00096                     const AssociateStats &as,
00097                     LanguageModelFlagsType tcf,
00098                     LanguageModelDawgInfo *d,
00099                     LanguageModelNgramInfo *n,
00100                     const char *debug_uch)
00101     : cost(c), curr_b(b), parent_vse(pe), competing_vse(NULL),
00102       ratings_sum(b->rating()),
00103       min_certainty(b->certainty()), adapted(b->IsAdapted()), length(1),
00104       outline_length(ol), consistency_info(ci), associate_stats(as),
00105       top_choice_flags(tcf), dawg_info(d), ngram_info(n),
00106       updated(true) {
00107     debug_str = (debug_uch == NULL) ? NULL : new STRING();
00108     if (pe != NULL) {
00109       ratings_sum += pe->ratings_sum;
00110       if (pe->min_certainty < min_certainty) {
00111         min_certainty = pe->min_certainty;
00112       }
00113       adapted += pe->adapted;
00114       length += pe->length;
00115       outline_length += pe->outline_length;
00116       if (debug_uch != NULL) *debug_str += *(pe->debug_str);
00117     }
00118     if (debug_str != NULL && debug_uch != NULL) *debug_str += debug_uch;
00119   }
00120   ~ViterbiStateEntry() {
00121     delete dawg_info;
00122     delete ngram_info;
00123     delete debug_str;
00124   }
00125   // Comparator function for sorting ViterbiStateEntry_LISTs in
00126   // non-increasing order of costs.
00127   static int Compare(const void *e1, const void *e2) {
00128     const ViterbiStateEntry *ve1 =
00129       *reinterpret_cast<const ViterbiStateEntry * const *>(e1);
00130     const ViterbiStateEntry *ve2 =
00131       *reinterpret_cast<const ViterbiStateEntry * const *>(e2);
00132     return (ve1->cost < ve2->cost) ? -1 : 1;
00133   }
00134   inline bool Consistent() const {
00135     if (dawg_info != NULL && consistency_info.NumInconsistentCase() == 0) {
00136       return true;
00137     }
00138     return consistency_info.Consistent();
00139   }
00140   // Returns true if this VSE has an alphanumeric character as its classifier
00141   // result.
00142   bool HasAlnumChoice(const UNICHARSET& unicharset) {
00143     if (curr_b == NULL) return false;
00144     UNICHAR_ID unichar_id =  curr_b->unichar_id();
00145     if (unicharset.get_isalpha(unichar_id) ||
00146         unicharset.get_isdigit(unichar_id))
00147       return true;
00148     return false;
00149   }
00150   void Print(const char *msg) const;
00151 
00152   // The cost is an adjusted ratings sum, that is adjusted by all the language
00153   // model components that use Viterbi search.
00154   float cost;
00155 
00156   // Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
00157   BLOB_CHOICE *curr_b;
00158   ViterbiStateEntry *parent_vse;
00159   // Pointer to a case-competing ViterbiStateEntry in the same list that
00160   // represents a path ending in the same letter of the opposite case.
00161   ViterbiStateEntry *competing_vse;
00162 
00163   // Various information about the characters on the path represented
00164   // by this ViterbiStateEntry.
00165   float ratings_sum;  // sum of ratings of character on the path
00166   float min_certainty;  // minimum certainty on the path
00167   int adapted;  // number of BLOB_CHOICES from adapted templates
00168   int length;  // number of characters on the path
00169   float outline_length;  // length of the outline so far
00170   LMConsistencyInfo consistency_info;  // path consistency info
00171   AssociateStats associate_stats;  // character widths/gaps/seams
00172 
00173   // Flags for marking the entry as a top choice path with
00174   // the smallest rating or lower/upper case letters).
00175   LanguageModelFlagsType top_choice_flags;
00176 
00177   // Extra information maintained by Dawg laguage model component
00178   // (owned by ViterbiStateEntry).
00179   LanguageModelDawgInfo *dawg_info;
00180 
00181   // Extra information maintained by Ngram laguage model component
00182   // (owned by ViterbiStateEntry).
00183   LanguageModelNgramInfo *ngram_info;
00184 
00185   bool updated;  // set to true if the entry has just been created/updated
00186   // UTF8 string representing the path corresponding to this vse.
00187   // Populated only in when language_model_debug_level > 0.
00188   STRING *debug_str;
00189 };
00190 
00191 ELISTIZEH(ViterbiStateEntry);
00192 
00193 // Struct to store information maintained by various language model components.
00194 struct LanguageModelState {
00195   LanguageModelState() :
00196      viterbi_state_entries_prunable_length(0),
00197     viterbi_state_entries_prunable_max_cost(MAX_FLOAT32),
00198     viterbi_state_entries_length(0) {}
00199   ~LanguageModelState() {}
00200 
00201   // Clears the viterbi search state back to its initial conditions.
00202   void Clear();
00203 
00204   void Print(const char *msg);
00205 
00206   // Storage for the Viterbi state.
00207   ViterbiStateEntry_LIST viterbi_state_entries;
00208   // Number and max cost of prunable paths in viterbi_state_entries.
00209   int viterbi_state_entries_prunable_length;
00210   float viterbi_state_entries_prunable_max_cost;
00211   // Total number of entries in viterbi_state_entries.
00212   int viterbi_state_entries_length;
00213 };
00214 
00215 // Bundle together all the things pertaining to the best choice/state.
00216 struct BestChoiceBundle {
00217   explicit BestChoiceBundle(int matrix_dimension)
00218     : updated(false), best_vse(NULL) {
00219     beam.reserve(matrix_dimension);
00220     for (int i = 0; i < matrix_dimension; ++i)
00221       beam.push_back(new LanguageModelState);
00222   }
00223   ~BestChoiceBundle() {}
00224 
00225   // Flag to indicate whether anything was changed.
00226   bool updated;
00227   // Places to try to fix the word suggested by ambiguity checking.
00228   DANGERR fixpt;
00229   // The beam. One LanguageModelState containing a list of ViterbiStateEntry per
00230   // row in the ratings matrix containing all VSEs whose BLOB_CHOICE is
00231   // somewhere in the corresponding row.
00232   PointerVector<LanguageModelState> beam;
00233   // Best ViterbiStateEntry and BLOB_CHOICE.
00234   ViterbiStateEntry *best_vse;
00235 };
00236 
00237 }  // namespace tesseract
00238 
00239 #endif  // TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines