tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/ratngs.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        ratngs.h  (Formerly ratings.h)
00003  * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
00004  * Author:      Ray Smith
00005  * Created:     Thu Apr 23 11:40:38 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifndef           RATNGS_H
00021 #define           RATNGS_H
00022 
00023 #include <assert.h>
00024 
00025 #include "clst.h"
00026 #include "elst.h"
00027 #include "genericvector.h"
00028 #include "matrix.h"
00029 #include "unichar.h"
00030 #include "unicharset.h"
00031 #include "werd.h"
00032 
00033 class MATRIX;
00034 struct TBLOB;
00035 struct TWERD;
00036 
00037 // Enum to describe the source of a BLOB_CHOICE to make it possible to determine
00038 // whether a blob has been classified by inspecting the BLOB_CHOICEs.
00039 enum BlobChoiceClassifier {
00040   BCC_STATIC_CLASSIFIER,   // From the char_norm classifier.
00041   BCC_ADAPTED_CLASSIFIER,  // From the adaptive classifier.
00042   BCC_SPECKLE_CLASSIFIER,  // Backup for failed classification.
00043   BCC_AMBIG,               // Generated by ambiguity detection.
00044   BCC_FAKE,                // From some other process.
00045 };
00046 
00047 class BLOB_CHOICE: public ELIST_LINK
00048 {
00049   public:
00050     BLOB_CHOICE() {
00051       unichar_id_ = INVALID_UNICHAR_ID;
00052       fontinfo_id_ = -1;
00053       fontinfo_id2_ = -1;
00054       rating_ = MAX_FLOAT32;
00055       certainty_ = -MAX_FLOAT32;
00056       script_id_ = -1;
00057       xgap_before_ = 0;
00058       xgap_after_ = 0;
00059       min_xheight_ = 0.0f;
00060       max_xheight_ = 0.0f;
00061       yshift_ = 0.0f;
00062       classifier_ = BCC_FAKE;
00063     }
00064     BLOB_CHOICE(UNICHAR_ID src_unichar_id,  // character id
00065                 float src_rating,          // rating
00066                 float src_cert,            // certainty
00067                 inT16 src_fontinfo_id,     // font
00068                 inT16 src_fontinfo_id2,    // 2nd choice font
00069                 int script_id,             // script
00070                 float min_xheight,         // min xheight in image pixel units
00071                 float max_xheight,         // max xheight allowed by this char
00072                 float yshift,           // the larger of y shift (top or bottom)
00073                 BlobChoiceClassifier c);   // adapted match or other
00074     BLOB_CHOICE(const BLOB_CHOICE &other);
00075     ~BLOB_CHOICE() {}
00076 
00077     UNICHAR_ID unichar_id() const {
00078       return unichar_id_;
00079     }
00080     float rating() const {
00081       return rating_;
00082     }
00083     float certainty() const {
00084       return certainty_;
00085     }
00086     inT16 fontinfo_id() const {
00087       return fontinfo_id_;
00088     }
00089     inT16 fontinfo_id2() const {
00090       return fontinfo_id2_;
00091     }
00092     int script_id() const {
00093       return script_id_;
00094     }
00095     const MATRIX_COORD& matrix_cell() {
00096       return matrix_cell_;
00097     }
00098     inT16 xgap_before() const {
00099       return xgap_before_;
00100     }
00101     inT16 xgap_after() const {
00102       return xgap_after_;
00103     }
00104     float min_xheight() const {
00105       return min_xheight_;
00106     }
00107     float max_xheight() const {
00108       return max_xheight_;
00109     }
00110     float yshift() const {
00111       return yshift_;
00112     }
00113     BlobChoiceClassifier classifier() const {
00114       return classifier_;
00115     }
00116     bool IsAdapted() const {
00117       return classifier_ == BCC_ADAPTED_CLASSIFIER;
00118     }
00119     bool IsClassified() const {
00120       return classifier_ == BCC_STATIC_CLASSIFIER ||
00121              classifier_ == BCC_ADAPTED_CLASSIFIER ||
00122              classifier_ == BCC_SPECKLE_CLASSIFIER;
00123     }
00124 
00125     void set_unichar_id(UNICHAR_ID newunichar_id) {
00126       unichar_id_ = newunichar_id;
00127     }
00128     void set_rating(float newrat) {
00129       rating_ = newrat;
00130     }
00131     void set_certainty(float newrat) {
00132       certainty_ = newrat;
00133     }
00134     void set_fontinfo_id(inT16 newfont) {
00135       fontinfo_id_ = newfont;
00136     }
00137     void set_fontinfo_id2(inT16 newfont) {
00138       fontinfo_id2_ = newfont;
00139     }
00140     void set_script(int newscript_id) {
00141       script_id_ = newscript_id;
00142     }
00143     void set_matrix_cell(int col, int row) {
00144       matrix_cell_.col = col;
00145       matrix_cell_.row = row;
00146     }
00147     void set_xgap_before(inT16 gap) {
00148       xgap_before_ = gap;
00149     }
00150     void set_xgap_after(inT16 gap) {
00151       xgap_after_ = gap;
00152     }
00153     void set_classifier(BlobChoiceClassifier classifier) {
00154       classifier_ = classifier;
00155     }
00156     static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) {
00157       BLOB_CHOICE* choice = new BLOB_CHOICE;
00158       *choice = *src;
00159       return choice;
00160     }
00161     // Returns true if *this and other agree on the baseline and x-height
00162     // to within some tolerance based on a given estimate of the x-height.
00163     bool PosAndSizeAgree(const BLOB_CHOICE& other, float x_height,
00164                          bool debug) const;
00165 
00166     void print(const UNICHARSET *unicharset) const {
00167       tprintf("r%.2f c%.2f x[%g,%g]: %d %s",
00168               rating_, certainty_,
00169               min_xheight_, max_xheight_, unichar_id_,
00170               (unicharset == NULL) ? "" :
00171               unicharset->debug_str(unichar_id_).string());
00172     }
00173     void print_full() const {
00174       print(NULL);
00175       tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n",
00176               script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_);
00177     }
00178     // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
00179     static int SortByRating(const void *p1, const void *p2) {
00180       const BLOB_CHOICE *bc1 =
00181           *reinterpret_cast<const BLOB_CHOICE * const *>(p1);
00182       const BLOB_CHOICE *bc2 =
00183           *reinterpret_cast<const BLOB_CHOICE * const *>(p2);
00184       return (bc1->rating_ < bc2->rating_) ? -1 : 1;
00185     }
00186 
00187  private:
00188   UNICHAR_ID unichar_id_;          // unichar id
00189   inT16 fontinfo_id_;              // char font information
00190   inT16 fontinfo_id2_;             // 2nd choice font information
00191   // Rating is the classifier distance weighted by the length of the outline
00192   // in the blob. In terms of probability, classifier distance is -klog p such
00193   // that the resulting distance is in the range [0, 1] and then
00194   // rating = w (-k log p) where w is the weight for the length of the outline.
00195   // Sums of ratings may be compared meaningfully for words of different
00196   // segmentation.
00197   float rating_;                  // size related
00198   // Certainty is a number in [-20, 0] indicating the classifier certainty
00199   // of the choice. In terms of probability, certainty = 20 (k log p) where
00200   // k is defined as above to normalize -klog p to the range [0, 1].
00201   float certainty_;               // absolute
00202   int script_id_;
00203   // Holds the position of this choice in the ratings matrix.
00204   // Used to location position in the matrix during path backtracking.
00205   MATRIX_COORD matrix_cell_;
00206   inT16 xgap_before_;
00207   inT16 xgap_after_;
00208   // X-height range (in image pixels) that this classification supports.
00209   float min_xheight_;
00210   float max_xheight_;
00211   // yshift_ - The vertical distance (in image pixels) the character is
00212   //           shifted (up or down) from an acceptable y position.
00213   float yshift_;
00214   BlobChoiceClassifier classifier_;  // What generated *this.
00215 };
00216 
00217 // Make BLOB_CHOICE listable.
00218 ELISTIZEH(BLOB_CHOICE)
00219 
00220 // Return the BLOB_CHOICE in bc_list matching a given unichar_id,
00221 // or NULL if there is no match.
00222 BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
00223 
00224 // Permuter codes used in WERD_CHOICEs.
00225 enum PermuterType {
00226   NO_PERM,            // 0
00227   PUNC_PERM,          // 1
00228   TOP_CHOICE_PERM,    // 2
00229   LOWER_CASE_PERM,    // 3
00230   UPPER_CASE_PERM,    // 4
00231   NGRAM_PERM,         // 5
00232   NUMBER_PERM,        // 6
00233   USER_PATTERN_PERM,  // 7
00234   SYSTEM_DAWG_PERM,   // 8
00235   DOC_DAWG_PERM,      // 9
00236   USER_DAWG_PERM,     // 10
00237   FREQ_DAWG_PERM,     // 11
00238   COMPOUND_PERM,      // 12
00239 
00240   NUM_PERMUTER_TYPES
00241 };
00242 
00243 namespace tesseract {
00244 // ScriptPos tells whether a character is subscript, superscript or normal.
00245 enum ScriptPos {
00246   SP_NORMAL,
00247   SP_SUBSCRIPT,
00248   SP_SUPERSCRIPT,
00249   SP_DROPCAP
00250 };
00251 
00252 const char *ScriptPosToString(tesseract::ScriptPos script_pos);
00253 
00254 }  // namespace tesseract.
00255 
00256 class WERD_CHOICE : public ELIST_LINK {
00257  public:
00258   static const float kBadRating;
00259   static const char *permuter_name(uinT8 permuter);
00260 
00261   WERD_CHOICE(const UNICHARSET *unicharset)
00262     : unicharset_(unicharset) { this->init(8); }
00263   WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
00264     : unicharset_(unicharset) { this->init(reserved); }
00265   WERD_CHOICE(const char *src_string,
00266               const char *src_lengths,
00267               float src_rating,
00268               float src_certainty,
00269               uinT8 src_permuter,
00270               const UNICHARSET &unicharset)
00271     : unicharset_(&unicharset) {
00272     this->init(src_string, src_lengths, src_rating,
00273                src_certainty, src_permuter);
00274   }
00275   WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
00276   WERD_CHOICE(const WERD_CHOICE &word) : unicharset_(word.unicharset_) {
00277     this->init(word.length());
00278     this->operator=(word);
00279   }
00280   ~WERD_CHOICE();
00281 
00282   const UNICHARSET *unicharset() const {
00283     return unicharset_;
00284   }
00285   inline int length() const {
00286     return length_;
00287   }
00288   float adjust_factor() const {
00289     return adjust_factor_;
00290   }
00291   void set_adjust_factor(float factor) {
00292     adjust_factor_ = factor;
00293   }
00294   inline const UNICHAR_ID *unichar_ids() const {
00295     return unichar_ids_;
00296   }
00297   inline const UNICHAR_ID unichar_id(int index) const {
00298     assert(index < length_);
00299     return unichar_ids_[index];
00300   }
00301   inline int state(int index) const {
00302     return state_[index];
00303   }
00304   tesseract::ScriptPos BlobPosition(int index) const {
00305     if (index < 0 || index >= length_)
00306       return tesseract::SP_NORMAL;
00307     return script_pos_[index];
00308   }
00309   inline float rating() const {
00310     return rating_;
00311   }
00312   inline float certainty() const {
00313     return certainty_;
00314   }
00315   inline float certainty(int index) const {
00316     return certainties_[index];
00317   }
00318   inline float min_x_height() const {
00319     return min_x_height_;
00320   }
00321   inline float max_x_height() const {
00322     return max_x_height_;
00323   }
00324   inline void set_x_heights(float min_height, float max_height) {
00325     min_x_height_ = min_height;
00326     max_x_height_ = max_height;
00327   }
00328   inline uinT8 permuter() const {
00329     return permuter_;
00330   }
00331   const char *permuter_name() const;
00332   // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
00333   // taken from the appropriate cell in the ratings MATRIX.
00334   // Borrowed pointer, so do not delete.
00335   BLOB_CHOICE_LIST* blob_choices(int index, MATRIX* ratings) const;
00336 
00337   // Returns the MATRIX_COORD corresponding to the location in the ratings
00338   // MATRIX for the given index into the word.
00339   MATRIX_COORD MatrixCoord(int index) const;
00340 
00341   inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
00342     assert(index < length_);
00343     unichar_ids_[index] = unichar_id;
00344   }
00345   bool dangerous_ambig_found() const {
00346     return dangerous_ambig_found_;
00347   }
00348   void set_dangerous_ambig_found_(bool value) {
00349     dangerous_ambig_found_ = value;
00350   }
00351   inline void set_rating(float new_val) {
00352     rating_ = new_val;
00353   }
00354   inline void set_certainty(float new_val) {
00355     certainty_ = new_val;
00356   }
00357   inline void set_permuter(uinT8 perm) {
00358     permuter_ = perm;
00359   }
00360   // Note: this function should only be used if all the fields
00361   // are populated manually with set_* functions (rather than
00362   // (copy)constructors and append_* functions).
00363   inline void set_length(int len) {
00364     ASSERT_HOST(reserved_ >= len);
00365     length_ = len;
00366   }
00367 
00369   inline void double_the_size() {
00370     if (reserved_ > 0) {
00371       unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy(
00372           reserved_, unichar_ids_);
00373       script_pos_ = GenericVector<tesseract::ScriptPos>::double_the_size_memcpy(
00374           reserved_, script_pos_);
00375       state_ = GenericVector<int>::double_the_size_memcpy(
00376           reserved_, state_);
00377       certainties_ = GenericVector<float>::double_the_size_memcpy(
00378           reserved_, certainties_);
00379       reserved_ *= 2;
00380     } else {
00381       unichar_ids_ = new UNICHAR_ID[1];
00382       script_pos_ = new tesseract::ScriptPos[1];
00383       state_ = new int[1];
00384       certainties_ = new float[1];
00385       reserved_ = 1;
00386     }
00387   }
00388 
00391   inline void init(int reserved) {
00392     reserved_ = reserved;
00393     if (reserved > 0) {
00394       unichar_ids_ = new UNICHAR_ID[reserved];
00395       script_pos_ = new tesseract::ScriptPos[reserved];
00396       state_ = new int[reserved];
00397       certainties_ = new float[reserved];
00398     } else {
00399       unichar_ids_ = NULL;
00400       script_pos_ = NULL;
00401       state_ = NULL;
00402       certainties_ = NULL;
00403     }
00404     length_ = 0;
00405     adjust_factor_ = 1.0f;
00406     rating_ = 0.0;
00407     certainty_ = MAX_FLOAT32;
00408     min_x_height_ = 0.0f;
00409     max_x_height_ = MAX_FLOAT32;
00410     permuter_ = NO_PERM;
00411     unichars_in_script_order_ = false;  // Tesseract is strict left-to-right.
00412     dangerous_ambig_found_ = false;
00413   }
00414 
00420   void init(const char *src_string, const char *src_lengths,
00421             float src_rating, float src_certainty,
00422             uinT8 src_permuter);
00423 
00425   inline void make_bad() {
00426     length_ = 0;
00427     rating_ = kBadRating;
00428     certainty_ = -MAX_FLOAT32;
00429   }
00430 
00434   inline void append_unichar_id_space_allocated(
00435       UNICHAR_ID unichar_id, int blob_count,
00436       float rating, float certainty) {
00437     assert(reserved_ > length_);
00438     length_++;
00439     this->set_unichar_id(unichar_id, blob_count,
00440                          rating, certainty, length_-1);
00441   }
00442 
00443   void append_unichar_id(UNICHAR_ID unichar_id, int blob_count,
00444                          float rating, float certainty);
00445 
00446   inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count,
00447                              float rating, float certainty, int index) {
00448     assert(index < length_);
00449     unichar_ids_[index] = unichar_id;
00450     state_[index] = blob_count;
00451     certainties_[index] = certainty;
00452     script_pos_[index] = tesseract::SP_NORMAL;
00453     rating_ += rating;
00454     if (certainty < certainty_) {
00455       certainty_ = certainty;
00456     }
00457   }
00458   // Sets the entries for the given index from the BLOB_CHOICE, assuming
00459   // unit fragment lengths, but setting the state for this index to blob_count.
00460   void set_blob_choice(int index, int blob_count,
00461                        const BLOB_CHOICE* blob_choice);
00462 
00463   bool contains_unichar_id(UNICHAR_ID unichar_id) const;
00464   void remove_unichar_ids(int index, int num);
00465   inline void remove_last_unichar_id() { --length_; }
00466   inline void remove_unichar_id(int index) {
00467     this->remove_unichar_ids(index, 1);
00468   }
00469   bool has_rtl_unichar_id() const;
00470   void reverse_and_mirror_unichar_ids();
00471 
00472   // Returns the half-open interval of unichar_id indices [start, end) which
00473   // enclose the core portion of this word -- the part after stripping
00474   // punctuation from the left and right.
00475   void punct_stripped(int *start_core, int *end_core) const;
00476 
00477   // Returns the indices [start, end) containing the core of the word, stripped
00478   // of any superscript digits on either side. (i.e., the non-footnote part
00479   // of the word). There is no guarantee that the output range is non-empty.
00480   void GetNonSuperscriptSpan(int *start, int *end) const;
00481 
00482   // Return a copy of this WERD_CHOICE with the choices [start, end).
00483   // The result is useful only for checking against a dictionary.
00484   WERD_CHOICE shallow_copy(int start, int end) const;
00485 
00486   void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const;
00487   const STRING debug_string() const {
00488     STRING word_str;
00489     for (int i = 0; i < length_; ++i) {
00490       word_str += unicharset_->debug_str(unichar_ids_[i]);
00491       word_str += " ";
00492     }
00493     return word_str;
00494   }
00495 
00496   // Call this to override the default (strict left to right graphemes)
00497   // with the fact that some engine produces a "reading order" set of
00498   // Graphemes for each word.
00499   bool set_unichars_in_script_order(bool in_script_order) {
00500     return unichars_in_script_order_ = in_script_order;
00501   }
00502 
00503   bool unichars_in_script_order() const {
00504     return unichars_in_script_order_;
00505   }
00506 
00507   // Returns a UTF-8 string equivalent to the current choice
00508   // of UNICHAR IDs.
00509   const STRING &unichar_string() const {
00510     this->string_and_lengths(&unichar_string_, &unichar_lengths_);
00511     return unichar_string_;
00512   }
00513 
00514   // Returns the lengths, one byte each, representing the number of bytes
00515   // required in the unichar_string for each UNICHAR_ID.
00516   const STRING &unichar_lengths() const {
00517     this->string_and_lengths(&unichar_string_, &unichar_lengths_);
00518     return unichar_lengths_;
00519   }
00520 
00521   // Sets up the script_pos_ member using the blobs_list to get the bln
00522   // bounding boxes, *this to get the unichars, and this->unicharset
00523   // to get the target positions. If small_caps is true, sub/super are not
00524   // considered, but dropcaps are.
00525   // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
00526   void SetScriptPositions(bool small_caps, TWERD* word);
00527   // Sets the script_pos_ member from some source positions with a given length.
00528   void SetScriptPositions(const tesseract::ScriptPos* positions, int length);
00529   // Sets all the script_pos_ positions to the given position.
00530   void SetAllScriptPositions(tesseract::ScriptPos position);
00531 
00532   static tesseract::ScriptPos ScriptPositionOf(bool print_debug,
00533                                                const UNICHARSET& unicharset,
00534                                                const TBOX& blob_box,
00535                                                UNICHAR_ID unichar_id);
00536 
00537   // Returns the "dominant" script ID for the word.  By "dominant", the script
00538   // must account for at least half the characters.  Otherwise, it returns 0.
00539   // Note that for Japanese, Hiragana and Katakana are simply treated as Han.
00540   int GetTopScriptID() const;
00541 
00542   // Fixes the state_ for a chop at the given blob_posiiton.
00543   void UpdateStateForSplit(int blob_position);
00544 
00545   // Returns the sum of all the state elements, being the total number of blobs.
00546   int TotalOfStates() const;
00547 
00548   void print() const { this->print(""); }
00549   void print(const char *msg) const;
00550   // Prints the segmentation state with an introductory message.
00551   void print_state(const char *msg) const;
00552 
00553   // Displays the segmentation state of *this (if not the same as the last
00554   // one displayed) and waits for a click in the window.
00555   void DisplaySegmentation(TWERD* word);
00556 
00557   WERD_CHOICE& operator+= (     // concatanate
00558     const WERD_CHOICE & second);// second on first
00559 
00560   WERD_CHOICE& operator= (const WERD_CHOICE& source);
00561 
00562  private:
00563   const UNICHARSET *unicharset_;
00564   // TODO(rays) Perhaps replace the multiple arrays with an array of structs?
00565   // unichar_ids_ is an array of classifier "results" that make up a word.
00566   // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position
00567   // of each unichar_id.
00568   // state_[i] indicates the number of blobs in WERD_RES::chopped_word that
00569   // were put together to make the classification results in the ith position
00570   // in unichar_ids_, and certainties_[i] is the certainty of the choice that
00571   // was used in this word.
00572   // == Change from before ==
00573   // Previously there was fragment_lengths_ that allowed a word to be
00574   // artificially composed of multiple fragment results. Since the new
00575   // segmentation search doesn't do fragments, treatment of fragments has
00576   // been moved to a lower level, augmenting the ratings matrix with the
00577   // combined fragments, and allowing the language-model/segmentation-search
00578   // to deal with only the combined unichar_ids.
00579   UNICHAR_ID *unichar_ids_;  // unichar ids that represent the text of the word
00580   tesseract::ScriptPos* script_pos_;  // Normal/Sub/Superscript of each unichar.
00581   int* state_;               // Number of blobs in each unichar.
00582   float* certainties_;       // Certainty of each unichar.
00583   int reserved_;             // size of the above arrays
00584   int length_;               // word length
00585   // Factor that was used to adjust the rating.
00586   float adjust_factor_;
00587   // Rating is the sum of the ratings of the individual blobs in the word.
00588   float rating_;             // size related
00589   // certainty is the min (worst) certainty of the individual blobs in the word.
00590   float certainty_;          // absolute
00591   // xheight computed from the result, or 0 if inconsistent.
00592   float min_x_height_;
00593   float max_x_height_;
00594   uinT8 permuter_;           // permuter code
00595 
00596   // Normally, the ratings_ matrix represents the recognition results in order
00597   // from left-to-right.  However, some engines (say Cube) may return
00598   // recognition results in the order of the script's major reading direction
00599   // (for Arabic, that is right-to-left).
00600   bool unichars_in_script_order_;
00601   // True if NoDangerousAmbig found an ambiguity.
00602   bool dangerous_ambig_found_;
00603 
00604   // The following variables are populated and passed by reference any
00605   // time unichar_string() or unichar_lengths() are called.
00606   mutable STRING unichar_string_;
00607   mutable STRING unichar_lengths_;
00608 };
00609 
00610 // Make WERD_CHOICE listable.
00611 ELISTIZEH(WERD_CHOICE)
00612 typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR;
00613 
00614 // Utilities for comparing WERD_CHOICEs
00615 
00616 bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1,
00617                                        const WERD_CHOICE &word2);
00618 
00619 // Utilities for debug printing.
00620 void print_ratings_list(
00621     const char *msg,                      // intro message
00622     BLOB_CHOICE_LIST *ratings,            // list of results
00623     const UNICHARSET &current_unicharset  // unicharset that can be used
00624                                           // for id-to-unichar conversion
00625     );
00626 
00627 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines