tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/ltrresultiterator.cpp
Go to the documentation of this file.
00001 
00002 // File:        ltrresultiterator.cpp
00003 // Description: Iterator for tesseract results in strict left-to-right
00004 //              order that avoids using tesseract internal data structures.
00005 // Author:      Ray Smith
00006 // Created:     Fri Feb 26 14:32:09 PST 2010
00007 //
00008 // (C) Copyright 2010, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "ltrresultiterator.h"
00022 
00023 #include "allheaders.h"
00024 #include "pageres.h"
00025 #include "strngs.h"
00026 #include "tesseractclass.h"
00027 
00028 namespace tesseract {
00029 
00030 LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
00031                                      int scale, int scaled_yres,
00032                                      int rect_left, int rect_top,
00033                                      int rect_width, int rect_height)
00034   : PageIterator(page_res, tesseract, scale, scaled_yres,
00035                  rect_left, rect_top, rect_width, rect_height),
00036     line_separator_("\n"),
00037     paragraph_separator_("\n") {
00038 }
00039 
00040 LTRResultIterator::~LTRResultIterator() {
00041 }
00042 
00043 // Returns the null terminated UTF-8 encoded text string for the current
00044 // object at the given level. Use delete [] to free after use.
00045 char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
00046   if (it_->word() == NULL) return NULL;  // Already at the end!
00047   STRING text;
00048   PAGE_RES_IT res_it(*it_);
00049   WERD_CHOICE* best_choice = res_it.word()->best_choice;
00050   ASSERT_HOST(best_choice != NULL);
00051   if (level == RIL_SYMBOL) {
00052     text = res_it.word()->BestUTF8(blob_index_, false);
00053   } else if (level == RIL_WORD) {
00054     text = best_choice->unichar_string();
00055   } else {
00056     bool eol = false;  // end of line?
00057     bool eop = false;  // end of paragraph?
00058     do {  // for each paragraph in a block
00059       do {  // for each text line in a paragraph
00060         do {  // for each word in a text line
00061           best_choice = res_it.word()->best_choice;
00062           ASSERT_HOST(best_choice != NULL);
00063           text += best_choice->unichar_string();
00064           text += " ";
00065           res_it.forward();
00066           eol = res_it.row() != res_it.prev_row();
00067         } while (!eol);
00068         text.truncate_at(text.length() - 1);
00069         text += line_separator_;
00070         eop = res_it.block() != res_it.prev_block() ||
00071             res_it.row()->row->para() != res_it.prev_row()->row->para();
00072       } while (level != RIL_TEXTLINE && !eop);
00073       if (eop) text += paragraph_separator_;
00074     } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
00075   }
00076   int length = text.length() + 1;
00077   char* result = new char[length];
00078   strncpy(result, text.string(), length);
00079   return result;
00080 }
00081 
00082 // Set the string inserted at the end of each text line. "\n" by default.
00083 void LTRResultIterator::SetLineSeparator(const char *new_line) {
00084   line_separator_ = new_line;
00085 }
00086 
00087 // Set the string inserted at the end of each paragraph. "\n" by default.
00088 void LTRResultIterator::SetParagraphSeparator(const char *new_para) {
00089   paragraph_separator_ = new_para;
00090 }
00091 
00092 // Returns the mean confidence of the current object at the given level.
00093 // The number should be interpreted as a percent probability. (0.0f-100.0f)
00094 float LTRResultIterator::Confidence(PageIteratorLevel level) const {
00095   if (it_->word() == NULL) return 0.0f;  // Already at the end!
00096   float mean_certainty = 0.0f;
00097   int certainty_count = 0;
00098   PAGE_RES_IT res_it(*it_);
00099   WERD_CHOICE* best_choice = res_it.word()->best_choice;
00100   ASSERT_HOST(best_choice != NULL);
00101   switch (level) {
00102     case RIL_BLOCK:
00103       do {
00104         best_choice = res_it.word()->best_choice;
00105         ASSERT_HOST(best_choice != NULL);
00106         mean_certainty += best_choice->certainty();
00107         ++certainty_count;
00108         res_it.forward();
00109       } while (res_it.block() == res_it.prev_block());
00110       break;
00111     case RIL_PARA:
00112       do {
00113         best_choice = res_it.word()->best_choice;
00114         ASSERT_HOST(best_choice != NULL);
00115         mean_certainty += best_choice->certainty();
00116         ++certainty_count;
00117         res_it.forward();
00118       } while (res_it.block() == res_it.prev_block() &&
00119                res_it.row()->row->para() == res_it.prev_row()->row->para());
00120       break;
00121     case RIL_TEXTLINE:
00122       do {
00123         best_choice = res_it.word()->best_choice;
00124         ASSERT_HOST(best_choice != NULL);
00125         mean_certainty += best_choice->certainty();
00126         ++certainty_count;
00127         res_it.forward();
00128       } while (res_it.row() == res_it.prev_row());
00129       break;
00130     case RIL_WORD:
00131       mean_certainty += best_choice->certainty();
00132      ++certainty_count;
00133       break;
00134     case RIL_SYMBOL:
00135       mean_certainty += best_choice->certainty(blob_index_);
00136       ++certainty_count;
00137   }
00138   if (certainty_count > 0) {
00139     mean_certainty /= certainty_count;
00140     float confidence = 100 + 5 * mean_certainty;
00141     if (confidence < 0.0f) confidence = 0.0f;
00142     if (confidence > 100.0f) confidence = 100.0f;
00143     return confidence;
00144   }
00145   return 0.0f;
00146 }
00147 
00148 // Returns the font attributes of the current word. If iterating at a higher
00149 // level object than words, eg textlines, then this will return the
00150 // attributes of the first word in that textline.
00151 // The actual return value is a string representing a font name. It points
00152 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
00153 // the iterator itself, ie rendered invalid by various members of
00154 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
00155 // Pointsize is returned in printers points (1/72 inch.)
00156 const char* LTRResultIterator::WordFontAttributes(bool* is_bold,
00157                                                   bool* is_italic,
00158                                                   bool* is_underlined,
00159                                                   bool* is_monospace,
00160                                                   bool* is_serif,
00161                                                   bool* is_smallcaps,
00162                                                   int* pointsize,
00163                                                   int* font_id) const {
00164   if (it_->word() == NULL) return NULL;  // Already at the end!
00165   if (it_->word()->fontinfo == NULL) {
00166     *font_id = -1;
00167     return NULL;  // No font information.
00168   }
00169   const FontInfo& font_info = *it_->word()->fontinfo;
00170   *font_id = font_info.universal_id;
00171   *is_bold = font_info.is_bold();
00172   *is_italic = font_info.is_italic();
00173   *is_underlined = false;  // TODO(rays) fix this!
00174   *is_monospace = font_info.is_fixed_pitch();
00175   *is_serif = font_info.is_serif();
00176   *is_smallcaps = it_->word()->small_caps;
00177   float row_height = it_->row()->row->x_height() +
00178       it_->row()->row->ascenders() - it_->row()->row->descenders();
00179   // Convert from pixels to printers points.
00180   *pointsize = scaled_yres_ > 0
00181       ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
00182       : 0;
00183 
00184   return font_info.name;
00185 }
00186 
00187 // Returns the name of the language used to recognize this word.
00188 const char* LTRResultIterator::WordRecognitionLanguage() const {
00189   if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL;
00190   return it_->word()->tesseract->lang.string();
00191 }
00192 
00193 // Return the overall directionality of this word.
00194 StrongScriptDirection LTRResultIterator::WordDirection() const {
00195   if (it_->word() == NULL) return DIR_NEUTRAL;
00196   bool has_rtl = it_->word()->AnyRtlCharsInWord();
00197   bool has_ltr = it_->word()->AnyLtrCharsInWord();
00198   if (has_rtl && !has_ltr)
00199     return DIR_RIGHT_TO_LEFT;
00200   if (has_ltr && !has_rtl)
00201     return DIR_LEFT_TO_RIGHT;
00202   if (!has_ltr && !has_rtl)
00203     return DIR_NEUTRAL;
00204   return DIR_MIX;
00205 }
00206 
00207 // Returns true if the current word was found in a dictionary.
00208 bool LTRResultIterator::WordIsFromDictionary() const {
00209   if (it_->word() == NULL) return false;  // Already at the end!
00210   int permuter = it_->word()->best_choice->permuter();
00211   return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
00212          permuter == USER_DAWG_PERM;
00213 }
00214 
00215 // Returns true if the current word is numeric.
00216 bool LTRResultIterator::WordIsNumeric() const {
00217   if (it_->word() == NULL) return false;  // Already at the end!
00218   int permuter = it_->word()->best_choice->permuter();
00219   return permuter == NUMBER_PERM;
00220 }
00221 
00222 // Returns true if the word contains blamer information.
00223 bool LTRResultIterator::HasBlamerInfo() const {
00224   return it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
00225          it_->word()->blamer_bundle->HasDebugInfo();
00226 }
00227 
00228 // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
00229 // of the current word.
00230 const void *LTRResultIterator::GetParamsTrainingBundle() const {
00231   return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
00232       &(it_->word()->blamer_bundle->params_training_bundle()) : NULL;
00233 }
00234 
00235 // Returns the pointer to the string with blamer information for this word.
00236 // Assumes that the word's blamer_bundle is not NULL.
00237 const char *LTRResultIterator::GetBlamerDebug() const {
00238   return it_->word()->blamer_bundle->debug().string();
00239 }
00240 
00241 // Returns the pointer to the string with misadaption information for this word.
00242 // Assumes that the word's blamer_bundle is not NULL.
00243 const char *LTRResultIterator::GetBlamerMisadaptionDebug() const {
00244   return it_->word()->blamer_bundle->misadaption_debug().string();
00245 }
00246 
00247 // Returns true if a truth string was recorded for the current word.
00248 bool LTRResultIterator::HasTruthString() const {
00249   if (it_->word() == NULL) return false;  // Already at the end!
00250   if (it_->word()->blamer_bundle == NULL ||
00251       it_->word()->blamer_bundle->NoTruth()) {
00252     return false;  // no truth information for this word
00253   }
00254   return true;
00255 }
00256 
00257 // Returns true if the given string is equivalent to the truth string for
00258 // the current word.
00259 bool LTRResultIterator::EquivalentToTruth(const char *str) const {
00260   if (!HasTruthString()) return false;
00261   ASSERT_HOST(it_->word()->uch_set != NULL);
00262   WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
00263   return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
00264 }
00265 
00266 // Returns the null terminated UTF-8 encoded truth string for the current word.
00267 // Use delete [] to free after use.
00268 char* LTRResultIterator::WordTruthUTF8Text() const {
00269   if (!HasTruthString()) return NULL;
00270   STRING truth_text = it_->word()->blamer_bundle->TruthString();
00271   int length = truth_text.length() + 1;
00272   char* result = new char[length];
00273   strncpy(result, truth_text.string(), length);
00274   return result;
00275 }
00276 
00277 // Returns the null terminated UTF-8 encoded normalized OCR string for the
00278 // current word. Use delete [] to free after use.
00279 char* LTRResultIterator::WordNormedUTF8Text() const {
00280   if (it_->word() == NULL) return NULL;  // Already at the end!
00281   STRING ocr_text;
00282   WERD_CHOICE* best_choice = it_->word()->best_choice;
00283   const UNICHARSET *unicharset = it_->word()->uch_set;
00284   ASSERT_HOST(best_choice != NULL);
00285   for (int i = 0; i < best_choice->length(); ++i) {
00286     ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
00287   }
00288   int length = ocr_text.length() + 1;
00289   char* result = new char[length];
00290   strncpy(result, ocr_text.string(), length);
00291   return result;
00292 }
00293 
00294 // Returns a pointer to serialized choice lattice.
00295 // Fills lattice_size with the number of bytes in lattice data.
00296 const char *LTRResultIterator::WordLattice(int *lattice_size) const {
00297   if (it_->word() == NULL) return NULL;  // Already at the end!
00298   if (it_->word()->blamer_bundle == NULL) return NULL;
00299   *lattice_size = it_->word()->blamer_bundle->lattice_size();
00300   return it_->word()->blamer_bundle->lattice_data();
00301 }
00302 
00303 // Returns true if the current symbol is a superscript.
00304 // If iterating at a higher level object than symbols, eg words, then
00305 // this will return the attributes of the first symbol in that word.
00306 bool LTRResultIterator::SymbolIsSuperscript() const {
00307   if (cblob_it_ == NULL && it_->word() != NULL)
00308     return it_->word()->best_choice->BlobPosition(blob_index_) ==
00309         SP_SUPERSCRIPT;
00310   return false;
00311 }
00312 
00313 // Returns true if the current symbol is a subscript.
00314 // If iterating at a higher level object than symbols, eg words, then
00315 // this will return the attributes of the first symbol in that word.
00316 bool LTRResultIterator::SymbolIsSubscript() const {
00317   if (cblob_it_ == NULL && it_->word() != NULL)
00318     return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT;
00319   return false;
00320 }
00321 
00322 // Returns true if the current symbol is a dropcap.
00323 // If iterating at a higher level object than symbols, eg words, then
00324 // this will return the attributes of the first symbol in that word.
00325 bool LTRResultIterator::SymbolIsDropcap() const {
00326   if (cblob_it_ == NULL && it_->word() != NULL)
00327     return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP;
00328   return false;
00329 }
00330 
00331 ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
00332   ASSERT_HOST(result_it.it_->word() != NULL);
00333   word_res_ = result_it.it_->word();
00334   BLOB_CHOICE_LIST* choices = NULL;
00335   if (word_res_->ratings != NULL)
00336     choices = word_res_->GetBlobChoices(result_it.blob_index_);
00337   if (choices != NULL && !choices->empty()) {
00338     choice_it_ = new BLOB_CHOICE_IT(choices);
00339     choice_it_->mark_cycle_pt();
00340   } else {
00341     choice_it_ = NULL;
00342   }
00343 }
00344 
00345 ChoiceIterator::~ChoiceIterator() {
00346   delete choice_it_;
00347 }
00348 
00349 // Moves to the next choice for the symbol and returns false if there
00350 // are none left.
00351 bool ChoiceIterator::Next() {
00352   if (choice_it_ == NULL)
00353     return false;
00354   choice_it_->forward();
00355   return !choice_it_->cycled_list();
00356 }
00357 
00358 // Returns the null terminated UTF-8 encoded text string for the current
00359 // choice. Use delete [] to free after use.
00360 const char* ChoiceIterator::GetUTF8Text() const {
00361   if (choice_it_ == NULL)
00362     return NULL;
00363   UNICHAR_ID id = choice_it_->data()->unichar_id();
00364   return word_res_->uch_set->id_to_unichar_ext(id);
00365 }
00366 
00367 // Returns the confidence of the current choice.
00368 // The number should be interpreted as a percent probability. (0.0f-100.0f)
00369 float ChoiceIterator::Confidence() const {
00370   if (choice_it_ == NULL)
00371     return 0.0f;
00372   float confidence = 100 + 5 * choice_it_->data()->certainty();
00373   if (confidence < 0.0f) confidence = 0.0f;
00374   if (confidence > 100.0f) confidence = 100.0f;
00375   return confidence;
00376 }
00377 
00378 
00379 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines