tesseract
3.03
|
00001 00002 // File: ltrresultiterator.cpp 00003 // Description: Iterator for tesseract results in strict left-to-right 00004 // order that avoids using tesseract internal data structures. 00005 // Author: Ray Smith 00006 // Created: Fri Feb 26 14:32:09 PST 2010 00007 // 00008 // (C) Copyright 2010, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #include "ltrresultiterator.h" 00022 00023 #include "allheaders.h" 00024 #include "pageres.h" 00025 #include "strngs.h" 00026 #include "tesseractclass.h" 00027 00028 namespace tesseract { 00029 00030 LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, 00031 int scale, int scaled_yres, 00032 int rect_left, int rect_top, 00033 int rect_width, int rect_height) 00034 : PageIterator(page_res, tesseract, scale, scaled_yres, 00035 rect_left, rect_top, rect_width, rect_height), 00036 line_separator_("\n"), 00037 paragraph_separator_("\n") { 00038 } 00039 00040 LTRResultIterator::~LTRResultIterator() { 00041 } 00042 00043 // Returns the null terminated UTF-8 encoded text string for the current 00044 // object at the given level. Use delete [] to free after use. 00045 char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const { 00046 if (it_->word() == NULL) return NULL; // Already at the end! 00047 STRING text; 00048 PAGE_RES_IT res_it(*it_); 00049 WERD_CHOICE* best_choice = res_it.word()->best_choice; 00050 ASSERT_HOST(best_choice != NULL); 00051 if (level == RIL_SYMBOL) { 00052 text = res_it.word()->BestUTF8(blob_index_, false); 00053 } else if (level == RIL_WORD) { 00054 text = best_choice->unichar_string(); 00055 } else { 00056 bool eol = false; // end of line? 00057 bool eop = false; // end of paragraph? 00058 do { // for each paragraph in a block 00059 do { // for each text line in a paragraph 00060 do { // for each word in a text line 00061 best_choice = res_it.word()->best_choice; 00062 ASSERT_HOST(best_choice != NULL); 00063 text += best_choice->unichar_string(); 00064 text += " "; 00065 res_it.forward(); 00066 eol = res_it.row() != res_it.prev_row(); 00067 } while (!eol); 00068 text.truncate_at(text.length() - 1); 00069 text += line_separator_; 00070 eop = res_it.block() != res_it.prev_block() || 00071 res_it.row()->row->para() != res_it.prev_row()->row->para(); 00072 } while (level != RIL_TEXTLINE && !eop); 00073 if (eop) text += paragraph_separator_; 00074 } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block()); 00075 } 00076 int length = text.length() + 1; 00077 char* result = new char[length]; 00078 strncpy(result, text.string(), length); 00079 return result; 00080 } 00081 00082 // Set the string inserted at the end of each text line. "\n" by default. 00083 void LTRResultIterator::SetLineSeparator(const char *new_line) { 00084 line_separator_ = new_line; 00085 } 00086 00087 // Set the string inserted at the end of each paragraph. "\n" by default. 00088 void LTRResultIterator::SetParagraphSeparator(const char *new_para) { 00089 paragraph_separator_ = new_para; 00090 } 00091 00092 // Returns the mean confidence of the current object at the given level. 00093 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00094 float LTRResultIterator::Confidence(PageIteratorLevel level) const { 00095 if (it_->word() == NULL) return 0.0f; // Already at the end! 00096 float mean_certainty = 0.0f; 00097 int certainty_count = 0; 00098 PAGE_RES_IT res_it(*it_); 00099 WERD_CHOICE* best_choice = res_it.word()->best_choice; 00100 ASSERT_HOST(best_choice != NULL); 00101 switch (level) { 00102 case RIL_BLOCK: 00103 do { 00104 best_choice = res_it.word()->best_choice; 00105 ASSERT_HOST(best_choice != NULL); 00106 mean_certainty += best_choice->certainty(); 00107 ++certainty_count; 00108 res_it.forward(); 00109 } while (res_it.block() == res_it.prev_block()); 00110 break; 00111 case RIL_PARA: 00112 do { 00113 best_choice = res_it.word()->best_choice; 00114 ASSERT_HOST(best_choice != NULL); 00115 mean_certainty += best_choice->certainty(); 00116 ++certainty_count; 00117 res_it.forward(); 00118 } while (res_it.block() == res_it.prev_block() && 00119 res_it.row()->row->para() == res_it.prev_row()->row->para()); 00120 break; 00121 case RIL_TEXTLINE: 00122 do { 00123 best_choice = res_it.word()->best_choice; 00124 ASSERT_HOST(best_choice != NULL); 00125 mean_certainty += best_choice->certainty(); 00126 ++certainty_count; 00127 res_it.forward(); 00128 } while (res_it.row() == res_it.prev_row()); 00129 break; 00130 case RIL_WORD: 00131 mean_certainty += best_choice->certainty(); 00132 ++certainty_count; 00133 break; 00134 case RIL_SYMBOL: 00135 mean_certainty += best_choice->certainty(blob_index_); 00136 ++certainty_count; 00137 } 00138 if (certainty_count > 0) { 00139 mean_certainty /= certainty_count; 00140 float confidence = 100 + 5 * mean_certainty; 00141 if (confidence < 0.0f) confidence = 0.0f; 00142 if (confidence > 100.0f) confidence = 100.0f; 00143 return confidence; 00144 } 00145 return 0.0f; 00146 } 00147 00148 // Returns the font attributes of the current word. If iterating at a higher 00149 // level object than words, eg textlines, then this will return the 00150 // attributes of the first word in that textline. 00151 // The actual return value is a string representing a font name. It points 00152 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as 00153 // the iterator itself, ie rendered invalid by various members of 00154 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. 00155 // Pointsize is returned in printers points (1/72 inch.) 00156 const char* LTRResultIterator::WordFontAttributes(bool* is_bold, 00157 bool* is_italic, 00158 bool* is_underlined, 00159 bool* is_monospace, 00160 bool* is_serif, 00161 bool* is_smallcaps, 00162 int* pointsize, 00163 int* font_id) const { 00164 if (it_->word() == NULL) return NULL; // Already at the end! 00165 if (it_->word()->fontinfo == NULL) { 00166 *font_id = -1; 00167 return NULL; // No font information. 00168 } 00169 const FontInfo& font_info = *it_->word()->fontinfo; 00170 *font_id = font_info.universal_id; 00171 *is_bold = font_info.is_bold(); 00172 *is_italic = font_info.is_italic(); 00173 *is_underlined = false; // TODO(rays) fix this! 00174 *is_monospace = font_info.is_fixed_pitch(); 00175 *is_serif = font_info.is_serif(); 00176 *is_smallcaps = it_->word()->small_caps; 00177 float row_height = it_->row()->row->x_height() + 00178 it_->row()->row->ascenders() - it_->row()->row->descenders(); 00179 // Convert from pixels to printers points. 00180 *pointsize = scaled_yres_ > 0 00181 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5) 00182 : 0; 00183 00184 return font_info.name; 00185 } 00186 00187 // Returns the name of the language used to recognize this word. 00188 const char* LTRResultIterator::WordRecognitionLanguage() const { 00189 if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL; 00190 return it_->word()->tesseract->lang.string(); 00191 } 00192 00193 // Return the overall directionality of this word. 00194 StrongScriptDirection LTRResultIterator::WordDirection() const { 00195 if (it_->word() == NULL) return DIR_NEUTRAL; 00196 bool has_rtl = it_->word()->AnyRtlCharsInWord(); 00197 bool has_ltr = it_->word()->AnyLtrCharsInWord(); 00198 if (has_rtl && !has_ltr) 00199 return DIR_RIGHT_TO_LEFT; 00200 if (has_ltr && !has_rtl) 00201 return DIR_LEFT_TO_RIGHT; 00202 if (!has_ltr && !has_rtl) 00203 return DIR_NEUTRAL; 00204 return DIR_MIX; 00205 } 00206 00207 // Returns true if the current word was found in a dictionary. 00208 bool LTRResultIterator::WordIsFromDictionary() const { 00209 if (it_->word() == NULL) return false; // Already at the end! 00210 int permuter = it_->word()->best_choice->permuter(); 00211 return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || 00212 permuter == USER_DAWG_PERM; 00213 } 00214 00215 // Returns true if the current word is numeric. 00216 bool LTRResultIterator::WordIsNumeric() const { 00217 if (it_->word() == NULL) return false; // Already at the end! 00218 int permuter = it_->word()->best_choice->permuter(); 00219 return permuter == NUMBER_PERM; 00220 } 00221 00222 // Returns true if the word contains blamer information. 00223 bool LTRResultIterator::HasBlamerInfo() const { 00224 return it_->word() != NULL && it_->word()->blamer_bundle != NULL && 00225 it_->word()->blamer_bundle->HasDebugInfo(); 00226 } 00227 00228 // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle 00229 // of the current word. 00230 const void *LTRResultIterator::GetParamsTrainingBundle() const { 00231 return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ? 00232 &(it_->word()->blamer_bundle->params_training_bundle()) : NULL; 00233 } 00234 00235 // Returns the pointer to the string with blamer information for this word. 00236 // Assumes that the word's blamer_bundle is not NULL. 00237 const char *LTRResultIterator::GetBlamerDebug() const { 00238 return it_->word()->blamer_bundle->debug().string(); 00239 } 00240 00241 // Returns the pointer to the string with misadaption information for this word. 00242 // Assumes that the word's blamer_bundle is not NULL. 00243 const char *LTRResultIterator::GetBlamerMisadaptionDebug() const { 00244 return it_->word()->blamer_bundle->misadaption_debug().string(); 00245 } 00246 00247 // Returns true if a truth string was recorded for the current word. 00248 bool LTRResultIterator::HasTruthString() const { 00249 if (it_->word() == NULL) return false; // Already at the end! 00250 if (it_->word()->blamer_bundle == NULL || 00251 it_->word()->blamer_bundle->NoTruth()) { 00252 return false; // no truth information for this word 00253 } 00254 return true; 00255 } 00256 00257 // Returns true if the given string is equivalent to the truth string for 00258 // the current word. 00259 bool LTRResultIterator::EquivalentToTruth(const char *str) const { 00260 if (!HasTruthString()) return false; 00261 ASSERT_HOST(it_->word()->uch_set != NULL); 00262 WERD_CHOICE str_wd(str, *(it_->word()->uch_set)); 00263 return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd); 00264 } 00265 00266 // Returns the null terminated UTF-8 encoded truth string for the current word. 00267 // Use delete [] to free after use. 00268 char* LTRResultIterator::WordTruthUTF8Text() const { 00269 if (!HasTruthString()) return NULL; 00270 STRING truth_text = it_->word()->blamer_bundle->TruthString(); 00271 int length = truth_text.length() + 1; 00272 char* result = new char[length]; 00273 strncpy(result, truth_text.string(), length); 00274 return result; 00275 } 00276 00277 // Returns the null terminated UTF-8 encoded normalized OCR string for the 00278 // current word. Use delete [] to free after use. 00279 char* LTRResultIterator::WordNormedUTF8Text() const { 00280 if (it_->word() == NULL) return NULL; // Already at the end! 00281 STRING ocr_text; 00282 WERD_CHOICE* best_choice = it_->word()->best_choice; 00283 const UNICHARSET *unicharset = it_->word()->uch_set; 00284 ASSERT_HOST(best_choice != NULL); 00285 for (int i = 0; i < best_choice->length(); ++i) { 00286 ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i)); 00287 } 00288 int length = ocr_text.length() + 1; 00289 char* result = new char[length]; 00290 strncpy(result, ocr_text.string(), length); 00291 return result; 00292 } 00293 00294 // Returns a pointer to serialized choice lattice. 00295 // Fills lattice_size with the number of bytes in lattice data. 00296 const char *LTRResultIterator::WordLattice(int *lattice_size) const { 00297 if (it_->word() == NULL) return NULL; // Already at the end! 00298 if (it_->word()->blamer_bundle == NULL) return NULL; 00299 *lattice_size = it_->word()->blamer_bundle->lattice_size(); 00300 return it_->word()->blamer_bundle->lattice_data(); 00301 } 00302 00303 // Returns true if the current symbol is a superscript. 00304 // If iterating at a higher level object than symbols, eg words, then 00305 // this will return the attributes of the first symbol in that word. 00306 bool LTRResultIterator::SymbolIsSuperscript() const { 00307 if (cblob_it_ == NULL && it_->word() != NULL) 00308 return it_->word()->best_choice->BlobPosition(blob_index_) == 00309 SP_SUPERSCRIPT; 00310 return false; 00311 } 00312 00313 // Returns true if the current symbol is a subscript. 00314 // If iterating at a higher level object than symbols, eg words, then 00315 // this will return the attributes of the first symbol in that word. 00316 bool LTRResultIterator::SymbolIsSubscript() const { 00317 if (cblob_it_ == NULL && it_->word() != NULL) 00318 return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT; 00319 return false; 00320 } 00321 00322 // Returns true if the current symbol is a dropcap. 00323 // If iterating at a higher level object than symbols, eg words, then 00324 // this will return the attributes of the first symbol in that word. 00325 bool LTRResultIterator::SymbolIsDropcap() const { 00326 if (cblob_it_ == NULL && it_->word() != NULL) 00327 return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP; 00328 return false; 00329 } 00330 00331 ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) { 00332 ASSERT_HOST(result_it.it_->word() != NULL); 00333 word_res_ = result_it.it_->word(); 00334 BLOB_CHOICE_LIST* choices = NULL; 00335 if (word_res_->ratings != NULL) 00336 choices = word_res_->GetBlobChoices(result_it.blob_index_); 00337 if (choices != NULL && !choices->empty()) { 00338 choice_it_ = new BLOB_CHOICE_IT(choices); 00339 choice_it_->mark_cycle_pt(); 00340 } else { 00341 choice_it_ = NULL; 00342 } 00343 } 00344 00345 ChoiceIterator::~ChoiceIterator() { 00346 delete choice_it_; 00347 } 00348 00349 // Moves to the next choice for the symbol and returns false if there 00350 // are none left. 00351 bool ChoiceIterator::Next() { 00352 if (choice_it_ == NULL) 00353 return false; 00354 choice_it_->forward(); 00355 return !choice_it_->cycled_list(); 00356 } 00357 00358 // Returns the null terminated UTF-8 encoded text string for the current 00359 // choice. Use delete [] to free after use. 00360 const char* ChoiceIterator::GetUTF8Text() const { 00361 if (choice_it_ == NULL) 00362 return NULL; 00363 UNICHAR_ID id = choice_it_->data()->unichar_id(); 00364 return word_res_->uch_set->id_to_unichar_ext(id); 00365 } 00366 00367 // Returns the confidence of the current choice. 00368 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00369 float ChoiceIterator::Confidence() const { 00370 if (choice_it_ == NULL) 00371 return 0.0f; 00372 float confidence = 100 + 5 * choice_it_->data()->certainty(); 00373 if (confidence < 0.0f) confidence = 0.0f; 00374 if (confidence > 100.0f) confidence = 100.0f; 00375 return confidence; 00376 } 00377 00378 00379 } // namespace tesseract.