tesseract
3.03
|
#include <ltrresultiterator.h>
Public Member Functions | |
LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) | |
virtual | ~LTRResultIterator () |
char * | GetUTF8Text (PageIteratorLevel level) const |
void | SetLineSeparator (const char *new_line) |
void | SetParagraphSeparator (const char *new_para) |
float | Confidence (PageIteratorLevel level) const |
const char * | WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const |
const char * | WordRecognitionLanguage () const |
StrongScriptDirection | WordDirection () const |
bool | WordIsFromDictionary () const |
bool | WordIsNumeric () const |
bool | HasBlamerInfo () const |
const void * | GetParamsTrainingBundle () const |
const char * | GetBlamerDebug () const |
const char * | GetBlamerMisadaptionDebug () const |
bool | HasTruthString () const |
bool | EquivalentToTruth (const char *str) const |
char * | WordTruthUTF8Text () const |
char * | WordNormedUTF8Text () const |
const char * | WordLattice (int *lattice_size) const |
bool | SymbolIsSuperscript () const |
bool | SymbolIsSubscript () const |
bool | SymbolIsDropcap () const |
Protected Attributes | |
const char * | line_separator_ |
const char * | paragraph_separator_ |
Friends | |
class | ChoiceIterator |
Definition at line 46 of file ltrresultiterator.h.
tesseract::LTRResultIterator::LTRResultIterator | ( | PAGE_RES * | page_res, |
Tesseract * | tesseract, | ||
int | scale, | ||
int | scaled_yres, | ||
int | rect_left, | ||
int | rect_top, | ||
int | rect_width, | ||
int | rect_height | ||
) |
Definition at line 30 of file ltrresultiterator.cpp.
: PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, rect_width, rect_height), line_separator_("\n"), paragraph_separator_("\n") { }
tesseract::LTRResultIterator::~LTRResultIterator | ( | ) | [virtual] |
Definition at line 40 of file ltrresultiterator.cpp.
{ }
float tesseract::LTRResultIterator::Confidence | ( | PageIteratorLevel | level | ) | const |
Definition at line 94 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return 0.0f; // Already at the end! float mean_certainty = 0.0f; int certainty_count = 0; PAGE_RES_IT res_it(*it_); WERD_CHOICE* best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); switch (level) { case RIL_BLOCK: do { best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); mean_certainty += best_choice->certainty(); ++certainty_count; res_it.forward(); } while (res_it.block() == res_it.prev_block()); break; case RIL_PARA: do { best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); mean_certainty += best_choice->certainty(); ++certainty_count; res_it.forward(); } while (res_it.block() == res_it.prev_block() && res_it.row()->row->para() == res_it.prev_row()->row->para()); break; case RIL_TEXTLINE: do { best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); mean_certainty += best_choice->certainty(); ++certainty_count; res_it.forward(); } while (res_it.row() == res_it.prev_row()); break; case RIL_WORD: mean_certainty += best_choice->certainty(); ++certainty_count; break; case RIL_SYMBOL: mean_certainty += best_choice->certainty(blob_index_); ++certainty_count; } if (certainty_count > 0) { mean_certainty /= certainty_count; float confidence = 100 + 5 * mean_certainty; if (confidence < 0.0f) confidence = 0.0f; if (confidence > 100.0f) confidence = 100.0f; return confidence; } return 0.0f; }
bool tesseract::LTRResultIterator::EquivalentToTruth | ( | const char * | str | ) | const |
Definition at line 259 of file ltrresultiterator.cpp.
{ if (!HasTruthString()) return false; ASSERT_HOST(it_->word()->uch_set != NULL); WERD_CHOICE str_wd(str, *(it_->word()->uch_set)); return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd); }
const char * tesseract::LTRResultIterator::GetBlamerDebug | ( | ) | const |
Definition at line 237 of file ltrresultiterator.cpp.
{ return it_->word()->blamer_bundle->debug().string(); }
const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug | ( | ) | const |
Definition at line 243 of file ltrresultiterator.cpp.
{ return it_->word()->blamer_bundle->misadaption_debug().string(); }
const void * tesseract::LTRResultIterator::GetParamsTrainingBundle | ( | ) | const |
Definition at line 230 of file ltrresultiterator.cpp.
{ return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ? &(it_->word()->blamer_bundle->params_training_bundle()) : NULL; }
char * tesseract::LTRResultIterator::GetUTF8Text | ( | PageIteratorLevel | level | ) | const |
Reimplemented in tesseract::ResultIterator.
Definition at line 45 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return NULL; // Already at the end! STRING text; PAGE_RES_IT res_it(*it_); WERD_CHOICE* best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); if (level == RIL_SYMBOL) { text = res_it.word()->BestUTF8(blob_index_, false); } else if (level == RIL_WORD) { text = best_choice->unichar_string(); } else { bool eol = false; // end of line? bool eop = false; // end of paragraph? do { // for each paragraph in a block do { // for each text line in a paragraph do { // for each word in a text line best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != NULL); text += best_choice->unichar_string(); text += " "; res_it.forward(); eol = res_it.row() != res_it.prev_row(); } while (!eol); text.truncate_at(text.length() - 1); text += line_separator_; eop = res_it.block() != res_it.prev_block() || res_it.row()->row->para() != res_it.prev_row()->row->para(); } while (level != RIL_TEXTLINE && !eop); if (eop) text += paragraph_separator_; } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block()); } int length = text.length() + 1; char* result = new char[length]; strncpy(result, text.string(), length); return result; }
bool tesseract::LTRResultIterator::HasBlamerInfo | ( | ) | const |
Definition at line 223 of file ltrresultiterator.cpp.
{ return it_->word() != NULL && it_->word()->blamer_bundle != NULL && it_->word()->blamer_bundle->HasDebugInfo(); }
bool tesseract::LTRResultIterator::HasTruthString | ( | ) | const |
Definition at line 248 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return false; // Already at the end! if (it_->word()->blamer_bundle == NULL || it_->word()->blamer_bundle->NoTruth()) { return false; // no truth information for this word } return true; }
void tesseract::LTRResultIterator::SetLineSeparator | ( | const char * | new_line | ) |
Definition at line 83 of file ltrresultiterator.cpp.
{ line_separator_ = new_line; }
void tesseract::LTRResultIterator::SetParagraphSeparator | ( | const char * | new_para | ) |
Definition at line 88 of file ltrresultiterator.cpp.
{ paragraph_separator_ = new_para; }
bool tesseract::LTRResultIterator::SymbolIsDropcap | ( | ) | const |
Definition at line 325 of file ltrresultiterator.cpp.
{ if (cblob_it_ == NULL && it_->word() != NULL) return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP; return false; }
bool tesseract::LTRResultIterator::SymbolIsSubscript | ( | ) | const |
Definition at line 316 of file ltrresultiterator.cpp.
{ if (cblob_it_ == NULL && it_->word() != NULL) return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT; return false; }
bool tesseract::LTRResultIterator::SymbolIsSuperscript | ( | ) | const |
Definition at line 306 of file ltrresultiterator.cpp.
{ if (cblob_it_ == NULL && it_->word() != NULL) return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUPERSCRIPT; return false; }
Definition at line 194 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return DIR_NEUTRAL; bool has_rtl = it_->word()->AnyRtlCharsInWord(); bool has_ltr = it_->word()->AnyLtrCharsInWord(); if (has_rtl && !has_ltr) return DIR_RIGHT_TO_LEFT; if (has_ltr && !has_rtl) return DIR_LEFT_TO_RIGHT; if (!has_ltr && !has_rtl) return DIR_NEUTRAL; return DIR_MIX; }
const char * tesseract::LTRResultIterator::WordFontAttributes | ( | bool * | is_bold, |
bool * | is_italic, | ||
bool * | is_underlined, | ||
bool * | is_monospace, | ||
bool * | is_serif, | ||
bool * | is_smallcaps, | ||
int * | pointsize, | ||
int * | font_id | ||
) | const |
Definition at line 156 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return NULL; // Already at the end! if (it_->word()->fontinfo == NULL) { *font_id = -1; return NULL; // No font information. } const FontInfo& font_info = *it_->word()->fontinfo; *font_id = font_info.universal_id; *is_bold = font_info.is_bold(); *is_italic = font_info.is_italic(); *is_underlined = false; // TODO(rays) fix this! *is_monospace = font_info.is_fixed_pitch(); *is_serif = font_info.is_serif(); *is_smallcaps = it_->word()->small_caps; float row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() - it_->row()->row->descenders(); // Convert from pixels to printers points. *pointsize = scaled_yres_ > 0 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5) : 0; return font_info.name; }
bool tesseract::LTRResultIterator::WordIsFromDictionary | ( | ) | const |
Definition at line 208 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return false; // Already at the end! int permuter = it_->word()->best_choice->permuter(); return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || permuter == USER_DAWG_PERM; }
bool tesseract::LTRResultIterator::WordIsNumeric | ( | ) | const |
Definition at line 216 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return false; // Already at the end! int permuter = it_->word()->best_choice->permuter(); return permuter == NUMBER_PERM; }
const char * tesseract::LTRResultIterator::WordLattice | ( | int * | lattice_size | ) | const |
Definition at line 296 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return NULL; // Already at the end! if (it_->word()->blamer_bundle == NULL) return NULL; *lattice_size = it_->word()->blamer_bundle->lattice_size(); return it_->word()->blamer_bundle->lattice_data(); }
char * tesseract::LTRResultIterator::WordNormedUTF8Text | ( | ) | const |
Definition at line 279 of file ltrresultiterator.cpp.
{ if (it_->word() == NULL) return NULL; // Already at the end! STRING ocr_text; WERD_CHOICE* best_choice = it_->word()->best_choice; const UNICHARSET *unicharset = it_->word()->uch_set; ASSERT_HOST(best_choice != NULL); for (int i = 0; i < best_choice->length(); ++i) { ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i)); } int length = ocr_text.length() + 1; char* result = new char[length]; strncpy(result, ocr_text.string(), length); return result; }
const char * tesseract::LTRResultIterator::WordRecognitionLanguage | ( | ) | const |
char * tesseract::LTRResultIterator::WordTruthUTF8Text | ( | ) | const |
Definition at line 268 of file ltrresultiterator.cpp.
{ if (!HasTruthString()) return NULL; STRING truth_text = it_->word()->blamer_bundle->TruthString(); int length = truth_text.length() + 1; char* result = new char[length]; strncpy(result, truth_text.string(), length); return result; }
friend class ChoiceIterator [friend] |
Definition at line 47 of file ltrresultiterator.h.
const char* tesseract::LTRResultIterator::line_separator_ [protected] |
Definition at line 176 of file ltrresultiterator.h.
const char* tesseract::LTRResultIterator::paragraph_separator_ [protected] |
Definition at line 177 of file ltrresultiterator.h.