tesseract  3.03
tesseract::LTRResultIterator Class Reference

#include <ltrresultiterator.h>

Inheritance diagram for tesseract::LTRResultIterator:
tesseract::PageIterator tesseract::ResultIterator tesseract::MutableIterator

List of all members.

Public Member Functions

 LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
virtual ~LTRResultIterator ()
char * GetUTF8Text (PageIteratorLevel level) const
void SetLineSeparator (const char *new_line)
void SetParagraphSeparator (const char *new_para)
float Confidence (PageIteratorLevel level) const
const char * WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
const char * WordRecognitionLanguage () const
StrongScriptDirection WordDirection () const
bool WordIsFromDictionary () const
bool WordIsNumeric () const
bool HasBlamerInfo () const
const void * GetParamsTrainingBundle () const
const char * GetBlamerDebug () const
const char * GetBlamerMisadaptionDebug () const
bool HasTruthString () const
bool EquivalentToTruth (const char *str) const
char * WordTruthUTF8Text () const
char * WordNormedUTF8Text () const
const char * WordLattice (int *lattice_size) const
bool SymbolIsSuperscript () const
bool SymbolIsSubscript () const
bool SymbolIsDropcap () const

Protected Attributes

const char * line_separator_
const char * paragraph_separator_

Friends

class ChoiceIterator

Detailed Description

Definition at line 46 of file ltrresultiterator.h.


Constructor & Destructor Documentation

tesseract::LTRResultIterator::LTRResultIterator ( PAGE_RES page_res,
Tesseract tesseract,
int  scale,
int  scaled_yres,
int  rect_left,
int  rect_top,
int  rect_width,
int  rect_height 
)

Definition at line 30 of file ltrresultiterator.cpp.

  : PageIterator(page_res, tesseract, scale, scaled_yres,
                 rect_left, rect_top, rect_width, rect_height),
    line_separator_("\n"),
    paragraph_separator_("\n") {
}

Definition at line 40 of file ltrresultiterator.cpp.

                                      {
}

Member Function Documentation

Definition at line 94 of file ltrresultiterator.cpp.

                                                                 {
  if (it_->word() == NULL) return 0.0f;  // Already at the end!
  float mean_certainty = 0.0f;
  int certainty_count = 0;
  PAGE_RES_IT res_it(*it_);
  WERD_CHOICE* best_choice = res_it.word()->best_choice;
  ASSERT_HOST(best_choice != NULL);
  switch (level) {
    case RIL_BLOCK:
      do {
        best_choice = res_it.word()->best_choice;
        ASSERT_HOST(best_choice != NULL);
        mean_certainty += best_choice->certainty();
        ++certainty_count;
        res_it.forward();
      } while (res_it.block() == res_it.prev_block());
      break;
    case RIL_PARA:
      do {
        best_choice = res_it.word()->best_choice;
        ASSERT_HOST(best_choice != NULL);
        mean_certainty += best_choice->certainty();
        ++certainty_count;
        res_it.forward();
      } while (res_it.block() == res_it.prev_block() &&
               res_it.row()->row->para() == res_it.prev_row()->row->para());
      break;
    case RIL_TEXTLINE:
      do {
        best_choice = res_it.word()->best_choice;
        ASSERT_HOST(best_choice != NULL);
        mean_certainty += best_choice->certainty();
        ++certainty_count;
        res_it.forward();
      } while (res_it.row() == res_it.prev_row());
      break;
    case RIL_WORD:
      mean_certainty += best_choice->certainty();
     ++certainty_count;
      break;
    case RIL_SYMBOL:
      mean_certainty += best_choice->certainty(blob_index_);
      ++certainty_count;
  }
  if (certainty_count > 0) {
    mean_certainty /= certainty_count;
    float confidence = 100 + 5 * mean_certainty;
    if (confidence < 0.0f) confidence = 0.0f;
    if (confidence > 100.0f) confidence = 100.0f;
    return confidence;
  }
  return 0.0f;
}
bool tesseract::LTRResultIterator::EquivalentToTruth ( const char *  str) const

Definition at line 259 of file ltrresultiterator.cpp.

                                                               {
  if (!HasTruthString()) return false;
  ASSERT_HOST(it_->word()->uch_set != NULL);
  WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
  return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
}

Definition at line 237 of file ltrresultiterator.cpp.

                                                    {
  return it_->word()->blamer_bundle->debug().string();
}

Definition at line 230 of file ltrresultiterator.cpp.

                                                             {
  return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
      &(it_->word()->blamer_bundle->params_training_bundle()) : NULL;
}

Reimplemented in tesseract::ResultIterator.

Definition at line 45 of file ltrresultiterator.cpp.

                                                                  {
  if (it_->word() == NULL) return NULL;  // Already at the end!
  STRING text;
  PAGE_RES_IT res_it(*it_);
  WERD_CHOICE* best_choice = res_it.word()->best_choice;
  ASSERT_HOST(best_choice != NULL);
  if (level == RIL_SYMBOL) {
    text = res_it.word()->BestUTF8(blob_index_, false);
  } else if (level == RIL_WORD) {
    text = best_choice->unichar_string();
  } else {
    bool eol = false;  // end of line?
    bool eop = false;  // end of paragraph?
    do {  // for each paragraph in a block
      do {  // for each text line in a paragraph
        do {  // for each word in a text line
          best_choice = res_it.word()->best_choice;
          ASSERT_HOST(best_choice != NULL);
          text += best_choice->unichar_string();
          text += " ";
          res_it.forward();
          eol = res_it.row() != res_it.prev_row();
        } while (!eol);
        text.truncate_at(text.length() - 1);
        text += line_separator_;
        eop = res_it.block() != res_it.prev_block() ||
            res_it.row()->row->para() != res_it.prev_row()->row->para();
      } while (level != RIL_TEXTLINE && !eop);
      if (eop) text += paragraph_separator_;
    } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
  }
  int length = text.length() + 1;
  char* result = new char[length];
  strncpy(result, text.string(), length);
  return result;
}

Definition at line 223 of file ltrresultiterator.cpp.

                                            {
  return it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
         it_->word()->blamer_bundle->HasDebugInfo();
}

Definition at line 248 of file ltrresultiterator.cpp.

                                             {
  if (it_->word() == NULL) return false;  // Already at the end!
  if (it_->word()->blamer_bundle == NULL ||
      it_->word()->blamer_bundle->NoTruth()) {
    return false;  // no truth information for this word
  }
  return true;
}
void tesseract::LTRResultIterator::SetLineSeparator ( const char *  new_line)

Definition at line 83 of file ltrresultiterator.cpp.

void tesseract::LTRResultIterator::SetParagraphSeparator ( const char *  new_para)

Definition at line 88 of file ltrresultiterator.cpp.

                                                                  {
  paragraph_separator_ = new_para;
}

Definition at line 325 of file ltrresultiterator.cpp.

                                              {
  if (cblob_it_ == NULL && it_->word() != NULL)
    return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP;
  return false;
}

Definition at line 316 of file ltrresultiterator.cpp.

                                                {
  if (cblob_it_ == NULL && it_->word() != NULL)
    return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT;
  return false;
}

Definition at line 306 of file ltrresultiterator.cpp.

                                                  {
  if (cblob_it_ == NULL && it_->word() != NULL)
    return it_->word()->best_choice->BlobPosition(blob_index_) ==
        SP_SUPERSCRIPT;
  return false;
}

Definition at line 194 of file ltrresultiterator.cpp.

                                                             {
  if (it_->word() == NULL) return DIR_NEUTRAL;
  bool has_rtl = it_->word()->AnyRtlCharsInWord();
  bool has_ltr = it_->word()->AnyLtrCharsInWord();
  if (has_rtl && !has_ltr)
    return DIR_RIGHT_TO_LEFT;
  if (has_ltr && !has_rtl)
    return DIR_LEFT_TO_RIGHT;
  if (!has_ltr && !has_rtl)
    return DIR_NEUTRAL;
  return DIR_MIX;
}
const char * tesseract::LTRResultIterator::WordFontAttributes ( bool *  is_bold,
bool *  is_italic,
bool *  is_underlined,
bool *  is_monospace,
bool *  is_serif,
bool *  is_smallcaps,
int *  pointsize,
int *  font_id 
) const

Definition at line 156 of file ltrresultiterator.cpp.

                                                                      {
  if (it_->word() == NULL) return NULL;  // Already at the end!
  if (it_->word()->fontinfo == NULL) {
    *font_id = -1;
    return NULL;  // No font information.
  }
  const FontInfo& font_info = *it_->word()->fontinfo;
  *font_id = font_info.universal_id;
  *is_bold = font_info.is_bold();
  *is_italic = font_info.is_italic();
  *is_underlined = false;  // TODO(rays) fix this!
  *is_monospace = font_info.is_fixed_pitch();
  *is_serif = font_info.is_serif();
  *is_smallcaps = it_->word()->small_caps;
  float row_height = it_->row()->row->x_height() +
      it_->row()->row->ascenders() - it_->row()->row->descenders();
  // Convert from pixels to printers points.
  *pointsize = scaled_yres_ > 0
      ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
      : 0;

  return font_info.name;
}

Definition at line 208 of file ltrresultiterator.cpp.

                                                   {
  if (it_->word() == NULL) return false;  // Already at the end!
  int permuter = it_->word()->best_choice->permuter();
  return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
         permuter == USER_DAWG_PERM;
}

Definition at line 216 of file ltrresultiterator.cpp.

                                            {
  if (it_->word() == NULL) return false;  // Already at the end!
  int permuter = it_->word()->best_choice->permuter();
  return permuter == NUMBER_PERM;
}
const char * tesseract::LTRResultIterator::WordLattice ( int *  lattice_size) const

Definition at line 296 of file ltrresultiterator.cpp.

                                                                  {
  if (it_->word() == NULL) return NULL;  // Already at the end!
  if (it_->word()->blamer_bundle == NULL) return NULL;
  *lattice_size = it_->word()->blamer_bundle->lattice_size();
  return it_->word()->blamer_bundle->lattice_data();
}

Definition at line 279 of file ltrresultiterator.cpp.

                                                  {
  if (it_->word() == NULL) return NULL;  // Already at the end!
  STRING ocr_text;
  WERD_CHOICE* best_choice = it_->word()->best_choice;
  const UNICHARSET *unicharset = it_->word()->uch_set;
  ASSERT_HOST(best_choice != NULL);
  for (int i = 0; i < best_choice->length(); ++i) {
    ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
  }
  int length = ocr_text.length() + 1;
  char* result = new char[length];
  strncpy(result, ocr_text.string(), length);
  return result;
}

Definition at line 188 of file ltrresultiterator.cpp.

                                                             {
  if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL;
  return it_->word()->tesseract->lang.string();
}

Definition at line 268 of file ltrresultiterator.cpp.

                                                 {
  if (!HasTruthString()) return NULL;
  STRING truth_text = it_->word()->blamer_bundle->TruthString();
  int length = truth_text.length() + 1;
  char* result = new char[length];
  strncpy(result, truth_text.string(), length);
  return result;
}

Friends And Related Function Documentation

friend class ChoiceIterator [friend]

Definition at line 47 of file ltrresultiterator.h.


Member Data Documentation

Definition at line 176 of file ltrresultiterator.h.

Definition at line 177 of file ltrresultiterator.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines