tesseract  3.03
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

List of all members.

Public Member Functions

 WERD_RES ()
 WERD_RES (WERD *the_word)
 WERD_RES (const WERD_RES &source)
 ~WERD_RES ()
const char *const BestUTF8 (int blob_index, bool in_rtl_context) const
const char *const RawUTF8 (int blob_index) const
UNICHARSET::Direction SymbolDirection (int blob_index) const
bool AnyRtlCharsInWord () const
bool AnyLtrCharsInWord () const
bool UnicharsInReadingOrder () const
void InitNonPointers ()
void InitPointers ()
void Clear ()
void ClearResults ()
void ClearWordChoices ()
void ClearRatings ()
WERD_RESoperator= (const WERD_RES &source)
void CopySimpleFields (const WERD_RES &source)
void InitForRetryRecognition (const WERD_RES &source)
bool SetupForRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
void SetupBasicsFromChoppedWord (const UNICHARSET &unicharset_in)
void SetupFake (const UNICHARSET &uch)
void SetupWordScript (const UNICHARSET &unicharset_in)
void SetupBlamerBundle ()
void SetupBlobWidthsAndGaps ()
void InsertSeam (int blob_number, SEAM *seam)
bool AlternativeChoiceAdjustmentsWorseThan (float threshold) const
bool IsAmbiguous ()
bool StatesAllValid ()
void DebugWordChoices (bool debug, const char *word_to_debug)
void FilterWordChoices (int debug_level)
void ComputeAdaptionThresholds (float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
bool LogNewRawChoice (WERD_CHOICE *word_choice)
bool LogNewCookedChoice (int max_num_choices, bool debug, WERD_CHOICE *word_choice)
void PrintBestChoices () const
int GetBlobsWidth (int start_blob, int last_blob)
int GetBlobsGap (int blob_index)
BLOB_CHOICEGetBlobChoice (int index) const
BLOB_CHOICE_LIST * GetBlobChoices (int index) const
void ConsumeWordResults (WERD_RES *word)
void ReplaceBestChoice (WERD_CHOICE *choice)
void RebuildBestState ()
void CloneChoppedToRebuild ()
void SetupBoxWord ()
void SetScriptPositions ()
void SetAllScriptPositions (tesseract::ScriptPos position)
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
void FakeWordFromRatings ()
void BestChoiceToCorrectText ()
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
void MergeAdjacentBlobs (int index)
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
void fix_quotes ()
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
void fix_hyphens ()
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
void merge_tess_fails ()
void copy_on (WERD_RES *word_res)
bool PiecesAllNatural (int start, int count) const

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)

Public Attributes

WERDword
tesseract::BoxWordbln_boxes
ROWblob_row
DENORM denorm
const UNICHARSETuch_set
TWERDchopped_word
GenericVector< SEAM * > seam_array
GenericVector< int > blob_widths
GenericVector< int > blob_gaps
MATRIXratings
WERD_CHOICEbest_choice
WERD_CHOICEraw_choice
WERD_CHOICE_LIST best_choices
BlamerBundleblamer_bundle
TWERDrebuild_word
tesseract::BoxWordbox_word
GenericVector< int > best_state
GenericVector< STRINGcorrect_text
tesseract::Tesseracttesseract
WERD_CHOICEep_choice
REJMAP reject_map
BOOL8 tess_failed
BOOL8 tess_accepted
BOOL8 tess_would_adapt
BOOL8 done
bool small_caps
inT8 italic
inT8 bold
const FontInfofontinfo
const FontInfofontinfo2
inT8 fontinfo_id_count
inT8 fontinfo_id2_count
BOOL8 guessed_x_ht
BOOL8 guessed_caps_ht
CRUNCH_MODE unlv_crunch_mode
float x_height
float caps_height
BOOL8 combination
BOOL8 part_of_combo
BOOL8 reject_spaces
GenericVector< inT8best_choice_fontinfo_ids

Detailed Description

Definition at line 154 of file pageres.h.


Constructor & Destructor Documentation

WERD_RES::WERD_RES ( ) [inline]

Definition at line 318 of file pageres.h.

WERD_RES::WERD_RES ( WERD the_word) [inline]

Definition at line 322 of file pageres.h.

                           {
    InitNonPointers();
    InitPointers();
    word = the_word;
  }
WERD_RES::WERD_RES ( const WERD_RES source) [inline]

Definition at line 329 of file pageres.h.

                                   {
    InitPointers();
    *this = source;            // see operator=
  }

Definition at line 1030 of file pageres.cpp.

                     {
  Clear();
}

Member Function Documentation

bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan ( float  threshold) const

Definition at line 390 of file pageres.cpp.

                                                                          {
  // The choices are not changed by this iteration.
  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
  for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
    WERD_CHOICE* choice = wc_it.data();
    if (choice->adjust_factor() <= threshold)
      return false;
  }
  return true;
}
bool WERD_RES::AnyLtrCharsInWord ( ) const [inline]

Definition at line 388 of file pageres.h.

                                 {
    if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
      return false;
    for (int id = 0; id < best_choice->length(); id++) {
      int unichar_id = best_choice->unichar_id(id);
      if (unichar_id < 0 || unichar_id >= uch_set->size())
        continue;  // Ignore illegal chars.
      UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
      if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
        return true;
    }
    return false;
  }
bool WERD_RES::AnyRtlCharsInWord ( ) const [inline]

Definition at line 371 of file pageres.h.

                                 {
    if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
      return false;
    for (int id = 0; id < best_choice->length(); id++) {
      int unichar_id = best_choice->unichar_id(id);
      if (unichar_id < 0 || unichar_id >= uch_set->size())
        continue;  // Ignore illegal chars.
      UNICHARSET::Direction dir =
          uch_set->get_direction(unichar_id);
      if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
          dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
          dir == UNICHARSET::U_ARABIC_NUMBER)
        return true;
    }
    return false;
  }

Definition at line 862 of file pageres.cpp.

                                       {
  correct_text.clear();
  ASSERT_HOST(best_choice != NULL);
  for (int i = 0; i < best_choice->length(); ++i) {
    UNICHAR_ID choice_id = best_choice->unichar_id(i);
    const char* blob_choice = uch_set->id_to_unichar(choice_id);
    correct_text.push_back(STRING(blob_choice));
  }
}
const char* const WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const [inline]

Definition at line 341 of file pageres.h.

                                                                        {
    if (blob_index < 0 || best_choice == NULL ||
        blob_index >= best_choice->length())
      return NULL;
    UNICHAR_ID id = best_choice->unichar_id(blob_index);
    if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
      return NULL;
    UNICHAR_ID mirrored = uch_set->get_mirror(id);
    if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
      id = mirrored;
    return uch_set->id_to_unichar_ext(id);
  }

Definition at line 969 of file pageres.cpp.

                                                               {
  const char *ch = uch_set->id_to_unichar(id1);
  const char *next_ch = uch_set->id_to_unichar(id2);
  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
      (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
    return uch_set->unichar_to_id("-");
  return INVALID_UNICHAR_ID;
}

Definition at line 947 of file pageres.cpp.

                                                              {
  const char *ch = uch_set->id_to_unichar(id1);
  const char *next_ch = uch_set->id_to_unichar(id2);
  if (is_simple_quote(ch, strlen(ch)) &&
      is_simple_quote(next_ch, strlen(next_ch)))
    return uch_set->unichar_to_id("\"");
  return INVALID_UNICHAR_ID;
}

Definition at line 998 of file pageres.cpp.

                                                              {
  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
    return id1;
  else
    return INVALID_UNICHAR_ID;
}
void WERD_RES::Clear ( )

Definition at line 1074 of file pageres.cpp.

                     {
  if (word != NULL && combination) {
    delete word;
  }
  word = NULL;
  delete blamer_bundle;
  blamer_bundle = NULL;
  ClearResults();
}

Definition at line 1129 of file pageres.cpp.

                            {
  if (ratings != NULL) {
    ratings->delete_matrix_pointers();
    delete ratings;
    ratings = NULL;
  }
}

Definition at line 1084 of file pageres.cpp.

                            {
  done = false;
  fontinfo = NULL;
  fontinfo2 = NULL;
  fontinfo_id_count = 0;
  fontinfo_id2_count = 0;
  if (bln_boxes != NULL) {
    delete bln_boxes;
    bln_boxes = NULL;
  }
  blob_row = NULL;
  if (chopped_word != NULL) {
    delete chopped_word;
    chopped_word = NULL;
  }
  if (rebuild_word != NULL) {
    delete rebuild_word;
    rebuild_word = NULL;
  }
  if (box_word != NULL) {
    delete box_word;
    box_word = NULL;
  }
  best_state.clear();
  correct_text.clear();
  seam_array.delete_data_pointers();
  seam_array.clear();
  blob_widths.clear();
  blob_gaps.clear();
  ClearRatings();
  ClearWordChoices();
  if (blamer_bundle != NULL) blamer_bundle->ClearResults();
}

Definition at line 1117 of file pageres.cpp.

                                {
  best_choice = NULL;
  if (raw_choice != NULL) {
    delete raw_choice;
    raw_choice = NULL;
  }
  best_choices.clear();
  if (ep_choice != NULL) {
    delete ep_choice;
    ep_choice = NULL;
  }
}

Definition at line 774 of file pageres.cpp.

                                     {
  if (rebuild_word != NULL)
    delete rebuild_word;
  rebuild_word = new TWERD(*chopped_word);
  SetupBoxWord();
  int word_len = box_word->length();
  best_state.reserve(word_len);
  correct_text.reserve(word_len);
  for (int i = 0; i < word_len; ++i) {
    best_state.push_back(1);
    correct_text.push_back(STRING(""));
  }
}
void WERD_RES::ComputeAdaptionThresholds ( float  certainty_scale,
float  min_rating,
float  max_rating,
float  rating_margin,
float *  thresholds 
)

Definition at line 503 of file pageres.cpp.

                                                            {
  int chunk = 0;
  int end_chunk = best_choice->state(0);
  int end_raw_chunk = raw_choice->state(0);
  int raw_blob = 0;
  for (int i = 0; i < best_choice->length(); i++, thresholds++) {
    float avg_rating = 0.0f;
    int num_error_chunks = 0;

    // For each chunk in best choice blob i, count non-matching raw results.
    while (chunk < end_chunk) {
      if (chunk >= end_raw_chunk) {
        ++raw_blob;
        end_raw_chunk += raw_choice->state(raw_blob);
      }
      if (best_choice->unichar_id(i) !=
          raw_choice->unichar_id(raw_blob)) {
        avg_rating += raw_choice->certainty(raw_blob);
        ++num_error_chunks;
      }
      ++chunk;
    }

    if (num_error_chunks > 0) {
      avg_rating /= num_error_chunks;
      *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
    } else {
      *thresholds = max_rating;
    }

    if (*thresholds > max_rating)
      *thresholds = max_rating;
    if (*thresholds < min_rating)
      *thresholds = min_rating;
  }
}
bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX & > *  box_cb 
)

Definition at line 877 of file pageres.cpp.

                                                                 {
  ASSERT_HOST(best_choice->length() == 0 || ratings != NULL);
  bool modified = false;
  for (int i = 0; i + 1 < best_choice->length(); ++i) {
    UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
                                      best_choice->unichar_id(i+1));
    if (new_id != INVALID_UNICHAR_ID &&
        (box_cb == NULL || box_cb->Run(box_word->BlobBox(i),
                                       box_word->BlobBox(i + 1)))) {
      // Raw choice should not be fixed.
      best_choice->set_unichar_id(new_id, i);
      modified = true;
      MergeAdjacentBlobs(i);
      const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
      if (!coord.Valid(*ratings)) {
        ratings->IncreaseBandSize(coord.row + 1 - coord.col);
      }
      BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
      if (FindMatchingChoice(new_id, blob_choices) == NULL) {
        // Insert a fake result.
        BLOB_CHOICE* blob_choice = new BLOB_CHOICE;
        blob_choice->set_unichar_id(new_id);
        BLOB_CHOICE_IT bc_it(blob_choices);
        bc_it.add_before_then_move(blob_choice);
      }
    }
  }
  delete class_cb;
  delete box_cb;
  return modified;
}

Definition at line 707 of file pageres.cpp.

                                                {
  denorm = word->denorm;
  blob_row = word->blob_row;
  MovePointerData(&chopped_word, &word->chopped_word);
  MovePointerData(&rebuild_word, &word->rebuild_word);
  MovePointerData(&box_word, &word->box_word);
  seam_array.delete_data_pointers();
  seam_array = word->seam_array;
  word->seam_array.clear();
  best_state.move(&word->best_state);
  correct_text.move(&word->correct_text);
  blob_widths.move(&word->blob_widths);
  blob_gaps.move(&word->blob_gaps);
  if (ratings != NULL) ratings->delete_matrix_pointers();
  MovePointerData(&ratings, &word->ratings);
  best_choice = word->best_choice;
  MovePointerData(&raw_choice, &word->raw_choice);
  best_choices.clear();
  WERD_CHOICE_IT wc_it(&best_choices);
  wc_it.add_list_after(&word->best_choices);
  reject_map = word->reject_map;
  if (word->blamer_bundle != NULL) {
    assert(blamer_bundle != NULL);
    blamer_bundle->CopyResults(*(word->blamer_bundle));
  }
  CopySimpleFields(*word);
}
void WERD_RES::copy_on ( WERD_RES word_res) [inline]

Definition at line 637 of file pageres.h.

                                   {  //from this word
    word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
    word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
    word->copy_on(word_res->word);
  }
void WERD_RES::DebugWordChoices ( bool  debug,
const char *  word_to_debug 
)

Definition at line 431 of file pageres.cpp.

                                                                     {
  if (debug ||
      (word_to_debug != NULL && *word_to_debug != '\0' && best_choice != NULL &&
       best_choice->unichar_string() == STRING(word_to_debug))) {
    if (raw_choice != NULL)
      raw_choice->print("\nBest Raw Choice");

    WERD_CHOICE_IT it(&best_choices);
    int index = 0;
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
      WERD_CHOICE* choice = it.data();
      STRING label;
      label.add_str_int("\nCooked Choice #", index);
      choice->print(label.string());
    }
  }
}
static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src) [inline, static]

Definition at line 626 of file pageres.h.

                                                  {
    WERD_RES* result = new WERD_RES(*src);
    // That didn't copy the ratings, but we want a copy if there is one to
    // begin width.
    if (src->ratings != NULL)
      result->ratings = src->ratings->DeepCopy();
    return result;
  }
void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 818 of file pageres.cpp.

                                                                     {
  // Setup the WERD_RES.
  ASSERT_HOST(box_word != NULL);
  ASSERT_HOST(blob_count == box_word->length());
  ClearWordChoices();
  ClearRatings();
  ratings = new MATRIX(blob_count, 1);
  for (int c = 0; c < blob_count; ++c) {
    BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
    BLOB_CHOICE_IT choice_it(choice_list);
    choice_it.add_after_then_move(choices[c]);
    ratings->put(c, c, choice_list);
  }
  FakeWordFromRatings();
  reject_map.initialise(blob_count);
}

Definition at line 837 of file pageres.cpp.

                                   {
  int num_blobs = ratings->dimension();
  WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
  word_choice->set_permuter(TOP_CHOICE_PERM);
  for (int b = 0; b < num_blobs; ++b) {
    UNICHAR_ID unichar_id = UNICHAR_SPACE;
    float rating = MAX_INT32;
    float certainty = -MAX_INT32;
    BLOB_CHOICE_LIST* choices = ratings->get(b, b);
    if (choices != NULL && !choices->empty()) {
      BLOB_CHOICE_IT bc_it(choices);
      BLOB_CHOICE* choice = bc_it.data();
      unichar_id = choice->unichar_id();
      rating = choice->rating();
      certainty = choice->certainty();
    }
    word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
                                                   certainty);
  }
  LogNewRawChoice(word_choice);
  // Ownership of word_choice taken by word here.
  LogNewCookedChoice(1, false, word_choice);
}
void WERD_RES::FilterWordChoices ( int  debug_level)

Definition at line 454 of file pageres.cpp.

                                                {
  if (best_choice == NULL || best_choices.singleton())
    return;

  if (debug_level >= 2)
    best_choice->print("\nFiltering against best choice");
  WERD_CHOICE_IT it(&best_choices);
  int index = 0;
  for (it.forward(); !it.at_first(); it.forward(), ++index) {
    WERD_CHOICE* choice = it.data();
    float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
                                            choice->adjust_factor());
    // i, j index the blob choice in choice, best_choice.
    // chunk is an index into the chopped_word blobs (AKA chunks).
    // Since the two words may use different segmentations of the chunks, we
    // iterate over the chunks to find out whether a comparable blob
    // classification is much worse than the best result.
    int i = 0, j = 0, chunk = 0;
    // Each iteration of the while deals with 1 chunk. On entry choice_chunk
    // and best_chunk are the indices of the first chunk in the NEXT blob,
    // i.e. we don't have to increment i, j while chunk < choice_chunk and
    // best_chunk respectively.
    int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
    while (i < choice->length() && j < best_choice->length()) {
      if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
          choice->certainty(i) - best_choice->certainty(j) < threshold) {
        if (debug_level >= 2) {
          STRING label;
          label.add_str_int("\nDiscarding bad choice #", index);
          choice->print(label.string());
          tprintf("i %d j %d Chunk %d Choice->Blob[i].Certainty %.4g"
              " BestChoice->ChunkCertainty[Chunk] %g Threshold %g\n",
              i, j, chunk, choice->certainty(i),
              best_choice->certainty(j), threshold);
        }
        delete it.extract();
        break;
      }
      ++chunk;
      // If needed, advance choice_chunk to keep up with chunk.
      while (choice_chunk < chunk && ++i < choice->length())
        choice_chunk += choice->state(i);
      // If needed, advance best_chunk to keep up with chunk.
      while (best_chunk < chunk && ++j < best_choice->length())
        best_chunk += best_choice->state(j);
    }
  }
}

Definition at line 986 of file pageres.cpp.

Definition at line 957 of file pageres.cpp.

                          {
  if (!uch_set->contains_unichar("\"") ||
      !uch_set->get_enabled(uch_set->unichar_to_id("\"")))
    return;  // Don't create it if it is disallowed.

  ConditionalBlobMerge(
      NewPermanentTessCallback(this, &WERD_RES::BothQuotes),
      NULL);
}
BLOB_CHOICE * WERD_RES::GetBlobChoice ( int  index) const

Definition at line 692 of file pageres.cpp.

                                                    {
  if (index < 0 || index >= best_choice->length()) return NULL;
  BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
  return FindMatchingChoice(best_choice->unichar_id(index), choices);
}
BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices ( int  index) const

Definition at line 701 of file pageres.cpp.

                                                          {
  return best_choice->blob_choices(index, ratings);
}
int WERD_RES::GetBlobsGap ( int  blob_index)

Definition at line 682 of file pageres.cpp.

                                        {
  if (blob_index < 0 || blob_index >= blob_gaps.size())
    return 0;
  return blob_gaps[blob_index];
}
int WERD_RES::GetBlobsWidth ( int  start_blob,
int  last_blob 
)

Definition at line 672 of file pageres.cpp.

                                                         {
  int result = 0;
  for (int b = start_blob; b <= last_blob; ++b) {
    result += blob_widths[b];
    if (b < last_blob)
      result += blob_gaps[b];
  }
  return result;
}
bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 980 of file pageres.cpp.

                                                                    {
  return box1.right() >= box2.left();
}
void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 230 of file pageres.cpp.

                                                             {
  word = source.word;
  CopySimpleFields(source);
  if (source.blamer_bundle != NULL) {
    blamer_bundle = new BlamerBundle();
    blamer_bundle->CopyTruth(*source.blamer_bundle);
  }
}

Definition at line 1034 of file pageres.cpp.

                               {
  tess_failed = FALSE;
  tess_accepted = FALSE;
  tess_would_adapt = FALSE;
  done = FALSE;
  unlv_crunch_mode = CR_NONE;
  small_caps = false;
  italic = FALSE;
  bold = FALSE;
  // The fontinfos and tesseract count as non-pointers as they point to
  // data owned elsewhere.
  fontinfo = NULL;
  fontinfo2 = NULL;
  tesseract = NULL;
  fontinfo_id_count = 0;
  fontinfo_id2_count = 0;
  x_height = 0.0;
  caps_height = 0.0;
  guessed_x_ht = TRUE;
  guessed_caps_ht = TRUE;
  combination = FALSE;
  part_of_combo = FALSE;
  reject_spaces = FALSE;
}

Definition at line 1059 of file pageres.cpp.

                            {
  word = NULL;
  bln_boxes = NULL;
  blob_row = NULL;
  uch_set = NULL;
  chopped_word = NULL;
  rebuild_word = NULL;
  box_word = NULL;
  ratings = NULL;
  best_choice = NULL;
  raw_choice = NULL;
  ep_choice = NULL;
  blamer_bundle = NULL;
}
void WERD_RES::InsertSeam ( int  blob_number,
SEAM seam 
)

Definition at line 370 of file pageres.cpp.

                                                     {
  // Insert the seam into the SEAMS array.
  insert_seam(chopped_word, blob_number, seam, &seam_array);
  if (ratings != NULL) {
    // Expand the ratings matrix.
    ratings = ratings->ConsumeAndMakeBigger(blob_number);
    // Fix all the segmentation states.
    if (raw_choice != NULL)
      raw_choice->UpdateStateForSplit(blob_number);
    WERD_CHOICE_IT wc_it(&best_choices);
    for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
      WERD_CHOICE* choice = wc_it.data();
      choice->UpdateStateForSplit(blob_number);
    }
    SetupBlobWidthsAndGaps();
  }
}

Definition at line 403 of file pageres.cpp.

                           {
  return !best_choices.singleton() || best_choice->dangerous_ambig_found();
}
bool WERD_RES::LogNewCookedChoice ( int  max_num_choices,
bool  debug,
WERD_CHOICE word_choice 
)

Definition at line 562 of file pageres.cpp.

                                                            {
  if (best_choice != NULL) {
    // Throw out obviously bad choices to save some work.
    // TODO(rays) Get rid of this! This piece of code produces different
    // results according to the order in which words are found, which is an
    // undesirable behavior. It would be better to keep all the choices and
    // prune them later when more information is available.
    float max_certainty_delta =
        StopperAmbigThreshold(best_choice->adjust_factor(),
                              word_choice->adjust_factor());
    if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
      max_certainty_delta = -kStopperAmbiguityThresholdOffset;
    if (word_choice->certainty() - best_choice->certainty() <
        max_certainty_delta) {
      if (debug) {
        STRING bad_string;
        word_choice->string_and_lengths(&bad_string, NULL);
        tprintf("Discarding choice \"%s\" with an overly low certainty"
                " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
                bad_string.string(), word_choice->certainty(),
                best_choice->certainty(),
                max_certainty_delta + best_choice->certainty());
      }
      delete word_choice;
      return false;
    }
  }

  // Insert in the list in order of increasing rating, but knock out worse
  // string duplicates.
  WERD_CHOICE_IT it(&best_choices);
  const STRING& new_str = word_choice->unichar_string();
  bool inserted = false;
  int num_choices = 0;
  if (!it.empty()) {
    do {
      WERD_CHOICE* choice = it.data();
      if (choice->rating() > word_choice->rating() && !inserted) {
        // Time to insert.
        it.add_before_stay_put(word_choice);
        inserted = true;
        if (num_choices == 0)
          best_choice = word_choice;  // This is the new best.
        ++num_choices;
      }
      if (choice->unichar_string() == new_str) {
        if (inserted) {
          // New is better.
          delete it.extract();
        } else {
          // Old is better.
          if (debug) {
            tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
                    new_str.string(), word_choice->rating(), choice->rating());
          }
          delete word_choice;
          return false;
        }
      } else {
        ++num_choices;
        if (num_choices > max_num_choices)
          delete it.extract();
      }
      it.forward();
    } while (!it.at_first());
  }
  if (!inserted && num_choices < max_num_choices) {
    it.add_to_end(word_choice);
    inserted = true;
    if (num_choices == 0)
      best_choice = word_choice;  // This is the new best.
  }
  if (debug) {
    if (inserted)
      tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
    else
      tprintf("Poor");
    word_choice->print(" Word Choice");
  }
  if (!inserted) {
    delete word_choice;
    return false;
  }
  return true;
}
bool WERD_RES::LogNewRawChoice ( WERD_CHOICE word_choice)

Definition at line 546 of file pageres.cpp.

                                                       {
  if (raw_choice == NULL || word_choice->rating() < raw_choice->rating()) {
    delete raw_choice;
    raw_choice = new WERD_CHOICE(*word_choice);
    raw_choice->set_permuter(TOP_CHOICE_PERM);
    return true;
  }
  return false;
}

Definition at line 1006 of file pageres.cpp.

void WERD_RES::MergeAdjacentBlobs ( int  index)

Definition at line 913 of file pageres.cpp.

                                           {
  if (reject_map.length() == best_choice->length())
    reject_map.remove_pos(index);
  best_choice->remove_unichar_id(index + 1);
  rebuild_word->MergeBlobs(index, index + 2);
  box_word->MergeBoxes(index, index + 2);
  if (index + 1 < best_state.length()) {
    best_state[index] += best_state[index + 1];
    best_state.remove(index + 1);
  }
}
WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 137 of file pageres.cpp.

                                                     {
  this->ELIST_LINK::operator=(source);
  Clear();
  if (source.combination) {
    word = new WERD;
    *word = *(source.word);      // deep copy
  } else {
    word = source.word;          // pt to same word
  }
  if (source.bln_boxes != NULL)
    bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
  if (source.chopped_word != NULL)
    chopped_word = new TWERD(*source.chopped_word);
  if (source.rebuild_word != NULL)
    rebuild_word = new TWERD(*source.rebuild_word);
  // TODO(rays) Do we ever need to copy the seam_array?
  blob_row = source.blob_row;
  denorm = source.denorm;
  if (source.box_word != NULL)
    box_word = new tesseract::BoxWord(*source.box_word);
  best_state = source.best_state;
  correct_text = source.correct_text;
  blob_widths = source.blob_widths;
  blob_gaps = source.blob_gaps;
  // None of the uses of operator= require the ratings matrix to be copied,
  // so don't as it would be really slow.

  // Copy the cooked choices.
  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
  WERD_CHOICE_IT wc_dest_it(&best_choices);
  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
    const WERD_CHOICE *choice = wc_it.data();
    wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
  }
  if (!wc_dest_it.empty()) {
    wc_dest_it.move_to_first();
    best_choice = wc_dest_it.data();
    best_choice_fontinfo_ids = source.best_choice_fontinfo_ids;
  } else {
    best_choice = NULL;
    if (!best_choice_fontinfo_ids.empty()) {
      best_choice_fontinfo_ids.clear();
    }
  }

  if (source.raw_choice != NULL) {
    raw_choice = new WERD_CHOICE(*source.raw_choice);
  } else {
    raw_choice = NULL;
  }
  if (source.ep_choice != NULL) {
    ep_choice = new WERD_CHOICE(*source.ep_choice);
  } else {
    ep_choice = NULL;
  }
  reject_map = source.reject_map;
  combination = source.combination;
  part_of_combo = source.part_of_combo;
  CopySimpleFields(source);
  if (source.blamer_bundle != NULL) {
    blamer_bundle =  new BlamerBundle(*(source.blamer_bundle));
  }
  return *this;
}
bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 1017 of file pageres.cpp.

                                                          {
  // all seams must have no splits.
  for (int index = start; index < start + count - 1; ++index) {
    if (index >= 0 && index < seam_array.size()) {
      SEAM* seam = seam_array[index];
      if (seam != NULL && seam->split1 != NULL)
        return false;
    }
  }
  return true;
}

Definition at line 659 of file pageres.cpp.

                                      {
  STRING alternates_str;
  WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    if (!it.at_first()) alternates_str += "\", \"";
    alternates_str += it.data()->unichar_string();
  }
  tprintf("Alternates for \"%s\": {\"%s\"}\n",
          best_choice->unichar_string().string(), alternates_str.string());
}
const char* const WERD_RES::RawUTF8 ( int  blob_index) const [inline]

Definition at line 354 of file pageres.h.

                                                  {
    if (blob_index < 0 || blob_index >= raw_choice->length())
      return NULL;
    UNICHAR_ID id = raw_choice->unichar_id(blob_index);
    if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
      return NULL;
    return uch_set->id_to_unichar(id);
  }

Definition at line 750 of file pageres.cpp.

                                {
  ASSERT_HOST(best_choice != NULL);
  if (rebuild_word != NULL)
    delete rebuild_word;
  rebuild_word = new TWERD;
  if (seam_array.empty())
    start_seam_list(chopped_word, &seam_array);
  best_state.truncate(0);
  int start = 0;
  for (int i = 0; i < best_choice->length(); ++i) {
    int length = best_choice->state(i);
    best_state.push_back(length);
    if (length > 1)
      join_pieces(seam_array, start, start + length - 1, chopped_word);
    TBLOB* blob = chopped_word->blobs[start];
    rebuild_word->blobs.push_back(new TBLOB(*blob));
    if (length > 1)
      break_pieces(seam_array, start, start + length - 1, chopped_word);
    start += length;
  }
}

Definition at line 737 of file pageres.cpp.

                                                    {
  best_choice = choice;
  RebuildBestState();
  SetupBoxWord();
  // Make up a fake reject map of the right length to keep the
  // rejection pass happy.
  reject_map.initialise(best_state.length());
  done = tess_accepted = tess_would_adapt = true;
  SetScriptPositions();
}

Definition at line 806 of file pageres.cpp.

                                                                {
  raw_choice->SetAllScriptPositions(position);
  WERD_CHOICE_IT wc_it(&best_choices);
  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
    wc_it.data()->SetAllScriptPositions(position);
}

Definition at line 345 of file pageres.cpp.

Definition at line 352 of file pageres.cpp.

                                      {
  blob_widths.truncate(0);
  blob_gaps.truncate(0);
  int num_blobs = chopped_word->NumBlobs();
  for (int b = 0; b < num_blobs; ++b) {
    TBLOB *blob = chopped_word->blobs[b];
    TBOX box = blob->bounding_box();
    blob_widths.push_back(box.width());
    if (b + 1 < num_blobs) {
      blob_gaps.push_back(
          chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
    }
  }
}
void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 304 of file pageres.cpp.

                                                        {
  ClearResults();
  SetupWordScript(unicharset_in);
  chopped_word = new TWERD;
  rebuild_word = new TWERD;
  bln_boxes = new tesseract::BoxWord;
  box_word = new tesseract::BoxWord;
  int blob_count = word->cblob_list()->length();
  if (blob_count > 0) {
    BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
    // For non-text blocks, just pass any blobs through to the box_word
    // and call the word failed with a fake classification.
    C_BLOB_IT b_it(word->cblob_list());
    int blob_id = 0;
    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
      TBOX box = b_it.data()->bounding_box();
      box_word->InsertBox(box_word->length(), box);
      fake_choices[blob_id++] = new BLOB_CHOICE(0, 10.0f, -1.0f,
                                                -1, -1, -1, 0, 0, 0, BCC_FAKE);
    }
    FakeClassifyWord(blob_count, fake_choices);
    delete [] fake_choices;
  } else {
    WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in);
    word->make_bad();
    LogNewRawChoice(word);
    // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
    LogNewCookedChoice(1, false, word);
  }
  tess_failed = true;
}
bool WERD_RES::SetupForRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
int  norm_mode,
const TBOX norm_box,
bool  numeric_mode,
bool  use_body_size,
bool  allow_detailed_fx,
ROW row,
const BLOCK block 
)

Definition at line 255 of file pageres.cpp.

                                                                 {
  tesseract::OcrEngineMode norm_mode_hint =
      static_cast<tesseract::OcrEngineMode>(norm_mode);
  tesseract = tess;
  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
  if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
       word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
    // Empty words occur when all the blobs have been moved to the rej_blobs
    // list, which seems to occur frequently in junk.
    SetupFake(unicharset_in);
    word->set_flag(W_REP_CHAR, false);
    return false;
  }
  ClearResults();
  SetupWordScript(unicharset_in);
  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
  float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
                     ? row->body_size() : x_height;
  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
                            word_xheight, numeric_mode, norm_mode_hint,
                            norm_box, &denorm);
  blob_row = row;
  SetupBasicsFromChoppedWord(unicharset_in);
  SetupBlamerBundle();
  int num_blobs = chopped_word->NumBlobs();
  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
  tess_failed = false;
  return true;
}
void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 336 of file pageres.cpp.

                                                    {
  uch_set = &uch;
  int script = uch.default_sid();
  word->set_script_id(script);
  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
}

Definition at line 409 of file pageres.cpp.

                              {
  int ratings_dim = ratings->dimension();
  if (raw_choice->TotalOfStates() != ratings_dim) {
    tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
            raw_choice->TotalOfStates(), ratings_dim);
    return false;
  }
  WERD_CHOICE_IT it(&best_choices);
  int index = 0;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
    WERD_CHOICE* choice = it.data();
    if (choice->TotalOfStates() != ratings_dim) {
      tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
              choice->TotalOfStates(), ratings_dim);
      return false;
    }
  }
  return true;
}
UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const [inline]

Definition at line 363 of file pageres.h.

                                                            {
    if (best_choice == NULL ||
        blob_index >= best_choice->length() ||
        blob_index < 0)
      return UNICHARSET::U_OTHER_NEUTRAL;
    return uch_set->get_direction(best_choice->unichar_id(blob_index));
  }
bool WERD_RES::UnicharsInReadingOrder ( ) const [inline]

Definition at line 405 of file pageres.h.


Member Data Documentation

Definition at line 218 of file pageres.h.

WERD_CHOICE_LIST WERD_RES::best_choices

Definition at line 226 of file pageres.h.

Definition at line 254 of file pageres.h.

Definition at line 229 of file pageres.h.

Definition at line 183 of file pageres.h.

Definition at line 207 of file pageres.h.

Definition at line 185 of file pageres.h.

Definition at line 204 of file pageres.h.

Definition at line 284 of file pageres.h.

Definition at line 249 of file pageres.h.

Definition at line 294 of file pageres.h.

Definition at line 200 of file pageres.h.

Definition at line 312 of file pageres.h.

Definition at line 189 of file pageres.h.

Definition at line 281 of file pageres.h.

Definition at line 269 of file pageres.h.

Definition at line 286 of file pageres.h.

Definition at line 287 of file pageres.h.

Definition at line 289 of file pageres.h.

Definition at line 288 of file pageres.h.

Definition at line 291 of file pageres.h.

Definition at line 290 of file pageres.h.

Definition at line 283 of file pageres.h.

Definition at line 313 of file pageres.h.

Definition at line 214 of file pageres.h.

Definition at line 223 of file pageres.h.

Definition at line 243 of file pageres.h.

Definition at line 270 of file pageres.h.

Definition at line 314 of file pageres.h.

Definition at line 202 of file pageres.h.

Definition at line 282 of file pageres.h.

Definition at line 279 of file pageres.h.

Definition at line 271 of file pageres.h.

Definition at line 280 of file pageres.h.

Definition at line 265 of file pageres.h.

Definition at line 191 of file pageres.h.

Definition at line 292 of file pageres.h.

Definition at line 174 of file pageres.h.

Definition at line 293 of file pageres.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines