tesseract  3.03
tesseract::LMPainPoints Class Reference

#include <lm_pain_points.h>

List of all members.

Public Member Functions

 LMPainPoints (int max, float rat, bool fp, const Dict *d, int deb)
 ~LMPainPoints ()
bool HasPainPoints (LMPainPointsType pp_type) const
LMPainPointsType Deque (MATRIX_COORD *pp, float *priority)
void Clear ()
void GenerateInitial (WERD_RES *word_res)
void GenerateFromPath (float rating_cert_scale, ViterbiStateEntry *vse, WERD_RES *word_res)
void GenerateFromAmbigs (const DANGERR &fixpt, ViterbiStateEntry *vse, WERD_RES *word_res)
bool GenerateForBlamer (double max_char_wh_ratio, WERD_RES *word_res, int col, int row)
bool GeneratePainPoint (int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, float max_char_wh_ratio, WERD_RES *word_res)
void RemapForSplit (int index)

Static Public Member Functions

static const char * PainPointDescription (LMPainPointsType type)

Static Public Attributes

static const float kDefaultPainPointPriorityAdjustment = 2.0f
static const float kLooseMaxCharWhRatio = 2.5f

Detailed Description

Definition at line 53 of file lm_pain_points.h.


Constructor & Destructor Documentation

tesseract::LMPainPoints::LMPainPoints ( int  max,
float  rat,
bool  fp,
const Dict d,
int  deb 
) [inline]

Definition at line 66 of file lm_pain_points.h.

                                                                    :
    max_heap_size_(max), max_char_wh_ratio_(rat), fixed_pitch_(fp),
    dict_(d), debug_level_(deb) {}

Definition at line 69 of file lm_pain_points.h.

{}

Member Function Documentation

void tesseract::LMPainPoints::Clear ( ) [inline]

Definition at line 82 of file lm_pain_points.h.

               {
    for (int h = 0; h < LM_PPTYPE_NUM; ++h) pain_points_heaps_[h].clear();
  }

Definition at line 37 of file lm_pain_points.cpp.

                                                                      {
  for (int h = 0; h < LM_PPTYPE_NUM; ++h) {
    if (pain_points_heaps_[h].empty()) continue;
    *priority = pain_points_heaps_[h].PeekTop().key;
    *pp = pain_points_heaps_[h].PeekTop().data;
    pain_points_heaps_[h].Pop(NULL);
    return static_cast<LMPainPointsType>(h);
  }
  return LM_PPTYPE_NUM;
}
bool tesseract::LMPainPoints::GenerateForBlamer ( double  max_char_wh_ratio,
WERD_RES word_res,
int  col,
int  row 
) [inline]

Definition at line 99 of file lm_pain_points.h.

                                           {
    return GeneratePainPoint(col, row, LM_PPTYPE_BLAMER, 0.0, false,
                             max_char_wh_ratio, word_res);
  }
void tesseract::LMPainPoints::GenerateFromAmbigs ( const DANGERR fixpt,
ViterbiStateEntry vse,
WERD_RES word_res 
)

Definition at line 130 of file lm_pain_points.cpp.

                                                          {
  // Begins and ends in DANGERR vector now record the blob indices as used
  // by the ratings matrix.
  for (int d = 0; d < fixpt.size(); ++d) {
    const DANGERR_INFO &danger = fixpt[d];
    // Only use dangerous ambiguities.
    if (danger.dangerous) {
      GeneratePainPoint(danger.begin, danger.end - 1,
                        LM_PPTYPE_AMBIG, vse->cost, true,
                        kLooseMaxCharWhRatio, word_res);
    }
  }
}
void tesseract::LMPainPoints::GenerateFromPath ( float  rating_cert_scale,
ViterbiStateEntry vse,
WERD_RES word_res 
)

Definition at line 68 of file lm_pain_points.cpp.

                                                        {
  ViterbiStateEntry *curr_vse = vse;
  BLOB_CHOICE *curr_b = vse->curr_b;
  // The following pain point generation and priority calculation approaches
  // prioritize exploring paths with low average rating of the known part of
  // the path, while not relying on the ratings of the pieces to be combined.
  //
  // A pain point to combine the neighbors is generated for each pair of
  // neighboring blobs on the path (the path is represented by vse argument
  // given to GenerateFromPath()). The priority of each pain point is set to
  // the average rating (per outline length) of the path, not including the
  // ratings of the blobs to be combined.
  // The ratings of the blobs to be combined are not used to calculate the
  // priority, since it is not possible to determine from their magnitude
  // whether it will be beneficial to combine the blobs. The reason is that
  // chopped junk blobs (/ | - ') can have very good (low) ratings, however
  // combining them will be beneficial. Blobs with high ratings might be
  // over-joined pieces of characters, but also could be blobs from an unseen
  // font or chopped pieces of complex characters.
  while (curr_vse->parent_vse != NULL) {
    ViterbiStateEntry* parent_vse = curr_vse->parent_vse;
    const MATRIX_COORD& curr_cell = curr_b->matrix_cell();
    const MATRIX_COORD& parent_cell = parent_vse->curr_b->matrix_cell();
    MATRIX_COORD pain_coord(parent_cell.col, curr_cell.row);
    if (!pain_coord.Valid(*word_res->ratings) ||
        !word_res->ratings->Classified(parent_cell.col, curr_cell.row,
                                       dict_->WildcardID())) {
      // rat_subtr contains ratings sum of the two adjacent blobs to be merged.
      // rat_subtr will be subtracted from the ratings sum of the path, since
      // the blobs will be joined into a new blob, whose rating is yet unknown.
      float rat_subtr = curr_b->rating() + parent_vse->curr_b->rating();
      // ol_subtr contains the outline length of the blobs that will be joined.
      float ol_subtr =
          AssociateUtils::ComputeOutlineLength(rating_cert_scale, *curr_b) +
          AssociateUtils::ComputeOutlineLength(rating_cert_scale,
                                               *(parent_vse->curr_b));
      // ol_dif is the outline of the path without the two blobs to be joined.
      float ol_dif = vse->outline_length - ol_subtr;
      // priority is set to the average rating of the path per unit of outline,
      // not counting the ratings of the pieces to be joined.
      float priority = ol_dif > 0 ? (vse->ratings_sum-rat_subtr)/ol_dif : 0.0;
      GeneratePainPoint(pain_coord.col, pain_coord.row, LM_PPTYPE_PATH,
                        priority, true, max_char_wh_ratio_, word_res);
    } else if (debug_level_ > 3) {
      tprintf("NO pain point (Classified) for col=%d row=%d type=%s\n",
              pain_coord.col, pain_coord.row,
              LMPainPointsTypeName[LM_PPTYPE_PATH]);
      BLOB_CHOICE_IT b_it(word_res->ratings->get(pain_coord.col,
                                                 pain_coord.row));
      for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
        BLOB_CHOICE* choice = b_it.data();
        choice->print_full();
      }
    }

    curr_vse = parent_vse;
    curr_b = curr_vse->curr_b;
  }
}

Definition at line 48 of file lm_pain_points.cpp.

                                                     {
  MATRIX *ratings = word_res->ratings;
  AssociateStats associate_stats;
  for (int col = 0; col < ratings->dimension(); ++col) {
    int row_end = MIN(ratings->dimension(), col + ratings->bandwidth() + 1);
    for (int row = col + 1; row < row_end; ++row) {
      MATRIX_COORD coord(col, row);
      if (coord.Valid(*ratings) &&
          ratings->get(col, row) != NOT_CLASSIFIED) continue;
      // Add an initial pain point if needed.
      if (ratings->Classified(col, row - 1, dict_->WildcardID()) ||
          (col + 1 < ratings->dimension() &&
              ratings->Classified(col + 1, row, dict_->WildcardID()))) {
        GeneratePainPoint(col, row, LM_PPTYPE_SHAPE, 0.0,
                          true, max_char_wh_ratio_, word_res);
      }
    }
  }
}
bool tesseract::LMPainPoints::GeneratePainPoint ( int  col,
int  row,
LMPainPointsType  pp_type,
float  special_priority,
bool  ok_to_extend,
float  max_char_wh_ratio,
WERD_RES word_res 
)

Definition at line 146 of file lm_pain_points.cpp.

                        {
  MATRIX_COORD coord(col, row);
  if (coord.Valid(*word_res->ratings) &&
      word_res->ratings->Classified(col, row, dict_->WildcardID())) {
    return false;
  }
  if (debug_level_ > 3) {
    tprintf("Generating pain point for col=%d row=%d type=%s\n",
            col, row, LMPainPointsTypeName[pp_type]);
  }
  // Compute associate stats.
  AssociateStats associate_stats;
  AssociateUtils::ComputeStats(col, row, NULL, 0, fixed_pitch_,
                               max_char_wh_ratio, word_res, debug_level_,
                               &associate_stats);
  // For fixed-pitch fonts/languages: if the current combined blob overlaps
  // the next blob on the right and it is ok to extend the blob, try extending
  // the blob until there is no overlap with the next blob on the right or
  // until the width-to-height ratio becomes too large.
  if (ok_to_extend) {
    while (associate_stats.bad_fixed_pitch_right_gap &&
           row + 1 < word_res->ratings->dimension() &&
           !associate_stats.bad_fixed_pitch_wh_ratio) {
      AssociateUtils::ComputeStats(col, ++row, NULL, 0, fixed_pitch_,
                                   max_char_wh_ratio, word_res, debug_level_,
                                   &associate_stats);
    }
  }
  if (associate_stats.bad_shape) {
    if (debug_level_ > 3) {
      tprintf("Discarded pain point with a bad shape\n");
    }
    return false;
  }

  // Insert the new pain point into pain_points_heap_.
  if (pain_points_heaps_[pp_type].size() < max_heap_size_) {
    // Compute pain point priority.
    float priority;
    if (pp_type == LM_PPTYPE_PATH) {
      priority = special_priority;
    } else {
      priority = associate_stats.gap_sum;
    }
    MatrixCoordPair pain_point(priority, MATRIX_COORD(col, row));
    pain_points_heaps_[pp_type].Push(&pain_point);
    if (debug_level_) {
      tprintf("Added pain point with priority %g\n", priority);
    }
    return true;
  } else {
    if (debug_level_) tprintf("Pain points heap is full\n");
    return false;
  }
}
bool tesseract::LMPainPoints::HasPainPoints ( LMPainPointsType  pp_type) const [inline]

Definition at line 72 of file lm_pain_points.h.

                                                            {
    return !pain_points_heaps_[pp_type].empty();
  }
static const char* tesseract::LMPainPoints::PainPointDescription ( LMPainPointsType  type) [inline, static]

Definition at line 62 of file lm_pain_points.h.

                                                                 {
    return LMPainPointsTypeName[type];
  }

Definition at line 207 of file lm_pain_points.cpp.

                                          {
  for (int i = 0; i < LM_PPTYPE_NUM; ++i) {
    GenericVector<MatrixCoordPair>* heap = pain_points_heaps_[i].heap();
    for (int j = 0; j < heap->size(); ++j)
      (*heap)[j].data.MapForSplit(index);
  }
}

Member Data Documentation

Definition at line 56 of file lm_pain_points.h.

const float tesseract::LMPainPoints::kLooseMaxCharWhRatio = 2.5f [static]

Definition at line 60 of file lm_pain_points.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines