tesseract  3.03
tesseract::Wordrec Class Reference

#include <wordrec.h>

Inheritance diagram for tesseract::Wordrec:
tesseract::Classify tesseract::CCStruct tesseract::CUtil tesseract::CCUtil tesseract::Tesseract

List of all members.

Public Member Functions

 Wordrec ()
virtual ~Wordrec ()
void SaveAltChoices (const LIST &best_choices, WERD_RES *word)
void FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
void CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
void SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
void DoSegSearch (WERD_RES *word_res)
SEAMattempt_blob_chop (TWERD *word, TBLOB *blob, inT32 blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
SEAMchop_numbered_blob (TWERD *word, inT32 blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
SEAMchop_overlapping_blob (const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
void add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams)
void choose_best_seam (SeamQueue *seam_queue, SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
void combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
inT16 constrained_split (SPLIT *split, TBLOB *blob)
SEAMpick_good_seam (TBLOB *blob)
PRIORITY seam_priority (SEAM *seam, inT16 xmin, inT16 xmax)
void try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
void try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
PRIORITY full_split_priority (SPLIT *split, inT16 xmin, inT16 xmax)
PRIORITY grade_center_of_blob (register BOUNDS_RECT rect)
PRIORITY grade_overlap (register BOUNDS_RECT rect)
PRIORITY grade_split_length (register SPLIT *split)
PRIORITY grade_sharpness (register SPLIT *split)
PRIORITY grade_width_change (register BOUNDS_RECT rect)
void set_outline_bounds (register EDGEPT *point1, register EDGEPT *point2, BOUNDS_RECT rect)
int crosses_outline (EDGEPT *p0, EDGEPT *p1, EDGEPT *outline)
int is_crossed (TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1)
int is_same_edgept (EDGEPT *p1, EDGEPT *p2)
bool near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
void reverse_outline (EDGEPT *outline)
virtual BLOB_CHOICE_LIST * classify_piece (const GenericVector< SEAM * > &seams, inT16 start, inT16 end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
void merge_fragments (MATRIX *ratings, inT16 num_blobs)
void get_fragment_lists (inT16 current_frag, inT16 current_row, inT16 start, inT16 num_frag_parts, inT16 num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists)
void merge_and_put_fragment_lists (inT16 row, inT16 column, inT16 num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings)
void fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices)
program_editup

Initialize all the things in the program that need to be initialized. init_permute determines whether to initialize the permute functions and Dawg models.

void program_editup (const char *textbase, bool init_classifier, bool init_permute)
cc_recog

Recognize a word.

void cc_recog (WERD_RES *word)
program_editdown

This function holds any nessessary post processing for the Wise Owl program.

void program_editdown (inT32 elasped_time)
set_pass1

Get ready to do some pass 1 stuff.

void set_pass1 ()
set_pass2

Get ready to do some pass 2 stuff.

void set_pass2 ()
end_recog

Cleanup and exit the recog program.

int end_recog ()
call_matcher

Called from Tess with a blob in tess form. The blob may need rotating to the correct orientation for classification.

BLOB_CHOICE_LIST * call_matcher (TBLOB *blob)
dict_word()

Test the dictionaries, returning NO_PERM (0) if not found, or one of the PermuterType values if found, according to the dictionary.

int dict_word (const WERD_CHOICE &word)
classify_blob

Classify the this blob if it is not already recorded in the match table. Attempt to recognize this blob as a character. The recognition rating for this blob will be stored as a part of the blob. This value will also be returned to the caller.

Parameters:
blobCurrent blob
stringThe string to display in ScrollView
colorThe colour to use when displayed with ScrollView
BLOB_CHOICE_LIST * classify_blob (TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle)
point_priority

Assign a priority to and edge point that might be used as part of a split. The argument should be of type EDGEPT.

PRIORITY point_priority (EDGEPT *point)
add_point_to_list

Add an edge point to a POINT_GROUP containg a list of other points.

void add_point_to_list (PointHeap *point_heap, EDGEPT *point)
angle_change

Return the change in angle (degrees) of the line segments between points one and two, and two and three.

int angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
is_little_chunk

Return TRUE if one of the pieces resulting from this split would less than some number of edge points.

int is_little_chunk (EDGEPT *point1, EDGEPT *point2)
is_small_area

Test the area defined by a split accross this outline.

int is_small_area (EDGEPT *point1, EDGEPT *point2)
pick_close_point

Choose the edge point that is closest to the critical point. This point may not be exactly vertical from the critical point.

EDGEPTpick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
prioritize_points

Find a list of edge points from the outer outline of this blob. For each of these points assign a priority. Sort these points using a heap structure so that they can be visited in order.

void prioritize_points (TESSLINE *outline, PointHeap *points)
new_min_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to NULL.

void new_min_point (EDGEPT *local_min, PointHeap *points)
new_max_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to NULL.

void new_max_point (EDGEPT *local_max, PointHeap *points)
vertical_projection_point

For one point on the outline, find the corresponding point on the other side of the outline that is a likely projection for a split point. This is done by iterating through the edge points until the X value of the point being looked at is greater than the X value of the split point. Ensure that the point being returned is not right next to the split point. Return the edge point in *best_point as a result, and any points that were newly created are also saved on the new_points list.

void vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
improve_one_blob

Finds the best place to chop, based on the worst blob, fixpt, or next to a fragment, according to the input. Returns the SEAM corresponding to the chop point, if any is found, and the index in the ratings_matrix of the chopped blob. Note that blob_choices is just a copy of the pointers in the leading diagonal of the ratings MATRIX. Although the blob is chopped, the returned SEAM is yet to be inserted into word->seam_array and the resulting blobs are unclassified, so this function can be used by ApplyBox as well as during recognition.

SEAMimprove_one_blob (const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
chop_one_blob

Start with the current one-blob word and its classification. Find the worst blobs and try to divide it up to improve the ratings. Used for testing chopper.

SEAMchop_one_blob (const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number)
chop_word_main

Classify the blobs in this word and permute the results. Find the worst blob in the word and chop it up. Continue this process until a good answer has been found or all the blobs have been chopped up enough. The results are returned in the WERD_RES.

void chop_word_main (WERD_RES *word)
improve_by_chopping

Repeatedly chops the worst blob, classifying the new blobs fixing up all the data, and incrementally runs the segmentation search until a good word is found, or no more chops can be found.

void improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
int select_blob_to_split (const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
int select_blob_to_split_from_fixpt (DANGERR *fixpt)

Public Attributes

bool merge_fragments_in_matrix = TRUE
bool wordrec_no_block = FALSE
bool wordrec_enable_assoc = TRUE
bool force_word_assoc = FALSE
double wordrec_worst_state = 1
bool fragments_guide_chopper = FALSE
int repair_unchopped_blobs = 1
double tessedit_certainty_threshold = -2.25
int chop_debug = 0
bool chop_enable = 1
bool chop_vertical_creep = 0
int chop_split_length = 10000
int chop_same_distance = 2
int chop_min_outline_points = 6
int chop_seam_pile_size = 150
bool chop_new_seam_pile = 1
int chop_inside_angle = -50
int chop_min_outline_area = 2000
double chop_split_dist_knob = 0.5
double chop_overlap_knob = 0.9
double chop_center_knob = 0.15
int chop_centered_maxwidth = 90
double chop_sharpness_knob = 0.06
double chop_width_change_knob = 5.0
double chop_ok_split = 100.0
double chop_good_split = 50.0
int chop_x_y_weight = 3
int segment_adjust_debug = 0
bool assume_fixed_pitch_char_segment = FALSE
int wordrec_debug_level = 0
int wordrec_max_join_chunks = 4
bool wordrec_skip_no_truth_words = false
bool wordrec_debug_blamer = false
bool wordrec_run_blamer = false
int segsearch_debug_level = 0
int segsearch_max_pain_points = 2000
int segsearch_max_futile_classifications = 10
double segsearch_max_char_wh_ratio = 2.0
bool save_alt_choices = true
LanguageModellanguage_model_
PRIORITY pass2_ok_split
WERD_CHOICEprev_word_best_choice_
GenericVector< int > blame_reasons_
void(Wordrec::* fill_lattice_ )(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)

Protected Member Functions

bool SegSearchDone (int num_futile_classifications)
void UpdateSegSearchNodes (float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
void ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
void ResetNGramSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
void InitBlamerForSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug)

Detailed Description

Definition at line 123 of file wordrec.h.


Constructor & Destructor Documentation

Definition at line 26 of file wordrec.cpp.

                 :
  // control parameters
  BOOL_MEMBER(merge_fragments_in_matrix, TRUE,
              "Merge the fragments in the ratings matrix and delete them"
              " after merging", params()),
  BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information",
              params()),
  BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable",
              params()),
  BOOL_MEMBER(force_word_assoc, FALSE,
              "force associator to run regardless of what enable_assoc is."
              "This is used for CJK where component grouping is necessary.",
              CCUtil::params()),
  double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state",
                params()),
  BOOL_MEMBER(fragments_guide_chopper, FALSE,
              "Use information from fragments to guide chopping process",
              params()),
  INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped",
             params()),
  double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit",
                params()),
  INT_MEMBER(chop_debug, 0, "Chop debug",
             params()),
  BOOL_MEMBER(chop_enable, 1, "Chop enable",
              params()),
  BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep",
            params()),
  INT_MEMBER(chop_split_length, 10000, "Split Length",
             params()),
  INT_MEMBER(chop_same_distance, 2, "Same distance",
             params()),
  INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline",
             params()),
  INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile",
             params()),
  BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()),
  INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend",
             params()),
  INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area",
             params()),
  double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment",
                params()),
  double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment",
                params()),
  double_MEMBER(chop_center_knob, 0.15, "Split center adjustment",
                params()),
  INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs "
             "above which we don't care that a chop is not near the center.",
             params()),
  double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment",
                params()),
  double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment",
                params()),
  double_MEMBER(chop_ok_split, 100.0, "OK split limit",
                params()),
  double_MEMBER(chop_good_split, 50.0, "Good split limit",
                params()),
  INT_MEMBER(chop_x_y_weight, 3, "X / Y  length weight",
             params()),
  INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug",
             params()),
  BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE,
              "include fixed-pitch heuristics in char segmentation",
              params()),
  INT_MEMBER(wordrec_debug_level, 0,
             "Debug level for wordrec", params()),
  INT_MEMBER(wordrec_max_join_chunks, 4,
             "Max number of broken pieces to associate", params()),
  BOOL_MEMBER(wordrec_skip_no_truth_words, false,
              "Only run OCR for words that had truth recorded in BlamerBundle",
              params()),
  BOOL_MEMBER(wordrec_debug_blamer, false,
              "Print blamer debug messages", params()),
  BOOL_MEMBER(wordrec_run_blamer, false,
              "Try to set the blame for errors", params()),
  INT_MEMBER(segsearch_debug_level, 0,
             "SegSearch debug level", params()),
  INT_MEMBER(segsearch_max_pain_points, 2000,
             "Maximum number of pain points stored in the queue",
             params()),
  INT_MEMBER(segsearch_max_futile_classifications, 20,
             "Maximum number of pain point classifications per chunk that"
             "did not result in finding a better word choice.",
             params()),
  double_MEMBER(segsearch_max_char_wh_ratio, 2.0,
                "Maximum character width-to-height ratio", params()),
  BOOL_MEMBER(save_alt_choices, true,
              "Save alternative paths found during chopping"
              " and segmentation search",
              params()) {
  prev_word_best_choice_ = NULL;
  language_model_ = new LanguageModel(&get_fontinfo_table(),
                                      &(getDict()));
  fill_lattice_ = NULL;
}

Definition at line 123 of file wordrec.cpp.

                  {
  delete language_model_;
}

Member Function Documentation

void tesseract::Wordrec::add_point_to_list ( PointHeap point_heap,
EDGEPT point 
)

Definition at line 65 of file chop.cpp.

                                                                    {
  if (point_heap->size() < MAX_NUM_POINTS - 2) {
    PointPair pair(point_priority(point), point);
    point_heap->Push(&pair);
  }

#ifndef GRAPHICS_DISABLED
  if (chop_debug > 2)
    mark_outline(point);
#endif
}
void tesseract::Wordrec::add_seam_to_queue ( float  new_priority,
SEAM new_seam,
SeamQueue seams 
)

Definition at line 65 of file findseam.cpp.

                                                  {
  if (new_seam == NULL) return;
  if (chop_debug) {
    tprintf("Pushing new seam with priority %g :", new_priority);
    print_seam("seam: ", new_seam);
  }
  if (seams->size() >= MAX_NUM_SEAMS) {
    SeamPair old_pair(0, NULL);
    if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
      if (chop_debug) {
        tprintf("Old seam staying with priority %g\n", old_pair.key());
      }
      delete new_seam;
      seams->Push(&old_pair);
      return;
    } else if (chop_debug) {
      tprintf("New seam with priority %g beats old worst seam with %g\n",
              new_priority, old_pair.key());
    }
  }
  SeamPair new_pair(new_priority, new_seam);
  seams->Push(&new_pair);
}
int tesseract::Wordrec::angle_change ( EDGEPT point1,
EDGEPT point2,
EDGEPT point3 
)

Definition at line 84 of file chop.cpp.

                                                                        {
  VECTOR vector1;
  VECTOR vector2;

  int angle;
  float length;

  /* Compute angle */
  vector1.x = point2->pos.x - point1->pos.x;
  vector1.y = point2->pos.y - point1->pos.y;
  vector2.x = point3->pos.x - point2->pos.x;
  vector2.y = point3->pos.y - point2->pos.y;
  /* Use cross product */
  length = (float)sqrt((float)LENGTH(vector1) * LENGTH(vector2));
  if ((int) length == 0)
    return (0);
  angle = static_cast<int>(floor(asin(CROSS (vector1, vector2) /
                                      length) / PI * 180.0 + 0.5));

  /* Use dot product */
  if (SCALAR (vector1, vector2) < 0)
    angle = 180 - angle;
  /* Adjust angle */
  if (angle > 180)
    angle -= 360;
  if (angle <= -180)
    angle += 360;
  return (angle);
}
SEAM * tesseract::Wordrec::attempt_blob_chop ( TWERD word,
TBLOB blob,
inT32  blob_number,
bool  italic_blob,
const GenericVector< SEAM * > &  seams 
)

Definition at line 175 of file chopper.cpp.

                                                                    {
  if (repair_unchopped_blobs)
    preserve_outline_tree (blob->outlines);
  TBLOB *other_blob = TBLOB::ShallowCopy(*blob);       /* Make new blob */
  // Insert it into the word.
  word->blobs.insert(other_blob, blob_number + 1);

  SEAM *seam = NULL;
  if (prioritize_division) {
    TPOINT location;
    if (divisible_blob(blob, italic_blob, &location)) {
      seam = new SEAM(0.0f, location, NULL, NULL, NULL);
    }
  }
  if (seam == NULL)
    seam = pick_good_seam(blob);
  if (chop_debug) {
    if (seam != NULL)
      print_seam("Good seam picked=", seam);
    else
      tprintf("\n** no seam picked *** \n");
  }
  if (seam) {
    apply_seam(blob, other_blob, italic_blob, seam);
  }

  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
                   seams, seam);
  if (seam == NULL) {
    if (repair_unchopped_blobs)
      restore_outline_tree(blob->outlines);
    if (word->latin_script) {
      // If the blob can simply be divided into outlines, then do that.
      TPOINT location;
      if (divisible_blob(blob, italic_blob, &location)) {
        other_blob = TBLOB::ShallowCopy(*blob);       /* Make new blob */
        word->blobs.insert(other_blob, blob_number + 1);
        seam = new SEAM(0.0f, location, NULL, NULL, NULL);
        apply_seam(blob, other_blob, italic_blob, seam);
        seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
                         seams, seam);
      }
    }
  }
  return seam;
}
BLOB_CHOICE_LIST * tesseract::Wordrec::call_matcher ( TBLOB blob)

Definition at line 136 of file tface.cpp.

                                                       {
  // Rotate the blob for classification if necessary.
  TBLOB* rotated_blob = tessblob->ClassifyNormalizeIfNeeded();
  if (rotated_blob == NULL) {
    rotated_blob = tessblob;
  }
  BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();  // matcher result
  AdaptiveClassifier(rotated_blob, ratings);
  if (rotated_blob != tessblob) {
    delete rotated_blob;
  }
  return ratings;
}
void tesseract::Wordrec::CallFillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
) [inline]

Definition at line 195 of file wordrec.h.

                                                    {
    (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
  }

Definition at line 111 of file tface.cpp.

                                     {
  getDict().reset_hyphen_vars(word->word->flag(W_EOL));
  chop_word_main(word);
  word->DebugWordChoices(getDict().stopper_debug_level >= 1,
                         getDict().word_to_debug.string());
  ASSERT_HOST(word->StatesAllValid());
}
void tesseract::Wordrec::choose_best_seam ( SeamQueue seam_queue,
SPLIT split,
PRIORITY  priority,
SEAM **  seam_result,
TBLOB blob,
SeamPile seam_pile 
)

Definition at line 104 of file findseam.cpp.

                                                    {
  SEAM *seam;
  char str[80];
  float my_priority;
  /* Add seam of split */
  my_priority = priority;
  if (split != NULL) {
    TPOINT split_point = split->point1->pos;
    split_point += split->point2->pos;
    split_point /= 2;
    seam = new SEAM(my_priority, split_point, split, NULL, NULL);
    if (chop_debug > 1)
      print_seam ("Partial priority    ", seam);
    add_seam_to_queue(my_priority, seam, seam_queue);

    if (my_priority > chop_good_split)
      return;
  }

  TBOX bbox = blob->bounding_box();
  /* Queue loop */
  while (!seam_queue->empty()) {
    SeamPair seam_pair;
    seam_queue->Pop(&seam_pair);
    seam = seam_pair.extract_data();
    /* Set full priority */
    my_priority = seam_priority(seam, bbox.left(), bbox.right());
    if (chop_debug) {
      sprintf (str, "Full my_priority %0.0f,  ", my_priority);
      print_seam(str, seam);
    }

    if ((*seam_result == NULL || (*seam_result)->priority > my_priority) &&
        my_priority < chop_ok_split) {
      /* No crossing */
      if (constrained_split(seam->split1, blob)) {
        delete *seam_result;
        *seam_result = new SEAM(*seam);
        (*seam_result)->priority = my_priority;
      } else {
        delete seam;
        seam = NULL;
        my_priority = BAD_PRIORITY;
      }
    }

    if (my_priority < chop_good_split) {
      if (seam)
        delete seam;
      return;                    /* Made good answer */
    }

    if (seam) {
      /* Combine with others */
      if (seam_pile->size() < chop_seam_pile_size) {
        combine_seam(*seam_pile, seam, seam_queue);
        SeamDecPair pair(seam_pair.key(), seam);
        seam_pile->Push(&pair);
      } else if (chop_new_seam_pile &&
                 seam_pile->size() == chop_seam_pile_size &&
                 seam_pile->PeekTop().key() > seam_pair.key()) {
        combine_seam(*seam_pile, seam, seam_queue);
        SeamDecPair pair;
        seam_pile->Pop(&pair);  // pop the worst.
        // Replace the seam in pair (deleting the old one) with
        // the new seam and score, then push back into the heap.
        pair.set_key(seam_pair.key());
        pair.set_data(seam);
        seam_pile->Push(&pair);
      } else {
        delete seam;
      }
    }

    my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
                                      : seam_queue->PeekTop().key();
    if ((my_priority > chop_ok_split) ||
      (my_priority > chop_good_split && split))
      return;
  }
}
SEAM * tesseract::Wordrec::chop_numbered_blob ( TWERD word,
inT32  blob_number,
bool  italic_blob,
const GenericVector< SEAM * > &  seams 
)

Definition at line 225 of file chopper.cpp.

                                                                     {
  return attempt_blob_chop(word, word->blobs[blob_number], blob_number,
                           italic_blob, seams);
}
SEAM * tesseract::Wordrec::chop_one_blob ( const GenericVector< TBOX > &  boxes,
const GenericVector< BLOB_CHOICE * > &  blob_choices,
WERD_RES word_res,
int *  blob_number 
)

Definition at line 376 of file chopper.cpp.

                                               {
  if (prioritize_division) {
    return chop_overlapping_blob(boxes, true, word_res, blob_number);
  } else {
    return improve_one_blob(blob_choices, NULL, false, true, word_res,
                            blob_number);
  }
}
SEAM * tesseract::Wordrec::chop_overlapping_blob ( const GenericVector< TBOX > &  boxes,
bool  italic_blob,
WERD_RES word_res,
int *  blob_number 
)

Definition at line 233 of file chopper.cpp.

                                                       {
  TWERD *word = word_res->chopped_word;
  for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
    TBLOB *blob = word->blobs[*blob_number];
    TPOINT topleft, botright;
    topleft.x = blob->bounding_box().left();
    topleft.y = blob->bounding_box().top();
    botright.x = blob->bounding_box().right();
    botright.y = blob->bounding_box().bottom();

    TPOINT original_topleft, original_botright;
    word_res->denorm.DenormTransform(NULL, topleft, &original_topleft);
    word_res->denorm.DenormTransform(NULL, botright, &original_botright);

    TBOX original_box = TBOX(original_topleft.x, original_botright.y,
                             original_botright.x, original_topleft.y);

    bool almost_equal_box = false;
    int num_overlap = 0;
    for (int i = 0; i < boxes.size(); i++) {
      if (original_box.overlap_fraction(boxes[i]) > 0.125)
        num_overlap++;
      if (original_box.almost_equal(boxes[i], 3))
        almost_equal_box = true;
    }

    TPOINT location;
    if (divisible_blob(blob, italic_blob, &location) ||
        (!almost_equal_box && num_overlap > 1)) {
      SEAM *seam = attempt_blob_chop(word, blob, *blob_number,
                                     italic_blob, word_res->seam_array);
      if (seam != NULL)
        return seam;
    }
  }

  *blob_number = -1;
  return NULL;
}

Definition at line 440 of file chopper.cpp.

                                           {
  int num_blobs = word->chopped_word->NumBlobs();
  if (word->ratings == NULL) {
    word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
  }
  if (word->ratings->get(0, 0) == NULL) {
    // Run initial classification.
    for (int b = 0; b < num_blobs; ++b) {
      BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
                                                 "Initial:", word->chopped_word,
                                                 word->blamer_bundle);
      word->ratings->put(b, b, choices);
    }
  } else {
    // Blobs have been pre-classified. Set matrix cell for all blob choices
    for (int col = 0; col < word->ratings->dimension(); ++col) {
      for (int row = col; row < word->ratings->dimension() &&
           row < col + word->ratings->bandwidth(); ++row) {
        BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
        if (choices != NULL) {
          BLOB_CHOICE_IT bc_it(choices);
          for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
            bc_it.data()->set_matrix_cell(col, row);
          }
        }
      }
    }
  }

  // Run Segmentation Search.
  BestChoiceBundle best_choice_bundle(word->ratings->dimension());
  SegSearch(word, &best_choice_bundle, word->blamer_bundle);

  if (word->best_choice == NULL) {
    // SegSearch found no valid paths, so just use the leading diagonal.
    word->FakeWordFromRatings();
  }
  word->RebuildBestState();
  // If we finished without a hyphen at the end of the word, let the next word
  // be found in the dictionary.
  if (word->word->flag(W_EOL) &&
      !getDict().has_hyphen_end(*word->best_choice)) {
    getDict().reset_hyphen_vars(true);
  }

  if (word->blamer_bundle != NULL && this->fill_lattice_ != NULL) {
    CallFillLattice(*word->ratings, word->best_choices,
                    *word->uch_set, word->blamer_bundle);
  }
  if (wordrec_debug_level > 0) {
    tprintf("Final Ratings Matrix:\n");
    word->ratings->print(getDict().getUnicharset());
  }
  word->FilterWordChoices(getDict().stopper_debug_level);
}
BLOB_CHOICE_LIST * tesseract::Wordrec::classify_blob ( TBLOB blob,
const char *  string,
C_COL  color,
BlamerBundle blamer_bundle 
)

Definition at line 56 of file wordclass.cpp.

                                                                      {
#ifndef GRAPHICS_DISABLED
  if (wordrec_display_all_blobs)
    display_blob(blob, color);
#endif
  // TODO(rays) collapse with call_matcher and move all to wordrec.cpp.
  BLOB_CHOICE_LIST* choices = call_matcher(blob);
  // If a blob with the same bounding box as one of the truth character
  // bounding boxes is not classified as the corresponding truth character
  // blame character classifier for incorrect answer.
  if (blamer_bundle != NULL) {
    blamer_bundle->BlameClassifier(getDict().getUnicharset(),
                                   blob->bounding_box(),
                                   *choices,
                                   wordrec_debug_blamer);
  }
  #ifndef GRAPHICS_DISABLED
  if (classify_debug_level && string)
    print_ratings_list(string, choices, getDict().getUnicharset());

  if (wordrec_blob_pause)
    window_wait(blob_window);
#endif

  return choices;
}
BLOB_CHOICE_LIST * tesseract::Wordrec::classify_piece ( const GenericVector< SEAM * > &  seams,
inT16  start,
inT16  end,
const char *  description,
TWERD word,
BlamerBundle blamer_bundle 
) [virtual]

Definition at line 55 of file pieces.cpp.

                                                                       {
  if (end > start) join_pieces(seams, start, end, word);
  BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description,
                                            White, blamer_bundle);
  // Set the matrix_cell_ entries in all the BLOB_CHOICES.
  BLOB_CHOICE_IT bc_it(choices);
  for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
    bc_it.data()->set_matrix_cell(start, end);
  }

  if (end > start) break_pieces(seams, start, end, word);

  return (choices);
}
void tesseract::Wordrec::combine_seam ( const SeamPile seam_pile,
const SEAM seam,
SeamQueue seam_queue 
)

tessedit_fix_sideways_chops ||

Definition at line 199 of file findseam.cpp.

                                                                    {
  register inT16 dist;
  inT16 bottom1, top1;
  inT16 bottom2, top2;

  SEAM *new_one;
  const SEAM *this_one;

  bottom1 = seam->split1->point1->pos.y;
  if (seam->split1->point2->pos.y >= bottom1)
    top1 = seam->split1->point2->pos.y;
  else {
    top1 = bottom1;
    bottom1 = seam->split1->point2->pos.y;
  }
  if (seam->split2 != NULL) {
    bottom2 = seam->split2->point1->pos.y;
    if (seam->split2->point2->pos.y >= bottom2)
      top2 = seam->split2->point2->pos.y;
    else {
      top2 = bottom2;
      bottom2 = seam->split2->point2->pos.y;
    }
  }
  else {
    bottom2 = bottom1;
    top2 = top1;
  }
  for (int x = 0; x < seam_pile.size(); ++x) {
    this_one = seam_pile.get(x).data();
    dist = seam->location.x - this_one->location.x;
    if (-SPLIT_CLOSENESS < dist &&
      dist < SPLIT_CLOSENESS &&
    seam->priority + this_one->priority < chop_ok_split) {
      inT16 split1_point1_y = this_one->split1->point1->pos.y;
      inT16 split1_point2_y = this_one->split1->point2->pos.y;
      inT16 split2_point1_y = 0;
      inT16 split2_point2_y = 0;
      if (this_one->split2) {
        split2_point1_y = this_one->split2->point1->pos.y;
        split2_point2_y = this_one->split2->point2->pos.y;
      }
      if (
        (
          /* this_one->split1 always exists */
          (
            ((split1_point1_y >= top1 && split1_point2_y >= top1) ||
             (split1_point1_y <= bottom1 && split1_point2_y <= bottom1))
            &&
            ((split1_point1_y >= top2 && split1_point2_y >= top2) ||
             (split1_point1_y <= bottom2 && split1_point2_y <= bottom2))
          )
        )
        &&
        (
          this_one->split2 == NULL ||
          (
            ((split2_point1_y >= top1 && split2_point2_y >= top1) ||
             (split2_point1_y <= bottom1 && split2_point2_y <= bottom1))
            &&
            ((split2_point1_y >= top2 && split2_point2_y >= top2) ||
             (split2_point1_y <= bottom2 && split2_point2_y <= bottom2))
          )
        )
      ) {
        new_one = join_two_seams (seam, this_one);
        if (new_one != NULL) {
          if (chop_debug > 1)
            print_seam ("Combo priority       ", new_one);
          add_seam_to_queue(new_one->priority, new_one, seam_queue);
        }
      }
    }
  }
}

Definition at line 284 of file findseam.cpp.

                                                          {
  TESSLINE *outline;

  if (is_little_chunk (split->point1, split->point2))
    return (FALSE);

  for (outline = blob->outlines; outline; outline = outline->next) {
    if (split_bounds_overlap (split, outline) &&
    crosses_outline (split->point1, split->point2, outline->loop)) {
      return (FALSE);
    }
  }
  return (TRUE);
}
int tesseract::Wordrec::crosses_outline ( EDGEPT p0,
EDGEPT p1,
EDGEPT outline 
)

Definition at line 48 of file outlines.cpp.

                                              {  /* Outline to check */
  EDGEPT *pt = outline;
  do {
    if (is_crossed (p0->pos, p1->pos, pt->pos, pt->next->pos))
      return (TRUE);
    pt = pt->next;
  }
  while (pt != outline);
  return (FALSE);
}

Definition at line 126 of file tface.cpp.

                                              {
  return getDict().valid_word(word);
}

Definition at line 31 of file segsearch.cpp.

                                            {
  BestChoiceBundle best_choice_bundle(word_res->ratings->dimension());
  // Run Segmentation Search.
  SegSearch(word_res, &best_choice_bundle, NULL);
}

Definition at line 63 of file tface.cpp.

                       {
  program_editdown (0);

  return (0);
}
void tesseract::Wordrec::fill_filtered_fragment_list ( BLOB_CHOICE_LIST *  choices,
int  fragment_pos,
int  num_frag_parts,
BLOB_CHOICE_LIST *  filtered_choices 
)

Definition at line 104 of file pieces.cpp.

                                                                              {
  BLOB_CHOICE_IT filtered_choices_it(filtered_choices);
  BLOB_CHOICE_IT choices_it(choices);

  for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
       choices_it.forward()) {
    UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
    const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id);

    if (frag != NULL && frag->get_pos() == fragment_pos &&
        frag->get_total() == num_frag_parts) {
      // Recover the unichar_id of the unichar that this fragment is
      // a part of
      BLOB_CHOICE *b = new BLOB_CHOICE(*choices_it.data());
      int original_unichar = unicharset.unichar_to_id(frag->get_unichar());
      b->set_unichar_id(original_unichar);
      filtered_choices_it.add_to_end(b);
    }
  }

  filtered_choices->sort(SortByUnicharID<BLOB_CHOICE>);
}
void tesseract::Wordrec::FillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
)

Definition at line 74 of file gradechop.cpp.

                                                                          {
  BOUNDS_RECT rect;

  set_outline_bounds (split->point1, split->point2, rect);

  if (xmin < MIN (rect[0], rect[2]) && xmax > MAX (rect[1], rect[3]))
    return (999.0);

  return (grade_overlap (rect) +
    grade_center_of_blob (rect) + grade_width_change (rect));
}
void tesseract::Wordrec::get_fragment_lists ( inT16  current_frag,
inT16  current_row,
inT16  start,
inT16  num_frag_parts,
inT16  num_blobs,
MATRIX ratings,
BLOB_CHOICE_LIST *  choice_lists 
)

Definition at line 266 of file pieces.cpp.

                                                                 {
  if (current_frag == num_frag_parts) {
    merge_and_put_fragment_lists(start, current_row - 1, num_frag_parts,
                                 choice_lists, ratings);
    return;
  }

  for (inT16 x = current_row; x < num_blobs; x++) {
    BLOB_CHOICE_LIST *choices = ratings->get(current_row, x);
    if (choices == NULL)
      continue;

    fill_filtered_fragment_list(choices, current_frag, num_frag_parts,
                                &choice_lists[current_frag]);
    if (!choice_lists[current_frag].empty()) {
      get_fragment_lists(current_frag + 1, x + 1, start, num_frag_parts,
                         num_blobs, ratings, choice_lists);
      choice_lists[current_frag].clear();
    }
  }
}

Definition at line 95 of file gradechop.cpp.

                                                                {
  register PRIORITY grade;
  int width1 = rect[1] - rect[0];
  int width2 = rect[3] - rect[2];

  if (width1 > chop_centered_maxwidth &&
      width2 > chop_centered_maxwidth) {
    return 0.0;
  }

  grade = width1 - width2;
  if (grade < 0)
    grade = -grade;

  grade *= chop_center_knob;
  grade = MIN (CENTER_GRADE_CAP, grade);
  return (MAX (0.0, grade));
}

Definition at line 122 of file gradechop.cpp.

                                                         {
  register PRIORITY grade;
  register inT16 width1;
  register inT16 width2;
  register inT16 overlap;

  width1 = rect[3] - rect[2];
  width2 = rect[1] - rect[0];

  overlap = MIN (rect[1], rect[3]) - MAX (rect[0], rect[2]);
  width1 = MIN (width1, width2);
  if (overlap == width1)
    return (100.0);              /* Total overlap */

  width1 = 2 * overlap - width1; /* Extra penalty for too */
  overlap += MAX (0, width1);    /* much overlap */

  grade = overlap * chop_overlap_knob;

  return (MAX (0.0, grade));
}

Definition at line 175 of file gradechop.cpp.

                                                       {
  register PRIORITY grade;

  grade = point_priority (split->point1) + point_priority (split->point2);

  if (grade < -360.0)
    grade = 0;
  else
    grade += 360.0;

  grade *= chop_sharpness_knob;       /* Values 0 to -360 */

  return (grade);
}

Definition at line 152 of file gradechop.cpp.

                                                          {
  register PRIORITY grade;
  register float split_length;

  split_length = weighted_edgept_dist (split->point1, split->point2,
    chop_x_y_weight);

  if (split_length <= 0)
    grade = 0;
  else
    grade = sqrt (split_length) * chop_split_dist_knob;

  return (MAX (0.0, grade));
}

Definition at line 198 of file gradechop.cpp.

                                                              {
  register PRIORITY grade;
  register inT32 width1;
  register inT32 width2;

  width1 = rect[3] - rect[2];
  width2 = rect[1] - rect[0];

  grade = 20 - (MAX (rect[1], rect[3])
    - MIN (rect[0], rect[2]) - MAX (width1, width2));

  grade *= chop_width_change_knob;

  return (MAX (0.0, grade));
}
void tesseract::Wordrec::improve_by_chopping ( float  rating_cert_scale,
WERD_RES word,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle,
LMPainPoints pain_points,
GenericVector< SegSearchPending > *  pending 
)

Definition at line 503 of file chopper.cpp.

                                                                            {
  int blob_number;
  do {  // improvement loop.
    // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
    // one to chop.
    GenericVector<BLOB_CHOICE*> blob_choices;
    int num_blobs = word->ratings->dimension();
    for (int i = 0; i < num_blobs; ++i) {
      BLOB_CHOICE_LIST* choices = word->ratings->get(i, i);
      if (choices == NULL || choices->empty()) {
        blob_choices.push_back(NULL);
      } else {
        BLOB_CHOICE_IT bc_it(choices);
        blob_choices.push_back(bc_it.data());
      }
    }
    SEAM* seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt,
                                  false, false, word, &blob_number);
    if (seam == NULL) break;
    // A chop has been made. We have to correct all the data structures to
    // take into account the extra bottom-level blob.
    // Put the seam into the seam_array and correct everything else on the
    // word: ratings matrix (including matrix location in the BLOB_CHOICES),
    // states in WERD_CHOICEs, and blob widths.
    word->InsertSeam(blob_number, seam);
    // Insert a new entry in the beam array.
    best_choice_bundle->beam.insert(new LanguageModelState, blob_number);
    // Fixpts are outdated, but will get recalculated.
    best_choice_bundle->fixpt.clear();
    // Remap existing pain points.
    pain_points->RemapForSplit(blob_number);
    // Insert a new pending at the chop point.
    pending->insert(SegSearchPending(), blob_number);

    // Classify the two newly created blobs using ProcessSegSearchPainPoint,
    // as that updates the pending correctly and adds new pain points.
    MATRIX_COORD pain_point(blob_number, blob_number);
    ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word,
                              pain_points, blamer_bundle);
    pain_point.col = blob_number + 1;
    pain_point.row = blob_number + 1;
    ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word,
                              pain_points, blamer_bundle);
    if (language_model_->language_model_ngram_on) {
      // N-gram evaluation depends on the number of blobs in a chunk, so we
      // have to re-evaluate everything in the word.
      ResetNGramSearch(word, best_choice_bundle, pending);
      blob_number = 0;
    }
    // Run language model incrementally. (Except with the n-gram model on.)
    UpdateSegSearchNodes(rating_cert_scale, blob_number, pending,
                         word, pain_points, best_choice_bundle, blamer_bundle);
  } while (!language_model_->AcceptableChoiceFound() &&
           word->ratings->dimension() < kMaxNumChunks);

  // If after running only the chopper best_choice is incorrect and no blame
  // has been yet set, blame the classifier if best_choice is classifier's
  // top choice and is a dictionary word (i.e. language model could not have
  // helped). Otherwise blame the tradeoff between the classifier and
  // the old language model (permuters).
  if (word->blamer_bundle != NULL &&
      word->blamer_bundle->incorrect_result_reason() == IRR_CORRECT &&
      !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) {
    bool valid_permuter = word->best_choice != NULL &&
        Dict::valid_word_permuter(word->best_choice->permuter(), false);
    word->blamer_bundle->BlameClassifierOrLangModel(word,
                                                    getDict().getUnicharset(),
                                                    valid_permuter,
                                                    wordrec_debug_blamer);
  }
}
SEAM * tesseract::Wordrec::improve_one_blob ( const GenericVector< BLOB_CHOICE * > &  blob_choices,
DANGERR fixpt,
bool  split_next_to_fragment,
bool  italic_blob,
WERD_RES word,
int *  blob_number 
)

Definition at line 332 of file chopper.cpp.

                                                  {
  float rating_ceiling = MAX_FLOAT32;
  SEAM *seam = NULL;
  do {
    *blob_number = select_blob_to_split_from_fixpt(fixpt);
    if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number);
    bool split_point_from_dict = (*blob_number != -1);
    if (split_point_from_dict) {
      fixpt->clear();
    } else {
      *blob_number = select_blob_to_split(blob_choices, rating_ceiling,
                                          split_next_to_fragment);
    }
    if (chop_debug) tprintf("blob_number = %d\n", *blob_number);
    if (*blob_number == -1)
      return NULL;

    // TODO(rays) it may eventually help to allow italic_blob to be true,
    seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob,
                              word->seam_array);
    if (seam != NULL)
      return seam;  // Success!
    if (blob_choices[*blob_number] == NULL)
      return NULL;
    if (!split_point_from_dict) {
      // We chopped the worst rated blob, try something else next time.
      rating_ceiling = blob_choices[*blob_number]->rating();
    }
  } while (true);
  return seam;
}
void tesseract::Wordrec::InitBlamerForSegSearch ( WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle,
STRING blamer_debug 
) [protected]

Definition at line 310 of file segsearch.cpp.

                                                           {
  pain_points->Clear();  // Clear pain points heap.
  TessResultCallback2<bool, int, int>* pp_cb = NewPermanentTessCallback(
      pain_points, &LMPainPoints::GenerateForBlamer,
      static_cast<double>(segsearch_max_char_wh_ratio), word_res);
  blamer_bundle->InitForSegSearch(word_res->best_choice, word_res->ratings,
                                  getDict().WildcardID(), wordrec_debug_blamer,
                                  blamer_debug, pp_cb);
  delete pp_cb;
}
int tesseract::Wordrec::is_crossed ( TPOINT  a0,
TPOINT  a1,
TPOINT  b0,
TPOINT  b1 
)

Definition at line 70 of file outlines.cpp.

                                                                  {
  int b0a1xb0b1, b0b1xb0a0;
  int a1b1xa1a0, a1a0xa1b0;

  TPOINT b0a1, b0a0, a1b1, b0b1, a1a0;

  b0a1.x = a1.x - b0.x;
  b0a0.x = a0.x - b0.x;
  a1b1.x = b1.x - a1.x;
  b0b1.x = b1.x - b0.x;
  a1a0.x = a0.x - a1.x;
  b0a1.y = a1.y - b0.y;
  b0a0.y = a0.y - b0.y;
  a1b1.y = b1.y - a1.y;
  b0b1.y = b1.y - b0.y;
  a1a0.y = a0.y - a1.y;

  b0a1xb0b1 = CROSS (b0a1, b0b1);
  b0b1xb0a0 = CROSS (b0b1, b0a0);
  a1b1xa1a0 = CROSS (a1b1, a1a0);
                                 /*a1a0xa1b0=CROSS(a1a0,a1b0); */
  a1a0xa1b0 = -CROSS (a1a0, b0a1);

  return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0)
    || (b0a1xb0b1 < 0 && b0b1xb0a0 < 0))
    && ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0));
}
int tesseract::Wordrec::is_little_chunk ( EDGEPT point1,
EDGEPT point2 
)

Definition at line 120 of file chop.cpp.

                                                           {
  EDGEPT *p = point1;            /* Iterator */
  int counter = 0;

  do {
                                 /* Go from P1 to P2 */
    if (is_same_edgept (point2, p)) {
      if (is_small_area (point1, point2))
        return (TRUE);
      else
        break;
    }
    p = p->next;
  }
  while ((p != point1) && (counter++ < chop_min_outline_points));
  /* Go from P2 to P1 */
  p = point2;
  counter = 0;
  do {
    if (is_same_edgept (point1, p)) {
      return (is_small_area (point2, point1));
    }
    p = p->next;
  }
  while ((p != point2) && (counter++ < chop_min_outline_points));

  return (FALSE);
}

Definition at line 104 of file outlines.cpp.

                                                  {
  return (p1 == p2);
}
int tesseract::Wordrec::is_small_area ( EDGEPT point1,
EDGEPT point2 
)

Definition at line 155 of file chop.cpp.

                                                         {
  EDGEPT *p = point1->next;      /* Iterator */
  int area = 0;
  TPOINT origin;

  do {
                                 /* Go from P1 to P2 */
    origin.x = p->pos.x - point1->pos.x;
    origin.y = p->pos.y - point1->pos.y;
    area += CROSS (origin, p->vec);
    p = p->next;
  }
  while (!is_same_edgept (point2, p));

  return (area < chop_min_outline_area);
}
void tesseract::Wordrec::merge_and_put_fragment_lists ( inT16  row,
inT16  column,
inT16  num_frag_parts,
BLOB_CHOICE_LIST *  choice_lists,
MATRIX ratings 
)

Definition at line 137 of file pieces.cpp.

                                                            {
  BLOB_CHOICE_IT *choice_lists_it = new BLOB_CHOICE_IT[num_frag_parts];

  for (int i = 0; i < num_frag_parts; i++) {
    choice_lists_it[i].set_to_list(&choice_lists[i]);
    choice_lists_it[i].mark_cycle_pt();
  }

  BLOB_CHOICE_LIST *merged_choice = ratings->get(row, column);
  if (merged_choice == NULL)
    merged_choice = new BLOB_CHOICE_LIST;

  bool end_of_list = false;
  BLOB_CHOICE_IT merged_choice_it(merged_choice);
  while (!end_of_list) {
    // Find the maximum unichar_id of the current entry the iterators
    // are pointing at
    UNICHAR_ID max_unichar_id = choice_lists_it[0].data()->unichar_id();
    for (int i = 0; i < num_frag_parts; i++) {
      UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
      if (max_unichar_id < unichar_id) {
        max_unichar_id = unichar_id;
      }
    }

    // Move the each iterators until it gets to an entry that has a
    // value greater than or equal to max_unichar_id
    for (int i = 0; i < num_frag_parts; i++) {
      UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
      while (!choice_lists_it[i].cycled_list() &&
             unichar_id < max_unichar_id) {
        choice_lists_it[i].forward();
        unichar_id = choice_lists_it[i].data()->unichar_id();
      }
      if (choice_lists_it[i].cycled_list()) {
        end_of_list = true;
        break;
      }
    }

    if (end_of_list)
      break;

    // Checks if the fragments are parts of the same character
    UNICHAR_ID first_unichar_id = choice_lists_it[0].data()->unichar_id();
    bool same_unichar = true;
    for (int i = 1; i < num_frag_parts; i++) {
      UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
      if (unichar_id != first_unichar_id) {
        same_unichar = false;
        break;
      }
    }

    if (same_unichar) {
      // Add the merged character to the result
      UNICHAR_ID merged_unichar_id = first_unichar_id;
      inT16 merged_fontinfo_id = choice_lists_it[0].data()->fontinfo_id();
      inT16 merged_fontinfo_id2 = choice_lists_it[0].data()->fontinfo_id2();
      float merged_min_xheight = choice_lists_it[0].data()->min_xheight();
      float merged_max_xheight = choice_lists_it[0].data()->max_xheight();
      float positive_yshift = 0, negative_yshift = 0;
      int merged_script_id = choice_lists_it[0].data()->script_id();
      BlobChoiceClassifier classifier = choice_lists_it[0].data()->classifier();

      float merged_rating = 0, merged_certainty = 0;
      for (int i = 0; i < num_frag_parts; i++) {
        float rating = choice_lists_it[i].data()->rating();
        float certainty = choice_lists_it[i].data()->certainty();

        if (i == 0 || certainty < merged_certainty)
          merged_certainty = certainty;
        merged_rating += rating;

        choice_lists_it[i].forward();
        if (choice_lists_it[i].cycled_list())
          end_of_list = true;
        IntersectRange(choice_lists_it[i].data()->min_xheight(),
                       choice_lists_it[i].data()->max_xheight(),
                       &merged_min_xheight, &merged_max_xheight);
        float yshift = choice_lists_it[i].data()->yshift();
        if (yshift > positive_yshift) positive_yshift = yshift;
        if (yshift < negative_yshift) negative_yshift = yshift;
      }

      float merged_yshift = positive_yshift != 0
          ? (negative_yshift != 0 ? 0 : positive_yshift)
          : negative_yshift;
      merged_choice_it.add_to_end(new BLOB_CHOICE(merged_unichar_id,
                                                  merged_rating,
                                                  merged_certainty,
                                                  merged_fontinfo_id,
                                                  merged_fontinfo_id2,
                                                  merged_script_id,
                                                  merged_min_xheight,
                                                  merged_max_xheight,
                                                  merged_yshift,
                                                  classifier));
    }
  }

  if (classify_debug_level)
    print_ratings_list("Merged Fragments", merged_choice,
                       unicharset);

  if (merged_choice->empty())
    delete merged_choice;
  else
    ratings->put(row, column, merged_choice);

  delete [] choice_lists_it;
}
void tesseract::Wordrec::merge_fragments ( MATRIX ratings,
inT16  num_blobs 
)

Definition at line 298 of file pieces.cpp.

                                                              {
  BLOB_CHOICE_LIST choice_lists[CHAR_FRAGMENT::kMaxChunks];
  for (inT16 start = 0; start < num_blobs; start++) {
    for (int frag_parts = 2; frag_parts <= CHAR_FRAGMENT::kMaxChunks;
         frag_parts++) {
      get_fragment_lists(0, start, start, frag_parts, num_blobs,
                         ratings, choice_lists);
    }
  }

  // Delete fragments from the rating matrix
  for (inT16 x = 0; x < num_blobs; x++) {
    for (inT16 y = x; y < num_blobs; y++) {
      BLOB_CHOICE_LIST *choices = ratings->get(x, y);
      if (choices != NULL) {
        BLOB_CHOICE_IT choices_it(choices);
        for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
             choices_it.forward()) {
          UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
          const CHAR_FRAGMENT *frag =
              unicharset.get_fragment(choice_unichar_id);
          if (frag != NULL)
            delete choices_it.extract();
        }
      }
    }
  }
}
bool tesseract::Wordrec::near_point ( EDGEPT point,
EDGEPT line_pt_0,
EDGEPT line_pt_1,
EDGEPT **  near_pt 
)

Definition at line 116 of file outlines.cpp.

                                           {
  TPOINT p;

  float slope;
  float intercept;

  float x0 = line_pt_0->pos.x;
  float x1 = line_pt_1->pos.x;
  float y0 = line_pt_0->pos.y;
  float y1 = line_pt_1->pos.y;

  if (x0 == x1) {
                                 /* Handle vertical line */
    p.x = (inT16) x0;
    p.y = point->pos.y;
  }
  else {
    /* Slope and intercept */
    slope = (y0 - y1) / (x0 - x1);
    intercept = y1 - x1 * slope;

    /* Find perpendicular */
    p.x = (inT16) ((point->pos.x + (point->pos.y - intercept) * slope) /
      (slope * slope + 1));
    p.y = (inT16) (slope * p.x + intercept);
  }

  if (is_on_line (p, line_pt_0->pos, line_pt_1->pos) &&
    (!same_point (p, line_pt_0->pos)) && (!same_point (p, line_pt_1->pos))) {
    /* Intersection on line */
    *near_pt = make_edgept(p.x, p.y, line_pt_1, line_pt_0);
    return true;
  } else {                           /* Intersection not on line */
    *near_pt = closest(point, line_pt_0, line_pt_1);
    return false;
  }
}
void tesseract::Wordrec::new_max_point ( EDGEPT local_max,
PointHeap points 
)

Definition at line 300 of file chop.cpp.

                                                                {
  inT16 dir;

  dir = direction (local_max);

  if (dir > 0) {
    add_point_to_list(points, local_max);
    return;
  }

  if (dir == 0 && point_priority (local_max) < 0) {
    add_point_to_list(points, local_max);
    return;
  }
}
void tesseract::Wordrec::new_min_point ( EDGEPT local_min,
PointHeap points 
)

Definition at line 276 of file chop.cpp.

                                                                {
  inT16 dir;

  dir = direction (local_min);

  if (dir < 0) {
    add_point_to_list(points, local_min);
    return;
  }

  if (dir == 0 && point_priority (local_min) < 0) {
    add_point_to_list(points, local_min);
    return;
  }
}
EDGEPT * tesseract::Wordrec::pick_close_point ( EDGEPT critical_point,
EDGEPT vertical_point,
int *  best_dist 
)

Definition at line 179 of file chop.cpp.

                                                  {
  EDGEPT *best_point = NULL;
  int this_distance;
  int found_better;

  do {
    found_better = FALSE;

    this_distance = edgept_dist (critical_point, vertical_point);
    if (this_distance <= *best_dist) {

      if (!(same_point (critical_point->pos, vertical_point->pos) ||
        same_point (critical_point->pos, vertical_point->next->pos) ||
        (best_point && same_point (best_point->pos, vertical_point->pos)) ||
      is_exterior_point (critical_point, vertical_point))) {
        *best_dist = this_distance;
        best_point = vertical_point;
        if (chop_vertical_creep)
          found_better = TRUE;
      }
    }
    vertical_point = vertical_point->next;
  }
  while (found_better == TRUE);

  return (best_point);
}

Definition at line 305 of file findseam.cpp.

                                         {
  SeamPile seam_pile(chop_seam_pile_size);
  EDGEPT *points[MAX_NUM_POINTS];
  EDGEPT_CLIST new_points;
  SEAM *seam = NULL;
  TESSLINE *outline;
  inT16 num_points = 0;

#ifndef GRAPHICS_DISABLED
  if (chop_debug > 2)
    wordrec_display_splits.set_value(true);

  draw_blob_edges(blob);
#endif

  PointHeap point_heap(MAX_NUM_POINTS);
  for (outline = blob->outlines; outline; outline = outline->next)
    prioritize_points(outline, &point_heap);

  while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
    points[num_points++] = point_heap.PeekTop().data;
    point_heap.Pop(NULL);
  }

  /* Initialize queue */
  SeamQueue seam_queue(MAX_NUM_SEAMS);

  try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
  try_vertical_splits(points, num_points, &new_points,
                      &seam_queue, &seam_pile, &seam, blob);

  if (seam == NULL) {
    choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile);
  }
  else if (seam->priority > chop_good_split) {
    choose_best_seam(&seam_queue, NULL, seam->priority,
                     &seam, blob, &seam_pile);
  }

  EDGEPT_C_IT it(&new_points);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    EDGEPT *inserted_point = it.data();
    if (!point_used_by_seam(seam, inserted_point)) {
      for (outline = blob->outlines; outline; outline = outline->next) {
        if (outline->loop == inserted_point) {
          outline->loop = outline->loop->next;
        }
      }
      remove_edgept(inserted_point);
    }
  }

  if (seam) {
    if (seam->priority > chop_ok_split) {
      delete seam;
      seam = NULL;
    }
#ifndef GRAPHICS_DISABLED
    else if (wordrec_display_splits) {
      if (seam->split1)
        mark_split (seam->split1);
      if (seam->split2)
        mark_split (seam->split2);
      if (seam->split3)
        mark_split (seam->split3);
      if (chop_debug > 2) {
        update_edge_window();
        edge_window_wait();
      }
    }
#endif
  }

  if (chop_debug)
    wordrec_display_splits.set_value(false);

  return (seam);
}

Definition at line 55 of file chop.cpp.

                                              {
  return (PRIORITY)angle_change(point->prev, point, point->next);
}
void tesseract::Wordrec::prioritize_points ( TESSLINE outline,
PointHeap points 
)

Definition at line 217 of file chop.cpp.

                                                                    {
  EDGEPT *this_point;
  EDGEPT *local_min = NULL;
  EDGEPT *local_max = NULL;

  this_point = outline->loop;
  local_min = this_point;
  local_max = this_point;
  do {
    if (this_point->vec.y < 0) {
                                 /* Look for minima */
      if (local_max != NULL)
        new_max_point(local_max, points);
      else if (is_inside_angle (this_point))
        add_point_to_list(points, this_point);
      local_max = NULL;
      local_min = this_point->next;
    }
    else if (this_point->vec.y > 0) {
                                 /* Look for maxima */
      if (local_min != NULL)
        new_min_point(local_min, points);
      else if (is_inside_angle (this_point))
        add_point_to_list(points, this_point);
      local_min = NULL;
      local_max = this_point->next;
    }
    else {
      /* Flat area */
      if (local_max != NULL) {
        if (local_max->prev->vec.y != 0) {
          new_max_point(local_max, points);
        }
        local_max = this_point->next;
        local_min = NULL;
      }
      else {
        if (local_min->prev->vec.y != 0) {
          new_min_point(local_min, points);
        }
        local_min = this_point->next;
        local_max = NULL;
      }
    }

                                 /* Next point */
    this_point = this_point->next;
  }
  while (this_point != outline->loop);
}
void tesseract::Wordrec::ProcessSegSearchPainPoint ( float  pain_point_priority,
const MATRIX_COORD pain_point,
const char *  pain_point_type,
GenericVector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle 
) [protected]

Definition at line 230 of file segsearch.cpp.

                                                            {
  if (segsearch_debug_level > 0) {
    tprintf("Classifying pain point %s priority=%.4f, col=%d, row=%d\n",
            pain_point_type, pain_point_priority,
            pain_point.col, pain_point.row);
  }
  ASSERT_HOST(pain_points != NULL);
  MATRIX *ratings = word_res->ratings;
  // Classify blob [pain_point.col pain_point.row]
  if (!pain_point.Valid(*ratings)) {
    ratings->IncreaseBandSize(pain_point.row + 1 - pain_point.col);
  }
  ASSERT_HOST(pain_point.Valid(*ratings));
  BLOB_CHOICE_LIST *classified = classify_piece(word_res->seam_array,
                                                pain_point.col, pain_point.row,
                                                pain_point_type,
                                                word_res->chopped_word,
                                                blamer_bundle);
  BLOB_CHOICE_LIST *lst = ratings->get(pain_point.col, pain_point.row);
  if (lst == NULL) {
    ratings->put(pain_point.col, pain_point.row, classified);
  } else {
    // We can not delete old BLOB_CHOICEs, since they might contain
    // ViterbiStateEntries that are parents of other "active" entries.
    // Thus if the matrix cell already contains classifications we add
    // the new ones to the beginning of the list.
    BLOB_CHOICE_IT it(lst);
    it.add_list_before(classified);
    delete classified;  // safe to delete, since empty after add_list_before()
    classified = NULL;
  }

  if (segsearch_debug_level > 0) {
    print_ratings_list("Updated ratings matrix with a new entry:",
                       ratings->get(pain_point.col, pain_point.row),
                       getDict().getUnicharset());
    ratings->print(getDict().getUnicharset());
  }

  // Insert initial "pain points" to join the newly classified blob
  // with its left and right neighbors.
  if (classified != NULL && !classified->empty()) {
    if (pain_point.col > 0) {
      pain_points->GeneratePainPoint(
          pain_point.col - 1, pain_point.row, LM_PPTYPE_SHAPE, 0.0,
          true, segsearch_max_char_wh_ratio, word_res);
    }
    if (pain_point.row + 1 < ratings->dimension()) {
      pain_points->GeneratePainPoint(
          pain_point.col, pain_point.row + 1, LM_PPTYPE_SHAPE, 0.0,
          true, segsearch_max_char_wh_ratio, word_res);
    }
  }
  (*pending)[pain_point.col].SetBlobClassified(pain_point.row);
}

Definition at line 76 of file tface.cpp.

void tesseract::Wordrec::program_editup ( const char *  textbase,
bool  init_classifier,
bool  init_permute 
)

Definition at line 47 of file tface.cpp.

                                             {
  if (textbase != NULL) imagefile = textbase;
  InitFeatureDefs(&feature_defs_);
  SetupExtractors(&feature_defs_);
  InitAdaptiveClassifier(init_classifier);
  if (init_dict) getDict().Load(Dict::GlobalDawgCache());
  pass2_ok_split = chop_ok_split;
}
void tesseract::Wordrec::ResetNGramSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
GenericVector< SegSearchPending > *  pending 
) [protected]

Definition at line 293 of file segsearch.cpp.

                                                                         {
  // TODO(rays) More refactoring required here.
  // Delete existing viterbi states.
  for (int col = 0; col < best_choice_bundle->beam.size(); ++col) {
    best_choice_bundle->beam[col]->Clear();
  }
  // Reset best_choice_bundle.
  word_res->ClearWordChoices();
  best_choice_bundle->best_vse = NULL;
  // Clear out all existing pendings and add a new one for the first column.
  (*pending)[0].SetColumnClassified();
  for (int i = 1; i < pending->size(); ++i)
    (*pending)[i].Clear();
}

Definition at line 164 of file outlines.cpp.

                                             {
  EDGEPT *edgept = outline;
  EDGEPT *temp;

  do {
                                 /* Swap next and prev */
    temp = edgept->prev;
    edgept->prev = edgept->next;
    edgept->next = temp;
    /* Set up vec field */
    edgept->vec.x = edgept->next->pos.x - edgept->pos.x;
    edgept->vec.y = edgept->next->pos.y - edgept->pos.y;

    edgept = edgept->prev;       /* Go to next point */
  }
  while (edgept != outline);
}
void tesseract::Wordrec::SaveAltChoices ( const LIST best_choices,
WERD_RES word 
)
PRIORITY tesseract::Wordrec::seam_priority ( SEAM seam,
inT16  xmin,
inT16  xmax 
)

Definition at line 390 of file findseam.cpp.

                                                                  {
  PRIORITY priority;

  if (seam->split1 == NULL)
    priority = 0;

  else if (seam->split2 == NULL) {
    priority = (seam->priority +
      full_split_priority (seam->split1, xmin, xmax));
  }

  else if (seam->split3 == NULL) {
    split_outline (seam->split2->point1, seam->split2->point2);
    priority = (seam->priority +
      full_split_priority (seam->split1, xmin, xmax));
    unsplit_outlines (seam->split2->point1, seam->split2->point2);
  }

  else {
    split_outline (seam->split2->point1, seam->split2->point2);
    split_outline (seam->split3->point1, seam->split3->point2);
    priority = (seam->priority +
      full_split_priority (seam->split1, xmin, xmax));
    unsplit_outlines (seam->split3->point1, seam->split3->point2);
    unsplit_outlines (seam->split2->point1, seam->split2->point2);
  }

  return (priority);
}
void tesseract::Wordrec::SegSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)

Definition at line 37 of file segsearch.cpp.

                                                     {
  if (segsearch_debug_level > 0) {
    tprintf("Starting SegSearch on ratings matrix%s:\n",
            wordrec_enable_assoc ? " (with assoc)" : "");
    word_res->ratings->print(getDict().getUnicharset());
  }
  LMPainPoints pain_points(segsearch_max_pain_points,
                           segsearch_max_char_wh_ratio,
                           assume_fixed_pitch_char_segment,
                           &getDict(), segsearch_debug_level);

  pain_points.GenerateInitial(word_res);

  // Compute scaling factor that will help us recover blob outline length
  // from classifier rating and certainty for the blob.
  float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;

  language_model_->InitForWord(prev_word_best_choice_,
                               assume_fixed_pitch_char_segment,
                               segsearch_max_char_wh_ratio, rating_cert_scale);

  // Initialize blamer-related information: map character boxes recorded in
  // blamer_bundle->norm_truth_word to the corresponding i,j indices in the
  // ratings matrix. We expect this step to succeed, since when running the
  // chopper we checked that the correct chops are present.
  if (blamer_bundle != NULL) {
    blamer_bundle->SetupCorrectSegmentation(word_res->chopped_word,
                                            wordrec_debug_blamer);
  }

  MATRIX_COORD pain_point;
  float pain_point_priority;

  // pending[col] tells whether there is update work to do to combine
  // best_choice_bundle->beam[col - 1] with some BLOB_CHOICEs in matrix[col, *].
  // As the language model state is updated, pending entries are modified to
  // minimize duplication of work. It is important that during the update the
  // children are considered in the non-decreasing order of their column, since
  // this guarantees that all the parents would be up to date before an update
  // of a child is done.
  GenericVector<SegSearchPending> pending;
  pending.init_to_size(word_res->ratings->dimension(), SegSearchPending());

  // Search the ratings matrix for the initial best path.
  pending[0].SetColumnClassified();
  UpdateSegSearchNodes(rating_cert_scale, 0, &pending, word_res,
                       &pain_points, best_choice_bundle, blamer_bundle);

  if (!SegSearchDone(0)) {  // find a better choice
    if (chop_enable && word_res->chopped_word != NULL) {
      improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle,
                          blamer_bundle, &pain_points, &pending);
    }
    if (chop_debug)
      print_seams("Final seam list:", word_res->seam_array);

    if (blamer_bundle != NULL &&
        !blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) {
      blamer_bundle->SetChopperBlame(word_res, wordrec_debug_blamer);
    }
  }
  // Keep trying to find a better path by fixing the "pain points".
  int num_futile_classifications = 0;
  STRING blamer_debug;
  while (wordrec_enable_assoc &&
      (!SegSearchDone(num_futile_classifications) ||
          (blamer_bundle != NULL &&
              blamer_bundle->GuidedSegsearchStillGoing()))) {
    // Get the next valid "pain point".
    bool found_nothing = true;
    LMPainPointsType pp_type;
    while ((pp_type = pain_points.Deque(&pain_point, &pain_point_priority)) !=
        LM_PPTYPE_NUM) {
      if (!pain_point.Valid(*word_res->ratings)) {
        word_res->ratings->IncreaseBandSize(
            pain_point.row - pain_point.col + 1);
      }
      if (pain_point.Valid(*word_res->ratings) &&
          !word_res->ratings->Classified(pain_point.col, pain_point.row,
                                         getDict().WildcardID())) {
        found_nothing = false;
        break;
      }
    }
    if (found_nothing) {
      if (segsearch_debug_level > 0) tprintf("Pain points queue is empty\n");
      break;
    }
    ProcessSegSearchPainPoint(pain_point_priority, pain_point,
                              LMPainPoints::PainPointDescription(pp_type),
                              &pending, word_res, &pain_points, blamer_bundle);

    UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending,
                         word_res, &pain_points, best_choice_bundle,
                         blamer_bundle);
    if (!best_choice_bundle->updated) ++num_futile_classifications;

    if (segsearch_debug_level > 0) {
      tprintf("num_futile_classifications %d\n", num_futile_classifications);
    }

    best_choice_bundle->updated = false;  // reset updated

    // See if it's time to terminate SegSearch or time for starting a guided
    // search for the true path to find the blame for the incorrect best_choice.
    if (SegSearchDone(num_futile_classifications) &&
        blamer_bundle != NULL &&
        blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) {
      InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle,
                             &blamer_debug);
    }
  }  // end while loop exploring alternative paths
  if (blamer_bundle != NULL) {
    blamer_bundle->FinishSegSearch(word_res->best_choice,
                                   wordrec_debug_blamer, &blamer_debug);
  }

  if (segsearch_debug_level > 0) {
    tprintf("Done with SegSearch (AcceptableChoiceFound: %d)\n",
            language_model_->AcceptableChoiceFound());
  }
}
bool tesseract::Wordrec::SegSearchDone ( int  num_futile_classifications) [inline, protected]

Definition at line 430 of file wordrec.h.

                                                            {
    return (language_model_->AcceptableChoiceFound() ||
            num_futile_classifications >=
            segsearch_max_futile_classifications);
  }
int tesseract::Wordrec::select_blob_to_split ( const GenericVector< BLOB_CHOICE * > &  blob_choices,
float  rating_ceiling,
bool  split_next_to_fragment 
)

Definition at line 587 of file chopper.cpp.

                                                       {
  BLOB_CHOICE *blob_choice;
  int x;
  float worst = -MAX_FLOAT32;
  int worst_index = -1;
  float worst_near_fragment = -MAX_FLOAT32;
  int worst_index_near_fragment = -1;
  const CHAR_FRAGMENT **fragments = NULL;

  if (chop_debug) {
    if (rating_ceiling < MAX_FLOAT32)
      tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
    else
      tprintf("rating_ceiling = No Limit\n");
  }

  if (split_next_to_fragment && blob_choices.size() > 0) {
    fragments = new const CHAR_FRAGMENT *[blob_choices.length()];
    if (blob_choices[0] != NULL) {
      fragments[0] = getDict().getUnicharset().get_fragment(
          blob_choices[0]->unichar_id());
    } else {
      fragments[0] = NULL;
    }
  }

  for (x = 0; x < blob_choices.size(); ++x) {
    if (blob_choices[x] == NULL) {
      if (fragments != NULL) {
        delete[] fragments;
      }
      return x;
    } else {
      blob_choice = blob_choices[x];
      // Populate fragments for the following position.
      if (split_next_to_fragment && x+1 < blob_choices.size()) {
        if (blob_choices[x + 1] != NULL) {
          fragments[x + 1] = getDict().getUnicharset().get_fragment(
              blob_choices[x + 1]->unichar_id());
        } else {
          fragments[x + 1] = NULL;
        }
      }
      if (blob_choice->rating() < rating_ceiling &&
          blob_choice->certainty() < tessedit_certainty_threshold) {
        // Update worst and worst_index.
        if (blob_choice->rating() > worst) {
          worst_index = x;
          worst = blob_choice->rating();
        }
        if (split_next_to_fragment) {
          // Update worst_near_fragment and worst_index_near_fragment.
          bool expand_following_fragment =
            (x + 1 < blob_choices.size() &&
             fragments[x+1] != NULL && !fragments[x+1]->is_beginning());
          bool expand_preceding_fragment =
            (x > 0 && fragments[x-1] != NULL && !fragments[x-1]->is_ending());
          if ((expand_following_fragment || expand_preceding_fragment) &&
              blob_choice->rating() > worst_near_fragment) {
            worst_index_near_fragment = x;
            worst_near_fragment = blob_choice->rating();
            if (chop_debug) {
              tprintf("worst_index_near_fragment=%d"
                      " expand_following_fragment=%d"
                      " expand_preceding_fragment=%d\n",
                      worst_index_near_fragment,
                      expand_following_fragment,
                      expand_preceding_fragment);
            }
          }
        }
      }
    }
  }
  if (fragments != NULL) {
    delete[] fragments;
  }
  // TODO(daria): maybe a threshold of badness for
  // worst_near_fragment would be useful.
  return worst_index_near_fragment != -1 ?
    worst_index_near_fragment : worst_index;
}

Definition at line 679 of file chopper.cpp.

                                                           {
  if (!fixpt)
    return -1;
  for (int i = 0; i < fixpt->size(); i++) {
    if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end &&
        (*fixpt)[i].dangerous &&
        (*fixpt)[i].correct_is_ngram) {
      return (*fixpt)[i].begin;
    }
  }
  return -1;
}
void tesseract::Wordrec::set_outline_bounds ( register EDGEPT point1,
register EDGEPT point2,
BOUNDS_RECT  rect 
)

Definition at line 220 of file gradechop.cpp.

                                                   {
  register EDGEPT *this_point;
  register inT16 x_min;
  register inT16 x_max;

  find_bounds_loop(point1, point2, x_min, x_max);

  rect[0] = x_min;
  rect[1] = x_max;

  find_bounds_loop(point2, point1, x_min, x_max);

  rect[2] = x_min;
  rect[3] = x_max;
}
void tesseract::Wordrec::try_point_pairs ( EDGEPT points[MAX_NUM_POINTS],
inT16  num_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

Definition at line 428 of file findseam.cpp.

                                            {
  inT16 x;
  inT16 y;
  SPLIT *split;
  PRIORITY priority;

  for (x = 0; x < num_points; x++) {
    for (y = x + 1; y < num_points; y++) {

      if (points[y] &&
          weighted_edgept_dist(points[x], points[y],
                               chop_x_y_weight) < chop_split_length &&
          points[x] != points[y]->next &&
          points[y] != points[x]->next &&
          !is_exterior_point(points[x], points[y]) &&
          !is_exterior_point(points[y], points[x])) {
        split = new_split (points[x], points[y]);
        priority = partial_split_priority (split);

        choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile);
      }
    }
  }
}
void tesseract::Wordrec::try_vertical_splits ( EDGEPT points[MAX_NUM_POINTS],
inT16  num_points,
EDGEPT_CLIST *  new_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

Definition at line 469 of file findseam.cpp.

                                                {
  EDGEPT *vertical_point = NULL;
  SPLIT *split;
  inT16 x;
  PRIORITY priority;
  TESSLINE *outline;

  for (x = 0; x < num_points; x++) {
    vertical_point = NULL;
    for (outline = blob->outlines; outline; outline = outline->next) {
      vertical_projection_point(points[x], outline->loop,
                                &vertical_point, new_points);
    }

    if (vertical_point &&
      points[x] != vertical_point->next &&
      vertical_point != points[x]->next &&
      weighted_edgept_dist(points[x], vertical_point,
                           chop_x_y_weight) < chop_split_length) {

      split = new_split (points[x], vertical_point);
      priority = partial_split_priority (split);

      choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile);
    }
  }
}
void tesseract::Wordrec::UpdateSegSearchNodes ( float  rating_cert_scale,
int  starting_col,
GenericVector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
) [protected]

Definition at line 162 of file segsearch.cpp.

                                 {
  MATRIX *ratings = word_res->ratings;
  ASSERT_HOST(ratings->dimension() == pending->size());
  ASSERT_HOST(ratings->dimension() == best_choice_bundle->beam.size());
  for (int col = starting_col; col < ratings->dimension(); ++col) {
    if (!(*pending)[col].WorkToDo()) continue;
    int first_row = col;
    int last_row = MIN(ratings->dimension() - 1,
                       col + ratings->bandwidth() - 1);
    if ((*pending)[col].SingleRow() >= 0) {
      first_row = last_row = (*pending)[col].SingleRow();
    }
    if (segsearch_debug_level > 0) {
      tprintf("\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n",
              col, first_row, last_row,
              (*pending)[col].IsRowJustClassified(MAX_INT32));
    }
    // Iterate over the pending list for this column.
    for (int row = first_row; row <= last_row; ++row) {
      // Update language model state of this child+parent pair.
      BLOB_CHOICE_LIST *current_node = ratings->get(col, row);
      LanguageModelState *parent_node =
          col == 0 ? NULL : best_choice_bundle->beam[col - 1];
      if (current_node != NULL &&
          language_model_->UpdateState((*pending)[col].IsRowJustClassified(row),
                                       col, row, current_node, parent_node,
                                       pain_points, word_res,
                                       best_choice_bundle, blamer_bundle) &&
          row + 1 < ratings->dimension()) {
        // Since the language model state of this entry changed, process all
        // the child column.
        (*pending)[row + 1].RevisitWholeColumn();
        if (segsearch_debug_level > 0) {
          tprintf("Added child col=%d to pending\n", row + 1);
        }
      }  // end if UpdateState.
    }  // end for row.
  }  // end for col.
  if (best_choice_bundle->best_vse != NULL) {
    ASSERT_HOST(word_res->StatesAllValid());
    if (best_choice_bundle->best_vse->updated) {
      pain_points->GenerateFromPath(rating_cert_scale,
                                    best_choice_bundle->best_vse, word_res);
      if (!best_choice_bundle->fixpt.empty()) {
        pain_points->GenerateFromAmbigs(best_choice_bundle->fixpt,
                                        best_choice_bundle->best_vse, word_res);
      }
    }
  }
  // The segsearch is completed. Reset all updated flags on all VSEs and reset
  // all pendings.
  for (int col = 0; col < pending->size(); ++col) {
    (*pending)[col].Clear();
    ViterbiStateEntry_IT
        vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries);
    for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) {
      vse_it.data()->updated = false;
    }
  }
}
void tesseract::Wordrec::vertical_projection_point ( EDGEPT split_point,
EDGEPT target_point,
EDGEPT **  best_point,
EDGEPT_CLIST *  new_points 
)

Definition at line 329 of file chop.cpp.

                                                                  {
  EDGEPT *p;                     /* Iterator */
  EDGEPT *this_edgept;           /* Iterator */
  EDGEPT_C_IT new_point_it(new_points);
  int x = split_point->pos.x;    /* X value of vertical */
  int best_dist = LARGE_DISTANCE;/* Best point found */

  if (*best_point != NULL)
    best_dist = edgept_dist(split_point, *best_point);

  p = target_point;
  /* Look at each edge point */
  do {
    if (((p->pos.x <= x && x <= p->next->pos.x) ||
         (p->next->pos.x <= x && x <= p->pos.x)) &&
        !same_point(split_point->pos, p->pos) &&
        !same_point(split_point->pos, p->next->pos) &&
        !p->IsChopPt() &&
        (*best_point == NULL || !same_point((*best_point)->pos, p->pos))) {

      if (near_point(split_point, p, p->next, &this_edgept)) {
        new_point_it.add_before_then_move(this_edgept);
      }

      if (*best_point == NULL)
        best_dist = edgept_dist (split_point, this_edgept);

      this_edgept =
        pick_close_point(split_point, this_edgept, &best_dist);
      if (this_edgept)
        *best_point = this_edgept;
    }

    p = p->next;
  }
  while (p != target_point);
}

Member Data Documentation

"include fixed-pitch heuristics in char segmentation"

Definition at line 161 of file wordrec.h.

"Split center adjustment"

Definition at line 151 of file wordrec.h.

"Width of (smaller) chopped blobs " "above which we don't care that a chop is not near the center."

Definition at line 153 of file wordrec.h.

"Chop debug"

Definition at line 139 of file wordrec.h.

"Chop enable"

Definition at line 140 of file wordrec.h.

"Good split limit"

Definition at line 157 of file wordrec.h.

"Min Inside Angle Bend"

Definition at line 147 of file wordrec.h.

"Min Outline Area"

Definition at line 148 of file wordrec.h.

"Min Number of Points on Outline"

Definition at line 144 of file wordrec.h.

"Use new seam_pile"

Definition at line 146 of file wordrec.h.

"OK split limit"

Definition at line 156 of file wordrec.h.

"Split overlap adjustment"

Definition at line 150 of file wordrec.h.

"Same distance"

Definition at line 143 of file wordrec.h.

"Max number of seams in seam_pile"

Definition at line 145 of file wordrec.h.

"Split sharpness adjustment"

Definition at line 154 of file wordrec.h.

"Split length adjustment"

Definition at line 149 of file wordrec.h.

"Split Length"

Definition at line 142 of file wordrec.h.

"Vertical creep"

Definition at line 141 of file wordrec.h.

"Width change adjustment"

Definition at line 155 of file wordrec.h.

"X / Y length weight"

Definition at line 158 of file wordrec.h.

void(Wordrec::* tesseract::Wordrec::fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)

Definition at line 424 of file wordrec.h.

"force associator to run regardless of what enable_assoc is." "This is used for CJK where component grouping is necessary."

Definition at line 133 of file wordrec.h.

"Use information from fragments to guide chopping process"

Definition at line 136 of file wordrec.h.

"Merge the fragments in the ratings matrix and delete them " "after merging"

Definition at line 128 of file wordrec.h.

"Fix blobs that aren't chopped"

Definition at line 137 of file wordrec.h.

"Save alternative paths found during chopping " "and segmentation search"

Definition at line 178 of file wordrec.h.

"Segmentation adjustment debug"

Definition at line 159 of file wordrec.h.

"SegSearch debug level"

Definition at line 169 of file wordrec.h.

"Maximum character width-to-height ratio"

Definition at line 175 of file wordrec.h.

"Maximum number of pain point classifications per word."

Definition at line 173 of file wordrec.h.

"Maximum number of pain points stored in the queue"

Definition at line 171 of file wordrec.h.

"Good blob limit"

Definition at line 138 of file wordrec.h.

"Print blamer debug messages"

Definition at line 167 of file wordrec.h.

"Debug level for wordrec"

Definition at line 162 of file wordrec.h.

"Associator Enable"

Definition at line 130 of file wordrec.h.

"Max number of broken pieces to associate"

Definition at line 164 of file wordrec.h.

"Don't output block information"

Definition at line 129 of file wordrec.h.

"Try to set the blame for errors"

Definition at line 168 of file wordrec.h.

"Only run OCR for words that had truth recorded in BlamerBundle"

Definition at line 166 of file wordrec.h.

"Worst segmentation state"

Definition at line 134 of file wordrec.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines