tesseract
3.03
|
#include <wordrec.h>
Public Member Functions | |||||||
Wordrec () | |||||||
virtual | ~Wordrec () | ||||||
void | SaveAltChoices (const LIST &best_choices, WERD_RES *word) | ||||||
void | FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | ||||||
void | CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | ||||||
void | SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) | ||||||
void | DoSegSearch (WERD_RES *word_res) | ||||||
SEAM * | attempt_blob_chop (TWERD *word, TBLOB *blob, inT32 blob_number, bool italic_blob, const GenericVector< SEAM * > &seams) | ||||||
SEAM * | chop_numbered_blob (TWERD *word, inT32 blob_number, bool italic_blob, const GenericVector< SEAM * > &seams) | ||||||
SEAM * | chop_overlapping_blob (const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number) | ||||||
void | add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams) | ||||||
void | choose_best_seam (SeamQueue *seam_queue, SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) | ||||||
void | combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) | ||||||
inT16 | constrained_split (SPLIT *split, TBLOB *blob) | ||||||
SEAM * | pick_good_seam (TBLOB *blob) | ||||||
PRIORITY | seam_priority (SEAM *seam, inT16 xmin, inT16 xmax) | ||||||
void | try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) | ||||||
void | try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) | ||||||
PRIORITY | full_split_priority (SPLIT *split, inT16 xmin, inT16 xmax) | ||||||
PRIORITY | grade_center_of_blob (register BOUNDS_RECT rect) | ||||||
PRIORITY | grade_overlap (register BOUNDS_RECT rect) | ||||||
PRIORITY | grade_split_length (register SPLIT *split) | ||||||
PRIORITY | grade_sharpness (register SPLIT *split) | ||||||
PRIORITY | grade_width_change (register BOUNDS_RECT rect) | ||||||
void | set_outline_bounds (register EDGEPT *point1, register EDGEPT *point2, BOUNDS_RECT rect) | ||||||
int | crosses_outline (EDGEPT *p0, EDGEPT *p1, EDGEPT *outline) | ||||||
int | is_crossed (TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1) | ||||||
int | is_same_edgept (EDGEPT *p1, EDGEPT *p2) | ||||||
bool | near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt) | ||||||
void | reverse_outline (EDGEPT *outline) | ||||||
virtual BLOB_CHOICE_LIST * | classify_piece (const GenericVector< SEAM * > &seams, inT16 start, inT16 end, const char *description, TWERD *word, BlamerBundle *blamer_bundle) | ||||||
void | merge_fragments (MATRIX *ratings, inT16 num_blobs) | ||||||
void | get_fragment_lists (inT16 current_frag, inT16 current_row, inT16 start, inT16 num_frag_parts, inT16 num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists) | ||||||
void | merge_and_put_fragment_lists (inT16 row, inT16 column, inT16 num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings) | ||||||
void | fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices) | ||||||
program_editup | |||||||
Initialize all the things in the program that need to be initialized. init_permute determines whether to initialize the permute functions and Dawg models. | |||||||
void | program_editup (const char *textbase, bool init_classifier, bool init_permute) | ||||||
cc_recog | |||||||
Recognize a word. | |||||||
void | cc_recog (WERD_RES *word) | ||||||
program_editdown | |||||||
This function holds any nessessary post processing for the Wise Owl program. | |||||||
void | program_editdown (inT32 elasped_time) | ||||||
set_pass1 | |||||||
Get ready to do some pass 1 stuff. | |||||||
void | set_pass1 () | ||||||
set_pass2 | |||||||
Get ready to do some pass 2 stuff. | |||||||
void | set_pass2 () | ||||||
end_recog | |||||||
Cleanup and exit the recog program. | |||||||
int | end_recog () | ||||||
call_matcher | |||||||
Called from Tess with a blob in tess form. The blob may need rotating to the correct orientation for classification. | |||||||
BLOB_CHOICE_LIST * | call_matcher (TBLOB *blob) | ||||||
dict_word() | |||||||
Test the dictionaries, returning NO_PERM (0) if not found, or one of the PermuterType values if found, according to the dictionary. | |||||||
int | dict_word (const WERD_CHOICE &word) | ||||||
classify_blob | |||||||
Classify the this blob if it is not already recorded in the match table. Attempt to recognize this blob as a character. The recognition rating for this blob will be stored as a part of the blob. This value will also be returned to the caller.
| |||||||
BLOB_CHOICE_LIST * | classify_blob (TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle) | ||||||
point_priority | |||||||
Assign a priority to and edge point that might be used as part of a split. The argument should be of type EDGEPT. | |||||||
PRIORITY | point_priority (EDGEPT *point) | ||||||
add_point_to_list | |||||||
Add an edge point to a POINT_GROUP containg a list of other points. | |||||||
void | add_point_to_list (PointHeap *point_heap, EDGEPT *point) | ||||||
angle_change | |||||||
Return the change in angle (degrees) of the line segments between points one and two, and two and three. | |||||||
int | angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) | ||||||
is_little_chunk | |||||||
Return TRUE if one of the pieces resulting from this split would less than some number of edge points. | |||||||
int | is_little_chunk (EDGEPT *point1, EDGEPT *point2) | ||||||
is_small_area | |||||||
Test the area defined by a split accross this outline. | |||||||
int | is_small_area (EDGEPT *point1, EDGEPT *point2) | ||||||
pick_close_point | |||||||
Choose the edge point that is closest to the critical point. This point may not be exactly vertical from the critical point. | |||||||
EDGEPT * | pick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) | ||||||
prioritize_points | |||||||
Find a list of edge points from the outer outline of this blob. For each of these points assign a priority. Sort these points using a heap structure so that they can be visited in order. | |||||||
void | prioritize_points (TESSLINE *outline, PointHeap *points) | ||||||
new_min_point | |||||||
Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to NULL. | |||||||
void | new_min_point (EDGEPT *local_min, PointHeap *points) | ||||||
new_max_point | |||||||
Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to NULL. | |||||||
void | new_max_point (EDGEPT *local_max, PointHeap *points) | ||||||
vertical_projection_point | |||||||
For one point on the outline, find the corresponding point on the other side of the outline that is a likely projection for a split point. This is done by iterating through the edge points until the X value of the point being looked at is greater than the X value of the split point. Ensure that the point being returned is not right next to the split point. Return the edge point in *best_point as a result, and any points that were newly created are also saved on the new_points list. | |||||||
void | vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points) | ||||||
improve_one_blob | |||||||
Finds the best place to chop, based on the worst blob, fixpt, or next to a fragment, according to the input. Returns the SEAM corresponding to the chop point, if any is found, and the index in the ratings_matrix of the chopped blob. Note that blob_choices is just a copy of the pointers in the leading diagonal of the ratings MATRIX. Although the blob is chopped, the returned SEAM is yet to be inserted into word->seam_array and the resulting blobs are unclassified, so this function can be used by ApplyBox as well as during recognition. | |||||||
SEAM * | improve_one_blob (const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number) | ||||||
chop_one_blob | |||||||
Start with the current one-blob word and its classification. Find the worst blobs and try to divide it up to improve the ratings. Used for testing chopper. | |||||||
SEAM * | chop_one_blob (const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number) | ||||||
chop_word_main | |||||||
void | chop_word_main (WERD_RES *word) | ||||||
improve_by_chopping | |||||||
Repeatedly chops the worst blob, classifying the new blobs fixing up all the data, and incrementally runs the segmentation search until a good word is found, or no more chops can be found. | |||||||
void | improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending) | ||||||
int | select_blob_to_split (const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment) | ||||||
int | select_blob_to_split_from_fixpt (DANGERR *fixpt) | ||||||
Public Attributes | |||||||
bool | merge_fragments_in_matrix = TRUE | ||||||
bool | wordrec_no_block = FALSE | ||||||
bool | wordrec_enable_assoc = TRUE | ||||||
bool | force_word_assoc = FALSE | ||||||
double | wordrec_worst_state = 1 | ||||||
bool | fragments_guide_chopper = FALSE | ||||||
int | repair_unchopped_blobs = 1 | ||||||
double | tessedit_certainty_threshold = -2.25 | ||||||
int | chop_debug = 0 | ||||||
bool | chop_enable = 1 | ||||||
bool | chop_vertical_creep = 0 | ||||||
int | chop_split_length = 10000 | ||||||
int | chop_same_distance = 2 | ||||||
int | chop_min_outline_points = 6 | ||||||
int | chop_seam_pile_size = 150 | ||||||
bool | chop_new_seam_pile = 1 | ||||||
int | chop_inside_angle = -50 | ||||||
int | chop_min_outline_area = 2000 | ||||||
double | chop_split_dist_knob = 0.5 | ||||||
double | chop_overlap_knob = 0.9 | ||||||
double | chop_center_knob = 0.15 | ||||||
int | chop_centered_maxwidth = 90 | ||||||
double | chop_sharpness_knob = 0.06 | ||||||
double | chop_width_change_knob = 5.0 | ||||||
double | chop_ok_split = 100.0 | ||||||
double | chop_good_split = 50.0 | ||||||
int | chop_x_y_weight = 3 | ||||||
int | segment_adjust_debug = 0 | ||||||
bool | assume_fixed_pitch_char_segment = FALSE | ||||||
int | wordrec_debug_level = 0 | ||||||
int | wordrec_max_join_chunks = 4 | ||||||
bool | wordrec_skip_no_truth_words = false | ||||||
bool | wordrec_debug_blamer = false | ||||||
bool | wordrec_run_blamer = false | ||||||
int | segsearch_debug_level = 0 | ||||||
int | segsearch_max_pain_points = 2000 | ||||||
int | segsearch_max_futile_classifications = 10 | ||||||
double | segsearch_max_char_wh_ratio = 2.0 | ||||||
bool | save_alt_choices = true | ||||||
LanguageModel * | language_model_ | ||||||
PRIORITY | pass2_ok_split | ||||||
WERD_CHOICE * | prev_word_best_choice_ | ||||||
GenericVector< int > | blame_reasons_ | ||||||
void(Wordrec::* | fill_lattice_ )(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | ||||||
Protected Member Functions | |||||||
bool | SegSearchDone (int num_futile_classifications) | ||||||
void | UpdateSegSearchNodes (float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) | ||||||
void | ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle) | ||||||
void | ResetNGramSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending) | ||||||
void | InitBlamerForSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug) |
Definition at line 26 of file wordrec.cpp.
: // control parameters BOOL_MEMBER(merge_fragments_in_matrix, TRUE, "Merge the fragments in the ratings matrix and delete them" " after merging", params()), BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information", params()), BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable", params()), BOOL_MEMBER(force_word_assoc, FALSE, "force associator to run regardless of what enable_assoc is." "This is used for CJK where component grouping is necessary.", CCUtil::params()), double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state", params()), BOOL_MEMBER(fragments_guide_chopper, FALSE, "Use information from fragments to guide chopping process", params()), INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", params()), double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", params()), INT_MEMBER(chop_debug, 0, "Chop debug", params()), BOOL_MEMBER(chop_enable, 1, "Chop enable", params()), BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", params()), INT_MEMBER(chop_split_length, 10000, "Split Length", params()), INT_MEMBER(chop_same_distance, 2, "Same distance", params()), INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", params()), INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", params()), BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()), INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", params()), INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", params()), double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", params()), double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", params()), double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", params()), INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs " "above which we don't care that a chop is not near the center.", params()), double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", params()), double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", params()), double_MEMBER(chop_ok_split, 100.0, "OK split limit", params()), double_MEMBER(chop_good_split, 50.0, "Good split limit", params()), INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", params()), INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug", params()), BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE, "include fixed-pitch heuristics in char segmentation", params()), INT_MEMBER(wordrec_debug_level, 0, "Debug level for wordrec", params()), INT_MEMBER(wordrec_max_join_chunks, 4, "Max number of broken pieces to associate", params()), BOOL_MEMBER(wordrec_skip_no_truth_words, false, "Only run OCR for words that had truth recorded in BlamerBundle", params()), BOOL_MEMBER(wordrec_debug_blamer, false, "Print blamer debug messages", params()), BOOL_MEMBER(wordrec_run_blamer, false, "Try to set the blame for errors", params()), INT_MEMBER(segsearch_debug_level, 0, "SegSearch debug level", params()), INT_MEMBER(segsearch_max_pain_points, 2000, "Maximum number of pain points stored in the queue", params()), INT_MEMBER(segsearch_max_futile_classifications, 20, "Maximum number of pain point classifications per chunk that" "did not result in finding a better word choice.", params()), double_MEMBER(segsearch_max_char_wh_ratio, 2.0, "Maximum character width-to-height ratio", params()), BOOL_MEMBER(save_alt_choices, true, "Save alternative paths found during chopping" " and segmentation search", params()) { prev_word_best_choice_ = NULL; language_model_ = new LanguageModel(&get_fontinfo_table(), &(getDict())); fill_lattice_ = NULL; }
tesseract::Wordrec::~Wordrec | ( | ) | [virtual] |
Definition at line 123 of file wordrec.cpp.
{ delete language_model_; }
void tesseract::Wordrec::add_point_to_list | ( | PointHeap * | point_heap, |
EDGEPT * | point | ||
) |
Definition at line 65 of file chop.cpp.
{ if (point_heap->size() < MAX_NUM_POINTS - 2) { PointPair pair(point_priority(point), point); point_heap->Push(&pair); } #ifndef GRAPHICS_DISABLED if (chop_debug > 2) mark_outline(point); #endif }
void tesseract::Wordrec::add_seam_to_queue | ( | float | new_priority, |
SEAM * | new_seam, | ||
SeamQueue * | seams | ||
) |
Definition at line 65 of file findseam.cpp.
{ if (new_seam == NULL) return; if (chop_debug) { tprintf("Pushing new seam with priority %g :", new_priority); print_seam("seam: ", new_seam); } if (seams->size() >= MAX_NUM_SEAMS) { SeamPair old_pair(0, NULL); if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) { if (chop_debug) { tprintf("Old seam staying with priority %g\n", old_pair.key()); } delete new_seam; seams->Push(&old_pair); return; } else if (chop_debug) { tprintf("New seam with priority %g beats old worst seam with %g\n", new_priority, old_pair.key()); } } SeamPair new_pair(new_priority, new_seam); seams->Push(&new_pair); }
int tesseract::Wordrec::angle_change | ( | EDGEPT * | point1, |
EDGEPT * | point2, | ||
EDGEPT * | point3 | ||
) |
Definition at line 84 of file chop.cpp.
{ VECTOR vector1; VECTOR vector2; int angle; float length; /* Compute angle */ vector1.x = point2->pos.x - point1->pos.x; vector1.y = point2->pos.y - point1->pos.y; vector2.x = point3->pos.x - point2->pos.x; vector2.y = point3->pos.y - point2->pos.y; /* Use cross product */ length = (float)sqrt((float)LENGTH(vector1) * LENGTH(vector2)); if ((int) length == 0) return (0); angle = static_cast<int>(floor(asin(CROSS (vector1, vector2) / length) / PI * 180.0 + 0.5)); /* Use dot product */ if (SCALAR (vector1, vector2) < 0) angle = 180 - angle; /* Adjust angle */ if (angle > 180) angle -= 360; if (angle <= -180) angle += 360; return (angle); }
SEAM * tesseract::Wordrec::attempt_blob_chop | ( | TWERD * | word, |
TBLOB * | blob, | ||
inT32 | blob_number, | ||
bool | italic_blob, | ||
const GenericVector< SEAM * > & | seams | ||
) |
Definition at line 175 of file chopper.cpp.
{ if (repair_unchopped_blobs) preserve_outline_tree (blob->outlines); TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ // Insert it into the word. word->blobs.insert(other_blob, blob_number + 1); SEAM *seam = NULL; if (prioritize_division) { TPOINT location; if (divisible_blob(blob, italic_blob, &location)) { seam = new SEAM(0.0f, location, NULL, NULL, NULL); } } if (seam == NULL) seam = pick_good_seam(blob); if (chop_debug) { if (seam != NULL) print_seam("Good seam picked=", seam); else tprintf("\n** no seam picked *** \n"); } if (seam) { apply_seam(blob, other_blob, italic_blob, seam); } seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam); if (seam == NULL) { if (repair_unchopped_blobs) restore_outline_tree(blob->outlines); if (word->latin_script) { // If the blob can simply be divided into outlines, then do that. TPOINT location; if (divisible_blob(blob, italic_blob, &location)) { other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ word->blobs.insert(other_blob, blob_number + 1); seam = new SEAM(0.0f, location, NULL, NULL, NULL); apply_seam(blob, other_blob, italic_blob, seam); seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam); } } } return seam; }
BLOB_CHOICE_LIST * tesseract::Wordrec::call_matcher | ( | TBLOB * | blob | ) |
Definition at line 136 of file tface.cpp.
{ // Rotate the blob for classification if necessary. TBLOB* rotated_blob = tessblob->ClassifyNormalizeIfNeeded(); if (rotated_blob == NULL) { rotated_blob = tessblob; } BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); // matcher result AdaptiveClassifier(rotated_blob, ratings); if (rotated_blob != tessblob) { delete rotated_blob; } return ratings; }
void tesseract::Wordrec::CallFillLattice | ( | const MATRIX & | ratings, |
const WERD_CHOICE_LIST & | best_choices, | ||
const UNICHARSET & | unicharset, | ||
BlamerBundle * | blamer_bundle | ||
) | [inline] |
Definition at line 195 of file wordrec.h.
{ (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle); }
void tesseract::Wordrec::cc_recog | ( | WERD_RES * | word | ) |
Definition at line 111 of file tface.cpp.
{ getDict().reset_hyphen_vars(word->word->flag(W_EOL)); chop_word_main(word); word->DebugWordChoices(getDict().stopper_debug_level >= 1, getDict().word_to_debug.string()); ASSERT_HOST(word->StatesAllValid()); }
void tesseract::Wordrec::choose_best_seam | ( | SeamQueue * | seam_queue, |
SPLIT * | split, | ||
PRIORITY | priority, | ||
SEAM ** | seam_result, | ||
TBLOB * | blob, | ||
SeamPile * | seam_pile | ||
) |
Definition at line 104 of file findseam.cpp.
{ SEAM *seam; char str[80]; float my_priority; /* Add seam of split */ my_priority = priority; if (split != NULL) { TPOINT split_point = split->point1->pos; split_point += split->point2->pos; split_point /= 2; seam = new SEAM(my_priority, split_point, split, NULL, NULL); if (chop_debug > 1) print_seam ("Partial priority ", seam); add_seam_to_queue(my_priority, seam, seam_queue); if (my_priority > chop_good_split) return; } TBOX bbox = blob->bounding_box(); /* Queue loop */ while (!seam_queue->empty()) { SeamPair seam_pair; seam_queue->Pop(&seam_pair); seam = seam_pair.extract_data(); /* Set full priority */ my_priority = seam_priority(seam, bbox.left(), bbox.right()); if (chop_debug) { sprintf (str, "Full my_priority %0.0f, ", my_priority); print_seam(str, seam); } if ((*seam_result == NULL || (*seam_result)->priority > my_priority) && my_priority < chop_ok_split) { /* No crossing */ if (constrained_split(seam->split1, blob)) { delete *seam_result; *seam_result = new SEAM(*seam); (*seam_result)->priority = my_priority; } else { delete seam; seam = NULL; my_priority = BAD_PRIORITY; } } if (my_priority < chop_good_split) { if (seam) delete seam; return; /* Made good answer */ } if (seam) { /* Combine with others */ if (seam_pile->size() < chop_seam_pile_size) { combine_seam(*seam_pile, seam, seam_queue); SeamDecPair pair(seam_pair.key(), seam); seam_pile->Push(&pair); } else if (chop_new_seam_pile && seam_pile->size() == chop_seam_pile_size && seam_pile->PeekTop().key() > seam_pair.key()) { combine_seam(*seam_pile, seam, seam_queue); SeamDecPair pair; seam_pile->Pop(&pair); // pop the worst. // Replace the seam in pair (deleting the old one) with // the new seam and score, then push back into the heap. pair.set_key(seam_pair.key()); pair.set_data(seam); seam_pile->Push(&pair); } else { delete seam; } } my_priority = seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key(); if ((my_priority > chop_ok_split) || (my_priority > chop_good_split && split)) return; } }
SEAM * tesseract::Wordrec::chop_numbered_blob | ( | TWERD * | word, |
inT32 | blob_number, | ||
bool | italic_blob, | ||
const GenericVector< SEAM * > & | seams | ||
) |
Definition at line 225 of file chopper.cpp.
{ return attempt_blob_chop(word, word->blobs[blob_number], blob_number, italic_blob, seams); }
SEAM * tesseract::Wordrec::chop_one_blob | ( | const GenericVector< TBOX > & | boxes, |
const GenericVector< BLOB_CHOICE * > & | blob_choices, | ||
WERD_RES * | word_res, | ||
int * | blob_number | ||
) |
Definition at line 376 of file chopper.cpp.
{ if (prioritize_division) { return chop_overlapping_blob(boxes, true, word_res, blob_number); } else { return improve_one_blob(blob_choices, NULL, false, true, word_res, blob_number); } }
SEAM * tesseract::Wordrec::chop_overlapping_blob | ( | const GenericVector< TBOX > & | boxes, |
bool | italic_blob, | ||
WERD_RES * | word_res, | ||
int * | blob_number | ||
) |
Definition at line 233 of file chopper.cpp.
{ TWERD *word = word_res->chopped_word; for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) { TBLOB *blob = word->blobs[*blob_number]; TPOINT topleft, botright; topleft.x = blob->bounding_box().left(); topleft.y = blob->bounding_box().top(); botright.x = blob->bounding_box().right(); botright.y = blob->bounding_box().bottom(); TPOINT original_topleft, original_botright; word_res->denorm.DenormTransform(NULL, topleft, &original_topleft); word_res->denorm.DenormTransform(NULL, botright, &original_botright); TBOX original_box = TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y); bool almost_equal_box = false; int num_overlap = 0; for (int i = 0; i < boxes.size(); i++) { if (original_box.overlap_fraction(boxes[i]) > 0.125) num_overlap++; if (original_box.almost_equal(boxes[i], 3)) almost_equal_box = true; } TPOINT location; if (divisible_blob(blob, italic_blob, &location) || (!almost_equal_box && num_overlap > 1)) { SEAM *seam = attempt_blob_chop(word, blob, *blob_number, italic_blob, word_res->seam_array); if (seam != NULL) return seam; } } *blob_number = -1; return NULL; }
void tesseract::Wordrec::chop_word_main | ( | WERD_RES * | word | ) |
Definition at line 440 of file chopper.cpp.
{ int num_blobs = word->chopped_word->NumBlobs(); if (word->ratings == NULL) { word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks); } if (word->ratings->get(0, 0) == NULL) { // Run initial classification. for (int b = 0; b < num_blobs; ++b) { BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b, "Initial:", word->chopped_word, word->blamer_bundle); word->ratings->put(b, b, choices); } } else { // Blobs have been pre-classified. Set matrix cell for all blob choices for (int col = 0; col < word->ratings->dimension(); ++col) { for (int row = col; row < word->ratings->dimension() && row < col + word->ratings->bandwidth(); ++row) { BLOB_CHOICE_LIST* choices = word->ratings->get(col, row); if (choices != NULL) { BLOB_CHOICE_IT bc_it(choices); for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { bc_it.data()->set_matrix_cell(col, row); } } } } } // Run Segmentation Search. BestChoiceBundle best_choice_bundle(word->ratings->dimension()); SegSearch(word, &best_choice_bundle, word->blamer_bundle); if (word->best_choice == NULL) { // SegSearch found no valid paths, so just use the leading diagonal. word->FakeWordFromRatings(); } word->RebuildBestState(); // If we finished without a hyphen at the end of the word, let the next word // be found in the dictionary. if (word->word->flag(W_EOL) && !getDict().has_hyphen_end(*word->best_choice)) { getDict().reset_hyphen_vars(true); } if (word->blamer_bundle != NULL && this->fill_lattice_ != NULL) { CallFillLattice(*word->ratings, word->best_choices, *word->uch_set, word->blamer_bundle); } if (wordrec_debug_level > 0) { tprintf("Final Ratings Matrix:\n"); word->ratings->print(getDict().getUnicharset()); } word->FilterWordChoices(getDict().stopper_debug_level); }
BLOB_CHOICE_LIST * tesseract::Wordrec::classify_blob | ( | TBLOB * | blob, |
const char * | string, | ||
C_COL | color, | ||
BlamerBundle * | blamer_bundle | ||
) |
Definition at line 56 of file wordclass.cpp.
{ #ifndef GRAPHICS_DISABLED if (wordrec_display_all_blobs) display_blob(blob, color); #endif // TODO(rays) collapse with call_matcher and move all to wordrec.cpp. BLOB_CHOICE_LIST* choices = call_matcher(blob); // If a blob with the same bounding box as one of the truth character // bounding boxes is not classified as the corresponding truth character // blame character classifier for incorrect answer. if (blamer_bundle != NULL) { blamer_bundle->BlameClassifier(getDict().getUnicharset(), blob->bounding_box(), *choices, wordrec_debug_blamer); } #ifndef GRAPHICS_DISABLED if (classify_debug_level && string) print_ratings_list(string, choices, getDict().getUnicharset()); if (wordrec_blob_pause) window_wait(blob_window); #endif return choices; }
BLOB_CHOICE_LIST * tesseract::Wordrec::classify_piece | ( | const GenericVector< SEAM * > & | seams, |
inT16 | start, | ||
inT16 | end, | ||
const char * | description, | ||
TWERD * | word, | ||
BlamerBundle * | blamer_bundle | ||
) | [virtual] |
Definition at line 55 of file pieces.cpp.
{ if (end > start) join_pieces(seams, start, end, word); BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description, White, blamer_bundle); // Set the matrix_cell_ entries in all the BLOB_CHOICES. BLOB_CHOICE_IT bc_it(choices); for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { bc_it.data()->set_matrix_cell(start, end); } if (end > start) break_pieces(seams, start, end, word); return (choices); }
void tesseract::Wordrec::combine_seam | ( | const SeamPile & | seam_pile, |
const SEAM * | seam, | ||
SeamQueue * | seam_queue | ||
) |
tessedit_fix_sideways_chops ||
Definition at line 199 of file findseam.cpp.
{ register inT16 dist; inT16 bottom1, top1; inT16 bottom2, top2; SEAM *new_one; const SEAM *this_one; bottom1 = seam->split1->point1->pos.y; if (seam->split1->point2->pos.y >= bottom1) top1 = seam->split1->point2->pos.y; else { top1 = bottom1; bottom1 = seam->split1->point2->pos.y; } if (seam->split2 != NULL) { bottom2 = seam->split2->point1->pos.y; if (seam->split2->point2->pos.y >= bottom2) top2 = seam->split2->point2->pos.y; else { top2 = bottom2; bottom2 = seam->split2->point2->pos.y; } } else { bottom2 = bottom1; top2 = top1; } for (int x = 0; x < seam_pile.size(); ++x) { this_one = seam_pile.get(x).data(); dist = seam->location.x - this_one->location.x; if (-SPLIT_CLOSENESS < dist && dist < SPLIT_CLOSENESS && seam->priority + this_one->priority < chop_ok_split) { inT16 split1_point1_y = this_one->split1->point1->pos.y; inT16 split1_point2_y = this_one->split1->point2->pos.y; inT16 split2_point1_y = 0; inT16 split2_point2_y = 0; if (this_one->split2) { split2_point1_y = this_one->split2->point1->pos.y; split2_point2_y = this_one->split2->point2->pos.y; } if ( ( /* this_one->split1 always exists */ ( ((split1_point1_y >= top1 && split1_point2_y >= top1) || (split1_point1_y <= bottom1 && split1_point2_y <= bottom1)) && ((split1_point1_y >= top2 && split1_point2_y >= top2) || (split1_point1_y <= bottom2 && split1_point2_y <= bottom2)) ) ) && ( this_one->split2 == NULL || ( ((split2_point1_y >= top1 && split2_point2_y >= top1) || (split2_point1_y <= bottom1 && split2_point2_y <= bottom1)) && ((split2_point1_y >= top2 && split2_point2_y >= top2) || (split2_point1_y <= bottom2 && split2_point2_y <= bottom2)) ) ) ) { new_one = join_two_seams (seam, this_one); if (new_one != NULL) { if (chop_debug > 1) print_seam ("Combo priority ", new_one); add_seam_to_queue(new_one->priority, new_one, seam_queue); } } } } }
inT16 tesseract::Wordrec::constrained_split | ( | SPLIT * | split, |
TBLOB * | blob | ||
) |
Definition at line 284 of file findseam.cpp.
{ TESSLINE *outline; if (is_little_chunk (split->point1, split->point2)) return (FALSE); for (outline = blob->outlines; outline; outline = outline->next) { if (split_bounds_overlap (split, outline) && crosses_outline (split->point1, split->point2, outline->loop)) { return (FALSE); } } return (TRUE); }
int tesseract::Wordrec::crosses_outline | ( | EDGEPT * | p0, |
EDGEPT * | p1, | ||
EDGEPT * | outline | ||
) |
Definition at line 48 of file outlines.cpp.
int tesseract::Wordrec::dict_word | ( | const WERD_CHOICE & | word | ) |
Definition at line 126 of file tface.cpp.
{ return getDict().valid_word(word); }
void tesseract::Wordrec::DoSegSearch | ( | WERD_RES * | word_res | ) |
Definition at line 31 of file segsearch.cpp.
int tesseract::Wordrec::end_recog | ( | ) |
Definition at line 63 of file tface.cpp.
{ program_editdown (0); return (0); }
void tesseract::Wordrec::fill_filtered_fragment_list | ( | BLOB_CHOICE_LIST * | choices, |
int | fragment_pos, | ||
int | num_frag_parts, | ||
BLOB_CHOICE_LIST * | filtered_choices | ||
) |
Definition at line 104 of file pieces.cpp.
{ BLOB_CHOICE_IT filtered_choices_it(filtered_choices); BLOB_CHOICE_IT choices_it(choices); for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) { UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id(); const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id); if (frag != NULL && frag->get_pos() == fragment_pos && frag->get_total() == num_frag_parts) { // Recover the unichar_id of the unichar that this fragment is // a part of BLOB_CHOICE *b = new BLOB_CHOICE(*choices_it.data()); int original_unichar = unicharset.unichar_to_id(frag->get_unichar()); b->set_unichar_id(original_unichar); filtered_choices_it.add_to_end(b); } } filtered_choices->sort(SortByUnicharID<BLOB_CHOICE>); }
void tesseract::Wordrec::FillLattice | ( | const MATRIX & | ratings, |
const WERD_CHOICE_LIST & | best_choices, | ||
const UNICHARSET & | unicharset, | ||
BlamerBundle * | blamer_bundle | ||
) |
PRIORITY tesseract::Wordrec::full_split_priority | ( | SPLIT * | split, |
inT16 | xmin, | ||
inT16 | xmax | ||
) |
Definition at line 74 of file gradechop.cpp.
{ BOUNDS_RECT rect; set_outline_bounds (split->point1, split->point2, rect); if (xmin < MIN (rect[0], rect[2]) && xmax > MAX (rect[1], rect[3])) return (999.0); return (grade_overlap (rect) + grade_center_of_blob (rect) + grade_width_change (rect)); }
void tesseract::Wordrec::get_fragment_lists | ( | inT16 | current_frag, |
inT16 | current_row, | ||
inT16 | start, | ||
inT16 | num_frag_parts, | ||
inT16 | num_blobs, | ||
MATRIX * | ratings, | ||
BLOB_CHOICE_LIST * | choice_lists | ||
) |
Definition at line 266 of file pieces.cpp.
{ if (current_frag == num_frag_parts) { merge_and_put_fragment_lists(start, current_row - 1, num_frag_parts, choice_lists, ratings); return; } for (inT16 x = current_row; x < num_blobs; x++) { BLOB_CHOICE_LIST *choices = ratings->get(current_row, x); if (choices == NULL) continue; fill_filtered_fragment_list(choices, current_frag, num_frag_parts, &choice_lists[current_frag]); if (!choice_lists[current_frag].empty()) { get_fragment_lists(current_frag + 1, x + 1, start, num_frag_parts, num_blobs, ratings, choice_lists); choice_lists[current_frag].clear(); } } }
PRIORITY tesseract::Wordrec::grade_center_of_blob | ( | register BOUNDS_RECT | rect | ) |
Definition at line 95 of file gradechop.cpp.
{ register PRIORITY grade; int width1 = rect[1] - rect[0]; int width2 = rect[3] - rect[2]; if (width1 > chop_centered_maxwidth && width2 > chop_centered_maxwidth) { return 0.0; } grade = width1 - width2; if (grade < 0) grade = -grade; grade *= chop_center_knob; grade = MIN (CENTER_GRADE_CAP, grade); return (MAX (0.0, grade)); }
PRIORITY tesseract::Wordrec::grade_overlap | ( | register BOUNDS_RECT | rect | ) |
Definition at line 122 of file gradechop.cpp.
{ register PRIORITY grade; register inT16 width1; register inT16 width2; register inT16 overlap; width1 = rect[3] - rect[2]; width2 = rect[1] - rect[0]; overlap = MIN (rect[1], rect[3]) - MAX (rect[0], rect[2]); width1 = MIN (width1, width2); if (overlap == width1) return (100.0); /* Total overlap */ width1 = 2 * overlap - width1; /* Extra penalty for too */ overlap += MAX (0, width1); /* much overlap */ grade = overlap * chop_overlap_knob; return (MAX (0.0, grade)); }
PRIORITY tesseract::Wordrec::grade_sharpness | ( | register SPLIT * | split | ) |
Definition at line 175 of file gradechop.cpp.
{ register PRIORITY grade; grade = point_priority (split->point1) + point_priority (split->point2); if (grade < -360.0) grade = 0; else grade += 360.0; grade *= chop_sharpness_knob; /* Values 0 to -360 */ return (grade); }
PRIORITY tesseract::Wordrec::grade_split_length | ( | register SPLIT * | split | ) |
Definition at line 152 of file gradechop.cpp.
{ register PRIORITY grade; register float split_length; split_length = weighted_edgept_dist (split->point1, split->point2, chop_x_y_weight); if (split_length <= 0) grade = 0; else grade = sqrt (split_length) * chop_split_dist_knob; return (MAX (0.0, grade)); }
PRIORITY tesseract::Wordrec::grade_width_change | ( | register BOUNDS_RECT | rect | ) |
Definition at line 198 of file gradechop.cpp.
{ register PRIORITY grade; register inT32 width1; register inT32 width2; width1 = rect[3] - rect[2]; width2 = rect[1] - rect[0]; grade = 20 - (MAX (rect[1], rect[3]) - MIN (rect[0], rect[2]) - MAX (width1, width2)); grade *= chop_width_change_knob; return (MAX (0.0, grade)); }
void tesseract::Wordrec::improve_by_chopping | ( | float | rating_cert_scale, |
WERD_RES * | word, | ||
BestChoiceBundle * | best_choice_bundle, | ||
BlamerBundle * | blamer_bundle, | ||
LMPainPoints * | pain_points, | ||
GenericVector< SegSearchPending > * | pending | ||
) |
Definition at line 503 of file chopper.cpp.
{ int blob_number; do { // improvement loop. // Make a simple vector of BLOB_CHOICEs to make it easy to pick which // one to chop. GenericVector<BLOB_CHOICE*> blob_choices; int num_blobs = word->ratings->dimension(); for (int i = 0; i < num_blobs; ++i) { BLOB_CHOICE_LIST* choices = word->ratings->get(i, i); if (choices == NULL || choices->empty()) { blob_choices.push_back(NULL); } else { BLOB_CHOICE_IT bc_it(choices); blob_choices.push_back(bc_it.data()); } } SEAM* seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt, false, false, word, &blob_number); if (seam == NULL) break; // A chop has been made. We have to correct all the data structures to // take into account the extra bottom-level blob. // Put the seam into the seam_array and correct everything else on the // word: ratings matrix (including matrix location in the BLOB_CHOICES), // states in WERD_CHOICEs, and blob widths. word->InsertSeam(blob_number, seam); // Insert a new entry in the beam array. best_choice_bundle->beam.insert(new LanguageModelState, blob_number); // Fixpts are outdated, but will get recalculated. best_choice_bundle->fixpt.clear(); // Remap existing pain points. pain_points->RemapForSplit(blob_number); // Insert a new pending at the chop point. pending->insert(SegSearchPending(), blob_number); // Classify the two newly created blobs using ProcessSegSearchPainPoint, // as that updates the pending correctly and adds new pain points. MATRIX_COORD pain_point(blob_number, blob_number); ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word, pain_points, blamer_bundle); pain_point.col = blob_number + 1; pain_point.row = blob_number + 1; ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word, pain_points, blamer_bundle); if (language_model_->language_model_ngram_on) { // N-gram evaluation depends on the number of blobs in a chunk, so we // have to re-evaluate everything in the word. ResetNGramSearch(word, best_choice_bundle, pending); blob_number = 0; } // Run language model incrementally. (Except with the n-gram model on.) UpdateSegSearchNodes(rating_cert_scale, blob_number, pending, word, pain_points, best_choice_bundle, blamer_bundle); } while (!language_model_->AcceptableChoiceFound() && word->ratings->dimension() < kMaxNumChunks); // If after running only the chopper best_choice is incorrect and no blame // has been yet set, blame the classifier if best_choice is classifier's // top choice and is a dictionary word (i.e. language model could not have // helped). Otherwise blame the tradeoff between the classifier and // the old language model (permuters). if (word->blamer_bundle != NULL && word->blamer_bundle->incorrect_result_reason() == IRR_CORRECT && !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) { bool valid_permuter = word->best_choice != NULL && Dict::valid_word_permuter(word->best_choice->permuter(), false); word->blamer_bundle->BlameClassifierOrLangModel(word, getDict().getUnicharset(), valid_permuter, wordrec_debug_blamer); } }
SEAM * tesseract::Wordrec::improve_one_blob | ( | const GenericVector< BLOB_CHOICE * > & | blob_choices, |
DANGERR * | fixpt, | ||
bool | split_next_to_fragment, | ||
bool | italic_blob, | ||
WERD_RES * | word, | ||
int * | blob_number | ||
) |
Definition at line 332 of file chopper.cpp.
{ float rating_ceiling = MAX_FLOAT32; SEAM *seam = NULL; do { *blob_number = select_blob_to_split_from_fixpt(fixpt); if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number); bool split_point_from_dict = (*blob_number != -1); if (split_point_from_dict) { fixpt->clear(); } else { *blob_number = select_blob_to_split(blob_choices, rating_ceiling, split_next_to_fragment); } if (chop_debug) tprintf("blob_number = %d\n", *blob_number); if (*blob_number == -1) return NULL; // TODO(rays) it may eventually help to allow italic_blob to be true, seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob, word->seam_array); if (seam != NULL) return seam; // Success! if (blob_choices[*blob_number] == NULL) return NULL; if (!split_point_from_dict) { // We chopped the worst rated blob, try something else next time. rating_ceiling = blob_choices[*blob_number]->rating(); } } while (true); return seam; }
void tesseract::Wordrec::InitBlamerForSegSearch | ( | WERD_RES * | word_res, |
LMPainPoints * | pain_points, | ||
BlamerBundle * | blamer_bundle, | ||
STRING * | blamer_debug | ||
) | [protected] |
Definition at line 310 of file segsearch.cpp.
{ pain_points->Clear(); // Clear pain points heap. TessResultCallback2<bool, int, int>* pp_cb = NewPermanentTessCallback( pain_points, &LMPainPoints::GenerateForBlamer, static_cast<double>(segsearch_max_char_wh_ratio), word_res); blamer_bundle->InitForSegSearch(word_res->best_choice, word_res->ratings, getDict().WildcardID(), wordrec_debug_blamer, blamer_debug, pp_cb); delete pp_cb; }
int tesseract::Wordrec::is_crossed | ( | TPOINT | a0, |
TPOINT | a1, | ||
TPOINT | b0, | ||
TPOINT | b1 | ||
) |
Definition at line 70 of file outlines.cpp.
{ int b0a1xb0b1, b0b1xb0a0; int a1b1xa1a0, a1a0xa1b0; TPOINT b0a1, b0a0, a1b1, b0b1, a1a0; b0a1.x = a1.x - b0.x; b0a0.x = a0.x - b0.x; a1b1.x = b1.x - a1.x; b0b1.x = b1.x - b0.x; a1a0.x = a0.x - a1.x; b0a1.y = a1.y - b0.y; b0a0.y = a0.y - b0.y; a1b1.y = b1.y - a1.y; b0b1.y = b1.y - b0.y; a1a0.y = a0.y - a1.y; b0a1xb0b1 = CROSS (b0a1, b0b1); b0b1xb0a0 = CROSS (b0b1, b0a0); a1b1xa1a0 = CROSS (a1b1, a1a0); /*a1a0xa1b0=CROSS(a1a0,a1b0); */ a1a0xa1b0 = -CROSS (a1a0, b0a1); return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) || (b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) && ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0)); }
int tesseract::Wordrec::is_little_chunk | ( | EDGEPT * | point1, |
EDGEPT * | point2 | ||
) |
Definition at line 120 of file chop.cpp.
{ EDGEPT *p = point1; /* Iterator */ int counter = 0; do { /* Go from P1 to P2 */ if (is_same_edgept (point2, p)) { if (is_small_area (point1, point2)) return (TRUE); else break; } p = p->next; } while ((p != point1) && (counter++ < chop_min_outline_points)); /* Go from P2 to P1 */ p = point2; counter = 0; do { if (is_same_edgept (point1, p)) { return (is_small_area (point2, point1)); } p = p->next; } while ((p != point2) && (counter++ < chop_min_outline_points)); return (FALSE); }
int tesseract::Wordrec::is_same_edgept | ( | EDGEPT * | p1, |
EDGEPT * | p2 | ||
) |
Definition at line 104 of file outlines.cpp.
{
return (p1 == p2);
}
int tesseract::Wordrec::is_small_area | ( | EDGEPT * | point1, |
EDGEPT * | point2 | ||
) |
void tesseract::Wordrec::merge_and_put_fragment_lists | ( | inT16 | row, |
inT16 | column, | ||
inT16 | num_frag_parts, | ||
BLOB_CHOICE_LIST * | choice_lists, | ||
MATRIX * | ratings | ||
) |
Definition at line 137 of file pieces.cpp.
{ BLOB_CHOICE_IT *choice_lists_it = new BLOB_CHOICE_IT[num_frag_parts]; for (int i = 0; i < num_frag_parts; i++) { choice_lists_it[i].set_to_list(&choice_lists[i]); choice_lists_it[i].mark_cycle_pt(); } BLOB_CHOICE_LIST *merged_choice = ratings->get(row, column); if (merged_choice == NULL) merged_choice = new BLOB_CHOICE_LIST; bool end_of_list = false; BLOB_CHOICE_IT merged_choice_it(merged_choice); while (!end_of_list) { // Find the maximum unichar_id of the current entry the iterators // are pointing at UNICHAR_ID max_unichar_id = choice_lists_it[0].data()->unichar_id(); for (int i = 0; i < num_frag_parts; i++) { UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id(); if (max_unichar_id < unichar_id) { max_unichar_id = unichar_id; } } // Move the each iterators until it gets to an entry that has a // value greater than or equal to max_unichar_id for (int i = 0; i < num_frag_parts; i++) { UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id(); while (!choice_lists_it[i].cycled_list() && unichar_id < max_unichar_id) { choice_lists_it[i].forward(); unichar_id = choice_lists_it[i].data()->unichar_id(); } if (choice_lists_it[i].cycled_list()) { end_of_list = true; break; } } if (end_of_list) break; // Checks if the fragments are parts of the same character UNICHAR_ID first_unichar_id = choice_lists_it[0].data()->unichar_id(); bool same_unichar = true; for (int i = 1; i < num_frag_parts; i++) { UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id(); if (unichar_id != first_unichar_id) { same_unichar = false; break; } } if (same_unichar) { // Add the merged character to the result UNICHAR_ID merged_unichar_id = first_unichar_id; inT16 merged_fontinfo_id = choice_lists_it[0].data()->fontinfo_id(); inT16 merged_fontinfo_id2 = choice_lists_it[0].data()->fontinfo_id2(); float merged_min_xheight = choice_lists_it[0].data()->min_xheight(); float merged_max_xheight = choice_lists_it[0].data()->max_xheight(); float positive_yshift = 0, negative_yshift = 0; int merged_script_id = choice_lists_it[0].data()->script_id(); BlobChoiceClassifier classifier = choice_lists_it[0].data()->classifier(); float merged_rating = 0, merged_certainty = 0; for (int i = 0; i < num_frag_parts; i++) { float rating = choice_lists_it[i].data()->rating(); float certainty = choice_lists_it[i].data()->certainty(); if (i == 0 || certainty < merged_certainty) merged_certainty = certainty; merged_rating += rating; choice_lists_it[i].forward(); if (choice_lists_it[i].cycled_list()) end_of_list = true; IntersectRange(choice_lists_it[i].data()->min_xheight(), choice_lists_it[i].data()->max_xheight(), &merged_min_xheight, &merged_max_xheight); float yshift = choice_lists_it[i].data()->yshift(); if (yshift > positive_yshift) positive_yshift = yshift; if (yshift < negative_yshift) negative_yshift = yshift; } float merged_yshift = positive_yshift != 0 ? (negative_yshift != 0 ? 0 : positive_yshift) : negative_yshift; merged_choice_it.add_to_end(new BLOB_CHOICE(merged_unichar_id, merged_rating, merged_certainty, merged_fontinfo_id, merged_fontinfo_id2, merged_script_id, merged_min_xheight, merged_max_xheight, merged_yshift, classifier)); } } if (classify_debug_level) print_ratings_list("Merged Fragments", merged_choice, unicharset); if (merged_choice->empty()) delete merged_choice; else ratings->put(row, column, merged_choice); delete [] choice_lists_it; }
void tesseract::Wordrec::merge_fragments | ( | MATRIX * | ratings, |
inT16 | num_blobs | ||
) |
Definition at line 298 of file pieces.cpp.
{ BLOB_CHOICE_LIST choice_lists[CHAR_FRAGMENT::kMaxChunks]; for (inT16 start = 0; start < num_blobs; start++) { for (int frag_parts = 2; frag_parts <= CHAR_FRAGMENT::kMaxChunks; frag_parts++) { get_fragment_lists(0, start, start, frag_parts, num_blobs, ratings, choice_lists); } } // Delete fragments from the rating matrix for (inT16 x = 0; x < num_blobs; x++) { for (inT16 y = x; y < num_blobs; y++) { BLOB_CHOICE_LIST *choices = ratings->get(x, y); if (choices != NULL) { BLOB_CHOICE_IT choices_it(choices); for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) { UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id(); const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id); if (frag != NULL) delete choices_it.extract(); } } } } }
bool tesseract::Wordrec::near_point | ( | EDGEPT * | point, |
EDGEPT * | line_pt_0, | ||
EDGEPT * | line_pt_1, | ||
EDGEPT ** | near_pt | ||
) |
Definition at line 116 of file outlines.cpp.
{ TPOINT p; float slope; float intercept; float x0 = line_pt_0->pos.x; float x1 = line_pt_1->pos.x; float y0 = line_pt_0->pos.y; float y1 = line_pt_1->pos.y; if (x0 == x1) { /* Handle vertical line */ p.x = (inT16) x0; p.y = point->pos.y; } else { /* Slope and intercept */ slope = (y0 - y1) / (x0 - x1); intercept = y1 - x1 * slope; /* Find perpendicular */ p.x = (inT16) ((point->pos.x + (point->pos.y - intercept) * slope) / (slope * slope + 1)); p.y = (inT16) (slope * p.x + intercept); } if (is_on_line (p, line_pt_0->pos, line_pt_1->pos) && (!same_point (p, line_pt_0->pos)) && (!same_point (p, line_pt_1->pos))) { /* Intersection on line */ *near_pt = make_edgept(p.x, p.y, line_pt_1, line_pt_0); return true; } else { /* Intersection not on line */ *near_pt = closest(point, line_pt_0, line_pt_1); return false; } }
void tesseract::Wordrec::new_max_point | ( | EDGEPT * | local_max, |
PointHeap * | points | ||
) |
Definition at line 300 of file chop.cpp.
{ inT16 dir; dir = direction (local_max); if (dir > 0) { add_point_to_list(points, local_max); return; } if (dir == 0 && point_priority (local_max) < 0) { add_point_to_list(points, local_max); return; } }
void tesseract::Wordrec::new_min_point | ( | EDGEPT * | local_min, |
PointHeap * | points | ||
) |
Definition at line 276 of file chop.cpp.
{ inT16 dir; dir = direction (local_min); if (dir < 0) { add_point_to_list(points, local_min); return; } if (dir == 0 && point_priority (local_min) < 0) { add_point_to_list(points, local_min); return; } }
EDGEPT * tesseract::Wordrec::pick_close_point | ( | EDGEPT * | critical_point, |
EDGEPT * | vertical_point, | ||
int * | best_dist | ||
) |
Definition at line 179 of file chop.cpp.
{ EDGEPT *best_point = NULL; int this_distance; int found_better; do { found_better = FALSE; this_distance = edgept_dist (critical_point, vertical_point); if (this_distance <= *best_dist) { if (!(same_point (critical_point->pos, vertical_point->pos) || same_point (critical_point->pos, vertical_point->next->pos) || (best_point && same_point (best_point->pos, vertical_point->pos)) || is_exterior_point (critical_point, vertical_point))) { *best_dist = this_distance; best_point = vertical_point; if (chop_vertical_creep) found_better = TRUE; } } vertical_point = vertical_point->next; } while (found_better == TRUE); return (best_point); }
SEAM * tesseract::Wordrec::pick_good_seam | ( | TBLOB * | blob | ) |
Definition at line 305 of file findseam.cpp.
{ SeamPile seam_pile(chop_seam_pile_size); EDGEPT *points[MAX_NUM_POINTS]; EDGEPT_CLIST new_points; SEAM *seam = NULL; TESSLINE *outline; inT16 num_points = 0; #ifndef GRAPHICS_DISABLED if (chop_debug > 2) wordrec_display_splits.set_value(true); draw_blob_edges(blob); #endif PointHeap point_heap(MAX_NUM_POINTS); for (outline = blob->outlines; outline; outline = outline->next) prioritize_points(outline, &point_heap); while (!point_heap.empty() && num_points < MAX_NUM_POINTS) { points[num_points++] = point_heap.PeekTop().data; point_heap.Pop(NULL); } /* Initialize queue */ SeamQueue seam_queue(MAX_NUM_SEAMS); try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob); try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, &seam, blob); if (seam == NULL) { choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile); } else if (seam->priority > chop_good_split) { choose_best_seam(&seam_queue, NULL, seam->priority, &seam, blob, &seam_pile); } EDGEPT_C_IT it(&new_points); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { EDGEPT *inserted_point = it.data(); if (!point_used_by_seam(seam, inserted_point)) { for (outline = blob->outlines; outline; outline = outline->next) { if (outline->loop == inserted_point) { outline->loop = outline->loop->next; } } remove_edgept(inserted_point); } } if (seam) { if (seam->priority > chop_ok_split) { delete seam; seam = NULL; } #ifndef GRAPHICS_DISABLED else if (wordrec_display_splits) { if (seam->split1) mark_split (seam->split1); if (seam->split2) mark_split (seam->split2); if (seam->split3) mark_split (seam->split3); if (chop_debug > 2) { update_edge_window(); edge_window_wait(); } } #endif } if (chop_debug) wordrec_display_splits.set_value(false); return (seam); }
PRIORITY tesseract::Wordrec::point_priority | ( | EDGEPT * | point | ) |
void tesseract::Wordrec::prioritize_points | ( | TESSLINE * | outline, |
PointHeap * | points | ||
) |
Definition at line 217 of file chop.cpp.
{ EDGEPT *this_point; EDGEPT *local_min = NULL; EDGEPT *local_max = NULL; this_point = outline->loop; local_min = this_point; local_max = this_point; do { if (this_point->vec.y < 0) { /* Look for minima */ if (local_max != NULL) new_max_point(local_max, points); else if (is_inside_angle (this_point)) add_point_to_list(points, this_point); local_max = NULL; local_min = this_point->next; } else if (this_point->vec.y > 0) { /* Look for maxima */ if (local_min != NULL) new_min_point(local_min, points); else if (is_inside_angle (this_point)) add_point_to_list(points, this_point); local_min = NULL; local_max = this_point->next; } else { /* Flat area */ if (local_max != NULL) { if (local_max->prev->vec.y != 0) { new_max_point(local_max, points); } local_max = this_point->next; local_min = NULL; } else { if (local_min->prev->vec.y != 0) { new_min_point(local_min, points); } local_min = this_point->next; local_max = NULL; } } /* Next point */ this_point = this_point->next; } while (this_point != outline->loop); }
void tesseract::Wordrec::ProcessSegSearchPainPoint | ( | float | pain_point_priority, |
const MATRIX_COORD & | pain_point, | ||
const char * | pain_point_type, | ||
GenericVector< SegSearchPending > * | pending, | ||
WERD_RES * | word_res, | ||
LMPainPoints * | pain_points, | ||
BlamerBundle * | blamer_bundle | ||
) | [protected] |
Definition at line 230 of file segsearch.cpp.
{ if (segsearch_debug_level > 0) { tprintf("Classifying pain point %s priority=%.4f, col=%d, row=%d\n", pain_point_type, pain_point_priority, pain_point.col, pain_point.row); } ASSERT_HOST(pain_points != NULL); MATRIX *ratings = word_res->ratings; // Classify blob [pain_point.col pain_point.row] if (!pain_point.Valid(*ratings)) { ratings->IncreaseBandSize(pain_point.row + 1 - pain_point.col); } ASSERT_HOST(pain_point.Valid(*ratings)); BLOB_CHOICE_LIST *classified = classify_piece(word_res->seam_array, pain_point.col, pain_point.row, pain_point_type, word_res->chopped_word, blamer_bundle); BLOB_CHOICE_LIST *lst = ratings->get(pain_point.col, pain_point.row); if (lst == NULL) { ratings->put(pain_point.col, pain_point.row, classified); } else { // We can not delete old BLOB_CHOICEs, since they might contain // ViterbiStateEntries that are parents of other "active" entries. // Thus if the matrix cell already contains classifications we add // the new ones to the beginning of the list. BLOB_CHOICE_IT it(lst); it.add_list_before(classified); delete classified; // safe to delete, since empty after add_list_before() classified = NULL; } if (segsearch_debug_level > 0) { print_ratings_list("Updated ratings matrix with a new entry:", ratings->get(pain_point.col, pain_point.row), getDict().getUnicharset()); ratings->print(getDict().getUnicharset()); } // Insert initial "pain points" to join the newly classified blob // with its left and right neighbors. if (classified != NULL && !classified->empty()) { if (pain_point.col > 0) { pain_points->GeneratePainPoint( pain_point.col - 1, pain_point.row, LM_PPTYPE_SHAPE, 0.0, true, segsearch_max_char_wh_ratio, word_res); } if (pain_point.row + 1 < ratings->dimension()) { pain_points->GeneratePainPoint( pain_point.col, pain_point.row + 1, LM_PPTYPE_SHAPE, 0.0, true, segsearch_max_char_wh_ratio, word_res); } } (*pending)[pain_point.col].SetBlobClassified(pain_point.row); }
void tesseract::Wordrec::program_editdown | ( | inT32 | elasped_time | ) |
Definition at line 76 of file tface.cpp.
{ EndAdaptiveClassifier(); getDict().End(); }
void tesseract::Wordrec::program_editup | ( | const char * | textbase, |
bool | init_classifier, | ||
bool | init_permute | ||
) |
Definition at line 47 of file tface.cpp.
{ if (textbase != NULL) imagefile = textbase; InitFeatureDefs(&feature_defs_); SetupExtractors(&feature_defs_); InitAdaptiveClassifier(init_classifier); if (init_dict) getDict().Load(Dict::GlobalDawgCache()); pass2_ok_split = chop_ok_split; }
void tesseract::Wordrec::ResetNGramSearch | ( | WERD_RES * | word_res, |
BestChoiceBundle * | best_choice_bundle, | ||
GenericVector< SegSearchPending > * | pending | ||
) | [protected] |
Definition at line 293 of file segsearch.cpp.
{ // TODO(rays) More refactoring required here. // Delete existing viterbi states. for (int col = 0; col < best_choice_bundle->beam.size(); ++col) { best_choice_bundle->beam[col]->Clear(); } // Reset best_choice_bundle. word_res->ClearWordChoices(); best_choice_bundle->best_vse = NULL; // Clear out all existing pendings and add a new one for the first column. (*pending)[0].SetColumnClassified(); for (int i = 1; i < pending->size(); ++i) (*pending)[i].Clear(); }
void tesseract::Wordrec::reverse_outline | ( | EDGEPT * | outline | ) |
Definition at line 164 of file outlines.cpp.
{ EDGEPT *edgept = outline; EDGEPT *temp; do { /* Swap next and prev */ temp = edgept->prev; edgept->prev = edgept->next; edgept->next = temp; /* Set up vec field */ edgept->vec.x = edgept->next->pos.x - edgept->pos.x; edgept->vec.y = edgept->next->pos.y - edgept->pos.y; edgept = edgept->prev; /* Go to next point */ } while (edgept != outline); }
void tesseract::Wordrec::SaveAltChoices | ( | const LIST & | best_choices, |
WERD_RES * | word | ||
) |
PRIORITY tesseract::Wordrec::seam_priority | ( | SEAM * | seam, |
inT16 | xmin, | ||
inT16 | xmax | ||
) |
Definition at line 390 of file findseam.cpp.
{ PRIORITY priority; if (seam->split1 == NULL) priority = 0; else if (seam->split2 == NULL) { priority = (seam->priority + full_split_priority (seam->split1, xmin, xmax)); } else if (seam->split3 == NULL) { split_outline (seam->split2->point1, seam->split2->point2); priority = (seam->priority + full_split_priority (seam->split1, xmin, xmax)); unsplit_outlines (seam->split2->point1, seam->split2->point2); } else { split_outline (seam->split2->point1, seam->split2->point2); split_outline (seam->split3->point1, seam->split3->point2); priority = (seam->priority + full_split_priority (seam->split1, xmin, xmax)); unsplit_outlines (seam->split3->point1, seam->split3->point2); unsplit_outlines (seam->split2->point1, seam->split2->point2); } return (priority); }
void tesseract::Wordrec::SegSearch | ( | WERD_RES * | word_res, |
BestChoiceBundle * | best_choice_bundle, | ||
BlamerBundle * | blamer_bundle | ||
) |
Definition at line 37 of file segsearch.cpp.
{ if (segsearch_debug_level > 0) { tprintf("Starting SegSearch on ratings matrix%s:\n", wordrec_enable_assoc ? " (with assoc)" : ""); word_res->ratings->print(getDict().getUnicharset()); } LMPainPoints pain_points(segsearch_max_pain_points, segsearch_max_char_wh_ratio, assume_fixed_pitch_char_segment, &getDict(), segsearch_debug_level); pain_points.GenerateInitial(word_res); // Compute scaling factor that will help us recover blob outline length // from classifier rating and certainty for the blob. float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale; language_model_->InitForWord(prev_word_best_choice_, assume_fixed_pitch_char_segment, segsearch_max_char_wh_ratio, rating_cert_scale); // Initialize blamer-related information: map character boxes recorded in // blamer_bundle->norm_truth_word to the corresponding i,j indices in the // ratings matrix. We expect this step to succeed, since when running the // chopper we checked that the correct chops are present. if (blamer_bundle != NULL) { blamer_bundle->SetupCorrectSegmentation(word_res->chopped_word, wordrec_debug_blamer); } MATRIX_COORD pain_point; float pain_point_priority; // pending[col] tells whether there is update work to do to combine // best_choice_bundle->beam[col - 1] with some BLOB_CHOICEs in matrix[col, *]. // As the language model state is updated, pending entries are modified to // minimize duplication of work. It is important that during the update the // children are considered in the non-decreasing order of their column, since // this guarantees that all the parents would be up to date before an update // of a child is done. GenericVector<SegSearchPending> pending; pending.init_to_size(word_res->ratings->dimension(), SegSearchPending()); // Search the ratings matrix for the initial best path. pending[0].SetColumnClassified(); UpdateSegSearchNodes(rating_cert_scale, 0, &pending, word_res, &pain_points, best_choice_bundle, blamer_bundle); if (!SegSearchDone(0)) { // find a better choice if (chop_enable && word_res->chopped_word != NULL) { improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle, blamer_bundle, &pain_points, &pending); } if (chop_debug) print_seams("Final seam list:", word_res->seam_array); if (blamer_bundle != NULL && !blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) { blamer_bundle->SetChopperBlame(word_res, wordrec_debug_blamer); } } // Keep trying to find a better path by fixing the "pain points". int num_futile_classifications = 0; STRING blamer_debug; while (wordrec_enable_assoc && (!SegSearchDone(num_futile_classifications) || (blamer_bundle != NULL && blamer_bundle->GuidedSegsearchStillGoing()))) { // Get the next valid "pain point". bool found_nothing = true; LMPainPointsType pp_type; while ((pp_type = pain_points.Deque(&pain_point, &pain_point_priority)) != LM_PPTYPE_NUM) { if (!pain_point.Valid(*word_res->ratings)) { word_res->ratings->IncreaseBandSize( pain_point.row - pain_point.col + 1); } if (pain_point.Valid(*word_res->ratings) && !word_res->ratings->Classified(pain_point.col, pain_point.row, getDict().WildcardID())) { found_nothing = false; break; } } if (found_nothing) { if (segsearch_debug_level > 0) tprintf("Pain points queue is empty\n"); break; } ProcessSegSearchPainPoint(pain_point_priority, pain_point, LMPainPoints::PainPointDescription(pp_type), &pending, word_res, &pain_points, blamer_bundle); UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending, word_res, &pain_points, best_choice_bundle, blamer_bundle); if (!best_choice_bundle->updated) ++num_futile_classifications; if (segsearch_debug_level > 0) { tprintf("num_futile_classifications %d\n", num_futile_classifications); } best_choice_bundle->updated = false; // reset updated // See if it's time to terminate SegSearch or time for starting a guided // search for the true path to find the blame for the incorrect best_choice. if (SegSearchDone(num_futile_classifications) && blamer_bundle != NULL && blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) { InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle, &blamer_debug); } } // end while loop exploring alternative paths if (blamer_bundle != NULL) { blamer_bundle->FinishSegSearch(word_res->best_choice, wordrec_debug_blamer, &blamer_debug); } if (segsearch_debug_level > 0) { tprintf("Done with SegSearch (AcceptableChoiceFound: %d)\n", language_model_->AcceptableChoiceFound()); } }
bool tesseract::Wordrec::SegSearchDone | ( | int | num_futile_classifications | ) | [inline, protected] |
Definition at line 430 of file wordrec.h.
{ return (language_model_->AcceptableChoiceFound() || num_futile_classifications >= segsearch_max_futile_classifications); }
int tesseract::Wordrec::select_blob_to_split | ( | const GenericVector< BLOB_CHOICE * > & | blob_choices, |
float | rating_ceiling, | ||
bool | split_next_to_fragment | ||
) |
Definition at line 587 of file chopper.cpp.
{ BLOB_CHOICE *blob_choice; int x; float worst = -MAX_FLOAT32; int worst_index = -1; float worst_near_fragment = -MAX_FLOAT32; int worst_index_near_fragment = -1; const CHAR_FRAGMENT **fragments = NULL; if (chop_debug) { if (rating_ceiling < MAX_FLOAT32) tprintf("rating_ceiling = %8.4f\n", rating_ceiling); else tprintf("rating_ceiling = No Limit\n"); } if (split_next_to_fragment && blob_choices.size() > 0) { fragments = new const CHAR_FRAGMENT *[blob_choices.length()]; if (blob_choices[0] != NULL) { fragments[0] = getDict().getUnicharset().get_fragment( blob_choices[0]->unichar_id()); } else { fragments[0] = NULL; } } for (x = 0; x < blob_choices.size(); ++x) { if (blob_choices[x] == NULL) { if (fragments != NULL) { delete[] fragments; } return x; } else { blob_choice = blob_choices[x]; // Populate fragments for the following position. if (split_next_to_fragment && x+1 < blob_choices.size()) { if (blob_choices[x + 1] != NULL) { fragments[x + 1] = getDict().getUnicharset().get_fragment( blob_choices[x + 1]->unichar_id()); } else { fragments[x + 1] = NULL; } } if (blob_choice->rating() < rating_ceiling && blob_choice->certainty() < tessedit_certainty_threshold) { // Update worst and worst_index. if (blob_choice->rating() > worst) { worst_index = x; worst = blob_choice->rating(); } if (split_next_to_fragment) { // Update worst_near_fragment and worst_index_near_fragment. bool expand_following_fragment = (x + 1 < blob_choices.size() && fragments[x+1] != NULL && !fragments[x+1]->is_beginning()); bool expand_preceding_fragment = (x > 0 && fragments[x-1] != NULL && !fragments[x-1]->is_ending()); if ((expand_following_fragment || expand_preceding_fragment) && blob_choice->rating() > worst_near_fragment) { worst_index_near_fragment = x; worst_near_fragment = blob_choice->rating(); if (chop_debug) { tprintf("worst_index_near_fragment=%d" " expand_following_fragment=%d" " expand_preceding_fragment=%d\n", worst_index_near_fragment, expand_following_fragment, expand_preceding_fragment); } } } } } } if (fragments != NULL) { delete[] fragments; } // TODO(daria): maybe a threshold of badness for // worst_near_fragment would be useful. return worst_index_near_fragment != -1 ? worst_index_near_fragment : worst_index; }
int tesseract::Wordrec::select_blob_to_split_from_fixpt | ( | DANGERR * | fixpt | ) |
Definition at line 679 of file chopper.cpp.
{ if (!fixpt) return -1; for (int i = 0; i < fixpt->size(); i++) { if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end && (*fixpt)[i].dangerous && (*fixpt)[i].correct_is_ngram) { return (*fixpt)[i].begin; } } return -1; }
void tesseract::Wordrec::set_outline_bounds | ( | register EDGEPT * | point1, |
register EDGEPT * | point2, | ||
BOUNDS_RECT | rect | ||
) |
Definition at line 220 of file gradechop.cpp.
{ register EDGEPT *this_point; register inT16 x_min; register inT16 x_max; find_bounds_loop(point1, point2, x_min, x_max); rect[0] = x_min; rect[1] = x_max; find_bounds_loop(point2, point1, x_min, x_max); rect[2] = x_min; rect[3] = x_max; }
void tesseract::Wordrec::set_pass1 | ( | ) |
Definition at line 87 of file tface.cpp.
{ chop_ok_split.set_value(70.0); language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS1); SettupPass1(); }
void tesseract::Wordrec::set_pass2 | ( | ) |
Definition at line 99 of file tface.cpp.
{ chop_ok_split.set_value(pass2_ok_split); language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS2); SettupPass2(); }
void tesseract::Wordrec::try_point_pairs | ( | EDGEPT * | points[MAX_NUM_POINTS], |
inT16 | num_points, | ||
SeamQueue * | seam_queue, | ||
SeamPile * | seam_pile, | ||
SEAM ** | seam, | ||
TBLOB * | blob | ||
) |
Definition at line 428 of file findseam.cpp.
{ inT16 x; inT16 y; SPLIT *split; PRIORITY priority; for (x = 0; x < num_points; x++) { for (y = x + 1; y < num_points; y++) { if (points[y] && weighted_edgept_dist(points[x], points[y], chop_x_y_weight) < chop_split_length && points[x] != points[y]->next && points[y] != points[x]->next && !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) { split = new_split (points[x], points[y]); priority = partial_split_priority (split); choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile); } } } }
void tesseract::Wordrec::try_vertical_splits | ( | EDGEPT * | points[MAX_NUM_POINTS], |
inT16 | num_points, | ||
EDGEPT_CLIST * | new_points, | ||
SeamQueue * | seam_queue, | ||
SeamPile * | seam_pile, | ||
SEAM ** | seam, | ||
TBLOB * | blob | ||
) |
Definition at line 469 of file findseam.cpp.
{ EDGEPT *vertical_point = NULL; SPLIT *split; inT16 x; PRIORITY priority; TESSLINE *outline; for (x = 0; x < num_points; x++) { vertical_point = NULL; for (outline = blob->outlines; outline; outline = outline->next) { vertical_projection_point(points[x], outline->loop, &vertical_point, new_points); } if (vertical_point && points[x] != vertical_point->next && vertical_point != points[x]->next && weighted_edgept_dist(points[x], vertical_point, chop_x_y_weight) < chop_split_length) { split = new_split (points[x], vertical_point); priority = partial_split_priority (split); choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile); } } }
void tesseract::Wordrec::UpdateSegSearchNodes | ( | float | rating_cert_scale, |
int | starting_col, | ||
GenericVector< SegSearchPending > * | pending, | ||
WERD_RES * | word_res, | ||
LMPainPoints * | pain_points, | ||
BestChoiceBundle * | best_choice_bundle, | ||
BlamerBundle * | blamer_bundle | ||
) | [protected] |
Definition at line 162 of file segsearch.cpp.
{ MATRIX *ratings = word_res->ratings; ASSERT_HOST(ratings->dimension() == pending->size()); ASSERT_HOST(ratings->dimension() == best_choice_bundle->beam.size()); for (int col = starting_col; col < ratings->dimension(); ++col) { if (!(*pending)[col].WorkToDo()) continue; int first_row = col; int last_row = MIN(ratings->dimension() - 1, col + ratings->bandwidth() - 1); if ((*pending)[col].SingleRow() >= 0) { first_row = last_row = (*pending)[col].SingleRow(); } if (segsearch_debug_level > 0) { tprintf("\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n", col, first_row, last_row, (*pending)[col].IsRowJustClassified(MAX_INT32)); } // Iterate over the pending list for this column. for (int row = first_row; row <= last_row; ++row) { // Update language model state of this child+parent pair. BLOB_CHOICE_LIST *current_node = ratings->get(col, row); LanguageModelState *parent_node = col == 0 ? NULL : best_choice_bundle->beam[col - 1]; if (current_node != NULL && language_model_->UpdateState((*pending)[col].IsRowJustClassified(row), col, row, current_node, parent_node, pain_points, word_res, best_choice_bundle, blamer_bundle) && row + 1 < ratings->dimension()) { // Since the language model state of this entry changed, process all // the child column. (*pending)[row + 1].RevisitWholeColumn(); if (segsearch_debug_level > 0) { tprintf("Added child col=%d to pending\n", row + 1); } } // end if UpdateState. } // end for row. } // end for col. if (best_choice_bundle->best_vse != NULL) { ASSERT_HOST(word_res->StatesAllValid()); if (best_choice_bundle->best_vse->updated) { pain_points->GenerateFromPath(rating_cert_scale, best_choice_bundle->best_vse, word_res); if (!best_choice_bundle->fixpt.empty()) { pain_points->GenerateFromAmbigs(best_choice_bundle->fixpt, best_choice_bundle->best_vse, word_res); } } } // The segsearch is completed. Reset all updated flags on all VSEs and reset // all pendings. for (int col = 0; col < pending->size(); ++col) { (*pending)[col].Clear(); ViterbiStateEntry_IT vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries); for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) { vse_it.data()->updated = false; } } }
void tesseract::Wordrec::vertical_projection_point | ( | EDGEPT * | split_point, |
EDGEPT * | target_point, | ||
EDGEPT ** | best_point, | ||
EDGEPT_CLIST * | new_points | ||
) |
Definition at line 329 of file chop.cpp.
{ EDGEPT *p; /* Iterator */ EDGEPT *this_edgept; /* Iterator */ EDGEPT_C_IT new_point_it(new_points); int x = split_point->pos.x; /* X value of vertical */ int best_dist = LARGE_DISTANCE;/* Best point found */ if (*best_point != NULL) best_dist = edgept_dist(split_point, *best_point); p = target_point; /* Look at each edge point */ do { if (((p->pos.x <= x && x <= p->next->pos.x) || (p->next->pos.x <= x && x <= p->pos.x)) && !same_point(split_point->pos, p->pos) && !same_point(split_point->pos, p->next->pos) && !p->IsChopPt() && (*best_point == NULL || !same_point((*best_point)->pos, p->pos))) { if (near_point(split_point, p, p->next, &this_edgept)) { new_point_it.add_before_then_move(this_edgept); } if (*best_point == NULL) best_dist = edgept_dist (split_point, this_edgept); this_edgept = pick_close_point(split_point, this_edgept, &best_dist); if (this_edgept) *best_point = this_edgept; } p = p->next; } while (p != target_point); }
bool tesseract::Wordrec::assume_fixed_pitch_char_segment = FALSE |
double tesseract::Wordrec::chop_center_knob = 0.15 |
int tesseract::Wordrec::chop_debug = 0 |
bool tesseract::Wordrec::chop_enable = 1 |
double tesseract::Wordrec::chop_good_split = 50.0 |
int tesseract::Wordrec::chop_inside_angle = -50 |
int tesseract::Wordrec::chop_min_outline_area = 2000 |
double tesseract::Wordrec::chop_ok_split = 100.0 |
double tesseract::Wordrec::chop_overlap_knob = 0.9 |
int tesseract::Wordrec::chop_seam_pile_size = 150 |
double tesseract::Wordrec::chop_sharpness_knob = 0.06 |
double tesseract::Wordrec::chop_split_dist_knob = 0.5 |
int tesseract::Wordrec::chop_split_length = 10000 |
double tesseract::Wordrec::chop_width_change_knob = 5.0 |
void(Wordrec::* tesseract::Wordrec::fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
bool tesseract::Wordrec::force_word_assoc = FALSE |
bool tesseract::Wordrec::fragments_guide_chopper = FALSE |
bool tesseract::Wordrec::merge_fragments_in_matrix = TRUE |
bool tesseract::Wordrec::save_alt_choices = true |
double tesseract::Wordrec::segsearch_max_char_wh_ratio = 2.0 |
double tesseract::Wordrec::tessedit_certainty_threshold = -2.25 |
bool tesseract::Wordrec::wordrec_debug_blamer = false |
bool tesseract::Wordrec::wordrec_enable_assoc = TRUE |
bool tesseract::Wordrec::wordrec_no_block = FALSE |
bool tesseract::Wordrec::wordrec_run_blamer = false |
bool tesseract::Wordrec::wordrec_skip_no_truth_words = false |
double tesseract::Wordrec::wordrec_worst_state = 1 |