tesseract
3.03
|
#include <pageres.h>
WERD_RES::WERD_RES | ( | ) | [inline] |
Definition at line 318 of file pageres.h.
{ InitNonPointers(); InitPointers(); }
WERD_RES::WERD_RES | ( | WERD * | the_word | ) | [inline] |
Definition at line 322 of file pageres.h.
{ InitNonPointers(); InitPointers(); word = the_word; }
WERD_RES::WERD_RES | ( | const WERD_RES & | source | ) | [inline] |
Definition at line 329 of file pageres.h.
{ InitPointers(); *this = source; // see operator= }
Definition at line 1030 of file pageres.cpp.
{ Clear(); }
bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan | ( | float | threshold | ) | const |
Definition at line 390 of file pageres.cpp.
{ // The choices are not changed by this iteration. WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices)); for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) { WERD_CHOICE* choice = wc_it.data(); if (choice->adjust_factor() <= threshold) return false; } return true; }
bool WERD_RES::AnyLtrCharsInWord | ( | ) | const [inline] |
Definition at line 388 of file pageres.h.
{ if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1) return false; for (int id = 0; id < best_choice->length(); id++) { int unichar_id = best_choice->unichar_id(id); if (unichar_id < 0 || unichar_id >= uch_set->size()) continue; // Ignore illegal chars. UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); if (dir == UNICHARSET::U_LEFT_TO_RIGHT) return true; } return false; }
bool WERD_RES::AnyRtlCharsInWord | ( | ) | const [inline] |
Definition at line 371 of file pageres.h.
{ if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1) return false; for (int id = 0; id < best_choice->length(); id++) { int unichar_id = best_choice->unichar_id(id); if (unichar_id < 0 || unichar_id >= uch_set->size()) continue; // Ignore illegal chars. UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC || dir == UNICHARSET::U_ARABIC_NUMBER) return true; } return false; }
void WERD_RES::BestChoiceToCorrectText | ( | ) |
Definition at line 862 of file pageres.cpp.
{ correct_text.clear(); ASSERT_HOST(best_choice != NULL); for (int i = 0; i < best_choice->length(); ++i) { UNICHAR_ID choice_id = best_choice->unichar_id(i); const char* blob_choice = uch_set->id_to_unichar(choice_id); correct_text.push_back(STRING(blob_choice)); } }
const char* const WERD_RES::BestUTF8 | ( | int | blob_index, |
bool | in_rtl_context | ||
) | const [inline] |
Definition at line 341 of file pageres.h.
{ if (blob_index < 0 || best_choice == NULL || blob_index >= best_choice->length()) return NULL; UNICHAR_ID id = best_choice->unichar_id(blob_index); if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID) return NULL; UNICHAR_ID mirrored = uch_set->get_mirror(id); if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID) id = mirrored; return uch_set->id_to_unichar_ext(id); }
UNICHAR_ID WERD_RES::BothHyphens | ( | UNICHAR_ID | id1, |
UNICHAR_ID | id2 | ||
) |
Definition at line 969 of file pageres.cpp.
{ const char *ch = uch_set->id_to_unichar(id1); const char *next_ch = uch_set->id_to_unichar(id2); if (strlen(ch) == 1 && strlen(next_ch) == 1 && (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~')) return uch_set->unichar_to_id("-"); return INVALID_UNICHAR_ID; }
UNICHAR_ID WERD_RES::BothQuotes | ( | UNICHAR_ID | id1, |
UNICHAR_ID | id2 | ||
) |
Definition at line 947 of file pageres.cpp.
{ const char *ch = uch_set->id_to_unichar(id1); const char *next_ch = uch_set->id_to_unichar(id2); if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) return uch_set->unichar_to_id("\""); return INVALID_UNICHAR_ID; }
UNICHAR_ID WERD_RES::BothSpaces | ( | UNICHAR_ID | id1, |
UNICHAR_ID | id2 | ||
) |
Definition at line 998 of file pageres.cpp.
{ if (id1 == id2 && id1 == uch_set->unichar_to_id(" ")) return id1; else return INVALID_UNICHAR_ID; }
void WERD_RES::Clear | ( | ) |
Definition at line 1074 of file pageres.cpp.
{ if (word != NULL && combination) { delete word; } word = NULL; delete blamer_bundle; blamer_bundle = NULL; ClearResults(); }
void WERD_RES::ClearRatings | ( | ) |
Definition at line 1129 of file pageres.cpp.
{ if (ratings != NULL) { ratings->delete_matrix_pointers(); delete ratings; ratings = NULL; } }
void WERD_RES::ClearResults | ( | ) |
Definition at line 1084 of file pageres.cpp.
{ done = false; fontinfo = NULL; fontinfo2 = NULL; fontinfo_id_count = 0; fontinfo_id2_count = 0; if (bln_boxes != NULL) { delete bln_boxes; bln_boxes = NULL; } blob_row = NULL; if (chopped_word != NULL) { delete chopped_word; chopped_word = NULL; } if (rebuild_word != NULL) { delete rebuild_word; rebuild_word = NULL; } if (box_word != NULL) { delete box_word; box_word = NULL; } best_state.clear(); correct_text.clear(); seam_array.delete_data_pointers(); seam_array.clear(); blob_widths.clear(); blob_gaps.clear(); ClearRatings(); ClearWordChoices(); if (blamer_bundle != NULL) blamer_bundle->ClearResults(); }
void WERD_RES::ClearWordChoices | ( | ) |
Definition at line 1117 of file pageres.cpp.
{ best_choice = NULL; if (raw_choice != NULL) { delete raw_choice; raw_choice = NULL; } best_choices.clear(); if (ep_choice != NULL) { delete ep_choice; ep_choice = NULL; } }
void WERD_RES::CloneChoppedToRebuild | ( | ) |
Definition at line 774 of file pageres.cpp.
{ if (rebuild_word != NULL) delete rebuild_word; rebuild_word = new TWERD(*chopped_word); SetupBoxWord(); int word_len = box_word->length(); best_state.reserve(word_len); correct_text.reserve(word_len); for (int i = 0; i < word_len; ++i) { best_state.push_back(1); correct_text.push_back(STRING("")); } }
void WERD_RES::ComputeAdaptionThresholds | ( | float | certainty_scale, |
float | min_rating, | ||
float | max_rating, | ||
float | rating_margin, | ||
float * | thresholds | ||
) |
Definition at line 503 of file pageres.cpp.
{ int chunk = 0; int end_chunk = best_choice->state(0); int end_raw_chunk = raw_choice->state(0); int raw_blob = 0; for (int i = 0; i < best_choice->length(); i++, thresholds++) { float avg_rating = 0.0f; int num_error_chunks = 0; // For each chunk in best choice blob i, count non-matching raw results. while (chunk < end_chunk) { if (chunk >= end_raw_chunk) { ++raw_blob; end_raw_chunk += raw_choice->state(raw_blob); } if (best_choice->unichar_id(i) != raw_choice->unichar_id(raw_blob)) { avg_rating += raw_choice->certainty(raw_blob); ++num_error_chunks; } ++chunk; } if (num_error_chunks > 0) { avg_rating /= num_error_chunks; *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin); } else { *thresholds = max_rating; } if (*thresholds > max_rating) *thresholds = max_rating; if (*thresholds < min_rating) *thresholds = min_rating; } }
bool WERD_RES::ConditionalBlobMerge | ( | TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > * | class_cb, |
TessResultCallback2< bool, const TBOX &, const TBOX & > * | box_cb | ||
) |
Definition at line 877 of file pageres.cpp.
{ ASSERT_HOST(best_choice->length() == 0 || ratings != NULL); bool modified = false; for (int i = 0; i + 1 < best_choice->length(); ++i) { UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i), best_choice->unichar_id(i+1)); if (new_id != INVALID_UNICHAR_ID && (box_cb == NULL || box_cb->Run(box_word->BlobBox(i), box_word->BlobBox(i + 1)))) { // Raw choice should not be fixed. best_choice->set_unichar_id(new_id, i); modified = true; MergeAdjacentBlobs(i); const MATRIX_COORD& coord = best_choice->MatrixCoord(i); if (!coord.Valid(*ratings)) { ratings->IncreaseBandSize(coord.row + 1 - coord.col); } BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i); if (FindMatchingChoice(new_id, blob_choices) == NULL) { // Insert a fake result. BLOB_CHOICE* blob_choice = new BLOB_CHOICE; blob_choice->set_unichar_id(new_id); BLOB_CHOICE_IT bc_it(blob_choices); bc_it.add_before_then_move(blob_choice); } } } delete class_cb; delete box_cb; return modified; }
void WERD_RES::ConsumeWordResults | ( | WERD_RES * | word | ) |
Definition at line 707 of file pageres.cpp.
{ denorm = word->denorm; blob_row = word->blob_row; MovePointerData(&chopped_word, &word->chopped_word); MovePointerData(&rebuild_word, &word->rebuild_word); MovePointerData(&box_word, &word->box_word); seam_array.delete_data_pointers(); seam_array = word->seam_array; word->seam_array.clear(); best_state.move(&word->best_state); correct_text.move(&word->correct_text); blob_widths.move(&word->blob_widths); blob_gaps.move(&word->blob_gaps); if (ratings != NULL) ratings->delete_matrix_pointers(); MovePointerData(&ratings, &word->ratings); best_choice = word->best_choice; MovePointerData(&raw_choice, &word->raw_choice); best_choices.clear(); WERD_CHOICE_IT wc_it(&best_choices); wc_it.add_list_after(&word->best_choices); reject_map = word->reject_map; if (word->blamer_bundle != NULL) { assert(blamer_bundle != NULL); blamer_bundle->CopyResults(*(word->blamer_bundle)); } CopySimpleFields(*word); }
void WERD_RES::copy_on | ( | WERD_RES * | word_res | ) | [inline] |
void WERD_RES::CopySimpleFields | ( | const WERD_RES & | source | ) |
Definition at line 204 of file pageres.cpp.
{ tess_failed = source.tess_failed; tess_accepted = source.tess_accepted; tess_would_adapt = source.tess_would_adapt; done = source.done; unlv_crunch_mode = source.unlv_crunch_mode; small_caps = source.small_caps; italic = source.italic; bold = source.bold; fontinfo = source.fontinfo; fontinfo2 = source.fontinfo2; fontinfo_id_count = source.fontinfo_id_count; fontinfo_id2_count = source.fontinfo_id2_count; x_height = source.x_height; caps_height = source.caps_height; guessed_x_ht = source.guessed_x_ht; guessed_caps_ht = source.guessed_caps_ht; reject_spaces = source.reject_spaces; uch_set = source.uch_set; tesseract = source.tesseract; }
void WERD_RES::DebugWordChoices | ( | bool | debug, |
const char * | word_to_debug | ||
) |
Definition at line 431 of file pageres.cpp.
{ if (debug || (word_to_debug != NULL && *word_to_debug != '\0' && best_choice != NULL && best_choice->unichar_string() == STRING(word_to_debug))) { if (raw_choice != NULL) raw_choice->print("\nBest Raw Choice"); WERD_CHOICE_IT it(&best_choices); int index = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) { WERD_CHOICE* choice = it.data(); STRING label; label.add_str_int("\nCooked Choice #", index); choice->print(label.string()); } } }
static WERD_RES* WERD_RES::deep_copy | ( | const WERD_RES * | src | ) | [inline, static] |
void WERD_RES::FakeClassifyWord | ( | int | blob_count, |
BLOB_CHOICE ** | choices | ||
) |
Definition at line 818 of file pageres.cpp.
{ // Setup the WERD_RES. ASSERT_HOST(box_word != NULL); ASSERT_HOST(blob_count == box_word->length()); ClearWordChoices(); ClearRatings(); ratings = new MATRIX(blob_count, 1); for (int c = 0; c < blob_count; ++c) { BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST; BLOB_CHOICE_IT choice_it(choice_list); choice_it.add_after_then_move(choices[c]); ratings->put(c, c, choice_list); } FakeWordFromRatings(); reject_map.initialise(blob_count); }
void WERD_RES::FakeWordFromRatings | ( | ) |
Definition at line 837 of file pageres.cpp.
{ int num_blobs = ratings->dimension(); WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs); word_choice->set_permuter(TOP_CHOICE_PERM); for (int b = 0; b < num_blobs; ++b) { UNICHAR_ID unichar_id = UNICHAR_SPACE; float rating = MAX_INT32; float certainty = -MAX_INT32; BLOB_CHOICE_LIST* choices = ratings->get(b, b); if (choices != NULL && !choices->empty()) { BLOB_CHOICE_IT bc_it(choices); BLOB_CHOICE* choice = bc_it.data(); unichar_id = choice->unichar_id(); rating = choice->rating(); certainty = choice->certainty(); } word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating, certainty); } LogNewRawChoice(word_choice); // Ownership of word_choice taken by word here. LogNewCookedChoice(1, false, word_choice); }
void WERD_RES::FilterWordChoices | ( | int | debug_level | ) |
Definition at line 454 of file pageres.cpp.
{ if (best_choice == NULL || best_choices.singleton()) return; if (debug_level >= 2) best_choice->print("\nFiltering against best choice"); WERD_CHOICE_IT it(&best_choices); int index = 0; for (it.forward(); !it.at_first(); it.forward(), ++index) { WERD_CHOICE* choice = it.data(); float threshold = StopperAmbigThreshold(best_choice->adjust_factor(), choice->adjust_factor()); // i, j index the blob choice in choice, best_choice. // chunk is an index into the chopped_word blobs (AKA chunks). // Since the two words may use different segmentations of the chunks, we // iterate over the chunks to find out whether a comparable blob // classification is much worse than the best result. int i = 0, j = 0, chunk = 0; // Each iteration of the while deals with 1 chunk. On entry choice_chunk // and best_chunk are the indices of the first chunk in the NEXT blob, // i.e. we don't have to increment i, j while chunk < choice_chunk and // best_chunk respectively. int choice_chunk = choice->state(0), best_chunk = best_choice->state(0); while (i < choice->length() && j < best_choice->length()) { if (choice->unichar_id(i) != best_choice->unichar_id(j) && choice->certainty(i) - best_choice->certainty(j) < threshold) { if (debug_level >= 2) { STRING label; label.add_str_int("\nDiscarding bad choice #", index); choice->print(label.string()); tprintf("i %d j %d Chunk %d Choice->Blob[i].Certainty %.4g" " BestChoice->ChunkCertainty[Chunk] %g Threshold %g\n", i, j, chunk, choice->certainty(i), best_choice->certainty(j), threshold); } delete it.extract(); break; } ++chunk; // If needed, advance choice_chunk to keep up with chunk. while (choice_chunk < chunk && ++i < choice->length()) choice_chunk += choice->state(i); // If needed, advance best_chunk to keep up with chunk. while (best_chunk < chunk && ++j < best_choice->length()) best_chunk += best_choice->state(j); } } }
void WERD_RES::fix_hyphens | ( | ) |
Definition at line 986 of file pageres.cpp.
{ if (!uch_set->contains_unichar("-") || !uch_set->get_enabled(uch_set->unichar_to_id("-"))) return; // Don't create it if it is disallowed. ConditionalBlobMerge( NewPermanentTessCallback(this, &WERD_RES::BothHyphens), NewPermanentTessCallback(this, &WERD_RES::HyphenBoxesOverlap)); }
void WERD_RES::fix_quotes | ( | ) |
Definition at line 957 of file pageres.cpp.
{ if (!uch_set->contains_unichar("\"") || !uch_set->get_enabled(uch_set->unichar_to_id("\""))) return; // Don't create it if it is disallowed. ConditionalBlobMerge( NewPermanentTessCallback(this, &WERD_RES::BothQuotes), NULL); }
BLOB_CHOICE * WERD_RES::GetBlobChoice | ( | int | index | ) | const |
Definition at line 692 of file pageres.cpp.
{ if (index < 0 || index >= best_choice->length()) return NULL; BLOB_CHOICE_LIST* choices = GetBlobChoices(index); return FindMatchingChoice(best_choice->unichar_id(index), choices); }
BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices | ( | int | index | ) | const |
Definition at line 701 of file pageres.cpp.
{ return best_choice->blob_choices(index, ratings); }
int WERD_RES::GetBlobsGap | ( | int | blob_index | ) |
Definition at line 682 of file pageres.cpp.
int WERD_RES::GetBlobsWidth | ( | int | start_blob, |
int | last_blob | ||
) |
Definition at line 672 of file pageres.cpp.
{ int result = 0; for (int b = start_blob; b <= last_blob; ++b) { result += blob_widths[b]; if (b < last_blob) result += blob_gaps[b]; } return result; }
bool WERD_RES::HyphenBoxesOverlap | ( | const TBOX & | box1, |
const TBOX & | box2 | ||
) |
Definition at line 980 of file pageres.cpp.
void WERD_RES::InitForRetryRecognition | ( | const WERD_RES & | source | ) |
Definition at line 230 of file pageres.cpp.
{ word = source.word; CopySimpleFields(source); if (source.blamer_bundle != NULL) { blamer_bundle = new BlamerBundle(); blamer_bundle->CopyTruth(*source.blamer_bundle); } }
void WERD_RES::InitNonPointers | ( | ) |
Definition at line 1034 of file pageres.cpp.
{ tess_failed = FALSE; tess_accepted = FALSE; tess_would_adapt = FALSE; done = FALSE; unlv_crunch_mode = CR_NONE; small_caps = false; italic = FALSE; bold = FALSE; // The fontinfos and tesseract count as non-pointers as they point to // data owned elsewhere. fontinfo = NULL; fontinfo2 = NULL; tesseract = NULL; fontinfo_id_count = 0; fontinfo_id2_count = 0; x_height = 0.0; caps_height = 0.0; guessed_x_ht = TRUE; guessed_caps_ht = TRUE; combination = FALSE; part_of_combo = FALSE; reject_spaces = FALSE; }
void WERD_RES::InitPointers | ( | ) |
Definition at line 1059 of file pageres.cpp.
{ word = NULL; bln_boxes = NULL; blob_row = NULL; uch_set = NULL; chopped_word = NULL; rebuild_word = NULL; box_word = NULL; ratings = NULL; best_choice = NULL; raw_choice = NULL; ep_choice = NULL; blamer_bundle = NULL; }
void WERD_RES::InsertSeam | ( | int | blob_number, |
SEAM * | seam | ||
) |
Definition at line 370 of file pageres.cpp.
{ // Insert the seam into the SEAMS array. insert_seam(chopped_word, blob_number, seam, &seam_array); if (ratings != NULL) { // Expand the ratings matrix. ratings = ratings->ConsumeAndMakeBigger(blob_number); // Fix all the segmentation states. if (raw_choice != NULL) raw_choice->UpdateStateForSplit(blob_number); WERD_CHOICE_IT wc_it(&best_choices); for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) { WERD_CHOICE* choice = wc_it.data(); choice->UpdateStateForSplit(blob_number); } SetupBlobWidthsAndGaps(); } }
bool WERD_RES::IsAmbiguous | ( | ) |
Definition at line 403 of file pageres.cpp.
{ return !best_choices.singleton() || best_choice->dangerous_ambig_found(); }
bool WERD_RES::LogNewCookedChoice | ( | int | max_num_choices, |
bool | debug, | ||
WERD_CHOICE * | word_choice | ||
) |
Definition at line 562 of file pageres.cpp.
{ if (best_choice != NULL) { // Throw out obviously bad choices to save some work. // TODO(rays) Get rid of this! This piece of code produces different // results according to the order in which words are found, which is an // undesirable behavior. It would be better to keep all the choices and // prune them later when more information is available. float max_certainty_delta = StopperAmbigThreshold(best_choice->adjust_factor(), word_choice->adjust_factor()); if (max_certainty_delta > -kStopperAmbiguityThresholdOffset) max_certainty_delta = -kStopperAmbiguityThresholdOffset; if (word_choice->certainty() - best_choice->certainty() < max_certainty_delta) { if (debug) { STRING bad_string; word_choice->string_and_lengths(&bad_string, NULL); tprintf("Discarding choice \"%s\" with an overly low certainty" " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n", bad_string.string(), word_choice->certainty(), best_choice->certainty(), max_certainty_delta + best_choice->certainty()); } delete word_choice; return false; } } // Insert in the list in order of increasing rating, but knock out worse // string duplicates. WERD_CHOICE_IT it(&best_choices); const STRING& new_str = word_choice->unichar_string(); bool inserted = false; int num_choices = 0; if (!it.empty()) { do { WERD_CHOICE* choice = it.data(); if (choice->rating() > word_choice->rating() && !inserted) { // Time to insert. it.add_before_stay_put(word_choice); inserted = true; if (num_choices == 0) best_choice = word_choice; // This is the new best. ++num_choices; } if (choice->unichar_string() == new_str) { if (inserted) { // New is better. delete it.extract(); } else { // Old is better. if (debug) { tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n", new_str.string(), word_choice->rating(), choice->rating()); } delete word_choice; return false; } } else { ++num_choices; if (num_choices > max_num_choices) delete it.extract(); } it.forward(); } while (!it.at_first()); } if (!inserted && num_choices < max_num_choices) { it.add_to_end(word_choice); inserted = true; if (num_choices == 0) best_choice = word_choice; // This is the new best. } if (debug) { if (inserted) tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary"); else tprintf("Poor"); word_choice->print(" Word Choice"); } if (!inserted) { delete word_choice; return false; } return true; }
bool WERD_RES::LogNewRawChoice | ( | WERD_CHOICE * | word_choice | ) |
Definition at line 546 of file pageres.cpp.
{ if (raw_choice == NULL || word_choice->rating() < raw_choice->rating()) { delete raw_choice; raw_choice = new WERD_CHOICE(*word_choice); raw_choice->set_permuter(TOP_CHOICE_PERM); return true; } return false; }
void WERD_RES::merge_tess_fails | ( | ) |
Definition at line 1006 of file pageres.cpp.
{ if (ConditionalBlobMerge( NewPermanentTessCallback(this, &WERD_RES::BothSpaces), NULL)) { int len = best_choice->length(); ASSERT_HOST(reject_map.length() == len); ASSERT_HOST(box_word->length() == len); } }
void WERD_RES::MergeAdjacentBlobs | ( | int | index | ) |
Definition at line 913 of file pageres.cpp.
{ if (reject_map.length() == best_choice->length()) reject_map.remove_pos(index); best_choice->remove_unichar_id(index + 1); rebuild_word->MergeBlobs(index, index + 2); box_word->MergeBoxes(index, index + 2); if (index + 1 < best_state.length()) { best_state[index] += best_state[index + 1]; best_state.remove(index + 1); } }
Definition at line 137 of file pageres.cpp.
{ this->ELIST_LINK::operator=(source); Clear(); if (source.combination) { word = new WERD; *word = *(source.word); // deep copy } else { word = source.word; // pt to same word } if (source.bln_boxes != NULL) bln_boxes = new tesseract::BoxWord(*source.bln_boxes); if (source.chopped_word != NULL) chopped_word = new TWERD(*source.chopped_word); if (source.rebuild_word != NULL) rebuild_word = new TWERD(*source.rebuild_word); // TODO(rays) Do we ever need to copy the seam_array? blob_row = source.blob_row; denorm = source.denorm; if (source.box_word != NULL) box_word = new tesseract::BoxWord(*source.box_word); best_state = source.best_state; correct_text = source.correct_text; blob_widths = source.blob_widths; blob_gaps = source.blob_gaps; // None of the uses of operator= require the ratings matrix to be copied, // so don't as it would be really slow. // Copy the cooked choices. WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices)); WERD_CHOICE_IT wc_dest_it(&best_choices); for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) { const WERD_CHOICE *choice = wc_it.data(); wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice)); } if (!wc_dest_it.empty()) { wc_dest_it.move_to_first(); best_choice = wc_dest_it.data(); best_choice_fontinfo_ids = source.best_choice_fontinfo_ids; } else { best_choice = NULL; if (!best_choice_fontinfo_ids.empty()) { best_choice_fontinfo_ids.clear(); } } if (source.raw_choice != NULL) { raw_choice = new WERD_CHOICE(*source.raw_choice); } else { raw_choice = NULL; } if (source.ep_choice != NULL) { ep_choice = new WERD_CHOICE(*source.ep_choice); } else { ep_choice = NULL; } reject_map = source.reject_map; combination = source.combination; part_of_combo = source.part_of_combo; CopySimpleFields(source); if (source.blamer_bundle != NULL) { blamer_bundle = new BlamerBundle(*(source.blamer_bundle)); } return *this; }
bool WERD_RES::PiecesAllNatural | ( | int | start, |
int | count | ||
) | const |
Definition at line 1017 of file pageres.cpp.
{ // all seams must have no splits. for (int index = start; index < start + count - 1; ++index) { if (index >= 0 && index < seam_array.size()) { SEAM* seam = seam_array[index]; if (seam != NULL && seam->split1 != NULL) return false; } } return true; }
void WERD_RES::PrintBestChoices | ( | ) | const |
Definition at line 659 of file pageres.cpp.
{ STRING alternates_str; WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices)); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { if (!it.at_first()) alternates_str += "\", \""; alternates_str += it.data()->unichar_string(); } tprintf("Alternates for \"%s\": {\"%s\"}\n", best_choice->unichar_string().string(), alternates_str.string()); }
const char* const WERD_RES::RawUTF8 | ( | int | blob_index | ) | const [inline] |
Definition at line 354 of file pageres.h.
{ if (blob_index < 0 || blob_index >= raw_choice->length()) return NULL; UNICHAR_ID id = raw_choice->unichar_id(blob_index); if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID) return NULL; return uch_set->id_to_unichar(id); }
void WERD_RES::RebuildBestState | ( | ) |
Definition at line 750 of file pageres.cpp.
{ ASSERT_HOST(best_choice != NULL); if (rebuild_word != NULL) delete rebuild_word; rebuild_word = new TWERD; if (seam_array.empty()) start_seam_list(chopped_word, &seam_array); best_state.truncate(0); int start = 0; for (int i = 0; i < best_choice->length(); ++i) { int length = best_choice->state(i); best_state.push_back(length); if (length > 1) join_pieces(seam_array, start, start + length - 1, chopped_word); TBLOB* blob = chopped_word->blobs[start]; rebuild_word->blobs.push_back(new TBLOB(*blob)); if (length > 1) break_pieces(seam_array, start, start + length - 1, chopped_word); start += length; } }
void WERD_RES::ReplaceBestChoice | ( | WERD_CHOICE * | choice | ) |
Definition at line 737 of file pageres.cpp.
{ best_choice = choice; RebuildBestState(); SetupBoxWord(); // Make up a fake reject map of the right length to keep the // rejection pass happy. reject_map.initialise(best_state.length()); done = tess_accepted = tess_would_adapt = true; SetScriptPositions(); }
void WERD_RES::SetAllScriptPositions | ( | tesseract::ScriptPos | position | ) |
Definition at line 806 of file pageres.cpp.
{ raw_choice->SetAllScriptPositions(position); WERD_CHOICE_IT wc_it(&best_choices); for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) wc_it.data()->SetAllScriptPositions(position); }
void WERD_RES::SetScriptPositions | ( | ) |
Definition at line 799 of file pageres.cpp.
void WERD_RES::SetupBasicsFromChoppedWord | ( | const UNICHARSET & | unicharset_in | ) |
Definition at line 295 of file pageres.cpp.
void WERD_RES::SetupBlamerBundle | ( | ) |
Definition at line 345 of file pageres.cpp.
{ if (blamer_bundle != NULL) { blamer_bundle->SetupNormTruthWord(denorm); } }
void WERD_RES::SetupBlobWidthsAndGaps | ( | ) |
Definition at line 352 of file pageres.cpp.
{ blob_widths.truncate(0); blob_gaps.truncate(0); int num_blobs = chopped_word->NumBlobs(); for (int b = 0; b < num_blobs; ++b) { TBLOB *blob = chopped_word->blobs[b]; TBOX box = blob->bounding_box(); blob_widths.push_back(box.width()); if (b + 1 < num_blobs) { blob_gaps.push_back( chopped_word->blobs[b + 1]->bounding_box().left() - box.right()); } } }
void WERD_RES::SetupBoxWord | ( | ) |
Definition at line 789 of file pageres.cpp.
{ if (box_word != NULL) delete box_word; rebuild_word->ComputeBoundingBoxes(); box_word = tesseract::BoxWord::CopyFromNormalized(rebuild_word); box_word->ClipToOriginalWord(denorm.block(), word); }
void WERD_RES::SetupFake | ( | const UNICHARSET & | uch | ) |
Definition at line 304 of file pageres.cpp.
{ ClearResults(); SetupWordScript(unicharset_in); chopped_word = new TWERD; rebuild_word = new TWERD; bln_boxes = new tesseract::BoxWord; box_word = new tesseract::BoxWord; int blob_count = word->cblob_list()->length(); if (blob_count > 0) { BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count]; // For non-text blocks, just pass any blobs through to the box_word // and call the word failed with a fake classification. C_BLOB_IT b_it(word->cblob_list()); int blob_id = 0; for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { TBOX box = b_it.data()->bounding_box(); box_word->InsertBox(box_word->length(), box); fake_choices[blob_id++] = new BLOB_CHOICE(0, 10.0f, -1.0f, -1, -1, -1, 0, 0, 0, BCC_FAKE); } FakeClassifyWord(blob_count, fake_choices); delete [] fake_choices; } else { WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in); word->make_bad(); LogNewRawChoice(word); // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice. LogNewCookedChoice(1, false, word); } tess_failed = true; }
bool WERD_RES::SetupForRecognition | ( | const UNICHARSET & | unicharset_in, |
tesseract::Tesseract * | tesseract, | ||
Pix * | pix, | ||
int | norm_mode, | ||
const TBOX * | norm_box, | ||
bool | numeric_mode, | ||
bool | use_body_size, | ||
bool | allow_detailed_fx, | ||
ROW * | row, | ||
const BLOCK * | block | ||
) |
Definition at line 255 of file pageres.cpp.
{ tesseract::OcrEngineMode norm_mode_hint = static_cast<tesseract::OcrEngineMode>(norm_mode); tesseract = tess; POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL; if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY && word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) { // Empty words occur when all the blobs have been moved to the rej_blobs // list, which seems to occur frequently in junk. SetupFake(unicharset_in); word->set_flag(W_REP_CHAR, false); return false; } ClearResults(); SetupWordScript(unicharset_in); chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word); float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f ? row->body_size() : x_height; chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE), word_xheight, numeric_mode, norm_mode_hint, norm_box, &denorm); blob_row = row; SetupBasicsFromChoppedWord(unicharset_in); SetupBlamerBundle(); int num_blobs = chopped_word->NumBlobs(); ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks); tess_failed = false; return true; }
void WERD_RES::SetupWordScript | ( | const UNICHARSET & | unicharset_in | ) |
Definition at line 336 of file pageres.cpp.
{ uch_set = &uch; int script = uch.default_sid(); word->set_script_id(script); word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight()); word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid()); }
bool WERD_RES::StatesAllValid | ( | ) |
Definition at line 409 of file pageres.cpp.
{ int ratings_dim = ratings->dimension(); if (raw_choice->TotalOfStates() != ratings_dim) { tprintf("raw_choice has total of states = %d vs ratings dim of %d\n", raw_choice->TotalOfStates(), ratings_dim); return false; } WERD_CHOICE_IT it(&best_choices); int index = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) { WERD_CHOICE* choice = it.data(); if (choice->TotalOfStates() != ratings_dim) { tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n", choice->TotalOfStates(), ratings_dim); return false; } } return true; }
UNICHARSET::Direction WERD_RES::SymbolDirection | ( | int | blob_index | ) | const [inline] |
Definition at line 363 of file pageres.h.
{ if (best_choice == NULL || blob_index >= best_choice->length() || blob_index < 0) return UNICHARSET::U_OTHER_NEUTRAL; return uch_set->get_direction(best_choice->unichar_id(blob_index)); }
bool WERD_RES::UnicharsInReadingOrder | ( | ) | const [inline] |
Definition at line 405 of file pageres.h.
{ return best_choice->unichars_in_script_order(); }
WERD_CHOICE_LIST WERD_RES::best_choices |
float WERD_RES::caps_height |
const FontInfo* WERD_RES::fontinfo |
const FontInfo* WERD_RES::fontinfo2 |
bool WERD_RES::small_caps |
const UNICHARSET* WERD_RES::uch_set |
float WERD_RES::x_height |