tesseract
3.03
|
#include <ratngs.h>
Public Member Functions | |
WERD_CHOICE (const UNICHARSET *unicharset) | |
WERD_CHOICE (const UNICHARSET *unicharset, int reserved) | |
WERD_CHOICE (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter, const UNICHARSET &unicharset) | |
WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset) | |
WERD_CHOICE (const WERD_CHOICE &word) | |
~WERD_CHOICE () | |
const UNICHARSET * | unicharset () const |
int | length () const |
float | adjust_factor () const |
void | set_adjust_factor (float factor) |
const UNICHAR_ID * | unichar_ids () const |
const UNICHAR_ID | unichar_id (int index) const |
int | state (int index) const |
tesseract::ScriptPos | BlobPosition (int index) const |
float | rating () const |
float | certainty () const |
float | certainty (int index) const |
float | min_x_height () const |
float | max_x_height () const |
void | set_x_heights (float min_height, float max_height) |
uinT8 | permuter () const |
const char * | permuter_name () const |
BLOB_CHOICE_LIST * | blob_choices (int index, MATRIX *ratings) const |
MATRIX_COORD | MatrixCoord (int index) const |
void | set_unichar_id (UNICHAR_ID unichar_id, int index) |
bool | dangerous_ambig_found () const |
void | set_dangerous_ambig_found_ (bool value) |
void | set_rating (float new_val) |
void | set_certainty (float new_val) |
void | set_permuter (uinT8 perm) |
void | set_length (int len) |
void | double_the_size () |
Make more space in unichar_id_ and fragment_lengths_ arrays. | |
void | init (int reserved) |
void | init (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter) |
void | make_bad () |
Set the fields in this choice to be default (bad) values. | |
void | append_unichar_id_space_allocated (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty) |
void | append_unichar_id (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty) |
void | set_unichar_id (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, int index) |
void | set_blob_choice (int index, int blob_count, const BLOB_CHOICE *blob_choice) |
bool | contains_unichar_id (UNICHAR_ID unichar_id) const |
void | remove_unichar_ids (int index, int num) |
void | remove_last_unichar_id () |
void | remove_unichar_id (int index) |
bool | has_rtl_unichar_id () const |
void | reverse_and_mirror_unichar_ids () |
void | punct_stripped (int *start_core, int *end_core) const |
void | GetNonSuperscriptSpan (int *start, int *end) const |
WERD_CHOICE | shallow_copy (int start, int end) const |
void | string_and_lengths (STRING *word_str, STRING *word_lengths_str) const |
const STRING | debug_string () const |
bool | set_unichars_in_script_order (bool in_script_order) |
bool | unichars_in_script_order () const |
const STRING & | unichar_string () const |
const STRING & | unichar_lengths () const |
void | SetScriptPositions (bool small_caps, TWERD *word) |
void | SetScriptPositions (const tesseract::ScriptPos *positions, int length) |
void | SetAllScriptPositions (tesseract::ScriptPos position) |
int | GetTopScriptID () const |
void | UpdateStateForSplit (int blob_position) |
int | TotalOfStates () const |
void | print () const |
void | print (const char *msg) const |
void | print_state (const char *msg) const |
void | DisplaySegmentation (TWERD *word) |
WERD_CHOICE & | operator+= (const WERD_CHOICE &second) |
WERD_CHOICE & | operator= (const WERD_CHOICE &source) |
Static Public Member Functions | |
static const char * | permuter_name (uinT8 permuter) |
static tesseract::ScriptPos | ScriptPositionOf (bool print_debug, const UNICHARSET &unicharset, const TBOX &blob_box, UNICHAR_ID unichar_id) |
Static Public Attributes | |
static const float | kBadRating = 100000.0 |
WERD_CHOICE::WERD_CHOICE | ( | const UNICHARSET * | unicharset | ) | [inline] |
WERD_CHOICE::WERD_CHOICE | ( | const UNICHARSET * | unicharset, |
int | reserved | ||
) | [inline] |
WERD_CHOICE::WERD_CHOICE | ( | const char * | src_string, |
const char * | src_lengths, | ||
float | src_rating, | ||
float | src_certainty, | ||
uinT8 | src_permuter, | ||
const UNICHARSET & | unicharset | ||
) | [inline] |
WERD_CHOICE::WERD_CHOICE | ( | const char * | src_string, |
const UNICHARSET & | unicharset | ||
) |
Constructor to build a WERD_CHOICE from the given string. The function assumes that src_string is not NULL.
Definition at line 199 of file ratngs.cpp.
: unicharset_(&unicharset){ GenericVector<UNICHAR_ID> encoding; GenericVector<char> lengths; if (unicharset.encode_string(src_string, true, &encoding, &lengths, NULL)) { lengths.push_back('\0'); STRING src_lengths = &lengths[0]; this->init(src_string, src_lengths.string(), 0.0, 0.0, NO_PERM); } else { // There must have been an invalid unichar in the string. this->init(8); this->make_bad(); } }
WERD_CHOICE::WERD_CHOICE | ( | const WERD_CHOICE & | word | ) | [inline] |
Definition at line 255 of file ratngs.cpp.
{ delete[] unichar_ids_; delete[] script_pos_; delete[] state_; delete[] certainties_; }
float WERD_CHOICE::adjust_factor | ( | ) | const [inline] |
void WERD_CHOICE::append_unichar_id | ( | UNICHAR_ID | unichar_id, |
int | blob_count, | ||
float | rating, | ||
float | certainty | ||
) |
append_unichar_id
Make sure there is enough space in the word for the new unichar id and call append_unichar_id_space_allocated().
Definition at line 447 of file ratngs.cpp.
{ if (length_ == reserved_) { this->double_the_size(); } this->append_unichar_id_space_allocated(unichar_id, blob_count, rating, certainty); }
void WERD_CHOICE::append_unichar_id_space_allocated | ( | UNICHAR_ID | unichar_id, |
int | blob_count, | ||
float | rating, | ||
float | certainty | ||
) | [inline] |
This function assumes that there is enough space reserved in the WERD_CHOICE for adding another unichar. This is an efficient alternative to append_unichar_id().
Definition at line 434 of file ratngs.h.
{ assert(reserved_ > length_); length_++; this->set_unichar_id(unichar_id, blob_count, rating, certainty, length_-1); }
BLOB_CHOICE_LIST * WERD_CHOICE::blob_choices | ( | int | index, |
MATRIX * | ratings | ||
) | const |
Definition at line 269 of file ratngs.cpp.
{ MATRIX_COORD coord = MatrixCoord(index); BLOB_CHOICE_LIST* result = ratings->get(coord.col, coord.row); if (result == NULL) { result = new BLOB_CHOICE_LIST; ratings->put(coord.col, coord.row, result); } return result; }
tesseract::ScriptPos WERD_CHOICE::BlobPosition | ( | int | index | ) | const [inline] |
Definition at line 304 of file ratngs.h.
{ if (index < 0 || index >= length_) return tesseract::SP_NORMAL; return script_pos_[index]; }
float WERD_CHOICE::certainty | ( | ) | const [inline] |
float WERD_CHOICE::certainty | ( | int | index | ) | const [inline] |
bool WERD_CHOICE::contains_unichar_id | ( | UNICHAR_ID | unichar_id | ) | const |
contains_unichar_id
Returns true if unichar_ids_ contain the given unichar_id, false otherwise.
Definition at line 305 of file ratngs.cpp.
{ for (int i = 0; i < length_; ++i) { if (unichar_ids_[i] == unichar_id) { return true; } } return false; }
bool WERD_CHOICE::dangerous_ambig_found | ( | ) | const [inline] |
const STRING WERD_CHOICE::debug_string | ( | ) | const [inline] |
void WERD_CHOICE::DisplaySegmentation | ( | TWERD * | word | ) |
Definition at line 748 of file ratngs.cpp.
{ #ifndef GRAPHICS_DISABLED // Number of different colors to draw with. const int kNumColors = 6; static ScrollView *segm_window = NULL; // Check the state against the static prev_drawn_state. static GenericVector<int> prev_drawn_state; bool already_done = prev_drawn_state.size() == length_; if (!already_done) prev_drawn_state.init_to_size(length_, 0); for (int i = 0; i < length_; ++i) { if (prev_drawn_state[i] != state_[i]) { already_done = false; } prev_drawn_state[i] = state_[i]; } if (already_done || word->blobs.empty()) return; // Create the window if needed. if (segm_window == NULL) { segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, 2000.0, 256.0, true); } else { segm_window->Clear(); } TBOX bbox; int blob_index = 0; for (int c = 0; c < length_; ++c) { ScrollView::Color color = static_cast<ScrollView::Color>(c % kNumColors + 3); for (int i = 0; i < state_[c]; ++i, ++blob_index) { TBLOB* blob = word->blobs[blob_index]; bbox += blob->bounding_box(); blob->plot(segm_window, color, color); } } segm_window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); segm_window->Update(); window_wait(segm_window); #endif }
void WERD_CHOICE::double_the_size | ( | ) | [inline] |
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition at line 369 of file ratngs.h.
{ if (reserved_ > 0) { unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy( reserved_, unichar_ids_); script_pos_ = GenericVector<tesseract::ScriptPos>::double_the_size_memcpy( reserved_, script_pos_); state_ = GenericVector<int>::double_the_size_memcpy( reserved_, state_); certainties_ = GenericVector<float>::double_the_size_memcpy( reserved_, certainties_); reserved_ *= 2; } else { unichar_ids_ = new UNICHAR_ID[1]; script_pos_ = new tesseract::ScriptPos[1]; state_ = new int[1]; certainties_ = new float[1]; reserved_ = 1; } }
void WERD_CHOICE::GetNonSuperscriptSpan | ( | int * | start, |
int * | end | ||
) | const |
Definition at line 376 of file ratngs.cpp.
{ int end = length(); while (end > 0 && unicharset_->get_isdigit(unichar_ids_[end - 1]) && BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) { end--; } int start = 0; while (start < end && unicharset_->get_isdigit(unichar_ids_[start]) && BlobPosition(start) == tesseract::SP_SUPERSCRIPT) { start++; } *pstart = start; *pend = end; }
int WERD_CHOICE::GetTopScriptID | ( | ) | const |
Definition at line 654 of file ratngs.cpp.
{ int max_script = unicharset_->get_script_table_size(); int *sid = new int[max_script]; int x; for (x = 0; x < max_script; x++) sid[x] = 0; for (x = 0; x < length_; ++x) { int script_id = unicharset_->get_script(unichar_id(x)); sid[script_id]++; } if (unicharset_->han_sid() != unicharset_->null_sid()) { // Add the Hiragana & Katakana counts to Han and zero them out. if (unicharset_->hiragana_sid() != unicharset_->null_sid()) { sid[unicharset_->han_sid()] += sid[unicharset_->hiragana_sid()]; sid[unicharset_->hiragana_sid()] = 0; } if (unicharset_->katakana_sid() != unicharset_->null_sid()) { sid[unicharset_->han_sid()] += sid[unicharset_->katakana_sid()]; sid[unicharset_->katakana_sid()] = 0; } } // Note that high script ID overrides lower one on a tie, thus biasing // towards non-Common script (if sorted that way in unicharset file). int max_sid = 0; for (x = 1; x < max_script; x++) if (sid[x] >= sid[max_sid]) max_sid = x; if (sid[max_sid] < length_ / 2) max_sid = unicharset_->null_sid(); delete[] sid; return max_sid; }
bool WERD_CHOICE::has_rtl_unichar_id | ( | ) | const |
has_rtl_unichar_id
Returns true if unichar_ids contain at least one "strongly" RTL unichar.
Definition at line 410 of file ratngs.cpp.
{ int i; for (i = 0; i < length_; ++i) { UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]); if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) { return true; } } return false; }
void WERD_CHOICE::init | ( | int | reserved | ) | [inline] |
Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and fragment_length_ arrays. Sets other values to default (blank) values.
Definition at line 391 of file ratngs.h.
{ reserved_ = reserved; if (reserved > 0) { unichar_ids_ = new UNICHAR_ID[reserved]; script_pos_ = new tesseract::ScriptPos[reserved]; state_ = new int[reserved]; certainties_ = new float[reserved]; } else { unichar_ids_ = NULL; script_pos_ = NULL; state_ = NULL; certainties_ = NULL; } length_ = 0; adjust_factor_ = 1.0f; rating_ = 0.0; certainty_ = MAX_FLOAT32; min_x_height_ = 0.0f; max_x_height_ = MAX_FLOAT32; permuter_ = NO_PERM; unichars_in_script_order_ = false; // Tesseract is strict left-to-right. dangerous_ambig_found_ = false; }
void WERD_CHOICE::init | ( | const char * | src_string, |
const char * | src_lengths, | ||
float | src_rating, | ||
float | src_certainty, | ||
uinT8 | src_permuter | ||
) |
Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter. The function assumes that src_string is not NULL. src_lengths argument could be NULL, in which case the unichars in src_string are assumed to all be of length 1.
Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter.
The function assumes that src_string is not NULL. src_lengths argument could be NULL, in which case the unichars in src_string are assumed to all be of length 1.
Definition at line 224 of file ratngs.cpp.
{ int src_string_len = strlen(src_string); if (src_string_len == 0) { this->init(8); } else { this->init(src_lengths ? strlen(src_lengths): src_string_len); length_ = reserved_; int offset = 0; for (int i = 0; i < length_; ++i) { int unichar_length = src_lengths ? src_lengths[i] : 1; unichar_ids_[i] = unicharset_->unichar_to_id(src_string+offset, unichar_length); state_[i] = 1; certainties_[i] = src_certainty; offset += unichar_length; } } adjust_factor_ = 1.0f; rating_ = src_rating; certainty_ = src_certainty; permuter_ = src_permuter; dangerous_ambig_found_ = false; }
int WERD_CHOICE::length | ( | ) | const [inline] |
void WERD_CHOICE::make_bad | ( | ) | [inline] |
Set the fields in this choice to be default (bad) values.
Definition at line 425 of file ratngs.h.
{ length_ = 0; rating_ = kBadRating; certainty_ = -MAX_FLOAT32; }
MATRIX_COORD WERD_CHOICE::MatrixCoord | ( | int | index | ) | const |
Definition at line 281 of file ratngs.cpp.
{ int col = 0; for (int i = 0; i < index; ++i) col += state_[i]; int row = col + state_[index] - 1; return MATRIX_COORD(col, row); }
float WERD_CHOICE::max_x_height | ( | ) | const [inline] |
float WERD_CHOICE::min_x_height | ( | ) | const [inline] |
WERD_CHOICE & WERD_CHOICE::operator+= | ( | const WERD_CHOICE & | second | ) |
Cat a second word rating on the end of this current one. The ratings are added and the confidence is the min. If the permuters are NOT the same the permuter is set to COMPOUND_PERM
Definition at line 464 of file ratngs.cpp.
{ ASSERT_HOST(unicharset_ == second.unicharset_); while (reserved_ < length_ + second.length()) { this->double_the_size(); } const UNICHAR_ID *other_unichar_ids = second.unichar_ids(); for (int i = 0; i < second.length(); ++i) { unichar_ids_[length_ + i] = other_unichar_ids[i]; state_[length_ + i] = second.state_[i]; certainties_[length_ + i] = second.certainties_[i]; script_pos_[length_ + i] = second.BlobPosition(i); } length_ += second.length(); if (second.adjust_factor_ > adjust_factor_) adjust_factor_ = second.adjust_factor_; rating_ += second.rating(); // add ratings if (second.certainty() < certainty_) // take min certainty_ = second.certainty(); if (second.dangerous_ambig_found_) dangerous_ambig_found_ = true; if (permuter_ == NO_PERM) { permuter_ = second.permuter(); } else if (second.permuter() != NO_PERM && second.permuter() != permuter_) { permuter_ = COMPOUND_PERM; } return *this; }
WERD_CHOICE & WERD_CHOICE::operator= | ( | const WERD_CHOICE & | source | ) |
Allocate enough memory to hold a copy of source and copy over all the information from source to this WERD_CHOICE.
Definition at line 500 of file ratngs.cpp.
{ while (reserved_ < source.length()) { this->double_the_size(); } unicharset_ = source.unicharset_; const UNICHAR_ID *other_unichar_ids = source.unichar_ids(); for (int i = 0; i < source.length(); ++i) { unichar_ids_[i] = other_unichar_ids[i]; state_[i] = source.state_[i]; certainties_[i] = source.certainties_[i]; script_pos_[i] = source.BlobPosition(i); } length_ = source.length(); adjust_factor_ = source.adjust_factor_; rating_ = source.rating(); certainty_ = source.certainty(); min_x_height_ = source.min_x_height(); max_x_height_ = source.max_x_height(); permuter_ = source.permuter(); dangerous_ambig_found_ = source.dangerous_ambig_found_; return *this; }
uinT8 WERD_CHOICE::permuter | ( | ) | const [inline] |
const char * WERD_CHOICE::permuter_name | ( | uinT8 | permuter | ) | [static] |
Definition at line 175 of file ratngs.cpp.
{ return kPermuterTypeNames[permuter]; }
const char * WERD_CHOICE::permuter_name | ( | ) | const |
Definition at line 262 of file ratngs.cpp.
{
return kPermuterTypeNames[permuter_];
}
void WERD_CHOICE::print | ( | ) | const [inline] |
void WERD_CHOICE::print | ( | const char * | msg | ) | const |
Print WERD_CHOICE to stdout.
Definition at line 711 of file ratngs.cpp.
{ tprintf("%s : ", msg); for (int i = 0; i < length_; ++i) { tprintf("%s", unicharset_->id_to_unichar(unichar_ids_[i])); } tprintf(" : R=%g, C=%g, F=%g, Perm=%d, xht=[%g,%g], ambig=%d\n", rating_, certainty_, adjust_factor_, permuter_, min_x_height_, max_x_height_, dangerous_ambig_found_); tprintf("pos"); for (int i = 0; i < length_; ++i) { tprintf("\t%s", ScriptPosToString(script_pos_[i])); } tprintf("\nstr"); for (int i = 0; i < length_; ++i) { tprintf("\t%s", unicharset_->id_to_unichar(unichar_ids_[i])); } tprintf("\nstate:"); for (int i = 0; i < length_; ++i) { tprintf("\t%d ", state_[i]); } tprintf("\nC"); for (int i = 0; i < length_; ++i) { tprintf("\t%.3f", certainties_[i]); } tprintf("\n"); }
void WERD_CHOICE::print_state | ( | const char * | msg | ) | const |
Definition at line 739 of file ratngs.cpp.
void WERD_CHOICE::punct_stripped | ( | int * | start, |
int * | end | ||
) | const |
punct_stripped
Returns the half-open interval of unichar_id indices [start, end) which enclose the core portion of this word -- the part after stripping punctuation from the left and right.
Definition at line 362 of file ratngs.cpp.
{ *start = 0; *end = length() - 1; while (*start < length() && unicharset()->get_ispunctuation(unichar_id(*start))) { (*start)++; } while (*end > -1 && unicharset()->get_ispunctuation(unichar_id(*end))) { (*end)--; } (*end)++; }
float WERD_CHOICE::rating | ( | ) | const [inline] |
void WERD_CHOICE::remove_last_unichar_id | ( | ) | [inline] |
void WERD_CHOICE::remove_unichar_id | ( | int | index | ) | [inline] |
Definition at line 466 of file ratngs.h.
{ this->remove_unichar_ids(index, 1); }
void WERD_CHOICE::remove_unichar_ids | ( | int | start, |
int | num | ||
) |
remove_unichar_ids
Removes num unichar ids starting from index start from unichar_ids_ and updates length_ and fragment_lengths_ to reflect this change. Note: this function does not modify rating_ and certainty_.
Definition at line 321 of file ratngs.cpp.
{ ASSERT_HOST(start >= 0 && start + num <= length_); // Accumulate the states to account for the merged blobs. for (int i = 0; i < num; ++i) { if (start > 0) state_[start - 1] += state_[start + i]; else if (start + num < length_) state_[start + num] += state_[start + i]; } for (int i = start; i + num < length_; ++i) { unichar_ids_[i] = unichar_ids_[i + num]; script_pos_[i] = script_pos_[i + num]; state_[i] = state_[i + num]; certainties_[i] = certainties_[i + num]; } length_ -= num; }
reverse_and_mirror_unichar_ids
Reverses and mirrors unichars in unichar_ids.
Definition at line 344 of file ratngs.cpp.
{ for (int i = 0; i < length_ / 2; ++i) { UNICHAR_ID tmp_id = unichar_ids_[i]; unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_-1-i]); unichar_ids_[length_-1-i] = unicharset_->get_mirror(tmp_id); } if (length_ % 2 != 0) { unichar_ids_[length_/2] = unicharset_->get_mirror(unichar_ids_[length_/2]); } }
ScriptPos WERD_CHOICE::ScriptPositionOf | ( | bool | print_debug, |
const UNICHARSET & | unicharset, | ||
const TBOX & | blob_box, | ||
UNICHAR_ID | unichar_id | ||
) | [static] |
Definition at line 616 of file ratngs.cpp.
{ ScriptPos retval = tesseract::SP_NORMAL; int top = blob_box.top(); int bottom = blob_box.bottom(); int min_bottom, max_bottom, min_top, max_top; unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top); int sub_thresh_top = min_top - kMinSubscriptOffset; int sub_thresh_bot = kBlnBaselineOffset - kMinSubscriptOffset; int sup_thresh_bot = max_bottom + kMinSuperscriptOffset; if (bottom <= kMaxDropCapBottom) { retval = tesseract::SP_DROPCAP; } else if (top < sub_thresh_top && bottom < sub_thresh_bot) { retval = tesseract::SP_SUBSCRIPT; } else if (bottom > sup_thresh_bot) { retval = tesseract::SP_SUPERSCRIPT; } if (print_debug) { const char *pos = ScriptPosToString(retval); tprintf("%s Character %s[bot:%d top: %d] " "bot_range[%d,%d] top_range[%d, %d] " "sub_thresh[bot:%d top:%d] sup_thresh_bot %d\n", pos, unicharset.id_to_unichar(unichar_id), bottom, top, min_bottom, max_bottom, min_top, max_top, sub_thresh_bot, sub_thresh_top, sup_thresh_bot); } return retval; }
void WERD_CHOICE::set_adjust_factor | ( | float | factor | ) | [inline] |
void WERD_CHOICE::set_blob_choice | ( | int | index, |
int | blob_count, | ||
const BLOB_CHOICE * | blob_choice | ||
) |
Definition at line 291 of file ratngs.cpp.
{ unichar_ids_[index] = blob_choice->unichar_id(); script_pos_[index] = tesseract::SP_NORMAL; state_[index] = blob_count; certainties_[index] = blob_choice->certainty(); }
void WERD_CHOICE::set_certainty | ( | float | new_val | ) | [inline] |
void WERD_CHOICE::set_dangerous_ambig_found_ | ( | bool | value | ) | [inline] |
void WERD_CHOICE::set_length | ( | int | len | ) | [inline] |
Definition at line 363 of file ratngs.h.
{ ASSERT_HOST(reserved_ >= len); length_ = len; }
void WERD_CHOICE::set_permuter | ( | uinT8 | perm | ) | [inline] |
void WERD_CHOICE::set_rating | ( | float | new_val | ) | [inline] |
void WERD_CHOICE::set_unichar_id | ( | UNICHAR_ID | unichar_id, |
int | index | ||
) | [inline] |
Definition at line 341 of file ratngs.h.
{ assert(index < length_); unichar_ids_[index] = unichar_id; }
void WERD_CHOICE::set_unichar_id | ( | UNICHAR_ID | unichar_id, |
int | blob_count, | ||
float | rating, | ||
float | certainty, | ||
int | index | ||
) | [inline] |
Definition at line 446 of file ratngs.h.
{ assert(index < length_); unichar_ids_[index] = unichar_id; state_[index] = blob_count; certainties_[index] = certainty; script_pos_[index] = tesseract::SP_NORMAL; rating_ += rating; if (certainty < certainty_) { certainty_ = certainty; } }
bool WERD_CHOICE::set_unichars_in_script_order | ( | bool | in_script_order | ) | [inline] |
void WERD_CHOICE::set_x_heights | ( | float | min_height, |
float | max_height | ||
) | [inline] |
void WERD_CHOICE::SetAllScriptPositions | ( | tesseract::ScriptPos | position | ) |
Definition at line 610 of file ratngs.cpp.
{ for (int i = 0; i < length_; ++i) script_pos_[i] = position; }
void WERD_CHOICE::SetScriptPositions | ( | bool | small_caps, |
TWERD * | word | ||
) |
Definition at line 529 of file ratngs.cpp.
{ // Since WERD_CHOICE isn't supposed to depend on a Tesseract, // we don't have easy access to the flags Tesseract stores. Therefore, debug // for this module is hard compiled in. int debug = 0; // Initialize to normal. for (int i = 0; i < length_; ++i) script_pos_[i] = tesseract::SP_NORMAL; if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) { return; } int position_counts[4]; for (int i = 0; i < 4; i++) { position_counts[i] = 0; } int chunk_index = 0; for (int blob_index = 0; blob_index < length_; ++blob_index, ++chunk_index) { TBLOB* tblob = word->blobs[chunk_index]; int uni_id = unichar_id(blob_index); TBOX blob_box = tblob->bounding_box(); if (state_ != NULL) { for (int i = 1; i < state_[blob_index]; ++i) { ++chunk_index; tblob = word->blobs[chunk_index]; blob_box += tblob->bounding_box(); } } script_pos_[blob_index] = ScriptPositionOf(false, *unicharset_, blob_box, uni_id); if (small_caps && script_pos_[blob_index] != tesseract::SP_DROPCAP) { script_pos_[blob_index] = tesseract::SP_NORMAL; } position_counts[script_pos_[blob_index]]++; } // If almost everything looks like a superscript or subscript, // we most likely just got the baseline wrong. if (position_counts[tesseract::SP_SUBSCRIPT] > 0.75 * length_ || position_counts[tesseract::SP_SUPERSCRIPT] > 0.75 * length_) { if (debug >= 2) { tprintf("Most characters of %s are subscript or superscript.\n" "That seems wrong, so I'll assume we got the baseline wrong\n", unichar_string().string()); } for (int i = 0; i < length_; i++) { ScriptPos sp = script_pos_[i]; if (sp == tesseract::SP_SUBSCRIPT || sp == tesseract::SP_SUPERSCRIPT) { position_counts[sp]--; position_counts[tesseract::SP_NORMAL]++; script_pos_[i] = tesseract::SP_NORMAL; } } } if ((debug >= 1 && position_counts[tesseract::SP_NORMAL] < length_) || debug >= 2) { tprintf("SetScriptPosition on %s\n", unichar_string().string()); int chunk_index = 0; for (int blob_index = 0; blob_index < length_; ++blob_index) { if (debug >= 2 || script_pos_[blob_index] != tesseract::SP_NORMAL) { TBLOB* tblob = word->blobs[chunk_index]; ScriptPositionOf(true, *unicharset_, tblob->bounding_box(), unichar_id(blob_index)); } chunk_index += state_ != NULL ? state_[blob_index] : 1; } } }
void WERD_CHOICE::SetScriptPositions | ( | const tesseract::ScriptPos * | positions, |
int | length | ||
) |
Definition at line 600 of file ratngs.cpp.
{ ASSERT_HOST(length == length_); if (positions != script_pos_) { delete [] script_pos_; script_pos_ = new ScriptPos[length]; memcpy(script_pos_, positions, sizeof(positions[0]) * length); } }
WERD_CHOICE WERD_CHOICE::shallow_copy | ( | int | start, |
int | end | ||
) | const |
Definition at line 393 of file ratngs.cpp.
{ ASSERT_HOST(start >= 0 && start <= length_); ASSERT_HOST(end >= 0 && end <= length_); if (end < start) { end = start; } WERD_CHOICE retval(unicharset_, end - start); for (int i = start; i < end; i++) { retval.append_unichar_id_space_allocated( unichar_ids_[i], state_[i], 0.0f, certainties_[i]); } return retval; }
int WERD_CHOICE::state | ( | int | index | ) | const [inline] |
void WERD_CHOICE::string_and_lengths | ( | STRING * | word_str, |
STRING * | word_lengths_str | ||
) | const |
string_and_lengths
Populates the given word_str with unichars from unichar_ids and and word_lengths_str with the corresponding unichar lengths.
Definition at line 428 of file ratngs.cpp.
{ *word_str = ""; if (word_lengths_str != NULL) *word_lengths_str = ""; for (int i = 0; i < length_; ++i) { const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]); *word_str += ch; if (word_lengths_str != NULL) { *word_lengths_str += strlen(ch); } } }
int WERD_CHOICE::TotalOfStates | ( | ) | const |
Definition at line 698 of file ratngs.cpp.
{ int total_chunks = 0; for (int i = 0; i < length_; ++i) { total_chunks += state_[i]; } return total_chunks; }
const UNICHAR_ID WERD_CHOICE::unichar_id | ( | int | index | ) | const [inline] |
const UNICHAR_ID* WERD_CHOICE::unichar_ids | ( | ) | const [inline] |
const STRING& WERD_CHOICE::unichar_lengths | ( | ) | const [inline] |
Definition at line 516 of file ratngs.h.
{ this->string_and_lengths(&unichar_string_, &unichar_lengths_); return unichar_lengths_; }
const STRING& WERD_CHOICE::unichar_string | ( | ) | const [inline] |
Definition at line 509 of file ratngs.h.
{ this->string_and_lengths(&unichar_string_, &unichar_lengths_); return unichar_string_; }
bool WERD_CHOICE::unichars_in_script_order | ( | ) | const [inline] |
const UNICHARSET* WERD_CHOICE::unicharset | ( | ) | const [inline] |
void WERD_CHOICE::UpdateStateForSplit | ( | int | blob_position | ) |
Definition at line 686 of file ratngs.cpp.
{ int total_chunks = 0; for (int i = 0; i < length_; ++i) { total_chunks += state_[i]; if (total_chunks > blob_position) { ++state_[i]; return; } } }
const float WERD_CHOICE::kBadRating = 100000.0 [static] |