#include <werd.h>
List of all members.
Detailed Description
Definition at line 60 of file werd.h.
Constructor & Destructor Documentation
WERD::WERD |
( |
C_BLOB_LIST * |
blob_list, |
|
|
uinT8 |
blank_count, |
|
|
const char * |
text |
|
) |
| |
WERD::WERD
Constructor to build a WERD from a list of C_BLOBs. blob_list The C_BLOBs (in word order) are not copied; we take its elements and put them in our lists. blank_count blanks in front of the word text correct text, outlives this WERD
Definition at line 48 of file werd.cpp.
: blanks(blank_count),
flags(0),
script_id_(0),
correct(text) {
C_BLOB_IT start_it = blob_list;
C_BLOB_IT end_it = blob_list;
C_BLOB_IT rej_cblob_it = &rej_cblobs;
C_OUTLINE_IT c_outline_it;
inT16 inverted_vote = 0;
inT16 non_inverted_vote = 0;
while (!end_it.at_last())
end_it.forward();
cblobs.assign_to_sublist(&start_it, &end_it);
start_it.set_to_list(&cblobs);
if (start_it.empty())
return;
for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
BOOL8 reject_blob = FALSE;
BOOL8 blob_inverted;
c_outline_it.set_to_list(start_it.data()->out_list());
blob_inverted = c_outline_it.data()->flag(COUT_INVERSE);
for (c_outline_it.mark_cycle_pt();
!c_outline_it.cycled_list() && !reject_blob;
c_outline_it.forward()) {
reject_blob = c_outline_it.data()->flag(COUT_INVERSE) != blob_inverted;
}
if (reject_blob) {
rej_cblob_it.add_after_then_move(start_it.extract());
} else {
if (blob_inverted)
inverted_vote++;
else
non_inverted_vote++;
}
}
flags.set_bit(W_INVERSE, (inverted_vote > non_inverted_vote));
start_it.set_to_list(&cblobs);
if (start_it.empty())
return;
for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
c_outline_it.set_to_list(start_it.data()->out_list());
if (c_outline_it.data()->flag(COUT_INVERSE) != flags.bit(W_INVERSE))
rej_cblob_it.add_after_then_move(start_it.extract());
}
}
WERD::WERD
Constructor to build a WERD from a list of C_BLOBs. The C_BLOBs are not copied so the source list is emptied.
Definition at line 122 of file werd.cpp.
: flags(clone->flags),
script_id_(clone->script_id_),
correct(clone->correct) {
C_BLOB_IT start_it = blob_list;
C_BLOB_IT end_it = blob_list;
while (!end_it.at_last ())
end_it.forward ();
((C_BLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
blanks = clone->blanks;
}
Member Function Documentation
WERD::bounding_box
Return the bounding box of the WERD. This is quite a mess to compute! ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the words on the row were re-sorted. The original words were built with reject blobs included. The FUZZY SPACE flags were set accordingly. If ALL the blobs in a word are rejected the BB for the word is NULL, causing the sort to screw up, leading to the erroneous possibility of the first word in a row being marked as FUZZY space.
Definition at line 163 of file werd.cpp.
{
TBOX box;
C_BLOB_IT rej_cblob_it = &rej_cblobs;
for (rej_cblob_it.mark_cycle_pt(); !rej_cblob_it.cycled_list();
rej_cblob_it.forward()) {
box += rej_cblob_it.data()->bounding_box();
}
C_BLOB_IT it = &cblobs;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
box += it.data()->bounding_box();
}
return box;
}
Definition at line 140 of file werd.cpp.
{
C_BLOB_LIST temp_blobs;
C_BLOB_IT temp_it(&temp_blobs);
temp_it.add_after_then_move(blob);
WERD* blob_word = new WERD(&temp_blobs, this);
blob_word->set_flag(W_BOL, bol);
blob_word->set_flag(W_EOL, eol);
return blob_word;
}
WERD::ConstructWerdWithNewBlobs()
This method returns a new werd constructed using the blobs in the input all_blobs list, which correspond to the blobs in this werd object. The blobs used to construct the new word are consumed and removed from the input all_blobs list. Returns NULL if the word couldn't be constructed. Returns original blobs for which no matches were found in the output list orphan_blobs (appends).
Definition at line 401 of file werd.cpp.
{
C_BLOB_LIST current_blob_list;
C_BLOB_IT werd_blobs_it(¤t_blob_list);
werd_blobs_it.add_list_after(cblob_list());
C_BLOB_LIST new_werd_blobs;
C_BLOB_IT new_blobs_it(&new_werd_blobs);
C_BLOB_LIST not_found_blobs;
C_BLOB_IT not_found_it(¬_found_blobs);
not_found_it.move_to_last();
werd_blobs_it.move_to_first();
for (werd_blobs_it.mark_cycle_pt(); !werd_blobs_it.cycled_list();
werd_blobs_it.forward()) {
C_BLOB* werd_blob = werd_blobs_it.extract();
TBOX werd_blob_box = werd_blob->bounding_box();
bool found = false;
C_BLOB_IT all_blobs_it(all_blobs);
for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list();
all_blobs_it.forward()) {
C_BLOB* a_blob = all_blobs_it.data();
TBOX a_blob_box = a_blob->bounding_box();
if (a_blob_box.null_box()) {
tprintf("Bounding box couldn't be ascertained\n");
}
if (werd_blob_box.contains(a_blob_box) ||
werd_blob_box.major_overlap(a_blob_box)) {
all_blobs_it.extract();
new_blobs_it.add_after_then_move(a_blob);
found = true;
}
}
if (!found) {
not_found_it.add_after_then_move(werd_blob);
} else {
delete werd_blob;
}
}
not_found_it.move_to_first();
for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list();
not_found_it.forward()) {
C_BLOB* not_found = not_found_it.data();
TBOX not_found_box = not_found->bounding_box();
C_BLOB_IT existing_blobs_it(new_blobs_it);
for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list();
existing_blobs_it.forward()) {
C_BLOB* a_blob = existing_blobs_it.data();
TBOX a_blob_box = a_blob->bounding_box();
if ((not_found_box.major_overlap(a_blob_box) ||
a_blob_box.major_overlap(not_found_box)) &&
not_found_box.y_overlap_fraction(a_blob_box) > 0.8) {
delete not_found_it.extract();
break;
}
}
}
if (orphan_blobs) {
C_BLOB_IT orphan_blobs_it(orphan_blobs);
orphan_blobs_it.move_to_last();
orphan_blobs_it.add_list_after(¬_found_blobs);
}
WERD* new_werd = NULL;
if (!new_werd_blobs.empty()) {
new_werd = new WERD(&new_werd_blobs, this);
} else {
C_BLOB_IT this_list_it(cblob_list());
this_list_it.add_list_after(¬_found_blobs);
}
return new_werd;
}
WERD::copy_on
Copy blobs from other word onto this one.
Definition at line 223 of file werd.cpp.
{
bool reversed = other->bounding_box().left() < bounding_box().left();
C_BLOB_IT c_blob_it(&cblobs);
C_BLOB_LIST c_blobs;
c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy);
if (reversed) {
c_blob_it.add_list_before(&c_blobs);
} else {
c_blob_it.move_to_last();
c_blob_it.add_list_after(&c_blobs);
}
if (!other->rej_cblobs.empty()) {
C_BLOB_IT rej_c_blob_it(&rej_cblobs);
C_BLOB_LIST new_rej_c_blobs;
new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy);
if (reversed) {
rej_c_blob_it.add_list_before(&new_rej_c_blobs);
} else {
rej_c_blob_it.move_to_last();
rej_c_blob_it.add_list_after(&new_rej_c_blobs);
}
}
}
Definition at line 122 of file werd.h.
{ return flags.bit(mask); }
WERD::join_on
Join other word onto this one. Delete the old word.
Definition at line 200 of file werd.cpp.
{
C_BLOB_IT blob_it(&cblobs);
C_BLOB_IT src_it(&other->cblobs);
C_BLOB_IT rej_cblob_it(&rej_cblobs);
C_BLOB_IT src_rej_it(&other->rej_cblobs);
while (!src_it.empty()) {
blob_it.add_to_end(src_it.extract());
src_it.forward();
}
while (!src_rej_it.empty()) {
rej_cblob_it.add_to_end(src_rej_it.extract());
src_rej_it.forward();
}
}
WERD::move
Reposition WERD by vector NOTE!! REJECT CBLOBS ARE NOT MOVED
Definition at line 187 of file werd.cpp.
{
C_BLOB_IT cblob_it(&cblobs);
for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward())
cblob_it.data()->move(vec);
}
WERD & WERD::operator= |
( |
const WERD & |
source | ) |
|
WERD::operator=
Assign a word, DEEP copying the blob list
Definition at line 358 of file werd.cpp.
{
this->ELIST2_LINK::operator= (source);
blanks = source.blanks;
flags = source.flags;
script_id_ = source.script_id_;
dummy = source.dummy;
correct = source.correct;
if (!cblobs.empty())
cblobs.clear();
cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
if (!rej_cblobs.empty())
rej_cblobs.clear();
rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
return *this;
}
WERD::plot
Draw the WERD in the given colour.
Definition at line 286 of file werd.cpp.
{
C_BLOB_IT it = &cblobs;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->plot(window, colour, colour);
}
plot_rej_blobs(window);
}
WERD::print
Display members
Definition at line 255 of file werd.cpp.
{
tprintf("Blanks= %d\n", blanks);
bounding_box().print();
tprintf("Flags = %d = 0%o\n", flags.val, flags.val);
tprintf(" W_SEGMENTED = %s\n", flags.bit(W_SEGMENTED) ? "TRUE" : "FALSE ");
tprintf(" W_ITALIC = %s\n", flags.bit(W_ITALIC) ? "TRUE" : "FALSE ");
tprintf(" W_BOL = %s\n", flags.bit(W_BOL) ? "TRUE" : "FALSE ");
tprintf(" W_EOL = %s\n", flags.bit(W_EOL) ? "TRUE" : "FALSE ");
tprintf(" W_NORMALIZED = %s\n",
flags.bit(W_NORMALIZED) ? "TRUE" : "FALSE ");
tprintf(" W_SCRIPT_HAS_XHEIGHT = %s\n",
flags.bit(W_SCRIPT_HAS_XHEIGHT) ? "TRUE" : "FALSE ");
tprintf(" W_SCRIPT_IS_LATIN = %s\n",
flags.bit(W_SCRIPT_IS_LATIN) ? "TRUE" : "FALSE ");
tprintf(" W_DONT_CHOP = %s\n", flags.bit(W_DONT_CHOP) ? "TRUE" : "FALSE ");
tprintf(" W_REP_CHAR = %s\n", flags.bit(W_REP_CHAR) ? "TRUE" : "FALSE ");
tprintf(" W_FUZZY_SP = %s\n", flags.bit(W_FUZZY_SP) ? "TRUE" : "FALSE ");
tprintf(" W_FUZZY_NON = %s\n", flags.bit(W_FUZZY_NON) ? "TRUE" : "FALSE ");
tprintf("Correct= %s\n", correct.string());
tprintf("Rejected cblob count = %d\n", rej_cblobs.length());
tprintf("Script = %d\n", script_id_);
}
WERD::shallow_copy()
Make a shallow copy of a word
Definition at line 341 of file werd.cpp.
{
WERD *new_word = new WERD;
new_word->blanks = blanks;
new_word->flags = flags;
new_word->dummy = dummy;
new_word->correct = correct;
return new_word;
}
The documentation for this class was generated from the following files:
- /usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/werd.h
- /usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/werd.cpp