tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/chopper.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        chopper.c  (Formerly chopper.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Tue Jul 30 16:18:52 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  **************************************************************************/
00025 
00026 /*----------------------------------------------------------------------
00027           I n c l u d e s
00028 ----------------------------------------------------------------------*/
00029 
00030 #include <math.h>
00031 
00032 #include "chopper.h"
00033 
00034 #include "assert.h"
00035 #include "associate.h"
00036 #include "blobs.h"
00037 #include "callcpp.h"
00038 #include "const.h"
00039 #include "findseam.h"
00040 #include "freelist.h"
00041 #include "globals.h"
00042 #include "makechop.h"
00043 #include "render.h"
00044 #include "pageres.h"
00045 #include "seam.h"
00046 #include "stopper.h"
00047 #include "structures.h"
00048 #include "unicharset.h"
00049 #include "wordrec.h"
00050 
00051 // Include automatically generated configuration file if running autoconf.
00052 #ifdef HAVE_CONFIG_H
00053 #include "config_auto.h"
00054 #endif
00055 
00056 // Even though the limit on the number of chunks may now be removed, keep
00057 // the same limit for repeatable behavior, and it may be a speed advantage.
00058 static const int kMaxNumChunks = 64;
00059 
00060 /*----------------------------------------------------------------------
00061           F u n c t i o n s
00062 ----------------------------------------------------------------------*/
00068 void preserve_outline(EDGEPT *start) {
00069   EDGEPT *srcpt;
00070 
00071   if (start == NULL)
00072     return;
00073   srcpt = start;
00074   do {
00075     srcpt->flags[1] = 1;
00076     srcpt = srcpt->next;
00077   }
00078   while (srcpt != start);
00079   srcpt->flags[1] = 2;
00080 }
00081 
00082 
00083 /**************************************************************************/
00084 void preserve_outline_tree(TESSLINE *srcline) {
00085   TESSLINE *outline;
00086 
00087   for (outline = srcline; outline != NULL; outline = outline->next) {
00088     preserve_outline (outline->loop);
00089   }
00090 }
00091 
00092 
00098 EDGEPT *restore_outline(EDGEPT *start) {
00099   EDGEPT *srcpt;
00100   EDGEPT *real_start;
00101 
00102   if (start == NULL)
00103     return NULL;
00104   srcpt = start;
00105   do {
00106     if (srcpt->flags[1] == 2)
00107       break;
00108     srcpt = srcpt->next;
00109   }
00110   while (srcpt != start);
00111   real_start = srcpt;
00112   do {
00113     srcpt = srcpt->next;
00114     if (srcpt->prev->flags[1] == 0) {
00115       remove_edgept(srcpt->prev);
00116     }
00117   }
00118   while (srcpt != real_start);
00119   return real_start;
00120 }
00121 
00122 
00123 /******************************************************************************/
00124 void restore_outline_tree(TESSLINE *srcline) {
00125   TESSLINE *outline;
00126 
00127   for (outline = srcline; outline != NULL; outline = outline->next) {
00128     outline->loop = restore_outline (outline->loop);
00129     outline->start = outline->loop->pos;
00130   }
00131 }
00132 
00133 // Helper runs all the checks on a seam to make sure it is valid.
00134 // Returns the seam if OK, otherwise deletes the seam and returns NULL.
00135 static SEAM* CheckSeam(int debug_level, inT32 blob_number, TWERD* word,
00136                        TBLOB* blob, TBLOB* other_blob,
00137                        const GenericVector<SEAM*>& seams, SEAM* seam) {
00138   if (seam == NULL ||
00139       blob->outlines == NULL ||
00140       other_blob->outlines == NULL ||
00141       total_containment(blob, other_blob) ||
00142       check_blob(other_blob) ||
00143       !(check_seam_order(blob, seam) &&
00144           check_seam_order(other_blob, seam)) ||
00145       any_shared_split_points(seams, seam) ||
00146       !test_insert_seam(seams, word, blob_number)) {
00147     word->blobs.remove(blob_number + 1);
00148     if (seam) {
00149       undo_seam(blob, other_blob, seam);
00150       delete seam;
00151       seam = NULL;
00152 #ifndef GRAPHICS_DISABLED
00153       if (debug_level) {
00154         if (debug_level >2)
00155           display_blob(blob, Red);
00156         tprintf("\n** seam being removed ** \n");
00157       }
00158 #endif
00159     } else {
00160       delete other_blob;
00161     }
00162     return NULL;
00163   }
00164   return seam;
00165 }
00166 
00167 
00174 namespace tesseract {
00175 SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number,
00176                                  bool italic_blob,
00177                                  const GenericVector<SEAM*>& seams) {
00178   if (repair_unchopped_blobs)
00179     preserve_outline_tree (blob->outlines);
00180   TBLOB *other_blob = TBLOB::ShallowCopy(*blob);       /* Make new blob */
00181   // Insert it into the word.
00182   word->blobs.insert(other_blob, blob_number + 1);
00183 
00184   SEAM *seam = NULL;
00185   if (prioritize_division) {
00186     TPOINT location;
00187     if (divisible_blob(blob, italic_blob, &location)) {
00188       seam = new SEAM(0.0f, location, NULL, NULL, NULL);
00189     }
00190   }
00191   if (seam == NULL)
00192     seam = pick_good_seam(blob);
00193   if (chop_debug) {
00194     if (seam != NULL)
00195       print_seam("Good seam picked=", seam);
00196     else
00197       tprintf("\n** no seam picked *** \n");
00198   }
00199   if (seam) {
00200     apply_seam(blob, other_blob, italic_blob, seam);
00201   }
00202 
00203   seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
00204                    seams, seam);
00205   if (seam == NULL) {
00206     if (repair_unchopped_blobs)
00207       restore_outline_tree(blob->outlines);
00208     if (word->latin_script) {
00209       // If the blob can simply be divided into outlines, then do that.
00210       TPOINT location;
00211       if (divisible_blob(blob, italic_blob, &location)) {
00212         other_blob = TBLOB::ShallowCopy(*blob);       /* Make new blob */
00213         word->blobs.insert(other_blob, blob_number + 1);
00214         seam = new SEAM(0.0f, location, NULL, NULL, NULL);
00215         apply_seam(blob, other_blob, italic_blob, seam);
00216         seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
00217                          seams, seam);
00218       }
00219     }
00220   }
00221   return seam;
00222 }
00223 
00224 
00225 SEAM *Wordrec::chop_numbered_blob(TWERD *word, inT32 blob_number,
00226                                   bool italic_blob,
00227                                   const GenericVector<SEAM*>& seams) {
00228   return attempt_blob_chop(word, word->blobs[blob_number], blob_number,
00229                            italic_blob, seams);
00230 }
00231 
00232 
00233 SEAM *Wordrec::chop_overlapping_blob(const GenericVector<TBOX>& boxes,
00234                                      bool italic_blob, WERD_RES *word_res,
00235                                      int *blob_number) {
00236   TWERD *word = word_res->chopped_word;
00237   for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
00238     TBLOB *blob = word->blobs[*blob_number];
00239     TPOINT topleft, botright;
00240     topleft.x = blob->bounding_box().left();
00241     topleft.y = blob->bounding_box().top();
00242     botright.x = blob->bounding_box().right();
00243     botright.y = blob->bounding_box().bottom();
00244 
00245     TPOINT original_topleft, original_botright;
00246     word_res->denorm.DenormTransform(NULL, topleft, &original_topleft);
00247     word_res->denorm.DenormTransform(NULL, botright, &original_botright);
00248 
00249     TBOX original_box = TBOX(original_topleft.x, original_botright.y,
00250                              original_botright.x, original_topleft.y);
00251 
00252     bool almost_equal_box = false;
00253     int num_overlap = 0;
00254     for (int i = 0; i < boxes.size(); i++) {
00255       if (original_box.overlap_fraction(boxes[i]) > 0.125)
00256         num_overlap++;
00257       if (original_box.almost_equal(boxes[i], 3))
00258         almost_equal_box = true;
00259     }
00260 
00261     TPOINT location;
00262     if (divisible_blob(blob, italic_blob, &location) ||
00263         (!almost_equal_box && num_overlap > 1)) {
00264       SEAM *seam = attempt_blob_chop(word, blob, *blob_number,
00265                                      italic_blob, word_res->seam_array);
00266       if (seam != NULL)
00267         return seam;
00268     }
00269   }
00270 
00271   *blob_number = -1;
00272   return NULL;
00273 }
00274 
00275 }  // namespace tesseract
00276 
00277 
00283 int any_shared_split_points(const GenericVector<SEAM*>& seams, SEAM *seam) {
00284   int length;
00285   int index;
00286 
00287   length = seams.size();
00288   for (index = 0; index < length; index++)
00289     if (shared_split_points(seams[index], seam))
00290       return TRUE;
00291   return FALSE;
00292 }
00293 
00294 
00300 int check_blob(TBLOB *blob) {
00301   TESSLINE *outline;
00302   EDGEPT *edgept;
00303 
00304   for (outline = blob->outlines; outline != NULL; outline = outline->next) {
00305     edgept = outline->loop;
00306     do {
00307       if (edgept == NULL)
00308         break;
00309       edgept = edgept->next;
00310     }
00311     while (edgept != outline->loop);
00312     if (edgept == NULL)
00313       return 1;
00314   }
00315   return 0;
00316 }
00317 
00318 
00319 namespace tesseract {
00332 SEAM* Wordrec::improve_one_blob(const GenericVector<BLOB_CHOICE*>& blob_choices,
00333                                 DANGERR *fixpt,
00334                                 bool split_next_to_fragment,
00335                                 bool italic_blob,
00336                                 WERD_RES* word,
00337                                 int* blob_number) {
00338   float rating_ceiling = MAX_FLOAT32;
00339   SEAM *seam = NULL;
00340   do {
00341     *blob_number = select_blob_to_split_from_fixpt(fixpt);
00342     if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number);
00343     bool split_point_from_dict = (*blob_number != -1);
00344     if (split_point_from_dict) {
00345       fixpt->clear();
00346     } else {
00347       *blob_number = select_blob_to_split(blob_choices, rating_ceiling,
00348                                           split_next_to_fragment);
00349     }
00350     if (chop_debug) tprintf("blob_number = %d\n", *blob_number);
00351     if (*blob_number == -1)
00352       return NULL;
00353 
00354     // TODO(rays) it may eventually help to allow italic_blob to be true,
00355     seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob,
00356                               word->seam_array);
00357     if (seam != NULL)
00358       return seam;  // Success!
00359     if (blob_choices[*blob_number] == NULL)
00360       return NULL;
00361     if (!split_point_from_dict) {
00362       // We chopped the worst rated blob, try something else next time.
00363       rating_ceiling = blob_choices[*blob_number]->rating();
00364     }
00365   } while (true);
00366   return seam;
00367 }
00368 
00376 SEAM* Wordrec::chop_one_blob(const GenericVector<TBOX>& boxes,
00377                              const GenericVector<BLOB_CHOICE*>& blob_choices,
00378                              WERD_RES* word_res,
00379                              int* blob_number) {
00380   if (prioritize_division) {
00381     return chop_overlapping_blob(boxes, true, word_res, blob_number);
00382   } else {
00383     return improve_one_blob(blob_choices, NULL, false, true, word_res,
00384                             blob_number);
00385   }
00386 }
00387 }  // namespace tesseract
00388 
00397 inT16 check_seam_order(TBLOB *blob, SEAM *seam) {
00398   TESSLINE *outline;
00399   inT8 found_em[3];
00400 
00401   if (seam->split1 == NULL || blob == NULL)
00402     return (TRUE);
00403 
00404   found_em[0] = found_em[1] = found_em[2] = FALSE;
00405 
00406   for (outline = blob->outlines; outline; outline = outline->next) {
00407     if (!found_em[0] &&
00408       ((seam->split1 == NULL) ||
00409     is_split_outline (outline, seam->split1))) {
00410       found_em[0] = TRUE;
00411     }
00412     if (!found_em[1] &&
00413       ((seam->split2 == NULL) ||
00414     is_split_outline (outline, seam->split2))) {
00415       found_em[1] = TRUE;
00416     }
00417     if (!found_em[2] &&
00418       ((seam->split3 == NULL) ||
00419     is_split_outline (outline, seam->split3))) {
00420       found_em[2] = TRUE;
00421     }
00422   }
00423 
00424   if (!found_em[0] || !found_em[1] || !found_em[2])
00425     return (FALSE);
00426   else
00427     return (TRUE);
00428 }
00429 
00430 namespace tesseract {
00431 
00440 void Wordrec::chop_word_main(WERD_RES *word) {
00441   int num_blobs = word->chopped_word->NumBlobs();
00442   if (word->ratings == NULL) {
00443     word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
00444   }
00445   if (word->ratings->get(0, 0) == NULL) {
00446     // Run initial classification.
00447     for (int b = 0; b < num_blobs; ++b) {
00448       BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
00449                                                  "Initial:", word->chopped_word,
00450                                                  word->blamer_bundle);
00451       word->ratings->put(b, b, choices);
00452     }
00453   } else {
00454     // Blobs have been pre-classified. Set matrix cell for all blob choices
00455     for (int col = 0; col < word->ratings->dimension(); ++col) {
00456       for (int row = col; row < word->ratings->dimension() &&
00457            row < col + word->ratings->bandwidth(); ++row) {
00458         BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
00459         if (choices != NULL) {
00460           BLOB_CHOICE_IT bc_it(choices);
00461           for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
00462             bc_it.data()->set_matrix_cell(col, row);
00463           }
00464         }
00465       }
00466     }
00467   }
00468 
00469   // Run Segmentation Search.
00470   BestChoiceBundle best_choice_bundle(word->ratings->dimension());
00471   SegSearch(word, &best_choice_bundle, word->blamer_bundle);
00472 
00473   if (word->best_choice == NULL) {
00474     // SegSearch found no valid paths, so just use the leading diagonal.
00475     word->FakeWordFromRatings();
00476   }
00477   word->RebuildBestState();
00478   // If we finished without a hyphen at the end of the word, let the next word
00479   // be found in the dictionary.
00480   if (word->word->flag(W_EOL) &&
00481       !getDict().has_hyphen_end(*word->best_choice)) {
00482     getDict().reset_hyphen_vars(true);
00483   }
00484 
00485   if (word->blamer_bundle != NULL && this->fill_lattice_ != NULL) {
00486     CallFillLattice(*word->ratings, word->best_choices,
00487                     *word->uch_set, word->blamer_bundle);
00488   }
00489   if (wordrec_debug_level > 0) {
00490     tprintf("Final Ratings Matrix:\n");
00491     word->ratings->print(getDict().getUnicharset());
00492   }
00493   word->FilterWordChoices(getDict().stopper_debug_level);
00494 }
00495 
00503 void Wordrec::improve_by_chopping(float rating_cert_scale,
00504                                   WERD_RES* word,
00505                                   BestChoiceBundle* best_choice_bundle,
00506                                   BlamerBundle* blamer_bundle,
00507                                   LMPainPoints* pain_points,
00508                                   GenericVector<SegSearchPending>* pending) {
00509   int blob_number;
00510   do {  // improvement loop.
00511     // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
00512     // one to chop.
00513     GenericVector<BLOB_CHOICE*> blob_choices;
00514     int num_blobs = word->ratings->dimension();
00515     for (int i = 0; i < num_blobs; ++i) {
00516       BLOB_CHOICE_LIST* choices = word->ratings->get(i, i);
00517       if (choices == NULL || choices->empty()) {
00518         blob_choices.push_back(NULL);
00519       } else {
00520         BLOB_CHOICE_IT bc_it(choices);
00521         blob_choices.push_back(bc_it.data());
00522       }
00523     }
00524     SEAM* seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt,
00525                                   false, false, word, &blob_number);
00526     if (seam == NULL) break;
00527     // A chop has been made. We have to correct all the data structures to
00528     // take into account the extra bottom-level blob.
00529     // Put the seam into the seam_array and correct everything else on the
00530     // word: ratings matrix (including matrix location in the BLOB_CHOICES),
00531     // states in WERD_CHOICEs, and blob widths.
00532     word->InsertSeam(blob_number, seam);
00533     // Insert a new entry in the beam array.
00534     best_choice_bundle->beam.insert(new LanguageModelState, blob_number);
00535     // Fixpts are outdated, but will get recalculated.
00536     best_choice_bundle->fixpt.clear();
00537     // Remap existing pain points.
00538     pain_points->RemapForSplit(blob_number);
00539     // Insert a new pending at the chop point.
00540     pending->insert(SegSearchPending(), blob_number);
00541 
00542     // Classify the two newly created blobs using ProcessSegSearchPainPoint,
00543     // as that updates the pending correctly and adds new pain points.
00544     MATRIX_COORD pain_point(blob_number, blob_number);
00545     ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word,
00546                               pain_points, blamer_bundle);
00547     pain_point.col = blob_number + 1;
00548     pain_point.row = blob_number + 1;
00549     ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word,
00550                               pain_points, blamer_bundle);
00551     if (language_model_->language_model_ngram_on) {
00552       // N-gram evaluation depends on the number of blobs in a chunk, so we
00553       // have to re-evaluate everything in the word.
00554       ResetNGramSearch(word, best_choice_bundle, pending);
00555       blob_number = 0;
00556     }
00557     // Run language model incrementally. (Except with the n-gram model on.)
00558     UpdateSegSearchNodes(rating_cert_scale, blob_number, pending,
00559                          word, pain_points, best_choice_bundle, blamer_bundle);
00560   } while (!language_model_->AcceptableChoiceFound() &&
00561            word->ratings->dimension() < kMaxNumChunks);
00562 
00563   // If after running only the chopper best_choice is incorrect and no blame
00564   // has been yet set, blame the classifier if best_choice is classifier's
00565   // top choice and is a dictionary word (i.e. language model could not have
00566   // helped). Otherwise blame the tradeoff between the classifier and
00567   // the old language model (permuters).
00568   if (word->blamer_bundle != NULL &&
00569       word->blamer_bundle->incorrect_result_reason() == IRR_CORRECT &&
00570       !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) {
00571     bool valid_permuter = word->best_choice != NULL &&
00572         Dict::valid_word_permuter(word->best_choice->permuter(), false);
00573     word->blamer_bundle->BlameClassifierOrLangModel(word,
00574                                                     getDict().getUnicharset(),
00575                                                     valid_permuter,
00576                                                     wordrec_debug_blamer);
00577   }
00578 }
00579 
00580 
00581 /**********************************************************************
00582  * select_blob_to_split
00583  *
00584  * These are the results of the last classification.  Find a likely
00585  * place to apply splits.  If none, return -1.
00586  **********************************************************************/
00587 int Wordrec::select_blob_to_split(
00588     const GenericVector<BLOB_CHOICE*>& blob_choices,
00589     float rating_ceiling, bool split_next_to_fragment) {
00590   BLOB_CHOICE *blob_choice;
00591   int x;
00592   float worst = -MAX_FLOAT32;
00593   int worst_index = -1;
00594   float worst_near_fragment = -MAX_FLOAT32;
00595   int worst_index_near_fragment = -1;
00596   const CHAR_FRAGMENT **fragments = NULL;
00597 
00598   if (chop_debug) {
00599     if (rating_ceiling < MAX_FLOAT32)
00600       tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
00601     else
00602       tprintf("rating_ceiling = No Limit\n");
00603   }
00604 
00605   if (split_next_to_fragment && blob_choices.size() > 0) {
00606     fragments = new const CHAR_FRAGMENT *[blob_choices.length()];
00607     if (blob_choices[0] != NULL) {
00608       fragments[0] = getDict().getUnicharset().get_fragment(
00609           blob_choices[0]->unichar_id());
00610     } else {
00611       fragments[0] = NULL;
00612     }
00613   }
00614 
00615   for (x = 0; x < blob_choices.size(); ++x) {
00616     if (blob_choices[x] == NULL) {
00617       if (fragments != NULL) {
00618         delete[] fragments;
00619       }
00620       return x;
00621     } else {
00622       blob_choice = blob_choices[x];
00623       // Populate fragments for the following position.
00624       if (split_next_to_fragment && x+1 < blob_choices.size()) {
00625         if (blob_choices[x + 1] != NULL) {
00626           fragments[x + 1] = getDict().getUnicharset().get_fragment(
00627               blob_choices[x + 1]->unichar_id());
00628         } else {
00629           fragments[x + 1] = NULL;
00630         }
00631       }
00632       if (blob_choice->rating() < rating_ceiling &&
00633           blob_choice->certainty() < tessedit_certainty_threshold) {
00634         // Update worst and worst_index.
00635         if (blob_choice->rating() > worst) {
00636           worst_index = x;
00637           worst = blob_choice->rating();
00638         }
00639         if (split_next_to_fragment) {
00640           // Update worst_near_fragment and worst_index_near_fragment.
00641           bool expand_following_fragment =
00642             (x + 1 < blob_choices.size() &&
00643              fragments[x+1] != NULL && !fragments[x+1]->is_beginning());
00644           bool expand_preceding_fragment =
00645             (x > 0 && fragments[x-1] != NULL && !fragments[x-1]->is_ending());
00646           if ((expand_following_fragment || expand_preceding_fragment) &&
00647               blob_choice->rating() > worst_near_fragment) {
00648             worst_index_near_fragment = x;
00649             worst_near_fragment = blob_choice->rating();
00650             if (chop_debug) {
00651               tprintf("worst_index_near_fragment=%d"
00652                       " expand_following_fragment=%d"
00653                       " expand_preceding_fragment=%d\n",
00654                       worst_index_near_fragment,
00655                       expand_following_fragment,
00656                       expand_preceding_fragment);
00657             }
00658           }
00659         }
00660       }
00661     }
00662   }
00663   if (fragments != NULL) {
00664     delete[] fragments;
00665   }
00666   // TODO(daria): maybe a threshold of badness for
00667   // worst_near_fragment would be useful.
00668   return worst_index_near_fragment != -1 ?
00669     worst_index_near_fragment : worst_index;
00670 }
00671 
00672 /**********************************************************************
00673  * select_blob_to_split_from_fixpt
00674  *
00675  * Given the fix point from a dictionary search, if there is a single
00676  * dangerous blob that maps to multiple characters, return that blob
00677  * index as a place we need to split.  If none, return -1.
00678  **********************************************************************/
00679 int Wordrec::select_blob_to_split_from_fixpt(DANGERR *fixpt) {
00680   if (!fixpt)
00681     return -1;
00682   for (int i = 0; i < fixpt->size(); i++) {
00683     if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end &&
00684         (*fixpt)[i].dangerous &&
00685         (*fixpt)[i].correct_is_ngram) {
00686       return (*fixpt)[i].begin;
00687     }
00688   }
00689   return -1;
00690 }
00691 
00692 
00693 }  // namespace tesseract
00694 
00695 
00696 /**********************************************************************
00697  * total_containment
00698  *
00699  * Check to see if one of these outlines is totally contained within
00700  * the bounding box of the other.
00701  **********************************************************************/
00702 inT16 total_containment(TBLOB *blob1, TBLOB *blob2) {
00703   TBOX box1 = blob1->bounding_box();
00704   TBOX box2 = blob2->bounding_box();
00705   return box1.contains(box2) || box2.contains(box1);
00706 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines