tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/findseam.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        findseam.c  (Formerly findseam.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Tue Jul 30 15:44:59 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 /*----------------------------------------------------------------------
00026               I n c l u d e s
00027 ----------------------------------------------------------------------*/
00028 #include "findseam.h"
00029 #include "gradechop.h"
00030 #include "olutil.h"
00031 #include "plotedges.h"
00032 #include "outlines.h"
00033 #include "freelist.h"
00034 #include "seam.h"
00035 #include "wordrec.h"
00036 
00037 // Include automatically generated configuration file if running autoconf.
00038 #ifdef HAVE_CONFIG_H
00039 #include "config_auto.h"
00040 #endif
00041 
00042 /*----------------------------------------------------------------------
00043               T y p e s
00044 ----------------------------------------------------------------------*/
00045 #define SPLIT_CLOSENESS        20/* Difference in x value */
00046                                  /* How many to keep */
00047 #define MAX_NUM_SEAMS          150
00048                                  /* How many to keep */
00049 #define MAX_OLD_SEAMS          150
00050 #define NO_FULL_PRIORITY       -1/* Special marker for pri. */
00051                                  /* Evalute right away */
00052 #define BAD_PRIORITY           9999.0
00053 
00054 /*----------------------------------------------------------------------
00055               F u n c t i o n s
00056 ----------------------------------------------------------------------*/
00057 namespace tesseract {
00058 
00059 /**********************************************************************
00060  * add_seam_to_queue
00061  *
00062  * Adds the given new_seam to the seams priority queue, unless it is full
00063  * and the new seam is worse than the worst.
00064  **********************************************************************/
00065 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
00066                                 SeamQueue* seams) {
00067   if (new_seam == NULL) return;
00068   if (chop_debug) {
00069     tprintf("Pushing new seam with priority %g :", new_priority);
00070     print_seam("seam: ", new_seam);
00071   }
00072   if (seams->size() >= MAX_NUM_SEAMS) {
00073     SeamPair old_pair(0, NULL);
00074     if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
00075       if (chop_debug) {
00076         tprintf("Old seam staying with priority %g\n", old_pair.key());
00077       }
00078       delete new_seam;
00079       seams->Push(&old_pair);
00080       return;
00081     } else if (chop_debug) {
00082       tprintf("New seam with priority %g beats old worst seam with %g\n",
00083               new_priority, old_pair.key());
00084     }
00085   }
00086   SeamPair new_pair(new_priority, new_seam);
00087   seams->Push(&new_pair);
00088 }
00089 
00090 
00091 /**********************************************************************
00092  * choose_best_seam
00093  *
00094  * Choose the best seam that can be created by assembling this a
00095  * collection of splits.  A queue of all the possible seams is
00096  * maintained.  Each new split received is placed in that queue with
00097  * its partial priority value.  These values in the seam queue are
00098  * evaluated and combined until a good enough seam is found.  If no
00099  * further good seams are being found then this function returns to the
00100  * caller, who will send more splits.  If this function is called with
00101  * a split of NULL, then no further splits can be supplied by the
00102  * caller.
00103  **********************************************************************/
00104 void Wordrec::choose_best_seam(SeamQueue* seam_queue,
00105                                SPLIT *split,
00106                                PRIORITY priority,
00107                                SEAM **seam_result,
00108                                TBLOB *blob,
00109                                SeamPile* seam_pile) {
00110   SEAM *seam;
00111   char str[80];
00112   float my_priority;
00113   /* Add seam of split */
00114   my_priority = priority;
00115   if (split != NULL) {
00116     TPOINT split_point = split->point1->pos;
00117     split_point += split->point2->pos;
00118     split_point /= 2;
00119     seam = new SEAM(my_priority, split_point, split, NULL, NULL);
00120     if (chop_debug > 1)
00121       print_seam ("Partial priority    ", seam);
00122     add_seam_to_queue(my_priority, seam, seam_queue);
00123 
00124     if (my_priority > chop_good_split)
00125       return;
00126   }
00127 
00128   TBOX bbox = blob->bounding_box();
00129   /* Queue loop */
00130   while (!seam_queue->empty()) {
00131     SeamPair seam_pair;
00132     seam_queue->Pop(&seam_pair);
00133     seam = seam_pair.extract_data();
00134     /* Set full priority */
00135     my_priority = seam_priority(seam, bbox.left(), bbox.right());
00136     if (chop_debug) {
00137       sprintf (str, "Full my_priority %0.0f,  ", my_priority);
00138       print_seam(str, seam);
00139     }
00140 
00141     if ((*seam_result == NULL || (*seam_result)->priority > my_priority) &&
00142         my_priority < chop_ok_split) {
00143       /* No crossing */
00144       if (constrained_split(seam->split1, blob)) {
00145         delete *seam_result;
00146         *seam_result = new SEAM(*seam);
00147         (*seam_result)->priority = my_priority;
00148       } else {
00149         delete seam;
00150         seam = NULL;
00151         my_priority = BAD_PRIORITY;
00152       }
00153     }
00154 
00155     if (my_priority < chop_good_split) {
00156       if (seam)
00157         delete seam;
00158       return;                    /* Made good answer */
00159     }
00160 
00161     if (seam) {
00162       /* Combine with others */
00163       if (seam_pile->size() < chop_seam_pile_size) {
00164         combine_seam(*seam_pile, seam, seam_queue);
00165         SeamDecPair pair(seam_pair.key(), seam);
00166         seam_pile->Push(&pair);
00167       } else if (chop_new_seam_pile &&
00168                  seam_pile->size() == chop_seam_pile_size &&
00169                  seam_pile->PeekTop().key() > seam_pair.key()) {
00170         combine_seam(*seam_pile, seam, seam_queue);
00171         SeamDecPair pair;
00172         seam_pile->Pop(&pair);  // pop the worst.
00173         // Replace the seam in pair (deleting the old one) with
00174         // the new seam and score, then push back into the heap.
00175         pair.set_key(seam_pair.key());
00176         pair.set_data(seam);
00177         seam_pile->Push(&pair);
00178       } else {
00179         delete seam;
00180       }
00181     }
00182 
00183     my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
00184                                       : seam_queue->PeekTop().key();
00185     if ((my_priority > chop_ok_split) ||
00186       (my_priority > chop_good_split && split))
00187       return;
00188   }
00189 }
00190 
00191 
00192 /**********************************************************************
00193  * combine_seam
00194  *
00195  * Find other seams to combine with this one.  The new seams that result
00196  * from this union should be added to the seam queue.  The return value
00197  * tells whether or not any additional seams were added to the queue.
00198  **********************************************************************/
00199 void Wordrec::combine_seam(const SeamPile& seam_pile,
00200                            const SEAM* seam, SeamQueue* seam_queue) {
00201   register inT16 dist;
00202   inT16 bottom1, top1;
00203   inT16 bottom2, top2;
00204 
00205   SEAM *new_one;
00206   const SEAM *this_one;
00207 
00208   bottom1 = seam->split1->point1->pos.y;
00209   if (seam->split1->point2->pos.y >= bottom1)
00210     top1 = seam->split1->point2->pos.y;
00211   else {
00212     top1 = bottom1;
00213     bottom1 = seam->split1->point2->pos.y;
00214   }
00215   if (seam->split2 != NULL) {
00216     bottom2 = seam->split2->point1->pos.y;
00217     if (seam->split2->point2->pos.y >= bottom2)
00218       top2 = seam->split2->point2->pos.y;
00219     else {
00220       top2 = bottom2;
00221       bottom2 = seam->split2->point2->pos.y;
00222     }
00223   }
00224   else {
00225     bottom2 = bottom1;
00226     top2 = top1;
00227   }
00228   for (int x = 0; x < seam_pile.size(); ++x) {
00229     this_one = seam_pile.get(x).data();
00230     dist = seam->location.x - this_one->location.x;
00231     if (-SPLIT_CLOSENESS < dist &&
00232       dist < SPLIT_CLOSENESS &&
00233     seam->priority + this_one->priority < chop_ok_split) {
00234       inT16 split1_point1_y = this_one->split1->point1->pos.y;
00235       inT16 split1_point2_y = this_one->split1->point2->pos.y;
00236       inT16 split2_point1_y = 0;
00237       inT16 split2_point2_y = 0;
00238       if (this_one->split2) {
00239         split2_point1_y = this_one->split2->point1->pos.y;
00240         split2_point2_y = this_one->split2->point2->pos.y;
00241       }
00242       if (
00244         (
00245           /* this_one->split1 always exists */
00246           (
00247             ((split1_point1_y >= top1 && split1_point2_y >= top1) ||
00248              (split1_point1_y <= bottom1 && split1_point2_y <= bottom1))
00249             &&
00250             ((split1_point1_y >= top2 && split1_point2_y >= top2) ||
00251              (split1_point1_y <= bottom2 && split1_point2_y <= bottom2))
00252           )
00253         )
00254         &&
00255         (
00256           this_one->split2 == NULL ||
00257           (
00258             ((split2_point1_y >= top1 && split2_point2_y >= top1) ||
00259              (split2_point1_y <= bottom1 && split2_point2_y <= bottom1))
00260             &&
00261             ((split2_point1_y >= top2 && split2_point2_y >= top2) ||
00262              (split2_point1_y <= bottom2 && split2_point2_y <= bottom2))
00263           )
00264         )
00265       ) {
00266         new_one = join_two_seams (seam, this_one);
00267         if (new_one != NULL) {
00268           if (chop_debug > 1)
00269             print_seam ("Combo priority       ", new_one);
00270           add_seam_to_queue(new_one->priority, new_one, seam_queue);
00271         }
00272       }
00273     }
00274   }
00275 }
00276 
00277 
00278 /**********************************************************************
00279  * constrained_split
00280  *
00281  * Constrain this split to obey certain rules.  It must not cross any
00282  * inner outline.  It must not cut off a small chunk of the outline.
00283  **********************************************************************/
00284 inT16 Wordrec::constrained_split(SPLIT *split, TBLOB *blob) {
00285   TESSLINE *outline;
00286 
00287   if (is_little_chunk (split->point1, split->point2))
00288     return (FALSE);
00289 
00290   for (outline = blob->outlines; outline; outline = outline->next) {
00291     if (split_bounds_overlap (split, outline) &&
00292     crosses_outline (split->point1, split->point2, outline->loop)) {
00293       return (FALSE);
00294     }
00295   }
00296   return (TRUE);
00297 }
00298 
00299 /**********************************************************************
00300  * pick_good_seam
00301  *
00302  * Find and return a good seam that will split this blob into two pieces.
00303  * Work from the outlines provided.
00304  **********************************************************************/
00305 SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
00306   SeamPile seam_pile(chop_seam_pile_size);
00307   EDGEPT *points[MAX_NUM_POINTS];
00308   EDGEPT_CLIST new_points;
00309   SEAM *seam = NULL;
00310   TESSLINE *outline;
00311   inT16 num_points = 0;
00312 
00313 #ifndef GRAPHICS_DISABLED
00314   if (chop_debug > 2)
00315     wordrec_display_splits.set_value(true);
00316 
00317   draw_blob_edges(blob);
00318 #endif
00319 
00320   PointHeap point_heap(MAX_NUM_POINTS);
00321   for (outline = blob->outlines; outline; outline = outline->next)
00322     prioritize_points(outline, &point_heap);
00323 
00324   while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
00325     points[num_points++] = point_heap.PeekTop().data;
00326     point_heap.Pop(NULL);
00327   }
00328 
00329   /* Initialize queue */
00330   SeamQueue seam_queue(MAX_NUM_SEAMS);
00331 
00332   try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
00333   try_vertical_splits(points, num_points, &new_points,
00334                       &seam_queue, &seam_pile, &seam, blob);
00335 
00336   if (seam == NULL) {
00337     choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile);
00338   }
00339   else if (seam->priority > chop_good_split) {
00340     choose_best_seam(&seam_queue, NULL, seam->priority,
00341                      &seam, blob, &seam_pile);
00342   }
00343 
00344   EDGEPT_C_IT it(&new_points);
00345   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00346     EDGEPT *inserted_point = it.data();
00347     if (!point_used_by_seam(seam, inserted_point)) {
00348       for (outline = blob->outlines; outline; outline = outline->next) {
00349         if (outline->loop == inserted_point) {
00350           outline->loop = outline->loop->next;
00351         }
00352       }
00353       remove_edgept(inserted_point);
00354     }
00355   }
00356 
00357   if (seam) {
00358     if (seam->priority > chop_ok_split) {
00359       delete seam;
00360       seam = NULL;
00361     }
00362 #ifndef GRAPHICS_DISABLED
00363     else if (wordrec_display_splits) {
00364       if (seam->split1)
00365         mark_split (seam->split1);
00366       if (seam->split2)
00367         mark_split (seam->split2);
00368       if (seam->split3)
00369         mark_split (seam->split3);
00370       if (chop_debug > 2) {
00371         update_edge_window();
00372         edge_window_wait();
00373       }
00374     }
00375 #endif
00376   }
00377 
00378   if (chop_debug)
00379     wordrec_display_splits.set_value(false);
00380 
00381   return (seam);
00382 }
00383 
00384 
00385 /**********************************************************************
00386  * seam_priority
00387  *
00388  * Assign a full priority value to the seam.
00389  **********************************************************************/
00390 PRIORITY Wordrec::seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) {
00391   PRIORITY priority;
00392 
00393   if (seam->split1 == NULL)
00394     priority = 0;
00395 
00396   else if (seam->split2 == NULL) {
00397     priority = (seam->priority +
00398       full_split_priority (seam->split1, xmin, xmax));
00399   }
00400 
00401   else if (seam->split3 == NULL) {
00402     split_outline (seam->split2->point1, seam->split2->point2);
00403     priority = (seam->priority +
00404       full_split_priority (seam->split1, xmin, xmax));
00405     unsplit_outlines (seam->split2->point1, seam->split2->point2);
00406   }
00407 
00408   else {
00409     split_outline (seam->split2->point1, seam->split2->point2);
00410     split_outline (seam->split3->point1, seam->split3->point2);
00411     priority = (seam->priority +
00412       full_split_priority (seam->split1, xmin, xmax));
00413     unsplit_outlines (seam->split3->point1, seam->split3->point2);
00414     unsplit_outlines (seam->split2->point1, seam->split2->point2);
00415   }
00416 
00417   return (priority);
00418 }
00419 
00420 
00421 /**********************************************************************
00422  * try_point_pairs
00423  *
00424  * Try all the splits that are produced by pairing critical points
00425  * together.  See if any of them are suitable for use.  Use a seam
00426  * queue and seam pile that have already been initialized and used.
00427  **********************************************************************/
00428 void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS],
00429                               inT16 num_points,
00430                               SeamQueue* seam_queue,
00431                               SeamPile* seam_pile,
00432                               SEAM ** seam,
00433                               TBLOB * blob) {
00434   inT16 x;
00435   inT16 y;
00436   SPLIT *split;
00437   PRIORITY priority;
00438 
00439   for (x = 0; x < num_points; x++) {
00440     for (y = x + 1; y < num_points; y++) {
00441 
00442       if (points[y] &&
00443           weighted_edgept_dist(points[x], points[y],
00444                                chop_x_y_weight) < chop_split_length &&
00445           points[x] != points[y]->next &&
00446           points[y] != points[x]->next &&
00447           !is_exterior_point(points[x], points[y]) &&
00448           !is_exterior_point(points[y], points[x])) {
00449         split = new_split (points[x], points[y]);
00450         priority = partial_split_priority (split);
00451 
00452         choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile);
00453       }
00454     }
00455   }
00456 }
00457 
00458 
00459 /**********************************************************************
00460  * try_vertical_splits
00461  *
00462  * Try all the splits that are produced by vertical projection to see
00463  * if any of them are suitable for use.  Use a seam queue and seam pile
00464  * that have already been initialized and used.
00465  * Return in new_points a collection of points that were inserted into
00466  * the blob while examining vertical splits and which may safely be
00467  * removed once a seam is chosen if they are not part of the seam.
00468  **********************************************************************/
00469 void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
00470                                   inT16 num_points,
00471                                   EDGEPT_CLIST *new_points,
00472                                   SeamQueue* seam_queue,
00473                                   SeamPile* seam_pile,
00474                                   SEAM ** seam,
00475                                   TBLOB * blob) {
00476   EDGEPT *vertical_point = NULL;
00477   SPLIT *split;
00478   inT16 x;
00479   PRIORITY priority;
00480   TESSLINE *outline;
00481 
00482   for (x = 0; x < num_points; x++) {
00483     vertical_point = NULL;
00484     for (outline = blob->outlines; outline; outline = outline->next) {
00485       vertical_projection_point(points[x], outline->loop,
00486                                 &vertical_point, new_points);
00487     }
00488 
00489     if (vertical_point &&
00490       points[x] != vertical_point->next &&
00491       vertical_point != points[x]->next &&
00492       weighted_edgept_dist(points[x], vertical_point,
00493                            chop_x_y_weight) < chop_split_length) {
00494 
00495       split = new_split (points[x], vertical_point);
00496       priority = partial_split_priority (split);
00497 
00498       choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile);
00499     }
00500   }
00501 }
00502 
00503 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines