tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/seam.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        seam.c  (Formerly seam.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Fri May 17 16:30:13 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 /*----------------------------------------------------------------------
00026               I n c l u d e s
00027 ----------------------------------------------------------------------*/
00028 #include "seam.h"
00029 #include "blobs.h"
00030 #include "freelist.h"
00031 #include "tprintf.h"
00032 
00033 #ifdef __UNIX__
00034 #include <assert.h>
00035 #endif
00036 
00037 /*----------------------------------------------------------------------
00038               V a r i a b l e s
00039 ----------------------------------------------------------------------*/
00040 #define NUM_STARTING_SEAMS  20
00041 
00042 /*----------------------------------------------------------------------
00043         Public Function Code
00044 ----------------------------------------------------------------------*/
00052 bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2) {
00053   return ((split) ? ((exact_point (split->point1, point1) ||
00054                       exact_point (split->point1, point2) ||
00055                       exact_point (split->point2, point1) ||
00056                       exact_point (split->point2, point2)) ? TRUE : FALSE)
00057                   : FALSE);
00058 }
00059 
00060 
00068 bool point_in_seam(const SEAM *seam, SPLIT *split) {
00069   return (point_in_split(seam->split1, split->point1, split->point2) ||
00070           point_in_split(seam->split2, split->point1, split->point2) ||
00071           point_in_split(seam->split3, split->point1, split->point2));
00072 }
00073 
00080 bool point_used_by_split(SPLIT *split, EDGEPT *point) {
00081   if (split == NULL) return false;
00082   return point == split->point1 || point == split->point2;
00083 }
00084 
00091 bool point_used_by_seam(SEAM *seam, EDGEPT *point) {
00092   if (seam == NULL) return false;
00093   return point_used_by_split(seam->split1, point) ||
00094       point_used_by_split(seam->split2, point) ||
00095       point_used_by_split(seam->split3, point);
00096 }
00097 
00105 void combine_seams(SEAM *dest_seam, SEAM *source_seam) {
00106   dest_seam->priority += source_seam->priority;
00107   dest_seam->location += source_seam->location;
00108   dest_seam->location /= 2;
00109 
00110   if (source_seam->split1) {
00111     if (!dest_seam->split1)
00112       dest_seam->split1 = source_seam->split1;
00113     else if (!dest_seam->split2)
00114       dest_seam->split2 = source_seam->split1;
00115     else if (!dest_seam->split3)
00116       dest_seam->split3 = source_seam->split1;
00117     else
00118       delete source_seam->split1;  // Wouldn't have fitted.
00119     source_seam->split1 = NULL;
00120   }
00121   if (source_seam->split2) {
00122     if (!dest_seam->split2)
00123       dest_seam->split2 = source_seam->split2;
00124     else if (!dest_seam->split3)
00125       dest_seam->split3 = source_seam->split2;
00126     else
00127       delete source_seam->split2;  // Wouldn't have fitted.
00128     source_seam->split2 = NULL;
00129   }
00130   if (source_seam->split3) {
00131     if (!dest_seam->split3)
00132       dest_seam->split3 = source_seam->split3;
00133     else
00134       delete source_seam->split3;  // Wouldn't have fitted.
00135     source_seam->split3 = NULL;
00136   }
00137   delete source_seam;
00138 }
00139 
00147 void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array) {
00148   seam_array->truncate(0);
00149   TPOINT location;
00150 
00151   for (int b = 1; b < word->NumBlobs(); ++b) {
00152     TBOX bbox = word->blobs[b - 1]->bounding_box();
00153     TBOX nbox = word->blobs[b]->bounding_box();
00154     location.x = (bbox.right() + nbox.left()) / 2;
00155     location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
00156     seam_array->push_back(new SEAM(0.0f, location, NULL, NULL, NULL));
00157   }
00158 }
00159 
00160 
00166 bool test_insert_seam(const GenericVector<SEAM*>& seam_array,
00167                       TWERD *word, int index) {
00168   SEAM *test_seam;
00169   int list_length = seam_array.size();
00170   for (int test_index = 0; test_index < index; ++test_index) {
00171     test_seam = seam_array[test_index];
00172     if (test_index + test_seam->widthp < index &&
00173         test_seam->widthp + test_index == index - 1 &&
00174         account_splits(test_seam, word, test_index + 1, 1) < 0)
00175       return false;
00176   }
00177   for (int test_index = index; test_index < list_length; test_index++) {
00178     test_seam = seam_array[test_index];
00179     if (test_index - test_seam->widthn >= index &&
00180         test_index - test_seam->widthn == index &&
00181         account_splits(test_seam, word, test_index + 1, -1) < 0)
00182       return false;
00183   }
00184   return true;
00185 }
00186 
00193 void insert_seam(const TWERD* word, int index, SEAM *seam,
00194                  GenericVector<SEAM*>* seam_array) {
00195   SEAM *test_seam;
00196   int list_length = seam_array->size();
00197   for (int test_index = 0; test_index < index; ++test_index) {
00198     test_seam = seam_array->get(test_index);
00199     if (test_index + test_seam->widthp >= index) {
00200       test_seam->widthp++;       /*got in the way */
00201     } else if (test_seam->widthp + test_index == index - 1) {
00202       test_seam->widthp = account_splits(test_seam, word, test_index + 1, 1);
00203       if (test_seam->widthp < 0) {
00204         tprintf("Failed to find any right blob for a split!\n");
00205         print_seam("New dud seam", seam);
00206         print_seam("Failed seam", test_seam);
00207       }
00208     }
00209   }
00210   for (int test_index = index; test_index < list_length; test_index++) {
00211     test_seam = seam_array->get(test_index);
00212     if (test_index - test_seam->widthn < index) {
00213       test_seam->widthn++;       /*got in the way */
00214     } else if (test_index - test_seam->widthn == index) {
00215       test_seam->widthn = account_splits(test_seam, word, test_index + 1, -1);
00216       if (test_seam->widthn < 0) {
00217         tprintf("Failed to find any left blob for a split!\n");
00218         print_seam("New dud seam", seam);
00219         print_seam("Failed seam", test_seam);
00220       }
00221     }
00222   }
00223   seam_array->insert(seam, index);
00224 }
00225 
00226 
00233 int account_splits(const SEAM *seam, const TWERD *word, int blob_index,
00234                    int blob_direction) {
00235   inT8 found_em[3];
00236   inT8 width;
00237 
00238   found_em[0] = seam->split1 == NULL;
00239   found_em[1] = seam->split2 == NULL;
00240   found_em[2] = seam->split3 == NULL;
00241   if (found_em[0] && found_em[1] && found_em[2])
00242     return 0;
00243   width = 0;
00244   do {
00245     TBLOB* blob = word->blobs[blob_index];
00246     if (!found_em[0])
00247       found_em[0] = find_split_in_blob(seam->split1, blob);
00248     if (!found_em[1])
00249       found_em[1] = find_split_in_blob(seam->split2, blob);
00250     if (!found_em[2])
00251       found_em[2] = find_split_in_blob(seam->split3, blob);
00252     if (found_em[0] && found_em[1] && found_em[2]) {
00253       return width;
00254     }
00255     width++;
00256     blob_index += blob_direction;
00257   } while (0 <= blob_index && blob_index < word->NumBlobs());
00258   return -1;
00259 }
00260 
00261 
00267 bool find_split_in_blob(SPLIT *split, TBLOB *blob) {
00268   TESSLINE *outline;
00269 
00270   for (outline = blob->outlines; outline != NULL; outline = outline->next)
00271     if (outline->Contains(split->point1->pos))
00272       break;
00273   if (outline == NULL)
00274     return FALSE;
00275   for (outline = blob->outlines; outline != NULL; outline = outline->next)
00276     if (outline->Contains(split->point2->pos))
00277       return TRUE;
00278   return FALSE;
00279 }
00280 
00281 
00288 SEAM *join_two_seams(const SEAM *seam1, const SEAM *seam2) {
00289   SEAM *result = NULL;
00290   SEAM *temp;
00291 
00292   assert(seam1 &&seam2);
00293 
00294   if (((seam1->split3 == NULL && seam2->split2 == NULL) ||
00295        (seam1->split2 == NULL && seam2->split3 == NULL) ||
00296         seam1->split1 == NULL || seam2->split1 == NULL) &&
00297       (!shared_split_points(seam1, seam2))) {
00298     result = new SEAM(*seam1);
00299     temp = new SEAM(*seam2);
00300     combine_seams(result, temp);
00301   }
00302   return (result);
00303 }
00304 
00311 void print_seam(const char *label, SEAM *seam) {
00312   if (seam) {
00313     tprintf(label);
00314     tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ",
00315             seam->priority, seam->location.x, seam->location.y,
00316             seam->widthp, seam->widthn);
00317     print_split(seam->split1);
00318 
00319     if (seam->split2) {
00320       tprintf(",   ");
00321       print_split (seam->split2);
00322       if (seam->split3) {
00323         tprintf(",   ");
00324         print_split (seam->split3);
00325       }
00326     }
00327     tprintf("\n");
00328   }
00329 }
00330 
00331 
00338 void print_seams(const char *label, const GenericVector<SEAM*>& seams) {
00339   char number[CHARS_PER_LINE];
00340 
00341   if (!seams.empty()) {
00342     tprintf("%s\n", label);
00343     for (int x = 0; x < seams.size(); ++x) {
00344       sprintf(number, "%2d:   ", x);
00345       print_seam(number, seams[x]);
00346     }
00347     tprintf("\n");
00348   }
00349 }
00350 
00351 
00359 int shared_split_points(const SEAM *seam1, const SEAM *seam2) {
00360   if (seam1 == NULL || seam2 == NULL)
00361     return (FALSE);
00362 
00363   if (seam2->split1 == NULL)
00364     return (FALSE);
00365   if (point_in_seam(seam1, seam2->split1))
00366     return (TRUE);
00367 
00368   if (seam2->split2 == NULL)
00369     return (FALSE);
00370   if (point_in_seam(seam1, seam2->split2))
00371     return (TRUE);
00372 
00373   if (seam2->split3 == NULL)
00374     return (FALSE);
00375   if (point_in_seam(seam1, seam2->split3))
00376     return (TRUE);
00377 
00378   return (FALSE);
00379 }
00380 
00381 /**********************************************************************
00382  * break_pieces
00383  *
00384  * Break up the blobs in this chain so that they are all independent.
00385  * This operation should undo the affect of join_pieces.
00386  **********************************************************************/
00387 void break_pieces(const GenericVector<SEAM*>& seams, int first, int last,
00388                   TWERD *word) {
00389   for (int x = first; x < last; ++x)
00390     reveal_seam(seams[x]);
00391 
00392   TESSLINE *outline = word->blobs[first]->outlines;
00393   int next_blob = first + 1;
00394 
00395   while (outline != NULL && next_blob <= last) {
00396     if (outline->next == word->blobs[next_blob]->outlines) {
00397       outline->next = NULL;
00398       outline = word->blobs[next_blob]->outlines;
00399       ++next_blob;
00400     } else {
00401       outline = outline->next;
00402     }
00403   }
00404 }
00405 
00406 
00407 /**********************************************************************
00408  * join_pieces
00409  *
00410  * Join a group of base level pieces into a single blob that can then
00411  * be classified.
00412  **********************************************************************/
00413 void join_pieces(const GenericVector<SEAM*>& seams, int first, int last,
00414                  TWERD *word) {
00415   TESSLINE *outline = word->blobs[first]->outlines;
00416   if (!outline)
00417     return;
00418 
00419   for (int x = first; x < last; ++x) {
00420     SEAM *seam = seams[x];
00421     if (x - seam->widthn >= first && x + seam->widthp < last)
00422       hide_seam(seam);
00423     while (outline->next)
00424       outline = outline->next;
00425     outline->next = word->blobs[x + 1]->outlines;
00426   }
00427 }
00428 
00429 
00430 /**********************************************************************
00431  * hide_seam
00432  *
00433  * Change the edge points that are referenced by this seam to make
00434  * them hidden edges.
00435  **********************************************************************/
00436 void hide_seam(SEAM *seam) {
00437   if (seam == NULL || seam->split1 == NULL)
00438     return;
00439   hide_edge_pair (seam->split1->point1, seam->split1->point2);
00440 
00441   if (seam->split2 == NULL)
00442     return;
00443   hide_edge_pair (seam->split2->point1, seam->split2->point2);
00444 
00445   if (seam->split3 == NULL)
00446     return;
00447   hide_edge_pair (seam->split3->point1, seam->split3->point2);
00448 }
00449 
00450 
00451 /**********************************************************************
00452  * hide_edge_pair
00453  *
00454  * Change the edge points that are referenced by this seam to make
00455  * them hidden edges.
00456  **********************************************************************/
00457 void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2) {
00458   EDGEPT *edgept;
00459 
00460   edgept = pt1;
00461   do {
00462     edgept->Hide();
00463     edgept = edgept->next;
00464   }
00465   while (!exact_point (edgept, pt2) && edgept != pt1);
00466   if (edgept == pt1) {
00467     /*              tprintf("Hid entire outline at (%d,%d)!!\n",
00468        edgept->pos.x,edgept->pos.y);                                */
00469   }
00470   edgept = pt2;
00471   do {
00472     edgept->Hide();
00473     edgept = edgept->next;
00474   }
00475   while (!exact_point (edgept, pt1) && edgept != pt2);
00476   if (edgept == pt2) {
00477     /*              tprintf("Hid entire outline at (%d,%d)!!\n",
00478        edgept->pos.x,edgept->pos.y);                                */
00479   }
00480 }
00481 
00482 
00483 /**********************************************************************
00484  * reveal_seam
00485  *
00486  * Change the edge points that are referenced by this seam to make
00487  * them hidden edges.
00488  **********************************************************************/
00489 void reveal_seam(SEAM *seam) {
00490   if (seam == NULL || seam->split1 == NULL)
00491     return;
00492   reveal_edge_pair (seam->split1->point1, seam->split1->point2);
00493 
00494   if (seam->split2 == NULL)
00495     return;
00496   reveal_edge_pair (seam->split2->point1, seam->split2->point2);
00497 
00498   if (seam->split3 == NULL)
00499     return;
00500   reveal_edge_pair (seam->split3->point1, seam->split3->point2);
00501 }
00502 
00503 
00504 /**********************************************************************
00505  * reveal_edge_pair
00506  *
00507  * Change the edge points that are referenced by this seam to make
00508  * them hidden edges.
00509  **********************************************************************/
00510 void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2) {
00511   EDGEPT *edgept;
00512 
00513   edgept = pt1;
00514   do {
00515     edgept->Reveal();
00516     edgept = edgept->next;
00517   }
00518   while (!exact_point (edgept, pt2) && edgept != pt1);
00519   if (edgept == pt1) {
00520     /*              tprintf("Hid entire outline at (%d,%d)!!\n",
00521        edgept->pos.x,edgept->pos.y);                                */
00522   }
00523   edgept = pt2;
00524   do {
00525     edgept->Reveal();
00526     edgept = edgept->next;
00527   }
00528   while (!exact_point (edgept, pt1) && edgept != pt2);
00529   if (edgept == pt2) {
00530     /*              tprintf("Hid entire outline at (%d,%d)!!\n",
00531        edgept->pos.x,edgept->pos.y);                                */
00532   }
00533 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines