tesseract
3.03
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: findseam.c (Formerly findseam.c) 00005 * Description: 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 16 14:37:00 1987 00008 * Modified: Tue Jul 30 15:44:59 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Reusable Software Component 00012 * 00013 * (c) Copyright 1987, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 /*---------------------------------------------------------------------- 00026 I n c l u d e s 00027 ----------------------------------------------------------------------*/ 00028 #include "findseam.h" 00029 #include "gradechop.h" 00030 #include "olutil.h" 00031 #include "plotedges.h" 00032 #include "outlines.h" 00033 #include "freelist.h" 00034 #include "seam.h" 00035 #include "wordrec.h" 00036 00037 // Include automatically generated configuration file if running autoconf. 00038 #ifdef HAVE_CONFIG_H 00039 #include "config_auto.h" 00040 #endif 00041 00042 /*---------------------------------------------------------------------- 00043 T y p e s 00044 ----------------------------------------------------------------------*/ 00045 #define SPLIT_CLOSENESS 20/* Difference in x value */ 00046 /* How many to keep */ 00047 #define MAX_NUM_SEAMS 150 00048 /* How many to keep */ 00049 #define MAX_OLD_SEAMS 150 00050 #define NO_FULL_PRIORITY -1/* Special marker for pri. */ 00051 /* Evalute right away */ 00052 #define BAD_PRIORITY 9999.0 00053 00054 /*---------------------------------------------------------------------- 00055 F u n c t i o n s 00056 ----------------------------------------------------------------------*/ 00057 namespace tesseract { 00058 00059 /********************************************************************** 00060 * add_seam_to_queue 00061 * 00062 * Adds the given new_seam to the seams priority queue, unless it is full 00063 * and the new seam is worse than the worst. 00064 **********************************************************************/ 00065 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, 00066 SeamQueue* seams) { 00067 if (new_seam == NULL) return; 00068 if (chop_debug) { 00069 tprintf("Pushing new seam with priority %g :", new_priority); 00070 print_seam("seam: ", new_seam); 00071 } 00072 if (seams->size() >= MAX_NUM_SEAMS) { 00073 SeamPair old_pair(0, NULL); 00074 if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) { 00075 if (chop_debug) { 00076 tprintf("Old seam staying with priority %g\n", old_pair.key()); 00077 } 00078 delete new_seam; 00079 seams->Push(&old_pair); 00080 return; 00081 } else if (chop_debug) { 00082 tprintf("New seam with priority %g beats old worst seam with %g\n", 00083 new_priority, old_pair.key()); 00084 } 00085 } 00086 SeamPair new_pair(new_priority, new_seam); 00087 seams->Push(&new_pair); 00088 } 00089 00090 00091 /********************************************************************** 00092 * choose_best_seam 00093 * 00094 * Choose the best seam that can be created by assembling this a 00095 * collection of splits. A queue of all the possible seams is 00096 * maintained. Each new split received is placed in that queue with 00097 * its partial priority value. These values in the seam queue are 00098 * evaluated and combined until a good enough seam is found. If no 00099 * further good seams are being found then this function returns to the 00100 * caller, who will send more splits. If this function is called with 00101 * a split of NULL, then no further splits can be supplied by the 00102 * caller. 00103 **********************************************************************/ 00104 void Wordrec::choose_best_seam(SeamQueue* seam_queue, 00105 SPLIT *split, 00106 PRIORITY priority, 00107 SEAM **seam_result, 00108 TBLOB *blob, 00109 SeamPile* seam_pile) { 00110 SEAM *seam; 00111 char str[80]; 00112 float my_priority; 00113 /* Add seam of split */ 00114 my_priority = priority; 00115 if (split != NULL) { 00116 TPOINT split_point = split->point1->pos; 00117 split_point += split->point2->pos; 00118 split_point /= 2; 00119 seam = new SEAM(my_priority, split_point, split, NULL, NULL); 00120 if (chop_debug > 1) 00121 print_seam ("Partial priority ", seam); 00122 add_seam_to_queue(my_priority, seam, seam_queue); 00123 00124 if (my_priority > chop_good_split) 00125 return; 00126 } 00127 00128 TBOX bbox = blob->bounding_box(); 00129 /* Queue loop */ 00130 while (!seam_queue->empty()) { 00131 SeamPair seam_pair; 00132 seam_queue->Pop(&seam_pair); 00133 seam = seam_pair.extract_data(); 00134 /* Set full priority */ 00135 my_priority = seam_priority(seam, bbox.left(), bbox.right()); 00136 if (chop_debug) { 00137 sprintf (str, "Full my_priority %0.0f, ", my_priority); 00138 print_seam(str, seam); 00139 } 00140 00141 if ((*seam_result == NULL || (*seam_result)->priority > my_priority) && 00142 my_priority < chop_ok_split) { 00143 /* No crossing */ 00144 if (constrained_split(seam->split1, blob)) { 00145 delete *seam_result; 00146 *seam_result = new SEAM(*seam); 00147 (*seam_result)->priority = my_priority; 00148 } else { 00149 delete seam; 00150 seam = NULL; 00151 my_priority = BAD_PRIORITY; 00152 } 00153 } 00154 00155 if (my_priority < chop_good_split) { 00156 if (seam) 00157 delete seam; 00158 return; /* Made good answer */ 00159 } 00160 00161 if (seam) { 00162 /* Combine with others */ 00163 if (seam_pile->size() < chop_seam_pile_size) { 00164 combine_seam(*seam_pile, seam, seam_queue); 00165 SeamDecPair pair(seam_pair.key(), seam); 00166 seam_pile->Push(&pair); 00167 } else if (chop_new_seam_pile && 00168 seam_pile->size() == chop_seam_pile_size && 00169 seam_pile->PeekTop().key() > seam_pair.key()) { 00170 combine_seam(*seam_pile, seam, seam_queue); 00171 SeamDecPair pair; 00172 seam_pile->Pop(&pair); // pop the worst. 00173 // Replace the seam in pair (deleting the old one) with 00174 // the new seam and score, then push back into the heap. 00175 pair.set_key(seam_pair.key()); 00176 pair.set_data(seam); 00177 seam_pile->Push(&pair); 00178 } else { 00179 delete seam; 00180 } 00181 } 00182 00183 my_priority = seam_queue->empty() ? NO_FULL_PRIORITY 00184 : seam_queue->PeekTop().key(); 00185 if ((my_priority > chop_ok_split) || 00186 (my_priority > chop_good_split && split)) 00187 return; 00188 } 00189 } 00190 00191 00192 /********************************************************************** 00193 * combine_seam 00194 * 00195 * Find other seams to combine with this one. The new seams that result 00196 * from this union should be added to the seam queue. The return value 00197 * tells whether or not any additional seams were added to the queue. 00198 **********************************************************************/ 00199 void Wordrec::combine_seam(const SeamPile& seam_pile, 00200 const SEAM* seam, SeamQueue* seam_queue) { 00201 register inT16 dist; 00202 inT16 bottom1, top1; 00203 inT16 bottom2, top2; 00204 00205 SEAM *new_one; 00206 const SEAM *this_one; 00207 00208 bottom1 = seam->split1->point1->pos.y; 00209 if (seam->split1->point2->pos.y >= bottom1) 00210 top1 = seam->split1->point2->pos.y; 00211 else { 00212 top1 = bottom1; 00213 bottom1 = seam->split1->point2->pos.y; 00214 } 00215 if (seam->split2 != NULL) { 00216 bottom2 = seam->split2->point1->pos.y; 00217 if (seam->split2->point2->pos.y >= bottom2) 00218 top2 = seam->split2->point2->pos.y; 00219 else { 00220 top2 = bottom2; 00221 bottom2 = seam->split2->point2->pos.y; 00222 } 00223 } 00224 else { 00225 bottom2 = bottom1; 00226 top2 = top1; 00227 } 00228 for (int x = 0; x < seam_pile.size(); ++x) { 00229 this_one = seam_pile.get(x).data(); 00230 dist = seam->location.x - this_one->location.x; 00231 if (-SPLIT_CLOSENESS < dist && 00232 dist < SPLIT_CLOSENESS && 00233 seam->priority + this_one->priority < chop_ok_split) { 00234 inT16 split1_point1_y = this_one->split1->point1->pos.y; 00235 inT16 split1_point2_y = this_one->split1->point2->pos.y; 00236 inT16 split2_point1_y = 0; 00237 inT16 split2_point2_y = 0; 00238 if (this_one->split2) { 00239 split2_point1_y = this_one->split2->point1->pos.y; 00240 split2_point2_y = this_one->split2->point2->pos.y; 00241 } 00242 if ( 00244 ( 00245 /* this_one->split1 always exists */ 00246 ( 00247 ((split1_point1_y >= top1 && split1_point2_y >= top1) || 00248 (split1_point1_y <= bottom1 && split1_point2_y <= bottom1)) 00249 && 00250 ((split1_point1_y >= top2 && split1_point2_y >= top2) || 00251 (split1_point1_y <= bottom2 && split1_point2_y <= bottom2)) 00252 ) 00253 ) 00254 && 00255 ( 00256 this_one->split2 == NULL || 00257 ( 00258 ((split2_point1_y >= top1 && split2_point2_y >= top1) || 00259 (split2_point1_y <= bottom1 && split2_point2_y <= bottom1)) 00260 && 00261 ((split2_point1_y >= top2 && split2_point2_y >= top2) || 00262 (split2_point1_y <= bottom2 && split2_point2_y <= bottom2)) 00263 ) 00264 ) 00265 ) { 00266 new_one = join_two_seams (seam, this_one); 00267 if (new_one != NULL) { 00268 if (chop_debug > 1) 00269 print_seam ("Combo priority ", new_one); 00270 add_seam_to_queue(new_one->priority, new_one, seam_queue); 00271 } 00272 } 00273 } 00274 } 00275 } 00276 00277 00278 /********************************************************************** 00279 * constrained_split 00280 * 00281 * Constrain this split to obey certain rules. It must not cross any 00282 * inner outline. It must not cut off a small chunk of the outline. 00283 **********************************************************************/ 00284 inT16 Wordrec::constrained_split(SPLIT *split, TBLOB *blob) { 00285 TESSLINE *outline; 00286 00287 if (is_little_chunk (split->point1, split->point2)) 00288 return (FALSE); 00289 00290 for (outline = blob->outlines; outline; outline = outline->next) { 00291 if (split_bounds_overlap (split, outline) && 00292 crosses_outline (split->point1, split->point2, outline->loop)) { 00293 return (FALSE); 00294 } 00295 } 00296 return (TRUE); 00297 } 00298 00299 /********************************************************************** 00300 * pick_good_seam 00301 * 00302 * Find and return a good seam that will split this blob into two pieces. 00303 * Work from the outlines provided. 00304 **********************************************************************/ 00305 SEAM *Wordrec::pick_good_seam(TBLOB *blob) { 00306 SeamPile seam_pile(chop_seam_pile_size); 00307 EDGEPT *points[MAX_NUM_POINTS]; 00308 EDGEPT_CLIST new_points; 00309 SEAM *seam = NULL; 00310 TESSLINE *outline; 00311 inT16 num_points = 0; 00312 00313 #ifndef GRAPHICS_DISABLED 00314 if (chop_debug > 2) 00315 wordrec_display_splits.set_value(true); 00316 00317 draw_blob_edges(blob); 00318 #endif 00319 00320 PointHeap point_heap(MAX_NUM_POINTS); 00321 for (outline = blob->outlines; outline; outline = outline->next) 00322 prioritize_points(outline, &point_heap); 00323 00324 while (!point_heap.empty() && num_points < MAX_NUM_POINTS) { 00325 points[num_points++] = point_heap.PeekTop().data; 00326 point_heap.Pop(NULL); 00327 } 00328 00329 /* Initialize queue */ 00330 SeamQueue seam_queue(MAX_NUM_SEAMS); 00331 00332 try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob); 00333 try_vertical_splits(points, num_points, &new_points, 00334 &seam_queue, &seam_pile, &seam, blob); 00335 00336 if (seam == NULL) { 00337 choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile); 00338 } 00339 else if (seam->priority > chop_good_split) { 00340 choose_best_seam(&seam_queue, NULL, seam->priority, 00341 &seam, blob, &seam_pile); 00342 } 00343 00344 EDGEPT_C_IT it(&new_points); 00345 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00346 EDGEPT *inserted_point = it.data(); 00347 if (!point_used_by_seam(seam, inserted_point)) { 00348 for (outline = blob->outlines; outline; outline = outline->next) { 00349 if (outline->loop == inserted_point) { 00350 outline->loop = outline->loop->next; 00351 } 00352 } 00353 remove_edgept(inserted_point); 00354 } 00355 } 00356 00357 if (seam) { 00358 if (seam->priority > chop_ok_split) { 00359 delete seam; 00360 seam = NULL; 00361 } 00362 #ifndef GRAPHICS_DISABLED 00363 else if (wordrec_display_splits) { 00364 if (seam->split1) 00365 mark_split (seam->split1); 00366 if (seam->split2) 00367 mark_split (seam->split2); 00368 if (seam->split3) 00369 mark_split (seam->split3); 00370 if (chop_debug > 2) { 00371 update_edge_window(); 00372 edge_window_wait(); 00373 } 00374 } 00375 #endif 00376 } 00377 00378 if (chop_debug) 00379 wordrec_display_splits.set_value(false); 00380 00381 return (seam); 00382 } 00383 00384 00385 /********************************************************************** 00386 * seam_priority 00387 * 00388 * Assign a full priority value to the seam. 00389 **********************************************************************/ 00390 PRIORITY Wordrec::seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) { 00391 PRIORITY priority; 00392 00393 if (seam->split1 == NULL) 00394 priority = 0; 00395 00396 else if (seam->split2 == NULL) { 00397 priority = (seam->priority + 00398 full_split_priority (seam->split1, xmin, xmax)); 00399 } 00400 00401 else if (seam->split3 == NULL) { 00402 split_outline (seam->split2->point1, seam->split2->point2); 00403 priority = (seam->priority + 00404 full_split_priority (seam->split1, xmin, xmax)); 00405 unsplit_outlines (seam->split2->point1, seam->split2->point2); 00406 } 00407 00408 else { 00409 split_outline (seam->split2->point1, seam->split2->point2); 00410 split_outline (seam->split3->point1, seam->split3->point2); 00411 priority = (seam->priority + 00412 full_split_priority (seam->split1, xmin, xmax)); 00413 unsplit_outlines (seam->split3->point1, seam->split3->point2); 00414 unsplit_outlines (seam->split2->point1, seam->split2->point2); 00415 } 00416 00417 return (priority); 00418 } 00419 00420 00421 /********************************************************************** 00422 * try_point_pairs 00423 * 00424 * Try all the splits that are produced by pairing critical points 00425 * together. See if any of them are suitable for use. Use a seam 00426 * queue and seam pile that have already been initialized and used. 00427 **********************************************************************/ 00428 void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS], 00429 inT16 num_points, 00430 SeamQueue* seam_queue, 00431 SeamPile* seam_pile, 00432 SEAM ** seam, 00433 TBLOB * blob) { 00434 inT16 x; 00435 inT16 y; 00436 SPLIT *split; 00437 PRIORITY priority; 00438 00439 for (x = 0; x < num_points; x++) { 00440 for (y = x + 1; y < num_points; y++) { 00441 00442 if (points[y] && 00443 weighted_edgept_dist(points[x], points[y], 00444 chop_x_y_weight) < chop_split_length && 00445 points[x] != points[y]->next && 00446 points[y] != points[x]->next && 00447 !is_exterior_point(points[x], points[y]) && 00448 !is_exterior_point(points[y], points[x])) { 00449 split = new_split (points[x], points[y]); 00450 priority = partial_split_priority (split); 00451 00452 choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile); 00453 } 00454 } 00455 } 00456 } 00457 00458 00459 /********************************************************************** 00460 * try_vertical_splits 00461 * 00462 * Try all the splits that are produced by vertical projection to see 00463 * if any of them are suitable for use. Use a seam queue and seam pile 00464 * that have already been initialized and used. 00465 * Return in new_points a collection of points that were inserted into 00466 * the blob while examining vertical splits and which may safely be 00467 * removed once a seam is chosen if they are not part of the seam. 00468 **********************************************************************/ 00469 void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], 00470 inT16 num_points, 00471 EDGEPT_CLIST *new_points, 00472 SeamQueue* seam_queue, 00473 SeamPile* seam_pile, 00474 SEAM ** seam, 00475 TBLOB * blob) { 00476 EDGEPT *vertical_point = NULL; 00477 SPLIT *split; 00478 inT16 x; 00479 PRIORITY priority; 00480 TESSLINE *outline; 00481 00482 for (x = 0; x < num_points; x++) { 00483 vertical_point = NULL; 00484 for (outline = blob->outlines; outline; outline = outline->next) { 00485 vertical_projection_point(points[x], outline->loop, 00486 &vertical_point, new_points); 00487 } 00488 00489 if (vertical_point && 00490 points[x] != vertical_point->next && 00491 vertical_point != points[x]->next && 00492 weighted_edgept_dist(points[x], vertical_point, 00493 chop_x_y_weight) < chop_split_length) { 00494 00495 split = new_split (points[x], vertical_point); 00496 priority = partial_split_priority (split); 00497 00498 choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile); 00499 } 00500 } 00501 } 00502 00503 }