tesseract
3.03
|
00001 /********************************************************************** 00002 * File: polyaprx.cpp (Formerly polygon.c) 00003 * Description: Code for polygonal approximation from old edgeprog. 00004 * Author: Ray Smith 00005 * Created: Thu Nov 25 11:42:04 GMT 1993 00006 * 00007 * (C) Copyright 1993, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <stdio.h> 00021 #ifdef __UNIX__ 00022 #include <assert.h> 00023 #endif 00024 #define FASTEDGELENGTH 256 00025 #include "polyaprx.h" 00026 #include "params.h" 00027 #include "tprintf.h" 00028 00029 #define EXTERN 00030 00031 EXTERN BOOL_VAR(poly_debug, FALSE, "Debug old poly"); 00032 EXTERN BOOL_VAR(poly_wide_objects_better, TRUE, 00033 "More accurate approx on wide things"); 00034 00035 #define FIXED 4 /*OUTLINE point is fixed */ 00036 00037 #define RUNLENGTH 1 /*length of run */ 00038 00039 #define DIR 2 /*direction of run */ 00040 00041 #define FLAGS 0 00042 00043 #define fixed_dist 20 //really an int_variable 00044 #define approx_dist 15 //really an int_variable 00045 00046 const int par1 = 4500 / (approx_dist * approx_dist); 00047 const int par2 = 6750 / (approx_dist * approx_dist); 00048 00049 00050 /********************************************************************** 00051 * tesspoly_outline 00052 * 00053 * Approximate an outline from chain codes form using the old tess algorithm. 00054 * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB 00055 * contain pointers to the input C_OUTLINEs that enable higher-resolution 00056 * feature extraction that does not use the polygonal approximation. 00057 **********************************************************************/ 00058 00059 00060 TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline) { 00061 EDGEPT *edgept; // converted steps 00062 TBOX loop_box; // bounding box 00063 inT32 area; // loop area 00064 EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path 00065 EDGEPT* edgepts = stack_edgepts; 00066 00067 // Use heap memory if the stack buffer is not big enough. 00068 if (c_outline->pathlength() > FASTEDGELENGTH) 00069 edgepts = new EDGEPT[c_outline->pathlength()]; 00070 00071 loop_box = c_outline->bounding_box(); 00072 area = loop_box.height(); 00073 if (!poly_wide_objects_better && loop_box.width() > area) 00074 area = loop_box.width(); 00075 area *= area; 00076 edgept = edgesteps_to_edgepts(c_outline, edgepts); 00077 fix2(edgepts, area); 00078 edgept = poly2 (edgepts, area); // 2nd approximation. 00079 EDGEPT* startpt = edgept; 00080 EDGEPT* result = NULL; 00081 EDGEPT* prev_result = NULL; 00082 do { 00083 EDGEPT* new_pt = new EDGEPT; 00084 new_pt->pos = edgept->pos; 00085 new_pt->prev = prev_result; 00086 if (prev_result == NULL) { 00087 result = new_pt; 00088 } else { 00089 prev_result->next = new_pt; 00090 new_pt->prev = prev_result; 00091 } 00092 if (allow_detailed_fx) { 00093 new_pt->src_outline = edgept->src_outline; 00094 new_pt->start_step = edgept->start_step; 00095 new_pt->step_count = edgept->step_count; 00096 } 00097 prev_result = new_pt; 00098 edgept = edgept->next; 00099 } 00100 while (edgept != startpt); 00101 prev_result->next = result; 00102 result->prev = prev_result; 00103 if (edgepts != stack_edgepts) 00104 delete [] edgepts; 00105 return TESSLINE::BuildFromOutlineList(result); 00106 } 00107 00108 00109 /********************************************************************** 00110 * edgesteps_to_edgepts 00111 * 00112 * Convert a C_OUTLINE to EDGEPTs. 00113 **********************************************************************/ 00114 00115 EDGEPT * 00116 edgesteps_to_edgepts ( //convert outline 00117 C_OUTLINE * c_outline, //input 00118 EDGEPT edgepts[] //output is array 00119 ) { 00120 inT32 length; //steps in path 00121 ICOORD pos; //current coords 00122 inT32 stepindex; //current step 00123 inT32 stepinc; //increment 00124 inT32 epindex; //current EDGEPT 00125 inT32 count; //repeated steps 00126 ICOORD vec; //for this 8 step 00127 ICOORD prev_vec; 00128 inT8 epdir; //of this step 00129 DIR128 prevdir; //prvious dir 00130 DIR128 dir; //of this step 00131 00132 pos = c_outline->start_pos (); //start of loop 00133 length = c_outline->pathlength (); 00134 stepindex = 0; 00135 epindex = 0; 00136 prevdir = -1; 00137 count = 0; 00138 int prev_stepindex = 0; 00139 do { 00140 dir = c_outline->step_dir (stepindex); 00141 vec = c_outline->step (stepindex); 00142 if (stepindex < length - 1 00143 && c_outline->step_dir (stepindex + 1) - dir == -32) { 00144 dir += 128 - 16; 00145 vec += c_outline->step (stepindex + 1); 00146 stepinc = 2; 00147 } 00148 else 00149 stepinc = 1; 00150 if (count == 0) { 00151 prevdir = dir; 00152 prev_vec = vec; 00153 } 00154 if (prevdir.get_dir () != dir.get_dir ()) { 00155 edgepts[epindex].pos.x = pos.x (); 00156 edgepts[epindex].pos.y = pos.y (); 00157 prev_vec *= count; 00158 edgepts[epindex].vec.x = prev_vec.x (); 00159 edgepts[epindex].vec.y = prev_vec.y (); 00160 pos += prev_vec; 00161 edgepts[epindex].flags[RUNLENGTH] = count; 00162 edgepts[epindex].prev = &edgepts[epindex - 1]; 00163 edgepts[epindex].flags[FLAGS] = 0; 00164 edgepts[epindex].next = &edgepts[epindex + 1]; 00165 prevdir += 64; 00166 epdir = (DIR128) 0 - prevdir; 00167 epdir >>= 4; 00168 epdir &= 7; 00169 edgepts[epindex].flags[DIR] = epdir; 00170 edgepts[epindex].src_outline = c_outline; 00171 edgepts[epindex].start_step = prev_stepindex; 00172 edgepts[epindex].step_count = stepindex - prev_stepindex; 00173 epindex++; 00174 prevdir = dir; 00175 prev_vec = vec; 00176 count = 1; 00177 prev_stepindex = stepindex; 00178 } 00179 else 00180 count++; 00181 stepindex += stepinc; 00182 } 00183 while (stepindex < length); 00184 edgepts[epindex].pos.x = pos.x (); 00185 edgepts[epindex].pos.y = pos.y (); 00186 prev_vec *= count; 00187 edgepts[epindex].vec.x = prev_vec.x (); 00188 edgepts[epindex].vec.y = prev_vec.y (); 00189 pos += prev_vec; 00190 edgepts[epindex].flags[RUNLENGTH] = count; 00191 edgepts[epindex].flags[FLAGS] = 0; 00192 edgepts[epindex].src_outline = c_outline; 00193 edgepts[epindex].start_step = prev_stepindex; 00194 edgepts[epindex].step_count = stepindex - prev_stepindex; 00195 edgepts[epindex].prev = &edgepts[epindex - 1]; 00196 edgepts[epindex].next = &edgepts[0]; 00197 prevdir += 64; 00198 epdir = (DIR128) 0 - prevdir; 00199 epdir >>= 4; 00200 epdir &= 7; 00201 edgepts[epindex].flags[DIR] = epdir; 00202 edgepts[0].prev = &edgepts[epindex]; 00203 ASSERT_HOST (pos.x () == c_outline->start_pos ().x () 00204 && pos.y () == c_outline->start_pos ().y ()); 00205 return &edgepts[0]; 00206 } 00207 00208 00209 /********************************************************************** 00210 *fix2(start,area) fixes points on the outline according to a trial method* 00211 **********************************************************************/ 00212 00213 //#pragma OPT_LEVEL 1 /*stop compiler bugs*/ 00214 00215 void fix2( //polygonal approx 00216 EDGEPT *start, /*loop to approimate */ 00217 int area) { 00218 register EDGEPT *edgept; /*current point */ 00219 register EDGEPT *edgept1; 00220 register EDGEPT *loopstart; /*modified start of loop */ 00221 register EDGEPT *linestart; /*start of line segment */ 00222 register int dir1, dir2; /*directions of line */ 00223 register int sum1, sum2; /*lengths in dir1,dir2 */ 00224 int stopped; /*completed flag */ 00225 int fixed_count; //no of fixed points 00226 int d01, d12, d23, gapmin; 00227 TPOINT d01vec, d12vec, d23vec; 00228 register EDGEPT *edgefix, *startfix; 00229 register EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3; 00230 00231 edgept = start; /*start of loop */ 00232 while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3 00233 && (dir1 = 00234 (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2 00235 && dir1 != 6) 00236 edgept = edgept->next; /*find suitable start */ 00237 loopstart = edgept; /*remember start */ 00238 00239 stopped = 0; /*not finished yet */ 00240 edgept->flags[FLAGS] |= FIXED; /*fix it */ 00241 do { 00242 linestart = edgept; /*possible start of line */ 00243 dir1 = edgept->flags[DIR]; /*first direction */ 00244 /*length of dir1 */ 00245 sum1 = edgept->flags[RUNLENGTH]; 00246 edgept = edgept->next; 00247 dir2 = edgept->flags[DIR]; /*2nd direction */ 00248 /*length in dir2 */ 00249 sum2 = edgept->flags[RUNLENGTH]; 00250 if (((dir1 - dir2 + 1) & 7) < 3) { 00251 while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) { 00252 edgept = edgept->next; /*look at next */ 00253 if (edgept->flags[DIR] == dir1) 00254 /*sum lengths */ 00255 sum1 += edgept->flags[RUNLENGTH]; 00256 else 00257 sum2 += edgept->flags[RUNLENGTH]; 00258 } 00259 00260 if (edgept == loopstart) 00261 stopped = 1; /*finished */ 00262 if (sum2 + sum1 > 2 00263 && linestart->prev->flags[DIR] == dir2 00264 && (linestart->prev->flags[RUNLENGTH] > 00265 linestart->flags[RUNLENGTH] || sum2 > sum1)) { 00266 /*start is back one */ 00267 linestart = linestart->prev; 00268 linestart->flags[FLAGS] |= FIXED; 00269 } 00270 00271 if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3 00272 || (edgept->flags[DIR] == dir1 && sum1 >= sum2) 00273 || ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH] 00274 || (edgept->flags[DIR] == dir2 && sum2 >= sum1)) 00275 && linestart->next != edgept)) 00276 edgept = edgept->next; 00277 } 00278 /*sharp bend */ 00279 edgept->flags[FLAGS] |= FIXED; 00280 } 00281 /*do whole loop */ 00282 while (edgept != loopstart && !stopped); 00283 00284 edgept = start; 00285 do { 00286 if (((edgept->flags[RUNLENGTH] >= 8) && 00287 (edgept->flags[DIR] != 2) && (edgept->flags[DIR] != 6)) || 00288 ((edgept->flags[RUNLENGTH] >= 8) && 00289 ((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) { 00290 edgept->flags[FLAGS] |= FIXED; 00291 edgept1 = edgept->next; 00292 edgept1->flags[FLAGS] |= FIXED; 00293 } 00294 edgept = edgept->next; 00295 } 00296 while (edgept != start); 00297 00298 edgept = start; 00299 do { 00300 /*single fixed step */ 00301 if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1 00302 /*and neighours free */ 00303 && edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0 00304 /*same pair of dirs */ 00305 && (edgept->next->next->flags[FLAGS] & FIXED) == 0 && edgept->prev->flags[DIR] == edgept->next->flags[DIR] && edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR] 00306 && ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) { 00307 /*unfix it */ 00308 edgept->flags[FLAGS] &= ~FIXED; 00309 edgept->next->flags[FLAGS] &= ~FIXED; 00310 } 00311 edgept = edgept->next; /*do all points */ 00312 } 00313 while (edgept != start); /*until finished */ 00314 00315 stopped = 0; 00316 if (area < 450) 00317 area = 450; 00318 00319 gapmin = area * fixed_dist * fixed_dist / 44000; 00320 00321 edgept = start; 00322 fixed_count = 0; 00323 do { 00324 if (edgept->flags[FLAGS] & FIXED) 00325 fixed_count++; 00326 edgept = edgept->next; 00327 } 00328 while (edgept != start); 00329 while ((edgept->flags[FLAGS] & FIXED) == 0) 00330 edgept = edgept->next; 00331 edgefix0 = edgept; 00332 00333 edgept = edgept->next; 00334 while ((edgept->flags[FLAGS] & FIXED) == 0) 00335 edgept = edgept->next; 00336 edgefix1 = edgept; 00337 00338 edgept = edgept->next; 00339 while ((edgept->flags[FLAGS] & FIXED) == 0) 00340 edgept = edgept->next; 00341 edgefix2 = edgept; 00342 00343 edgept = edgept->next; 00344 while ((edgept->flags[FLAGS] & FIXED) == 0) 00345 edgept = edgept->next; 00346 edgefix3 = edgept; 00347 00348 startfix = edgefix2; 00349 00350 do { 00351 if (fixed_count <= 3) 00352 break; //already too few 00353 point_diff (d12vec, edgefix1->pos, edgefix2->pos); 00354 d12 = LENGTH (d12vec); 00355 // TODO(rays) investigate this change: 00356 // Only unfix a point if it is part of a low-curvature section 00357 // of outline and the total angle change of the outlines is 00358 // less than 90 degrees, ie the scalar product is positive. 00359 // if (d12 <= gapmin && SCALAR(edgefix0->vec, edgefix2->vec) > 0) { 00360 if (d12 <= gapmin) { 00361 point_diff (d01vec, edgefix0->pos, edgefix1->pos); 00362 d01 = LENGTH (d01vec); 00363 point_diff (d23vec, edgefix2->pos, edgefix3->pos); 00364 d23 = LENGTH (d23vec); 00365 if (d01 > d23) { 00366 edgefix2->flags[FLAGS] &= ~FIXED; 00367 fixed_count--; 00368 } 00369 else { 00370 edgefix1->flags[FLAGS] &= ~FIXED; 00371 fixed_count--; 00372 edgefix1 = edgefix2; 00373 } 00374 } 00375 else { 00376 edgefix0 = edgefix1; 00377 edgefix1 = edgefix2; 00378 } 00379 edgefix2 = edgefix3; 00380 edgept = edgept->next; 00381 while ((edgept->flags[FLAGS] & FIXED) == 0) { 00382 if (edgept == startfix) 00383 stopped = 1; 00384 edgept = edgept->next; 00385 } 00386 edgefix3 = edgept; 00387 edgefix = edgefix2; 00388 } 00389 while ((edgefix != startfix) && (!stopped)); 00390 } 00391 00392 00393 //#pragma OPT_LEVEL 2 /*stop compiler bugs*/ 00394 00395 /********************************************************************** 00396 *poly2(startpt,area,path) applies a second approximation to the outline 00397 *using the points which have been fixed by the first approximation* 00398 **********************************************************************/ 00399 00400 EDGEPT *poly2( //second poly 00401 EDGEPT *startpt, /*start of loop */ 00402 int area /*area of blob box */ 00403 ) { 00404 register EDGEPT *edgept; /*current outline point */ 00405 EDGEPT *loopstart; /*starting point */ 00406 register EDGEPT *linestart; /*start of line */ 00407 register int edgesum; /*correction count */ 00408 00409 if (area < 1200) 00410 area = 1200; /*minimum value */ 00411 00412 loopstart = NULL; /*not found it yet */ 00413 edgept = startpt; /*start of loop */ 00414 00415 do { 00416 /*current point fixed */ 00417 if (edgept->flags[FLAGS] & FIXED 00418 /*and next not */ 00419 && (edgept->next->flags[FLAGS] & FIXED) == 0) { 00420 loopstart = edgept; /*start of repoly */ 00421 break; 00422 } 00423 edgept = edgept->next; /*next point */ 00424 } 00425 while (edgept != startpt); /*until found or finished */ 00426 00427 if (loopstart == NULL && (startpt->flags[FLAGS] & FIXED) == 0) { 00428 /*fixed start of loop */ 00429 startpt->flags[FLAGS] |= FIXED; 00430 loopstart = startpt; /*or start of loop */ 00431 } 00432 if (loopstart) { 00433 do { 00434 edgept = loopstart; /*first to do */ 00435 do { 00436 linestart = edgept; 00437 edgesum = 0; /*sum of lengths */ 00438 do { 00439 /*sum lengths */ 00440 edgesum += edgept->flags[RUNLENGTH]; 00441 edgept = edgept->next; /*move on */ 00442 } 00443 while ((edgept->flags[FLAGS] & FIXED) == 0 00444 && edgept != loopstart && edgesum < 126); 00445 if (poly_debug) 00446 tprintf 00447 ("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", 00448 linestart->pos.x, linestart->pos.y, linestart->flags[DIR], 00449 linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x, 00450 edgept->pos.y); 00451 /*reapproximate */ 00452 cutline(linestart, edgept, area); 00453 00454 while ((edgept->next->flags[FLAGS] & FIXED) 00455 && edgept != loopstart) 00456 edgept = edgept->next; /*look for next non-fixed */ 00457 } 00458 /*do all the loop */ 00459 while (edgept != loopstart); 00460 edgesum = 0; 00461 do { 00462 if (edgept->flags[FLAGS] & FIXED) 00463 edgesum++; 00464 edgept = edgept->next; 00465 } 00466 //count fixed pts 00467 while (edgept != loopstart); 00468 if (edgesum < 3) 00469 area /= 2; //must have 3 pts 00470 } 00471 while (edgesum < 3); 00472 do { 00473 linestart = edgept; 00474 do { 00475 edgept = edgept->next; 00476 } 00477 while ((edgept->flags[FLAGS] & FIXED) == 0); 00478 linestart->next = edgept; 00479 edgept->prev = linestart; 00480 linestart->vec.x = edgept->pos.x - linestart->pos.x; 00481 linestart->vec.y = edgept->pos.y - linestart->pos.y; 00482 } 00483 while (edgept != loopstart); 00484 } 00485 else 00486 edgept = startpt; /*start of loop */ 00487 00488 loopstart = edgept; /*new start */ 00489 return loopstart; /*correct exit */ 00490 } 00491 00492 00493 /********************************************************************** 00494 *cutline(first,last,area) straightens out a line by partitioning 00495 *and joining the ends by a straight line* 00496 **********************************************************************/ 00497 00498 void cutline( //recursive refine 00499 EDGEPT *first, /*ends of line */ 00500 EDGEPT *last, 00501 int area /*area of object */ 00502 ) { 00503 register EDGEPT *edge; /*current edge */ 00504 TPOINT vecsum; /*vector sum */ 00505 int vlen; /*approx length of vecsum */ 00506 TPOINT vec; /*accumulated vector */ 00507 EDGEPT *maxpoint; /*worst point */ 00508 int maxperp; /*max deviation */ 00509 register int perp; /*perp distance */ 00510 int ptcount; /*no of points */ 00511 int squaresum; /*sum of perps */ 00512 00513 edge = first; /*start of line */ 00514 if (edge->next == last) 00515 return; /*simple line */ 00516 00517 /*vector sum */ 00518 vecsum.x = last->pos.x - edge->pos.x; 00519 vecsum.y = last->pos.y - edge->pos.y; 00520 if (vecsum.x == 0 && vecsum.y == 0) { 00521 /*special case */ 00522 vecsum.x = -edge->prev->vec.x; 00523 vecsum.y = -edge->prev->vec.y; 00524 } 00525 /*absolute value */ 00526 vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x; 00527 if (vecsum.y > vlen) 00528 vlen = vecsum.y; /*maximum */ 00529 else if (-vecsum.y > vlen) 00530 vlen = -vecsum.y; /*absolute value */ 00531 00532 vec.x = edge->vec.x; /*accumulated vector */ 00533 vec.y = edge->vec.y; 00534 maxperp = 0; /*none yet */ 00535 squaresum = ptcount = 0; 00536 edge = edge->next; /*move to actual point */ 00537 maxpoint = edge; /*in case there isn't one */ 00538 do { 00539 perp = CROSS (vec, vecsum); /*get perp distance */ 00540 if (perp != 0) { 00541 perp *= perp; /*squared deviation */ 00542 } 00543 squaresum += perp; /*sum squares */ 00544 ptcount++; /*count points */ 00545 if (poly_debug) 00546 tprintf ("Cutline:Final perp=%d\n", perp); 00547 if (perp > maxperp) { 00548 maxperp = perp; 00549 maxpoint = edge; /*find greatest deviation */ 00550 } 00551 vec.x += edge->vec.x; /*accumulate vectors */ 00552 vec.y += edge->vec.y; 00553 edge = edge->next; 00554 } 00555 while (edge != last); /*test all line */ 00556 00557 perp = LENGTH (vecsum); 00558 ASSERT_HOST (perp != 0); 00559 00560 if (maxperp < 256 * MAX_INT16) { 00561 maxperp <<= 8; 00562 maxperp /= perp; /*true max perp */ 00563 } 00564 else { 00565 maxperp /= perp; 00566 maxperp <<= 8; /*avoid overflow */ 00567 } 00568 if (squaresum < 256 * MAX_INT16) 00569 /*mean squared perp */ 00570 perp = (squaresum << 8) / (perp * ptcount); 00571 else 00572 /*avoid overflow */ 00573 perp = (squaresum / perp << 8) / ptcount; 00574 00575 if (poly_debug) 00576 tprintf ("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", 00577 area, maxperp / 256.0, maxperp * 200.0 / area, 00578 perp / 256.0, perp * 300.0 / area); 00579 if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) { 00580 maxpoint->flags[FLAGS] |= FIXED; 00581 /*partitions */ 00582 cutline(first, maxpoint, area); 00583 cutline(maxpoint, last, area); 00584 } 00585 }