tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/polyaprx.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        polyaprx.cpp  (Formerly polygon.c)
00003  * Description: Code for polygonal approximation from old edgeprog.
00004  * Author:      Ray Smith
00005  * Created:     Thu Nov 25 11:42:04 GMT 1993
00006  *
00007  * (C) Copyright 1993, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include          <stdio.h>
00021 #ifdef __UNIX__
00022 #include          <assert.h>
00023 #endif
00024 #define FASTEDGELENGTH    256
00025 #include          "polyaprx.h"
00026 #include          "params.h"
00027 #include          "tprintf.h"
00028 
00029 #define EXTERN
00030 
00031 EXTERN BOOL_VAR(poly_debug, FALSE, "Debug old poly");
00032 EXTERN BOOL_VAR(poly_wide_objects_better, TRUE,
00033                 "More accurate approx on wide things");
00034 
00035 #define FIXED       4            /*OUTLINE point is fixed */
00036 
00037 #define RUNLENGTH     1          /*length of run */
00038 
00039 #define DIR         2            /*direction of run */
00040 
00041 #define FLAGS       0
00042 
00043 #define fixed_dist      20       //really an int_variable
00044 #define approx_dist     15       //really an int_variable
00045 
00046 const int par1 = 4500 / (approx_dist * approx_dist);
00047 const int par2 = 6750 / (approx_dist * approx_dist);
00048 
00049 
00050 /**********************************************************************
00051  * tesspoly_outline
00052  *
00053  * Approximate an outline from chain codes form using the old tess algorithm.
00054  * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
00055  * contain pointers to the input C_OUTLINEs that enable higher-resolution
00056  * feature extraction that does not use the polygonal approximation.
00057  **********************************************************************/
00058 
00059 
00060 TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline) {
00061   EDGEPT *edgept;                // converted steps
00062   TBOX loop_box;                  // bounding box
00063   inT32 area;                    // loop area
00064   EDGEPT stack_edgepts[FASTEDGELENGTH];  // converted path
00065   EDGEPT* edgepts = stack_edgepts;
00066 
00067   // Use heap memory if the stack buffer is not big enough.
00068   if (c_outline->pathlength() > FASTEDGELENGTH)
00069     edgepts = new EDGEPT[c_outline->pathlength()];
00070 
00071   loop_box = c_outline->bounding_box();
00072   area = loop_box.height();
00073   if (!poly_wide_objects_better && loop_box.width() > area)
00074     area = loop_box.width();
00075   area *= area;
00076   edgept = edgesteps_to_edgepts(c_outline, edgepts);
00077   fix2(edgepts, area);
00078   edgept = poly2 (edgepts, area);  // 2nd approximation.
00079   EDGEPT* startpt = edgept;
00080   EDGEPT* result = NULL;
00081   EDGEPT* prev_result = NULL;
00082   do {
00083     EDGEPT* new_pt = new EDGEPT;
00084     new_pt->pos = edgept->pos;
00085     new_pt->prev = prev_result;
00086     if (prev_result == NULL) {
00087       result = new_pt;
00088     } else {
00089       prev_result->next = new_pt;
00090       new_pt->prev = prev_result;
00091     }
00092     if (allow_detailed_fx) {
00093       new_pt->src_outline = edgept->src_outline;
00094       new_pt->start_step = edgept->start_step;
00095       new_pt->step_count = edgept->step_count;
00096     }
00097     prev_result = new_pt;
00098     edgept = edgept->next;
00099   }
00100   while (edgept != startpt);
00101   prev_result->next = result;
00102   result->prev = prev_result;
00103   if (edgepts != stack_edgepts)
00104     delete [] edgepts;
00105   return TESSLINE::BuildFromOutlineList(result);
00106 }
00107 
00108 
00109 /**********************************************************************
00110  * edgesteps_to_edgepts
00111  *
00112  * Convert a C_OUTLINE to EDGEPTs.
00113  **********************************************************************/
00114 
00115 EDGEPT *
00116 edgesteps_to_edgepts (           //convert outline
00117 C_OUTLINE * c_outline,           //input
00118 EDGEPT edgepts[]                 //output is array
00119 ) {
00120   inT32 length;                  //steps in path
00121   ICOORD pos;                    //current coords
00122   inT32 stepindex;               //current step
00123   inT32 stepinc;                 //increment
00124   inT32 epindex;                 //current EDGEPT
00125   inT32 count;                   //repeated steps
00126   ICOORD vec;                    //for this 8 step
00127   ICOORD prev_vec;
00128   inT8 epdir;                    //of this step
00129   DIR128 prevdir;                //prvious dir
00130   DIR128 dir;                    //of this step
00131 
00132   pos = c_outline->start_pos (); //start of loop
00133   length = c_outline->pathlength ();
00134   stepindex = 0;
00135   epindex = 0;
00136   prevdir = -1;
00137   count = 0;
00138   int prev_stepindex = 0;
00139   do {
00140     dir = c_outline->step_dir (stepindex);
00141     vec = c_outline->step (stepindex);
00142     if (stepindex < length - 1
00143     && c_outline->step_dir (stepindex + 1) - dir == -32) {
00144       dir += 128 - 16;
00145       vec += c_outline->step (stepindex + 1);
00146       stepinc = 2;
00147     }
00148     else
00149       stepinc = 1;
00150     if (count == 0) {
00151       prevdir = dir;
00152       prev_vec = vec;
00153     }
00154     if (prevdir.get_dir () != dir.get_dir ()) {
00155       edgepts[epindex].pos.x = pos.x ();
00156       edgepts[epindex].pos.y = pos.y ();
00157       prev_vec *= count;
00158       edgepts[epindex].vec.x = prev_vec.x ();
00159       edgepts[epindex].vec.y = prev_vec.y ();
00160       pos += prev_vec;
00161       edgepts[epindex].flags[RUNLENGTH] = count;
00162       edgepts[epindex].prev = &edgepts[epindex - 1];
00163       edgepts[epindex].flags[FLAGS] = 0;
00164       edgepts[epindex].next = &edgepts[epindex + 1];
00165       prevdir += 64;
00166       epdir = (DIR128) 0 - prevdir;
00167       epdir >>= 4;
00168       epdir &= 7;
00169       edgepts[epindex].flags[DIR] = epdir;
00170       edgepts[epindex].src_outline = c_outline;
00171       edgepts[epindex].start_step = prev_stepindex;
00172       edgepts[epindex].step_count = stepindex - prev_stepindex;
00173       epindex++;
00174       prevdir = dir;
00175       prev_vec = vec;
00176       count = 1;
00177       prev_stepindex = stepindex;
00178     }
00179     else
00180       count++;
00181     stepindex += stepinc;
00182   }
00183   while (stepindex < length);
00184   edgepts[epindex].pos.x = pos.x ();
00185   edgepts[epindex].pos.y = pos.y ();
00186   prev_vec *= count;
00187   edgepts[epindex].vec.x = prev_vec.x ();
00188   edgepts[epindex].vec.y = prev_vec.y ();
00189   pos += prev_vec;
00190   edgepts[epindex].flags[RUNLENGTH] = count;
00191   edgepts[epindex].flags[FLAGS] = 0;
00192   edgepts[epindex].src_outline = c_outline;
00193   edgepts[epindex].start_step = prev_stepindex;
00194   edgepts[epindex].step_count = stepindex - prev_stepindex;
00195   edgepts[epindex].prev = &edgepts[epindex - 1];
00196   edgepts[epindex].next = &edgepts[0];
00197   prevdir += 64;
00198   epdir = (DIR128) 0 - prevdir;
00199   epdir >>= 4;
00200   epdir &= 7;
00201   edgepts[epindex].flags[DIR] = epdir;
00202   edgepts[0].prev = &edgepts[epindex];
00203   ASSERT_HOST (pos.x () == c_outline->start_pos ().x ()
00204     && pos.y () == c_outline->start_pos ().y ());
00205   return &edgepts[0];
00206 }
00207 
00208 
00209 /**********************************************************************
00210  *fix2(start,area) fixes points on the outline according to a trial method*
00211  **********************************************************************/
00212 
00213 //#pragma OPT_LEVEL 1                                                                           /*stop compiler bugs*/
00214 
00215 void fix2(                //polygonal approx
00216           EDGEPT *start,  /*loop to approimate */
00217           int area) {
00218   register EDGEPT *edgept;       /*current point */
00219   register EDGEPT *edgept1;
00220   register EDGEPT *loopstart;    /*modified start of loop */
00221   register EDGEPT *linestart;    /*start of line segment */
00222   register int dir1, dir2;       /*directions of line */
00223   register int sum1, sum2;       /*lengths in dir1,dir2 */
00224   int stopped;                   /*completed flag */
00225   int fixed_count;               //no of fixed points
00226   int d01, d12, d23, gapmin;
00227   TPOINT d01vec, d12vec, d23vec;
00228   register EDGEPT *edgefix, *startfix;
00229   register EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
00230 
00231   edgept = start;                /*start of loop */
00232   while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3
00233     && (dir1 =
00234     (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2
00235     && dir1 != 6)
00236     edgept = edgept->next;       /*find suitable start */
00237   loopstart = edgept;            /*remember start */
00238 
00239   stopped = 0;                   /*not finished yet */
00240   edgept->flags[FLAGS] |= FIXED; /*fix it */
00241   do {
00242     linestart = edgept;          /*possible start of line */
00243     dir1 = edgept->flags[DIR];   /*first direction */
00244                                  /*length of dir1 */
00245     sum1 = edgept->flags[RUNLENGTH];
00246     edgept = edgept->next;
00247     dir2 = edgept->flags[DIR];   /*2nd direction */
00248                                  /*length in dir2 */
00249     sum2 = edgept->flags[RUNLENGTH];
00250     if (((dir1 - dir2 + 1) & 7) < 3) {
00251       while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) {
00252         edgept = edgept->next;   /*look at next */
00253         if (edgept->flags[DIR] == dir1)
00254                                  /*sum lengths */
00255           sum1 += edgept->flags[RUNLENGTH];
00256         else
00257           sum2 += edgept->flags[RUNLENGTH];
00258       }
00259 
00260       if (edgept == loopstart)
00261         stopped = 1;             /*finished */
00262       if (sum2 + sum1 > 2
00263         && linestart->prev->flags[DIR] == dir2
00264         && (linestart->prev->flags[RUNLENGTH] >
00265       linestart->flags[RUNLENGTH] || sum2 > sum1)) {
00266                                  /*start is back one */
00267         linestart = linestart->prev;
00268         linestart->flags[FLAGS] |= FIXED;
00269       }
00270 
00271       if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3
00272         || (edgept->flags[DIR] == dir1 && sum1 >= sum2)
00273         || ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH]
00274         || (edgept->flags[DIR] == dir2 && sum2 >= sum1))
00275           && linestart->next != edgept))
00276         edgept = edgept->next;
00277     }
00278                                  /*sharp bend */
00279     edgept->flags[FLAGS] |= FIXED;
00280   }
00281                                  /*do whole loop */
00282   while (edgept != loopstart && !stopped);
00283 
00284   edgept = start;
00285   do {
00286     if (((edgept->flags[RUNLENGTH] >= 8) &&
00287       (edgept->flags[DIR] != 2) && (edgept->flags[DIR] != 6)) ||
00288       ((edgept->flags[RUNLENGTH] >= 8) &&
00289     ((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) {
00290       edgept->flags[FLAGS] |= FIXED;
00291       edgept1 = edgept->next;
00292       edgept1->flags[FLAGS] |= FIXED;
00293     }
00294     edgept = edgept->next;
00295   }
00296   while (edgept != start);
00297 
00298   edgept = start;
00299   do {
00300                                  /*single fixed step */
00301     if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1
00302                                  /*and neighours free */
00303       && edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0
00304                                  /*same pair of dirs */
00305       && (edgept->next->next->flags[FLAGS] & FIXED) == 0 && edgept->prev->flags[DIR] == edgept->next->flags[DIR] && edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR]
00306     && ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) {
00307                                  /*unfix it */
00308       edgept->flags[FLAGS] &= ~FIXED;
00309       edgept->next->flags[FLAGS] &= ~FIXED;
00310     }
00311     edgept = edgept->next;       /*do all points */
00312   }
00313   while (edgept != start);       /*until finished */
00314 
00315   stopped = 0;
00316   if (area < 450)
00317     area = 450;
00318 
00319   gapmin = area * fixed_dist * fixed_dist / 44000;
00320 
00321   edgept = start;
00322   fixed_count = 0;
00323   do {
00324     if (edgept->flags[FLAGS] & FIXED)
00325       fixed_count++;
00326     edgept = edgept->next;
00327   }
00328   while (edgept != start);
00329   while ((edgept->flags[FLAGS] & FIXED) == 0)
00330     edgept = edgept->next;
00331   edgefix0 = edgept;
00332 
00333   edgept = edgept->next;
00334   while ((edgept->flags[FLAGS] & FIXED) == 0)
00335     edgept = edgept->next;
00336   edgefix1 = edgept;
00337 
00338   edgept = edgept->next;
00339   while ((edgept->flags[FLAGS] & FIXED) == 0)
00340     edgept = edgept->next;
00341   edgefix2 = edgept;
00342 
00343   edgept = edgept->next;
00344   while ((edgept->flags[FLAGS] & FIXED) == 0)
00345     edgept = edgept->next;
00346   edgefix3 = edgept;
00347 
00348   startfix = edgefix2;
00349 
00350   do {
00351     if (fixed_count <= 3)
00352       break;                     //already too few
00353     point_diff (d12vec, edgefix1->pos, edgefix2->pos);
00354     d12 = LENGTH (d12vec);
00355     // TODO(rays) investigate this change:
00356     // Only unfix a point if it is part of a low-curvature section
00357     // of outline and the total angle change of the outlines is
00358     // less than 90 degrees, ie the scalar product is positive.
00359     // if (d12 <= gapmin && SCALAR(edgefix0->vec, edgefix2->vec) > 0) {
00360     if (d12 <= gapmin) {
00361       point_diff (d01vec, edgefix0->pos, edgefix1->pos);
00362       d01 = LENGTH (d01vec);
00363       point_diff (d23vec, edgefix2->pos, edgefix3->pos);
00364       d23 = LENGTH (d23vec);
00365       if (d01 > d23) {
00366         edgefix2->flags[FLAGS] &= ~FIXED;
00367         fixed_count--;
00368       }
00369       else {
00370         edgefix1->flags[FLAGS] &= ~FIXED;
00371         fixed_count--;
00372         edgefix1 = edgefix2;
00373       }
00374     }
00375     else {
00376       edgefix0 = edgefix1;
00377       edgefix1 = edgefix2;
00378     }
00379     edgefix2 = edgefix3;
00380     edgept = edgept->next;
00381     while ((edgept->flags[FLAGS] & FIXED) == 0) {
00382       if (edgept == startfix)
00383         stopped = 1;
00384       edgept = edgept->next;
00385     }
00386     edgefix3 = edgept;
00387     edgefix = edgefix2;
00388   }
00389   while ((edgefix != startfix) && (!stopped));
00390 }
00391 
00392 
00393 //#pragma OPT_LEVEL 2                                                                           /*stop compiler bugs*/
00394 
00395 /**********************************************************************
00396  *poly2(startpt,area,path) applies a second approximation to the outline
00397  *using the points which have been fixed by the first approximation*
00398  **********************************************************************/
00399 
00400 EDGEPT *poly2(                  //second poly
00401               EDGEPT *startpt,  /*start of loop */
00402               int area          /*area of blob box */
00403              ) {
00404   register EDGEPT *edgept;       /*current outline point */
00405   EDGEPT *loopstart;             /*starting point */
00406   register EDGEPT *linestart;    /*start of line */
00407   register int edgesum;          /*correction count */
00408 
00409   if (area < 1200)
00410     area = 1200;                 /*minimum value */
00411 
00412   loopstart = NULL;              /*not found it yet */
00413   edgept = startpt;              /*start of loop */
00414 
00415   do {
00416                                  /*current point fixed */
00417     if (edgept->flags[FLAGS] & FIXED
00418                                  /*and next not */
00419     && (edgept->next->flags[FLAGS] & FIXED) == 0) {
00420       loopstart = edgept;        /*start of repoly */
00421       break;
00422     }
00423     edgept = edgept->next;       /*next point */
00424   }
00425   while (edgept != startpt);     /*until found or finished */
00426 
00427   if (loopstart == NULL && (startpt->flags[FLAGS] & FIXED) == 0) {
00428                                  /*fixed start of loop */
00429     startpt->flags[FLAGS] |= FIXED;
00430     loopstart = startpt;         /*or start of loop */
00431   }
00432   if (loopstart) {
00433     do {
00434       edgept = loopstart;        /*first to do */
00435       do {
00436         linestart = edgept;
00437         edgesum = 0;             /*sum of lengths */
00438         do {
00439                                  /*sum lengths */
00440           edgesum += edgept->flags[RUNLENGTH];
00441           edgept = edgept->next; /*move on */
00442         }
00443         while ((edgept->flags[FLAGS] & FIXED) == 0
00444           && edgept != loopstart && edgesum < 126);
00445         if (poly_debug)
00446           tprintf
00447             ("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n",
00448             linestart->pos.x, linestart->pos.y, linestart->flags[DIR],
00449             linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x,
00450             edgept->pos.y);
00451                                  /*reapproximate */
00452         cutline(linestart, edgept, area);
00453 
00454         while ((edgept->next->flags[FLAGS] & FIXED)
00455           && edgept != loopstart)
00456           edgept = edgept->next; /*look for next non-fixed */
00457       }
00458                                  /*do all the loop */
00459       while (edgept != loopstart);
00460       edgesum = 0;
00461       do {
00462         if (edgept->flags[FLAGS] & FIXED)
00463           edgesum++;
00464         edgept = edgept->next;
00465       }
00466                                  //count fixed pts
00467       while (edgept != loopstart);
00468       if (edgesum < 3)
00469         area /= 2;               //must have 3 pts
00470     }
00471     while (edgesum < 3);
00472     do {
00473       linestart = edgept;
00474       do {
00475         edgept = edgept->next;
00476       }
00477       while ((edgept->flags[FLAGS] & FIXED) == 0);
00478       linestart->next = edgept;
00479       edgept->prev = linestart;
00480       linestart->vec.x = edgept->pos.x - linestart->pos.x;
00481       linestart->vec.y = edgept->pos.y - linestart->pos.y;
00482     }
00483     while (edgept != loopstart);
00484   }
00485   else
00486     edgept = startpt;            /*start of loop */
00487 
00488   loopstart = edgept;            /*new start */
00489   return loopstart;              /*correct exit */
00490 }
00491 
00492 
00493 /**********************************************************************
00494  *cutline(first,last,area) straightens out a line by partitioning
00495  *and joining the ends by a straight line*
00496  **********************************************************************/
00497 
00498 void cutline(                //recursive refine
00499              EDGEPT *first,  /*ends of line */
00500              EDGEPT *last,
00501              int area        /*area of object */
00502             ) {
00503   register EDGEPT *edge;         /*current edge */
00504   TPOINT vecsum;                 /*vector sum */
00505   int vlen;                      /*approx length of vecsum */
00506   TPOINT vec;                    /*accumulated vector */
00507   EDGEPT *maxpoint;              /*worst point */
00508   int maxperp;                   /*max deviation */
00509   register int perp;             /*perp distance */
00510   int ptcount;                   /*no of points */
00511   int squaresum;                 /*sum of perps */
00512 
00513   edge = first;                  /*start of line */
00514   if (edge->next == last)
00515     return;                      /*simple line */
00516 
00517                                  /*vector sum */
00518   vecsum.x = last->pos.x - edge->pos.x;
00519   vecsum.y = last->pos.y - edge->pos.y;
00520   if (vecsum.x == 0 && vecsum.y == 0) {
00521                                  /*special case */
00522     vecsum.x = -edge->prev->vec.x;
00523     vecsum.y = -edge->prev->vec.y;
00524   }
00525                                  /*absolute value */
00526   vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
00527   if (vecsum.y > vlen)
00528     vlen = vecsum.y;             /*maximum */
00529   else if (-vecsum.y > vlen)
00530     vlen = -vecsum.y;            /*absolute value */
00531 
00532   vec.x = edge->vec.x;           /*accumulated vector */
00533   vec.y = edge->vec.y;
00534   maxperp = 0;                   /*none yet */
00535   squaresum = ptcount = 0;
00536   edge = edge->next;             /*move to actual point */
00537   maxpoint = edge;               /*in case there isn't one */
00538   do {
00539     perp = CROSS (vec, vecsum);  /*get perp distance */
00540     if (perp != 0) {
00541       perp *= perp;              /*squared deviation */
00542     }
00543     squaresum += perp;           /*sum squares */
00544     ptcount++;                   /*count points */
00545     if (poly_debug)
00546       tprintf ("Cutline:Final perp=%d\n", perp);
00547     if (perp > maxperp) {
00548       maxperp = perp;
00549       maxpoint = edge;           /*find greatest deviation */
00550     }
00551     vec.x += edge->vec.x;        /*accumulate vectors */
00552     vec.y += edge->vec.y;
00553     edge = edge->next;
00554   }
00555   while (edge != last);          /*test all line */
00556 
00557   perp = LENGTH (vecsum);
00558   ASSERT_HOST (perp != 0);
00559 
00560   if (maxperp < 256 * MAX_INT16) {
00561     maxperp <<= 8;
00562     maxperp /= perp;             /*true max perp */
00563   }
00564   else {
00565     maxperp /= perp;
00566     maxperp <<= 8;               /*avoid overflow */
00567   }
00568   if (squaresum < 256 * MAX_INT16)
00569                                  /*mean squared perp */
00570     perp = (squaresum << 8) / (perp * ptcount);
00571   else
00572                                  /*avoid overflow */
00573     perp = (squaresum / perp << 8) / ptcount;
00574 
00575   if (poly_debug)
00576     tprintf ("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n",
00577       area, maxperp / 256.0, maxperp * 200.0 / area,
00578       perp / 256.0, perp * 300.0 / area);
00579   if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
00580     maxpoint->flags[FLAGS] |= FIXED;
00581                                  /*partitions */
00582     cutline(first, maxpoint, area);
00583     cutline(maxpoint, last, area);
00584   }
00585 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines