tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/chop.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        chop.c  (Formerly chop.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Tue Jul 30 16:41:11 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 
00026 /*----------------------------------------------------------------------
00027               I n c l u d e s
00028 ----------------------------------------------------------------------*/
00029 
00030 #include "chop.h"
00031 #include "outlines.h"
00032 #include "olutil.h"
00033 #include "callcpp.h"
00034 #include "plotedges.h"
00035 #include "const.h"
00036 #include "wordrec.h"
00037 
00038 #include <math.h>
00039 
00040 // Include automatically generated configuration file if running autoconf.
00041 #ifdef HAVE_CONFIG_H
00042 #include "config_auto.h"
00043 #endif
00044 
00045 namespace tesseract {
00046 /*----------------------------------------------------------------------
00047               F u n c t i o n s
00048 ----------------------------------------------------------------------*/
00055 PRIORITY Wordrec::point_priority(EDGEPT *point) {
00056   return (PRIORITY)angle_change(point->prev, point, point->next);
00057 }
00058 
00059 
00065 void Wordrec::add_point_to_list(PointHeap* point_heap, EDGEPT *point) {
00066   if (point_heap->size() < MAX_NUM_POINTS - 2) {
00067     PointPair pair(point_priority(point), point);
00068     point_heap->Push(&pair);
00069   }
00070 
00071 #ifndef GRAPHICS_DISABLED
00072   if (chop_debug > 2)
00073     mark_outline(point);
00074 #endif
00075 }
00076 
00077 
00084 int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
00085   VECTOR vector1;
00086   VECTOR vector2;
00087 
00088   int angle;
00089   float length;
00090 
00091   /* Compute angle */
00092   vector1.x = point2->pos.x - point1->pos.x;
00093   vector1.y = point2->pos.y - point1->pos.y;
00094   vector2.x = point3->pos.x - point2->pos.x;
00095   vector2.y = point3->pos.y - point2->pos.y;
00096   /* Use cross product */
00097   length = (float)sqrt((float)LENGTH(vector1) * LENGTH(vector2));
00098   if ((int) length == 0)
00099     return (0);
00100   angle = static_cast<int>(floor(asin(CROSS (vector1, vector2) /
00101                                       length) / PI * 180.0 + 0.5));
00102 
00103   /* Use dot product */
00104   if (SCALAR (vector1, vector2) < 0)
00105     angle = 180 - angle;
00106   /* Adjust angle */
00107   if (angle > 180)
00108     angle -= 360;
00109   if (angle <= -180)
00110     angle += 360;
00111   return (angle);
00112 }
00113 
00120 int Wordrec::is_little_chunk(EDGEPT *point1, EDGEPT *point2) {
00121   EDGEPT *p = point1;            /* Iterator */
00122   int counter = 0;
00123 
00124   do {
00125                                  /* Go from P1 to P2 */
00126     if (is_same_edgept (point2, p)) {
00127       if (is_small_area (point1, point2))
00128         return (TRUE);
00129       else
00130         break;
00131     }
00132     p = p->next;
00133   }
00134   while ((p != point1) && (counter++ < chop_min_outline_points));
00135   /* Go from P2 to P1 */
00136   p = point2;
00137   counter = 0;
00138   do {
00139     if (is_same_edgept (point1, p)) {
00140       return (is_small_area (point2, point1));
00141     }
00142     p = p->next;
00143   }
00144   while ((p != point2) && (counter++ < chop_min_outline_points));
00145 
00146   return (FALSE);
00147 }
00148 
00149 
00155 int Wordrec::is_small_area(EDGEPT *point1, EDGEPT *point2) {
00156   EDGEPT *p = point1->next;      /* Iterator */
00157   int area = 0;
00158   TPOINT origin;
00159 
00160   do {
00161                                  /* Go from P1 to P2 */
00162     origin.x = p->pos.x - point1->pos.x;
00163     origin.y = p->pos.y - point1->pos.y;
00164     area += CROSS (origin, p->vec);
00165     p = p->next;
00166   }
00167   while (!is_same_edgept (point2, p));
00168 
00169   return (area < chop_min_outline_area);
00170 }
00171 
00172 
00179 EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point,
00180                                   EDGEPT *vertical_point,
00181                                   int *best_dist) {
00182   EDGEPT *best_point = NULL;
00183   int this_distance;
00184   int found_better;
00185 
00186   do {
00187     found_better = FALSE;
00188 
00189     this_distance = edgept_dist (critical_point, vertical_point);
00190     if (this_distance <= *best_dist) {
00191 
00192       if (!(same_point (critical_point->pos, vertical_point->pos) ||
00193         same_point (critical_point->pos, vertical_point->next->pos) ||
00194         (best_point && same_point (best_point->pos, vertical_point->pos)) ||
00195       is_exterior_point (critical_point, vertical_point))) {
00196         *best_dist = this_distance;
00197         best_point = vertical_point;
00198         if (chop_vertical_creep)
00199           found_better = TRUE;
00200       }
00201     }
00202     vertical_point = vertical_point->next;
00203   }
00204   while (found_better == TRUE);
00205 
00206   return (best_point);
00207 }
00208 
00209 
00217 void Wordrec::prioritize_points(TESSLINE *outline, PointHeap* points) {
00218   EDGEPT *this_point;
00219   EDGEPT *local_min = NULL;
00220   EDGEPT *local_max = NULL;
00221 
00222   this_point = outline->loop;
00223   local_min = this_point;
00224   local_max = this_point;
00225   do {
00226     if (this_point->vec.y < 0) {
00227                                  /* Look for minima */
00228       if (local_max != NULL)
00229         new_max_point(local_max, points);
00230       else if (is_inside_angle (this_point))
00231         add_point_to_list(points, this_point);
00232       local_max = NULL;
00233       local_min = this_point->next;
00234     }
00235     else if (this_point->vec.y > 0) {
00236                                  /* Look for maxima */
00237       if (local_min != NULL)
00238         new_min_point(local_min, points);
00239       else if (is_inside_angle (this_point))
00240         add_point_to_list(points, this_point);
00241       local_min = NULL;
00242       local_max = this_point->next;
00243     }
00244     else {
00245       /* Flat area */
00246       if (local_max != NULL) {
00247         if (local_max->prev->vec.y != 0) {
00248           new_max_point(local_max, points);
00249         }
00250         local_max = this_point->next;
00251         local_min = NULL;
00252       }
00253       else {
00254         if (local_min->prev->vec.y != 0) {
00255           new_min_point(local_min, points);
00256         }
00257         local_min = this_point->next;
00258         local_max = NULL;
00259       }
00260     }
00261 
00262                                  /* Next point */
00263     this_point = this_point->next;
00264   }
00265   while (this_point != outline->loop);
00266 }
00267 
00268 
00276 void Wordrec::new_min_point(EDGEPT *local_min, PointHeap* points) {
00277   inT16 dir;
00278 
00279   dir = direction (local_min);
00280 
00281   if (dir < 0) {
00282     add_point_to_list(points, local_min);
00283     return;
00284   }
00285 
00286   if (dir == 0 && point_priority (local_min) < 0) {
00287     add_point_to_list(points, local_min);
00288     return;
00289   }
00290 }
00291 
00292 
00300 void Wordrec::new_max_point(EDGEPT *local_max, PointHeap* points) {
00301   inT16 dir;
00302 
00303   dir = direction (local_max);
00304 
00305   if (dir > 0) {
00306     add_point_to_list(points, local_max);
00307     return;
00308   }
00309 
00310   if (dir == 0 && point_priority (local_max) < 0) {
00311     add_point_to_list(points, local_max);
00312     return;
00313   }
00314 }
00315 
00316 
00329 void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point,
00330                                         EDGEPT** best_point,
00331                                         EDGEPT_CLIST *new_points) {
00332   EDGEPT *p;                     /* Iterator */
00333   EDGEPT *this_edgept;           /* Iterator */
00334   EDGEPT_C_IT new_point_it(new_points);
00335   int x = split_point->pos.x;    /* X value of vertical */
00336   int best_dist = LARGE_DISTANCE;/* Best point found */
00337 
00338   if (*best_point != NULL)
00339     best_dist = edgept_dist(split_point, *best_point);
00340 
00341   p = target_point;
00342   /* Look at each edge point */
00343   do {
00344     if (((p->pos.x <= x && x <= p->next->pos.x) ||
00345          (p->next->pos.x <= x && x <= p->pos.x)) &&
00346         !same_point(split_point->pos, p->pos) &&
00347         !same_point(split_point->pos, p->next->pos) &&
00348         !p->IsChopPt() &&
00349         (*best_point == NULL || !same_point((*best_point)->pos, p->pos))) {
00350 
00351       if (near_point(split_point, p, p->next, &this_edgept)) {
00352         new_point_it.add_before_then_move(this_edgept);
00353       }
00354 
00355       if (*best_point == NULL)
00356         best_dist = edgept_dist (split_point, this_edgept);
00357 
00358       this_edgept =
00359         pick_close_point(split_point, this_edgept, &best_dist);
00360       if (this_edgept)
00361         *best_point = this_edgept;
00362     }
00363 
00364     p = p->next;
00365   }
00366   while (p != target_point);
00367 }
00368 
00369 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines