tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/gradechop.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        gradechop.c  (Formerly gradechop.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Tue Jul 30 16:06:27 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 /*----------------------------------------------------------------------
00026               I n c l u d e s
00027 ----------------------------------------------------------------------*/
00028 #include "gradechop.h"
00029 #include "wordrec.h"
00030 #include "olutil.h"
00031 #include "chop.h"
00032 #include "ndminx.h"
00033 #include <math.h>
00034 
00035 /*----------------------------------------------------------------------
00036               T y p e s
00037 ----------------------------------------------------------------------*/
00038 #define CENTER_GRADE_CAP 25.0
00039 
00040 /*----------------------------------------------------------------------
00041               M a c r o s
00042 ----------------------------------------------------------------------*/
00043 /**********************************************************************
00044  * find_bounds_loop
00045  *
00046  * This is a macro to be used by set_outline_bounds.
00047  **********************************************************************/
00048 
00049 #define find_bounds_loop(point1,point2,x_min,x_max)     \
00050         x_min = point2->pos.x;                               \
00051         x_max = point2->pos.x;                               \
00052                                                                                                                                                 \
00053         this_point = point1;                                 \
00054         do {                                                 \
00055                 x_min = MIN (this_point->pos.x, x_min);           \
00056                 x_max = MAX (this_point->pos.x, x_max);           \
00057                 this_point = this_point->next;                    \
00058         }                                                    \
00059         while (this_point != point2 && this_point != point1) \
00060 
00061 
00062 namespace tesseract {
00063 
00064 /*----------------------------------------------------------------------
00065               F u n c t i o n s
00066 ----------------------------------------------------------------------*/
00067 /**********************************************************************
00068  * full_split_priority
00069  *
00070  * Assign a priority to this split based on the features that it has.
00071  * Part of the priority has already been calculated so just return the
00072  * additional amount for the bounding box type information.
00073  **********************************************************************/
00074 PRIORITY Wordrec::full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax) {
00075   BOUNDS_RECT rect;
00076 
00077   set_outline_bounds (split->point1, split->point2, rect);
00078 
00079   if (xmin < MIN (rect[0], rect[2]) && xmax > MAX (rect[1], rect[3]))
00080     return (999.0);
00081 
00082   return (grade_overlap (rect) +
00083     grade_center_of_blob (rect) + grade_width_change (rect));
00084 }
00085 
00086 
00087 /**********************************************************************
00088  * grade_center_of_blob
00089  *
00090  * Return a grade for the a split.  Rank it on closeness to the center
00091  * of the original blob
00092  *   0    =  "perfect"
00093  *   100  =  "no way jay"
00094  **********************************************************************/
00095 PRIORITY Wordrec::grade_center_of_blob(register BOUNDS_RECT rect) {
00096   register PRIORITY grade;
00097   int width1 = rect[1] - rect[0];
00098   int width2 = rect[3] - rect[2];
00099 
00100   if (width1 > chop_centered_maxwidth &&
00101       width2 > chop_centered_maxwidth) {
00102     return 0.0;
00103   }
00104 
00105   grade = width1 - width2;
00106   if (grade < 0)
00107     grade = -grade;
00108 
00109   grade *= chop_center_knob;
00110   grade = MIN (CENTER_GRADE_CAP, grade);
00111   return (MAX (0.0, grade));
00112 }
00113 
00114 
00115 /**********************************************************************
00116  * grade_overlap
00117  *
00118  * Return a grade for this split for the overlap of the resultant blobs.
00119  *   0    =  "perfect"
00120  *   100  =  "no way jay"
00121  **********************************************************************/
00122 PRIORITY Wordrec::grade_overlap(register BOUNDS_RECT rect) {
00123   register PRIORITY grade;
00124   register inT16 width1;
00125   register inT16 width2;
00126   register inT16 overlap;
00127 
00128   width1 = rect[3] - rect[2];
00129   width2 = rect[1] - rect[0];
00130 
00131   overlap = MIN (rect[1], rect[3]) - MAX (rect[0], rect[2]);
00132   width1 = MIN (width1, width2);
00133   if (overlap == width1)
00134     return (100.0);              /* Total overlap */
00135 
00136   width1 = 2 * overlap - width1; /* Extra penalty for too */
00137   overlap += MAX (0, width1);    /* much overlap */
00138 
00139   grade = overlap * chop_overlap_knob;
00140 
00141   return (MAX (0.0, grade));
00142 }
00143 
00144 
00145 /**********************************************************************
00146  * grade_split_length
00147  *
00148  * Return a grade for the length of this split.
00149  *   0    =  "perfect"
00150  *   100  =  "no way jay"
00151  **********************************************************************/
00152 PRIORITY Wordrec::grade_split_length(register SPLIT *split) {
00153   register PRIORITY grade;
00154   register float split_length;
00155 
00156   split_length = weighted_edgept_dist (split->point1, split->point2,
00157     chop_x_y_weight);
00158 
00159   if (split_length <= 0)
00160     grade = 0;
00161   else
00162     grade = sqrt (split_length) * chop_split_dist_knob;
00163 
00164   return (MAX (0.0, grade));
00165 }
00166 
00167 
00168 /**********************************************************************
00169  * grade_sharpness
00170  *
00171  * Return a grade for the sharpness of this split.
00172  *   0    =  "perfect"
00173  *   100  =  "no way jay"
00174  **********************************************************************/
00175 PRIORITY Wordrec::grade_sharpness(register SPLIT *split) {
00176   register PRIORITY grade;
00177 
00178   grade = point_priority (split->point1) + point_priority (split->point2);
00179 
00180   if (grade < -360.0)
00181     grade = 0;
00182   else
00183     grade += 360.0;
00184 
00185   grade *= chop_sharpness_knob;       /* Values 0 to -360 */
00186 
00187   return (grade);
00188 }
00189 
00190 
00191 /**********************************************************************
00192  * grade_width_change
00193  *
00194  * Return a grade for the change in width of the resultant blobs.
00195  *   0    =  "perfect"
00196  *   100  =  "no way jay"
00197  **********************************************************************/
00198 PRIORITY Wordrec::grade_width_change(register BOUNDS_RECT rect) {
00199   register PRIORITY grade;
00200   register inT32 width1;
00201   register inT32 width2;
00202 
00203   width1 = rect[3] - rect[2];
00204   width2 = rect[1] - rect[0];
00205 
00206   grade = 20 - (MAX (rect[1], rect[3])
00207     - MIN (rect[0], rect[2]) - MAX (width1, width2));
00208 
00209   grade *= chop_width_change_knob;
00210 
00211   return (MAX (0.0, grade));
00212 }
00213 
00214 
00215 /**********************************************************************
00216  * set_outline_bounds
00217  *
00218  * Set up the limits for the x coordinate of the outline.
00219  **********************************************************************/
00220 void Wordrec::set_outline_bounds(register EDGEPT *point1,
00221                                  register EDGEPT *point2,
00222                                  BOUNDS_RECT rect) {
00223   register EDGEPT *this_point;
00224   register inT16 x_min;
00225   register inT16 x_max;
00226 
00227   find_bounds_loop(point1, point2, x_min, x_max);
00228 
00229   rect[0] = x_min;
00230   rect[1] = x_max;
00231 
00232   find_bounds_loop(point2, point1, x_min, x_max);
00233 
00234   rect[2] = x_min;
00235   rect[3] = x_max;
00236 }
00237 
00238 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines