tesseract
3.03
|
00001 /****************************************************************************** 00002 ** Filename: picofeat.c 00003 ** Purpose: Definition of pico-features. 00004 ** Author: Dan Johnson 00005 ** History: 9/4/90, DSJ, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00021 #include "picofeat.h" 00022 00023 #include "classify.h" 00024 #include "efio.h" 00025 #include "featdefs.h" 00026 #include "fpoint.h" 00027 #include "mfoutline.h" 00028 #include "ocrfeatures.h" 00029 #include "params.h" 00030 #include "trainingsample.h" 00031 00032 #include <math.h> 00033 #include <stdio.h> 00034 00035 /*--------------------------------------------------------------------------- 00036 Variables 00037 ----------------------------------------------------------------------------*/ 00038 00039 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length"); 00040 00041 /*--------------------------------------------------------------------------- 00042 Private Function Prototypes 00043 ----------------------------------------------------------------------------*/ 00044 void ConvertSegmentToPicoFeat(FPOINT *Start, 00045 FPOINT *End, 00046 FEATURE_SET FeatureSet); 00047 00048 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet); 00049 00050 void NormalizePicoX(FEATURE_SET FeatureSet); 00051 00055 /*---------------------------------------------------------------------------*/ 00056 namespace tesseract { 00057 FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { 00058 /* 00059 ** Parameters: 00060 ** Blob blob to extract pico-features from 00061 ** LineStats statistics on text row blob is in 00062 ** Globals: 00063 ** classify_norm_method normalization method currently specified 00064 ** Operation: Dummy for now. 00065 ** Return: Pico-features for Blob. 00066 ** Exceptions: none 00067 ** History: 9/4/90, DSJ, Created. 00068 */ 00069 LIST Outlines; 00070 LIST RemainingOutlines; 00071 MFOUTLINE Outline; 00072 FEATURE_SET FeatureSet; 00073 FLOAT32 XScale, YScale; 00074 00075 FeatureSet = NewFeatureSet(MAX_PICO_FEATURES); 00076 Outlines = ConvertBlob(Blob); 00077 NormalizeOutlines(Outlines, &XScale, &YScale); 00078 RemainingOutlines = Outlines; 00079 iterate(RemainingOutlines) { 00080 Outline = (MFOUTLINE) first_node (RemainingOutlines); 00081 ConvertToPicoFeatures2(Outline, FeatureSet); 00082 } 00083 if (classify_norm_method == baseline) 00084 NormalizePicoX(FeatureSet); 00085 FreeOutlines(Outlines); 00086 return (FeatureSet); 00087 00088 } /* ExtractPicoFeatures */ 00089 } // namespace tesseract 00090 00094 /*---------------------------------------------------------------------------*/ 00095 void ConvertSegmentToPicoFeat(FPOINT *Start, 00096 FPOINT *End, 00097 FEATURE_SET FeatureSet) { 00098 /* 00099 ** Parameters: 00100 ** Start starting point of pico-feature 00101 ** End ending point of pico-feature 00102 ** FeatureSet set to add pico-feature to 00103 ** Globals: 00104 ** classify_pico_feature_length length of a single pico-feature 00105 ** Operation: This routine converts an entire segment of an outline 00106 ** into a set of pico features which are added to 00107 ** FeatureSet. The length of the segment is rounded to the 00108 ** nearest whole number of pico-features. The pico-features 00109 ** are spaced evenly over the entire segment. 00110 ** Return: none (results are placed in FeatureSet) 00111 ** Exceptions: none 00112 ** History: Tue Apr 30 15:44:34 1991, DSJ, Created. 00113 */ 00114 FEATURE Feature; 00115 FLOAT32 Angle; 00116 FLOAT32 Length; 00117 int NumFeatures; 00118 FPOINT Center; 00119 FPOINT Delta; 00120 int i; 00121 00122 Angle = NormalizedAngleFrom (Start, End, 1.0); 00123 Length = DistanceBetween (*Start, *End); 00124 NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5); 00125 if (NumFeatures < 1) 00126 NumFeatures = 1; 00127 00128 /* compute vector for one pico feature */ 00129 Delta.x = XDelta (*Start, *End) / NumFeatures; 00130 Delta.y = YDelta (*Start, *End) / NumFeatures; 00131 00132 /* compute position of first pico feature */ 00133 Center.x = Start->x + Delta.x / 2.0; 00134 Center.y = Start->y + Delta.y / 2.0; 00135 00136 /* compute each pico feature in segment and add to feature set */ 00137 for (i = 0; i < NumFeatures; i++) { 00138 Feature = NewFeature (&PicoFeatDesc); 00139 Feature->Params[PicoFeatDir] = Angle; 00140 Feature->Params[PicoFeatX] = Center.x; 00141 Feature->Params[PicoFeatY] = Center.y; 00142 AddFeature(FeatureSet, Feature); 00143 00144 Center.x += Delta.x; 00145 Center.y += Delta.y; 00146 } 00147 } /* ConvertSegmentToPicoFeat */ 00148 00149 00150 /*---------------------------------------------------------------------------*/ 00151 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { 00152 /* 00153 ** Parameters: 00154 ** Outline outline to extract micro-features from 00155 ** FeatureSet set of features to add pico-features to 00156 ** Globals: 00157 ** classify_pico_feature_length 00158 ** length of features to be extracted 00159 ** Operation: 00160 ** This routine steps thru the specified outline and cuts it 00161 ** up into pieces of equal length. These pieces become the 00162 ** desired pico-features. Each segment in the outline 00163 ** is converted into an integral number of pico-features. 00164 ** Return: none (results are returned in FeatureSet) 00165 ** Exceptions: none 00166 ** History: 4/30/91, DSJ, Adapted from ConvertToPicoFeatures(). 00167 */ 00168 MFOUTLINE Next; 00169 MFOUTLINE First; 00170 MFOUTLINE Current; 00171 00172 if (DegenerateOutline(Outline)) 00173 return; 00174 00175 First = Outline; 00176 Current = First; 00177 Next = NextPointAfter(Current); 00178 do { 00179 /* note that an edge is hidden if the ending point of the edge is 00180 marked as hidden. This situation happens because the order of 00181 the outlines is reversed when they are converted from the old 00182 format. In the old format, a hidden edge is marked by the 00183 starting point for that edge. */ 00184 if (!(PointAt(Next)->Hidden)) 00185 ConvertSegmentToPicoFeat (&(PointAt(Current)->Point), 00186 &(PointAt(Next)->Point), FeatureSet); 00187 00188 Current = Next; 00189 Next = NextPointAfter(Current); 00190 } 00191 while (Current != First); 00192 00193 } /* ConvertToPicoFeatures2 */ 00194 00195 00196 /*---------------------------------------------------------------------------*/ 00197 void NormalizePicoX(FEATURE_SET FeatureSet) { 00198 /* 00199 ** Parameters: 00200 ** FeatureSet pico-features to be normalized 00201 ** Globals: none 00202 ** Operation: This routine computes the average x position over all 00203 ** of the pico-features in FeatureSet and then renormalizes 00204 ** the pico-features to force this average to be the x origin 00205 ** (i.e. x=0). 00206 ** Return: none (FeatureSet is changed) 00207 ** Exceptions: none 00208 ** History: Tue Sep 4 16:50:08 1990, DSJ, Created. 00209 */ 00210 int i; 00211 FEATURE Feature; 00212 FLOAT32 Origin = 0.0; 00213 00214 for (i = 0; i < FeatureSet->NumFeatures; i++) { 00215 Feature = FeatureSet->Features[i]; 00216 Origin += Feature->Params[PicoFeatX]; 00217 } 00218 Origin /= FeatureSet->NumFeatures; 00219 00220 for (i = 0; i < FeatureSet->NumFeatures; i++) { 00221 Feature = FeatureSet->Features[i]; 00222 Feature->Params[PicoFeatX] -= Origin; 00223 } 00224 } /* NormalizePicoX */ 00225 00226 /*---------------------------------------------------------------------------*/ 00227 FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm, 00228 const DENORM& cn_denorm, 00229 const INT_FX_RESULT_STRUCT& fx_info) { 00230 /* 00231 ** Parameters: 00232 ** blob blob to extract features from 00233 ** denorm normalization/denormalization parameters. 00234 ** Return: Integer character-normalized features for blob. 00235 ** Exceptions: none 00236 ** History: 8/8/2011, rays, Created. 00237 */ 00238 INT_FX_RESULT_STRUCT local_fx_info(fx_info); 00239 GenericVector<INT_FEATURE_STRUCT> bl_features; 00240 tesseract::TrainingSample* sample = 00241 tesseract::BlobToTrainingSample(*blob, false, &local_fx_info, 00242 &bl_features); 00243 if (sample == NULL) return NULL; 00244 00245 int num_features = sample->num_features(); 00246 const INT_FEATURE_STRUCT* features = sample->features(); 00247 FEATURE_SET feature_set = NewFeatureSet(num_features); 00248 for (int f = 0; f < num_features; ++f) { 00249 FEATURE feature = NewFeature(&IntFeatDesc); 00250 00251 feature->Params[IntX] = features[f].X; 00252 feature->Params[IntY] = features[f].Y; 00253 feature->Params[IntDir] = features[f].Theta; 00254 AddFeature(feature_set, feature); 00255 } 00256 delete sample; 00257 00258 return feature_set; 00259 } /* ExtractIntCNFeatures */ 00260 00261 /*---------------------------------------------------------------------------*/ 00262 FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm, 00263 const DENORM& cn_denorm, 00264 const INT_FX_RESULT_STRUCT& fx_info) { 00265 /* 00266 ** Parameters: 00267 ** blob blob to extract features from 00268 ** denorm normalization/denormalization parameters. 00269 ** Return: Geometric (top/bottom/width) features for blob. 00270 ** Exceptions: none 00271 ** History: 8/8/2011, rays, Created. 00272 */ 00273 INT_FX_RESULT_STRUCT local_fx_info(fx_info); 00274 GenericVector<INT_FEATURE_STRUCT> bl_features; 00275 tesseract::TrainingSample* sample = 00276 tesseract::BlobToTrainingSample(*blob, false, &local_fx_info, 00277 &bl_features); 00278 if (sample == NULL) return NULL; 00279 00280 FEATURE_SET feature_set = NewFeatureSet(1); 00281 FEATURE feature = NewFeature(&IntFeatDesc); 00282 00283 feature->Params[GeoBottom] = sample->geo_feature(GeoBottom); 00284 feature->Params[GeoTop] = sample->geo_feature(GeoTop); 00285 feature->Params[GeoWidth] = sample->geo_feature(GeoWidth); 00286 AddFeature(feature_set, feature); 00287 delete sample; 00288 00289 return feature_set; 00290 } /* ExtractIntGeoFeatures */