tesseract
3.03
|
00001 /****************************************************************************** 00002 ** Filename: intfx.c 00003 ** Purpose: Integer character normalization & feature extraction 00004 ** Author: Robert Moss, rays@google.com (Ray Smith) 00005 ** History: Tue May 21 15:51:57 MDT 1991, RWM, Created. 00006 ** Tue Feb 28 10:42:00 PST 2012, vastly rewritten to allow 00007 greyscale fx and non-linear 00008 normalization. 00009 ** 00010 ** (c) Copyright Hewlett-Packard Company, 1988. 00011 ** Licensed under the Apache License, Version 2.0 (the "License"); 00012 ** you may not use this file except in compliance with the License. 00013 ** You may obtain a copy of the License at 00014 ** http://www.apache.org/licenses/LICENSE-2.0 00015 ** Unless required by applicable law or agreed to in writing, software 00016 ** distributed under the License is distributed on an "AS IS" BASIS, 00017 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00018 ** See the License for the specific language governing permissions and 00019 ** limitations under the License. 00020 ******************************************************************************/ 00024 #include "intfx.h" 00025 #include "allheaders.h" 00026 #include "ccutil.h" 00027 #include "classify.h" 00028 #include "const.h" 00029 #include "helpers.h" 00030 #include "intmatcher.h" 00031 #include "linlsq.h" 00032 #include "ndminx.h" 00033 #include "normalis.h" 00034 #include "statistc.h" 00035 #include "trainingsample.h" 00036 00037 using tesseract::TrainingSample; 00038 00042 // Look up table for cos and sin to turn the intfx feature angle to a vector. 00043 // Protected by atan_table_mutex. 00044 // The entries are in binary degrees where a full circle is 256 binary degrees. 00045 static float cos_table[INT_CHAR_NORM_RANGE]; 00046 static float sin_table[INT_CHAR_NORM_RANGE]; 00047 // Guards write access to AtanTable so we dont create it more than once. 00048 tesseract::CCUtilMutex atan_table_mutex; 00049 00050 00054 /*---------------------------------------------------------------------------*/ 00055 void InitIntegerFX() { 00056 static bool atan_table_init = false; 00057 atan_table_mutex.Lock(); 00058 if (!atan_table_init) { 00059 for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) { 00060 cos_table[i] = cos(i * 2 * PI / INT_CHAR_NORM_RANGE + PI); 00061 sin_table[i] = sin(i * 2 * PI / INT_CHAR_NORM_RANGE + PI); 00062 } 00063 atan_table_init = true; 00064 } 00065 atan_table_mutex.Unlock(); 00066 } 00067 00068 // Returns a vector representing the direction of a feature with the given 00069 // theta direction in an INT_FEATURE_STRUCT. 00070 FCOORD FeatureDirection(uinT8 theta) { 00071 return FCOORD(cos_table[theta], sin_table[theta]); 00072 } 00073 00074 namespace tesseract { 00075 00076 // Generates a TrainingSample from a TBLOB. Extracts features and sets 00077 // the bounding box, so classifiers that operate on the image can work. 00078 // TODO(rays) BlobToTrainingSample must remain a global function until 00079 // the FlexFx and FeatureDescription code can be removed and LearnBlob 00080 // made a member of Classify. 00081 TrainingSample* BlobToTrainingSample( 00082 const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, 00083 GenericVector<INT_FEATURE_STRUCT>* bl_features) { 00084 GenericVector<INT_FEATURE_STRUCT> cn_features; 00085 Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, 00086 &cn_features, fx_info, NULL); 00087 // TODO(rays) Use blob->PreciseBoundingBox() instead. 00088 TBOX box = blob.bounding_box(); 00089 TrainingSample* sample = NULL; 00090 int num_features = fx_info->NumCN; 00091 if (num_features > 0) { 00092 sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], 00093 num_features); 00094 } 00095 if (sample != NULL) { 00096 // Set the bounding box (in original image coordinates) in the sample. 00097 TPOINT topleft, botright; 00098 topleft.x = box.left(); 00099 topleft.y = box.top(); 00100 botright.x = box.right(); 00101 botright.y = box.bottom(); 00102 TPOINT original_topleft, original_botright; 00103 blob.denorm().DenormTransform(NULL, topleft, &original_topleft); 00104 blob.denorm().DenormTransform(NULL, botright, &original_botright); 00105 sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y, 00106 original_botright.x, original_topleft.y)); 00107 } 00108 return sample; 00109 } 00110 00111 // Computes the DENORMS for bl(baseline) and cn(character) normalization 00112 // during feature extraction. The input denorm describes the current state 00113 // of the blob, which is usually a baseline-normalized word. 00114 // The Transforms setup are as follows: 00115 // Baseline Normalized (bl) Output: 00116 // We center the grapheme by aligning the x-coordinate of its centroid with 00117 // x=128 and leaving the already-baseline-normalized y as-is. 00118 // 00119 // Character Normalized (cn) Output: 00120 // We align the grapheme's centroid at the origin and scale it 00121 // asymmetrically in x and y so that the 2nd moments are a standard value 00122 // (51.2) ie the result is vaguely square. 00123 // If classify_nonlinear_norm is true: 00124 // A non-linear normalization is setup that attempts to evenly distribute 00125 // edges across x and y. 00126 // 00127 // Some of the fields of fx_info are also setup: 00128 // Length: Total length of outline. 00129 // Rx: Rounded y second moment. (Reversed by convention.) 00130 // Ry: rounded x second moment. 00131 // Xmean: Rounded x center of mass of the blob. 00132 // Ymean: Rounded y center of mass of the blob. 00133 void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm, 00134 DENORM* bl_denorm, DENORM* cn_denorm, 00135 INT_FX_RESULT_STRUCT* fx_info) { 00136 // Compute 1st and 2nd moments of the original outline. 00137 FCOORD center, second_moments; 00138 int length = blob.ComputeMoments(¢er, &second_moments); 00139 if (fx_info != NULL) { 00140 fx_info->Length = length; 00141 fx_info->Rx = IntCastRounded(second_moments.y()); 00142 fx_info->Ry = IntCastRounded(second_moments.x()); 00143 00144 fx_info->Xmean = IntCastRounded(center.x()); 00145 fx_info->Ymean = IntCastRounded(center.y()); 00146 } 00147 // Setup the denorm for Baseline normalization. 00148 bl_denorm->SetupNormalization(NULL, NULL, &blob.denorm(), center.x(), 128.0f, 00149 1.0f, 1.0f, 128.0f, 128.0f); 00150 // Setup the denorm for character normalization. 00151 if (nonlinear_norm) { 00152 GenericVector<GenericVector<int> > x_coords; 00153 GenericVector<GenericVector<int> > y_coords; 00154 TBOX box; 00155 blob.GetPreciseBoundingBox(&box); 00156 box.pad(1, 1); 00157 blob.GetEdgeCoords(box, &x_coords, &y_coords); 00158 cn_denorm->SetupNonLinear(&blob.denorm(), box, MAX_UINT8, MAX_UINT8, 00159 0.0f, 0.0f, x_coords, y_coords); 00160 } else { 00161 cn_denorm->SetupNormalization(NULL, NULL, &blob.denorm(), 00162 center.x(), center.y(), 00163 51.2f / second_moments.x(), 00164 51.2f / second_moments.y(), 00165 128.0f, 128.0f); 00166 } 00167 } 00168 00169 // Helper normalizes the direction, assuming that it is at the given 00170 // unnormed_pos, using the given denorm, starting at the root_denorm. 00171 uinT8 NormalizeDirection(uinT8 dir, const FCOORD& unnormed_pos, 00172 const DENORM& denorm, const DENORM* root_denorm) { 00173 // Convert direction to a vector. 00174 FCOORD unnormed_end; 00175 unnormed_end.from_direction(dir); 00176 unnormed_end += unnormed_pos; 00177 FCOORD normed_pos, normed_end; 00178 denorm.NormTransform(root_denorm, unnormed_pos, &normed_pos); 00179 denorm.NormTransform(root_denorm, unnormed_end, &normed_end); 00180 normed_end -= normed_pos; 00181 return normed_end.to_direction(); 00182 } 00183 00184 // Helper returns the mean direction vector from the given stats. Use the 00185 // mean direction from dirs if there is information available, otherwise, use 00186 // the fit_vector from point_diffs. 00187 static FCOORD MeanDirectionVector(const LLSQ& point_diffs, const LLSQ& dirs, 00188 const FCOORD& start_pt, 00189 const FCOORD& end_pt) { 00190 FCOORD fit_vector; 00191 if (dirs.count() > 0) { 00192 // There were directions, so use them. To avoid wrap-around problems, we 00193 // have 2 accumulators in dirs: x for normal directions and y for 00194 // directions offset by 128. We will use the one with the least variance. 00195 FCOORD mean_pt = dirs.mean_point(); 00196 double mean_dir = 0.0; 00197 if (dirs.x_variance() <= dirs.y_variance()) { 00198 mean_dir = mean_pt.x(); 00199 } else { 00200 mean_dir = mean_pt.y() + 128; 00201 } 00202 fit_vector.from_direction(Modulo(IntCastRounded(mean_dir), 256)); 00203 } else { 00204 // There were no directions, so we rely on the vector_fit to the points. 00205 // Since the vector_fit is 180 degrees ambiguous, we align with the 00206 // supplied feature_dir by making the scalar product non-negative. 00207 FCOORD feature_dir(end_pt - start_pt); 00208 fit_vector = point_diffs.vector_fit(); 00209 if (fit_vector.x() == 0.0f && fit_vector.y() == 0.0f) { 00210 // There was only a single point. Use feature_dir directly. 00211 fit_vector = feature_dir; 00212 } else { 00213 // Sometimes the least mean squares fit is wrong, due to the small sample 00214 // of points and scaling. Use a 90 degree rotated vector if that matches 00215 // feature_dir better. 00216 FCOORD fit_vector2 = !fit_vector; 00217 // The fit_vector is 180 degrees ambiguous, so resolve the ambiguity by 00218 // insisting that the scalar product with the feature_dir should be +ve. 00219 if (fit_vector % feature_dir < 0.0) 00220 fit_vector = -fit_vector; 00221 if (fit_vector2 % feature_dir < 0.0) 00222 fit_vector2 = -fit_vector2; 00223 // Even though fit_vector2 has a higher mean squared error, it might be 00224 // a better fit, so use it if the dot product with feature_dir is bigger. 00225 if (fit_vector2 % feature_dir > fit_vector % feature_dir) 00226 fit_vector = fit_vector2; 00227 } 00228 } 00229 return fit_vector; 00230 } 00231 00232 // Helper computes one or more features corresponding to the given points. 00233 // Emitted features are on the line defined by: 00234 // start_pt + lambda * (end_pt - start_pt) for scalar lambda. 00235 // Features are spaced at feature_length intervals. 00236 static int ComputeFeatures(const FCOORD& start_pt, const FCOORD& end_pt, 00237 double feature_length, 00238 GenericVector<INT_FEATURE_STRUCT>* features) { 00239 FCOORD feature_vector(end_pt - start_pt); 00240 if (feature_vector.x() == 0.0f && feature_vector.y() == 0.0f) return 0; 00241 // Compute theta for the feature based on its direction. 00242 uinT8 theta = feature_vector.to_direction(); 00243 // Compute the number of features and lambda_step. 00244 double target_length = feature_vector.length(); 00245 int num_features = IntCastRounded(target_length / feature_length); 00246 if (num_features == 0) return 0; 00247 // Divide the length evenly into num_features pieces. 00248 double lambda_step = 1.0 / num_features; 00249 double lambda = lambda_step / 2.0; 00250 for (int f = 0; f < num_features; ++f, lambda += lambda_step) { 00251 FCOORD feature_pt(start_pt); 00252 feature_pt += feature_vector * lambda; 00253 INT_FEATURE_STRUCT feature(feature_pt, theta); 00254 features->push_back(feature); 00255 } 00256 return num_features; 00257 } 00258 00259 // Gathers outline points and their directions from start_index into dirs by 00260 // stepping along the outline and normalizing the coordinates until the 00261 // required feature_length has been collected or end_index is reached. 00262 // On input pos must point to the position corresponding to start_index and on 00263 // return pos is updated to the current raw position, and pos_normed is set to 00264 // the normed version of pos. 00265 // Since directions wrap-around, they need special treatment to get the mean. 00266 // Provided the cluster of directions doesn't straddle the wrap-around point, 00267 // the simple mean works. If they do, then, unless the directions are wildly 00268 // varying, the cluster rotated by 180 degrees will not straddle the wrap- 00269 // around point, so mean(dir + 180 degrees) - 180 degrees will work. Since 00270 // LLSQ conveniently stores the mean of 2 variables, we use it to store 00271 // dir and dir+128 (128 is 180 degrees) and then use the resulting mean 00272 // with the least variance. 00273 static int GatherPoints(const C_OUTLINE* outline, double feature_length, 00274 const DENORM& denorm, const DENORM* root_denorm, 00275 int start_index, int end_index, 00276 ICOORD* pos, FCOORD* pos_normed, 00277 LLSQ* points, LLSQ* dirs) { 00278 int step_length = outline->pathlength(); 00279 ICOORD step = outline->step(start_index % step_length); 00280 // Prev_normed is the start point of this collection and will be set on the 00281 // first iteration, and on later iterations used to determine the length 00282 // that has been collected. 00283 FCOORD prev_normed; 00284 points->clear(); 00285 dirs->clear(); 00286 int num_points = 0; 00287 int index; 00288 for (index = start_index; index <= end_index; ++index, *pos += step) { 00289 step = outline->step(index % step_length); 00290 int edge_weight = outline->edge_strength_at_index(index % step_length); 00291 if (edge_weight == 0) { 00292 // This point has conflicting gradient and step direction, so ignore it. 00293 continue; 00294 } 00295 // Get the sub-pixel precise location and normalize. 00296 FCOORD f_pos = outline->sub_pixel_pos_at_index(*pos, index % step_length); 00297 denorm.NormTransform(root_denorm, f_pos, pos_normed); 00298 if (num_points == 0) { 00299 // The start of this segment. 00300 prev_normed = *pos_normed; 00301 } else { 00302 FCOORD offset = *pos_normed - prev_normed; 00303 float length = offset.length(); 00304 if (length > feature_length) { 00305 // We have gone far enough from the start. We will use this point in 00306 // the next set so return what we have so far. 00307 return index; 00308 } 00309 } 00310 points->add(pos_normed->x(), pos_normed->y(), edge_weight); 00311 int direction = outline->direction_at_index(index % step_length); 00312 if (direction >= 0) { 00313 direction = NormalizeDirection(direction, f_pos, denorm, root_denorm); 00314 // Use both the direction and direction +128 so we are not trying to 00315 // take the mean of something straddling the wrap-around point. 00316 dirs->add(direction, Modulo(direction + 128, 256)); 00317 } 00318 ++num_points; 00319 } 00320 return index; 00321 } 00322 00323 // Extracts Tesseract features and appends them to the features vector. 00324 // Startpt to lastpt, inclusive, MUST have the same src_outline member, 00325 // which may be NULL. The vector from lastpt to its next is included in 00326 // the feature extraction. Hidden edges should be excluded by the caller. 00327 // If force_poly is true, the features will be extracted from the polygonal 00328 // approximation even if more accurate data is available. 00329 static void ExtractFeaturesFromRun( 00330 const EDGEPT* startpt, const EDGEPT* lastpt, 00331 const DENORM& denorm, double feature_length, bool force_poly, 00332 GenericVector<INT_FEATURE_STRUCT>* features) { 00333 const EDGEPT* endpt = lastpt->next; 00334 const C_OUTLINE* outline = startpt->src_outline; 00335 if (outline != NULL && !force_poly) { 00336 // Detailed information is available. We have to normalize only from 00337 // the root_denorm to denorm. 00338 const DENORM* root_denorm = denorm.RootDenorm(); 00339 int total_features = 0; 00340 // Get the features from the outline. 00341 int step_length = outline->pathlength(); 00342 int start_index = startpt->start_step; 00343 // pos is the integer coordinates of the binary image steps. 00344 ICOORD pos = outline->position_at_index(start_index); 00345 // We use an end_index that allows us to use a positive increment, but that 00346 // may be beyond the bounds of the outline steps/ due to wrap-around, to 00347 // so we use % step_length everywhere, except for start_index. 00348 int end_index = lastpt->start_step + lastpt->step_count; 00349 if (end_index <= start_index) 00350 end_index += step_length; 00351 LLSQ prev_points; 00352 LLSQ prev_dirs; 00353 FCOORD prev_normed_pos = outline->sub_pixel_pos_at_index(pos, start_index); 00354 denorm.NormTransform(root_denorm, prev_normed_pos, &prev_normed_pos); 00355 LLSQ points; 00356 LLSQ dirs; 00357 FCOORD normed_pos; 00358 int index = GatherPoints(outline, feature_length, denorm, root_denorm, 00359 start_index, end_index, &pos, &normed_pos, 00360 &points, &dirs); 00361 while (index <= end_index) { 00362 // At each iteration we nominally have 3 accumulated sets of points and 00363 // dirs: prev_points/dirs, points/dirs, next_points/dirs and sum them 00364 // into sum_points/dirs, but we don't necessarily get any features out, 00365 // so if that is the case, we keep accumulating instead of rotating the 00366 // accumulators. 00367 LLSQ next_points; 00368 LLSQ next_dirs; 00369 FCOORD next_normed_pos; 00370 index = GatherPoints(outline, feature_length, denorm, root_denorm, 00371 index, end_index, &pos, &next_normed_pos, 00372 &next_points, &next_dirs); 00373 LLSQ sum_points(prev_points); 00374 // TODO(rays) find out why it is better to use just dirs and next_dirs 00375 // in sum_dirs, instead of using prev_dirs as well. 00376 LLSQ sum_dirs(dirs); 00377 sum_points.add(points); 00378 sum_points.add(next_points); 00379 sum_dirs.add(next_dirs); 00380 bool made_features = false; 00381 // If we have some points, we can try making some features. 00382 if (sum_points.count() > 0) { 00383 // We have gone far enough from the start. Make a feature and restart. 00384 FCOORD fit_pt = sum_points.mean_point(); 00385 FCOORD fit_vector = MeanDirectionVector(sum_points, sum_dirs, 00386 prev_normed_pos, normed_pos); 00387 // The segment to which we fit features is the line passing through 00388 // fit_pt in direction of fit_vector that starts nearest to 00389 // prev_normed_pos and ends nearest to normed_pos. 00390 FCOORD start_pos = prev_normed_pos.nearest_pt_on_line(fit_pt, 00391 fit_vector); 00392 FCOORD end_pos = normed_pos.nearest_pt_on_line(fit_pt, fit_vector); 00393 // Possible correction to match the adjacent polygon segment. 00394 if (total_features == 0 && startpt != endpt) { 00395 FCOORD poly_pos(startpt->pos.x, startpt->pos.y); 00396 denorm.LocalNormTransform(poly_pos, &start_pos); 00397 } 00398 if (index > end_index && startpt != endpt) { 00399 FCOORD poly_pos(endpt->pos.x, endpt->pos.y); 00400 denorm.LocalNormTransform(poly_pos, &end_pos); 00401 } 00402 int num_features = ComputeFeatures(start_pos, end_pos, feature_length, 00403 features); 00404 if (num_features > 0) { 00405 // We made some features so shuffle the accumulators. 00406 prev_points = points; 00407 prev_dirs = dirs; 00408 prev_normed_pos = normed_pos; 00409 points = next_points; 00410 dirs = next_dirs; 00411 made_features = true; 00412 total_features += num_features; 00413 } 00414 // The end of the next set becomes the end next time around. 00415 normed_pos = next_normed_pos; 00416 } 00417 if (!made_features) { 00418 // We didn't make any features, so keep the prev accumulators and 00419 // add the next ones into the current. 00420 points.add(next_points); 00421 dirs.add(next_dirs); 00422 } 00423 } 00424 } else { 00425 // There is no outline, so we are forced to use the polygonal approximation. 00426 const EDGEPT* pt = startpt; 00427 do { 00428 FCOORD start_pos(pt->pos.x, pt->pos.y); 00429 FCOORD end_pos(pt->next->pos.x, pt->next->pos.y); 00430 denorm.LocalNormTransform(start_pos, &start_pos); 00431 denorm.LocalNormTransform(end_pos, &end_pos); 00432 ComputeFeatures(start_pos, end_pos, feature_length, features); 00433 } while ((pt = pt->next) != endpt); 00434 } 00435 } 00436 00437 // Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as 00438 // (x,y) position and angle as measured counterclockwise from the vector 00439 // <-1, 0>, from blob using two normalizations defined by bl_denorm and 00440 // cn_denorm. See SetpuBLCNDenorms for definitions. 00441 // If outline_cn_counts is not NULL, on return it contains the cumulative 00442 // number of cn features generated for each outline in the blob (in order). 00443 // Thus after the first outline, there were (*outline_cn_counts)[0] features, 00444 // after the second outline, there were (*outline_cn_counts)[1] features etc. 00445 void Classify::ExtractFeatures(const TBLOB& blob, 00446 bool nonlinear_norm, 00447 GenericVector<INT_FEATURE_STRUCT>* bl_features, 00448 GenericVector<INT_FEATURE_STRUCT>* cn_features, 00449 INT_FX_RESULT_STRUCT* results, 00450 GenericVector<int>* outline_cn_counts) { 00451 DENORM bl_denorm, cn_denorm; 00452 tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, 00453 &bl_denorm, &cn_denorm, results); 00454 if (outline_cn_counts != NULL) 00455 outline_cn_counts->truncate(0); 00456 // Iterate the outlines. 00457 for (TESSLINE* ol = blob.outlines; ol != NULL; ol = ol->next) { 00458 // Iterate the polygon. 00459 EDGEPT* loop_pt = ol->FindBestStartPt(); 00460 EDGEPT* pt = loop_pt; 00461 if (pt == NULL) continue; 00462 do { 00463 if (pt->IsHidden()) continue; 00464 // Find a run of equal src_outline. 00465 EDGEPT* last_pt = pt; 00466 do { 00467 last_pt = last_pt->next; 00468 } while (last_pt != loop_pt && !last_pt->IsHidden() && 00469 last_pt->src_outline == pt->src_outline); 00470 last_pt = last_pt->prev; 00471 // Until the adaptive classifier can be weaned off polygon segments, 00472 // we have to force extraction from the polygon for the bl_features. 00473 ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength, 00474 true, bl_features); 00475 ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength, 00476 false, cn_features); 00477 pt = last_pt; 00478 } while ((pt = pt->next) != loop_pt); 00479 if (outline_cn_counts != NULL) 00480 outline_cn_counts->push_back(cn_features->size()); 00481 } 00482 results->NumBL = bl_features->size(); 00483 results->NumCN = cn_features->size(); 00484 results->YBottom = blob.bounding_box().bottom(); 00485 results->YTop = blob.bounding_box().top(); 00486 results->Width = blob.bounding_box().width(); 00487 } 00488 00489 } // namespace tesseract 00490 00491 00492 /*--------------------------------------------------------------------------*/ 00493 // Extract a set of standard-sized features from Blobs and write them out in 00494 // two formats: baseline normalized and character normalized. 00495 // 00496 // We presume the Blobs are already scaled so that x-height=128 units 00497 // 00498 // Standard Features: 00499 // We take all outline segments longer than 7 units and chop them into 00500 // standard-sized segments of approximately 13 = (64 / 5) units. 00501 // When writing these features out, we output their center and angle as 00502 // measured counterclockwise from the vector <-1, 0> 00503 // 00504 // Baseline Normalized Output: 00505 // We center the grapheme by aligning the x-coordinate of its centroid with 00506 // x=0 and subtracting 128 from the y-coordinate. 00507 // 00508 // Character Normalized Output: 00509 // We align the grapheme's centroid at the origin and scale it asymmetrically 00510 // in x and y so that the result is vaguely square. 00511 // 00512 // Deprecated! Prefer tesseract::Classify::ExtractFeatures instead. 00513 bool ExtractIntFeat(const TBLOB& blob, 00514 bool nonlinear_norm, 00515 INT_FEATURE_ARRAY baseline_features, 00516 INT_FEATURE_ARRAY charnorm_features, 00517 INT_FX_RESULT_STRUCT* results) { 00518 GenericVector<INT_FEATURE_STRUCT> bl_features; 00519 GenericVector<INT_FEATURE_STRUCT> cn_features; 00520 tesseract::Classify::ExtractFeatures(blob, nonlinear_norm, 00521 &bl_features, &cn_features, results, 00522 NULL); 00523 if (bl_features.size() == 0 || cn_features.size() == 0 || 00524 bl_features.size() > MAX_NUM_INT_FEATURES || 00525 cn_features.size() > MAX_NUM_INT_FEATURES) { 00526 return false; // Feature extraction failed. 00527 } 00528 memcpy(baseline_features, &bl_features[0], 00529 bl_features.size() * sizeof(bl_features[0])); 00530 memcpy(charnorm_features, &cn_features[0], 00531 cn_features.size() * sizeof(cn_features[0])); 00532 return true; 00533 }