tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/classify/normfeat.cpp
Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    normfeat.c
00003  **     Purpose:     Definition of char normalization features.
00004  **     Author:      Dan Johnson
00005  **     History:     12/14/90, DSJ, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00021 #include "normfeat.h"
00022 
00023 #include "intfx.h"
00024 #include "featdefs.h"
00025 #include "mfoutline.h"
00026 
00031 // Return the length of the outline in baseline normalized form.
00032 FLOAT32 ActualOutlineLength(FEATURE Feature) {
00033   return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
00034 }
00035 
00036 
00037 /*---------------------------------------------------------------------------*/
00038 // Return the character normalization feature for a blob.
00039 //
00040 // The features returned are in a scale where the x-height has been
00041 // normalized to live in the region y = [-0.25 .. 0.25].  Example ranges
00042 // for English below are based on the Linux font collection on 2009-12-04:
00043 //
00044 //   Params[CharNormY]
00045 //     The y coordinate of the grapheme's centroid.
00046 //     English: [-0.27, 0.71]
00047 //
00048 //   Params[CharNormLength]
00049 //     The length of the grapheme's outline (tiny segments discarded),
00050 //     divided by 10.0=LENGTH_COMPRESSION.
00051 //     English: [0.16, 0.85]
00052 //
00053 //   Params[CharNormRx]
00054 //     The radius of gyration about the x axis, as measured from CharNormY.
00055 //     English: [0.011, 0.34]
00056 //
00057 //   Params[CharNormRy]
00058 //     The radius of gyration about the y axis, as measured from
00059 //     the x center of the grapheme's bounding box.
00060 //     English: [0.011, 0.31]
00061 //
00062 FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& bl_denorm,
00063                                     const DENORM& cn_denorm,
00064                                     const INT_FX_RESULT_STRUCT& fx_info) {
00065   FEATURE_SET feature_set = NewFeatureSet(1);
00066   FEATURE feature = NewFeature(&CharNormDesc);
00067 
00068   feature->Params[CharNormY] =
00069       MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
00070   feature->Params[CharNormLength] =
00071       MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
00072   feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
00073   feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
00074 
00075   AddFeature(feature_set, feature);
00076 
00077   return feature_set;
00078 }                                /* ExtractCharNormFeatures */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines