tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/params_training_featdef.h
Go to the documentation of this file.
00001 
00002 // File:        params_training_featdef.h
00003 // Description: Feature definitions for params training.
00004 // Author:      Rika Antonova
00005 // Created:     Mon Nov 28 11:26:42 PDT 2011
00006 //
00007 // (C) Copyright 2011, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
00021 #define TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
00022 
00023 #include "genericvector.h"
00024 #include "strngs.h"
00025 
00026 namespace tesseract {
00027 
00028 // Maximum number of unichars in the small and medium sized words
00029 static const int kMaxSmallWordUnichars = 3;
00030 static const int kMaxMediumWordUnichars = 6;
00031 
00032 // Raw features extracted from a single OCR hypothesis.
00033 // The features are normalized (by outline length or number of unichars as
00034 // appropriate) real-valued quantities with unbounded range and
00035 // unknown distribution.
00036 // Normalization / binarization of these features is done at a later stage.
00037 // Note: when adding new fields to this enum make sure to modify
00038 // kParamsTrainingFeatureTypeName
00039 enum kParamsTrainingFeatureType {
00040   // Digits
00041   PTRAIN_DIGITS_SHORT,             // 0
00042   PTRAIN_DIGITS_MED,               // 1
00043   PTRAIN_DIGITS_LONG,              // 2
00044   // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM)
00045   PTRAIN_NUM_SHORT,                // 3
00046   PTRAIN_NUM_MED,                  // 4
00047   PTRAIN_NUM_LONG,                 // 5
00048   // Document word (DOC_DAWG_PERM)
00049   PTRAIN_DOC_SHORT,                // 6
00050   PTRAIN_DOC_MED,                  // 7
00051   PTRAIN_DOC_LONG,                 // 8
00052   // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM)
00053   PTRAIN_DICT_SHORT,               // 9
00054   PTRAIN_DICT_MED,                 // 10
00055   PTRAIN_DICT_LONG,                // 11
00056   // Frequent word (FREQ_DAWG_PERM)
00057   PTRAIN_FREQ_SHORT,               // 12
00058   PTRAIN_FREQ_MED,                 // 13
00059   PTRAIN_FREQ_LONG,                // 14
00060   PTRAIN_SHAPE_COST_PER_CHAR,      // 15
00061   PTRAIN_NGRAM_COST_PER_CHAR,      // 16
00062   PTRAIN_NUM_BAD_PUNC,             // 17
00063   PTRAIN_NUM_BAD_CASE,             // 18
00064   PTRAIN_XHEIGHT_CONSISTENCY,      // 19
00065   PTRAIN_NUM_BAD_CHAR_TYPE,        // 20
00066   PTRAIN_NUM_BAD_SPACING,          // 21
00067   PTRAIN_NUM_BAD_FONT,             // 22
00068   PTRAIN_RATING_PER_CHAR,          // 23
00069 
00070   PTRAIN_NUM_FEATURE_TYPES
00071 };
00072 
00073 static const char * const kParamsTrainingFeatureTypeName[] = {
00074     "PTRAIN_DIGITS_SHORT",             // 0
00075     "PTRAIN_DIGITS_MED",               // 1
00076     "PTRAIN_DIGITS_LONG",              // 2
00077     "PTRAIN_NUM_SHORT",                // 3
00078     "PTRAIN_NUM_MED",                  // 4
00079     "PTRAIN_NUM_LONG",                 // 5
00080     "PTRAIN_DOC_SHORT",                // 6
00081     "PTRAIN_DOC_MED",                  // 7
00082     "PTRAIN_DOC_LONG",                 // 8
00083     "PTRAIN_DICT_SHORT",               // 9
00084     "PTRAIN_DICT_MED",                 // 10
00085     "PTRAIN_DICT_LONG",                // 11
00086     "PTRAIN_FREQ_SHORT",               // 12
00087     "PTRAIN_FREQ_MED",                 // 13
00088     "PTRAIN_FREQ_LONG",                // 14
00089     "PTRAIN_SHAPE_COST_PER_CHAR",      // 15
00090     "PTRAIN_NGRAM_COST_PER_CHAR",      // 16
00091     "PTRAIN_NUM_BAD_PUNC",             // 17
00092     "PTRAIN_NUM_BAD_CASE",             // 18
00093     "PTRAIN_XHEIGHT_CONSISTENCY",      // 19
00094     "PTRAIN_NUM_BAD_CHAR_TYPE",        // 20
00095     "PTRAIN_NUM_BAD_SPACING",          // 21
00096     "PTRAIN_NUM_BAD_FONT",             // 22
00097     "PTRAIN_RATING_PER_CHAR",          // 23
00098 };
00099 
00100 // Returns the index of the given feature (by name),
00101 // or -1 meaning the feature is unknown.
00102 int ParamsTrainingFeatureByName(const char *name);
00103 
00104 
00105 // Entry with features extracted from a single OCR hypothesis for a word.
00106 struct ParamsTrainingHypothesis {
00107   ParamsTrainingHypothesis() : cost(0.0) {
00108     memset(features, 0, sizeof(float) * PTRAIN_NUM_FEATURE_TYPES);
00109   }
00110   ParamsTrainingHypothesis(const ParamsTrainingHypothesis &other) {
00111     memcpy(features, other.features,
00112            sizeof(float) * PTRAIN_NUM_FEATURE_TYPES);
00113     str = other.str;
00114     cost = other.cost;
00115   }
00116   float features[PTRAIN_NUM_FEATURE_TYPES];
00117   STRING str;  // string corresponding to word hypothesis (for debugging)
00118   float cost;  // path cost computed by segsearch
00119 };
00120 
00121 // A list of hypotheses explored during one run of segmentation search.
00122 typedef GenericVector<ParamsTrainingHypothesis> ParamsTrainingHypothesisList;
00123 
00124 // A bundle that accumulates all of the hypothesis lists explored during all
00125 // of the runs of segmentation search on a word (e.g. a list of hypotheses
00126 // explored on PASS1, PASS2, fix xheight pass, etc).
00127 class ParamsTrainingBundle {
00128  public:
00129   ParamsTrainingBundle() {};
00130   // Starts a new hypothesis list.
00131   // Should be called at the beginning of a new run of the segmentation search.
00132   void StartHypothesisList() {
00133     hyp_list_vec.push_back(ParamsTrainingHypothesisList());
00134   }
00135   // Adds a new ParamsTrainingHypothesis to the current hypothesis list
00136   // and returns the reference to the newly added entry.
00137   ParamsTrainingHypothesis &AddHypothesis(
00138       const ParamsTrainingHypothesis &other) {
00139     if (hyp_list_vec.empty()) StartHypothesisList();
00140     hyp_list_vec.back().push_back(ParamsTrainingHypothesis(other));
00141     return hyp_list_vec.back().back();
00142   }
00143 
00144   GenericVector<ParamsTrainingHypothesisList> hyp_list_vec;
00145 };
00146 
00147 }  // namespace tesseract
00148 
00149 #endif  // TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines