tesseract
3.03
|
00001 00002 // File: params_training_featdef.h 00003 // Description: Feature definitions for params training. 00004 // Author: Rika Antonova 00005 // Created: Mon Nov 28 11:26:42 PDT 2011 00006 // 00007 // (C) Copyright 2011, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_ 00021 #define TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_ 00022 00023 #include "genericvector.h" 00024 #include "strngs.h" 00025 00026 namespace tesseract { 00027 00028 // Maximum number of unichars in the small and medium sized words 00029 static const int kMaxSmallWordUnichars = 3; 00030 static const int kMaxMediumWordUnichars = 6; 00031 00032 // Raw features extracted from a single OCR hypothesis. 00033 // The features are normalized (by outline length or number of unichars as 00034 // appropriate) real-valued quantities with unbounded range and 00035 // unknown distribution. 00036 // Normalization / binarization of these features is done at a later stage. 00037 // Note: when adding new fields to this enum make sure to modify 00038 // kParamsTrainingFeatureTypeName 00039 enum kParamsTrainingFeatureType { 00040 // Digits 00041 PTRAIN_DIGITS_SHORT, // 0 00042 PTRAIN_DIGITS_MED, // 1 00043 PTRAIN_DIGITS_LONG, // 2 00044 // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM) 00045 PTRAIN_NUM_SHORT, // 3 00046 PTRAIN_NUM_MED, // 4 00047 PTRAIN_NUM_LONG, // 5 00048 // Document word (DOC_DAWG_PERM) 00049 PTRAIN_DOC_SHORT, // 6 00050 PTRAIN_DOC_MED, // 7 00051 PTRAIN_DOC_LONG, // 8 00052 // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM) 00053 PTRAIN_DICT_SHORT, // 9 00054 PTRAIN_DICT_MED, // 10 00055 PTRAIN_DICT_LONG, // 11 00056 // Frequent word (FREQ_DAWG_PERM) 00057 PTRAIN_FREQ_SHORT, // 12 00058 PTRAIN_FREQ_MED, // 13 00059 PTRAIN_FREQ_LONG, // 14 00060 PTRAIN_SHAPE_COST_PER_CHAR, // 15 00061 PTRAIN_NGRAM_COST_PER_CHAR, // 16 00062 PTRAIN_NUM_BAD_PUNC, // 17 00063 PTRAIN_NUM_BAD_CASE, // 18 00064 PTRAIN_XHEIGHT_CONSISTENCY, // 19 00065 PTRAIN_NUM_BAD_CHAR_TYPE, // 20 00066 PTRAIN_NUM_BAD_SPACING, // 21 00067 PTRAIN_NUM_BAD_FONT, // 22 00068 PTRAIN_RATING_PER_CHAR, // 23 00069 00070 PTRAIN_NUM_FEATURE_TYPES 00071 }; 00072 00073 static const char * const kParamsTrainingFeatureTypeName[] = { 00074 "PTRAIN_DIGITS_SHORT", // 0 00075 "PTRAIN_DIGITS_MED", // 1 00076 "PTRAIN_DIGITS_LONG", // 2 00077 "PTRAIN_NUM_SHORT", // 3 00078 "PTRAIN_NUM_MED", // 4 00079 "PTRAIN_NUM_LONG", // 5 00080 "PTRAIN_DOC_SHORT", // 6 00081 "PTRAIN_DOC_MED", // 7 00082 "PTRAIN_DOC_LONG", // 8 00083 "PTRAIN_DICT_SHORT", // 9 00084 "PTRAIN_DICT_MED", // 10 00085 "PTRAIN_DICT_LONG", // 11 00086 "PTRAIN_FREQ_SHORT", // 12 00087 "PTRAIN_FREQ_MED", // 13 00088 "PTRAIN_FREQ_LONG", // 14 00089 "PTRAIN_SHAPE_COST_PER_CHAR", // 15 00090 "PTRAIN_NGRAM_COST_PER_CHAR", // 16 00091 "PTRAIN_NUM_BAD_PUNC", // 17 00092 "PTRAIN_NUM_BAD_CASE", // 18 00093 "PTRAIN_XHEIGHT_CONSISTENCY", // 19 00094 "PTRAIN_NUM_BAD_CHAR_TYPE", // 20 00095 "PTRAIN_NUM_BAD_SPACING", // 21 00096 "PTRAIN_NUM_BAD_FONT", // 22 00097 "PTRAIN_RATING_PER_CHAR", // 23 00098 }; 00099 00100 // Returns the index of the given feature (by name), 00101 // or -1 meaning the feature is unknown. 00102 int ParamsTrainingFeatureByName(const char *name); 00103 00104 00105 // Entry with features extracted from a single OCR hypothesis for a word. 00106 struct ParamsTrainingHypothesis { 00107 ParamsTrainingHypothesis() : cost(0.0) { 00108 memset(features, 0, sizeof(float) * PTRAIN_NUM_FEATURE_TYPES); 00109 } 00110 ParamsTrainingHypothesis(const ParamsTrainingHypothesis &other) { 00111 memcpy(features, other.features, 00112 sizeof(float) * PTRAIN_NUM_FEATURE_TYPES); 00113 str = other.str; 00114 cost = other.cost; 00115 } 00116 float features[PTRAIN_NUM_FEATURE_TYPES]; 00117 STRING str; // string corresponding to word hypothesis (for debugging) 00118 float cost; // path cost computed by segsearch 00119 }; 00120 00121 // A list of hypotheses explored during one run of segmentation search. 00122 typedef GenericVector<ParamsTrainingHypothesis> ParamsTrainingHypothesisList; 00123 00124 // A bundle that accumulates all of the hypothesis lists explored during all 00125 // of the runs of segmentation search on a word (e.g. a list of hypotheses 00126 // explored on PASS1, PASS2, fix xheight pass, etc). 00127 class ParamsTrainingBundle { 00128 public: 00129 ParamsTrainingBundle() {}; 00130 // Starts a new hypothesis list. 00131 // Should be called at the beginning of a new run of the segmentation search. 00132 void StartHypothesisList() { 00133 hyp_list_vec.push_back(ParamsTrainingHypothesisList()); 00134 } 00135 // Adds a new ParamsTrainingHypothesis to the current hypothesis list 00136 // and returns the reference to the newly added entry. 00137 ParamsTrainingHypothesis &AddHypothesis( 00138 const ParamsTrainingHypothesis &other) { 00139 if (hyp_list_vec.empty()) StartHypothesisList(); 00140 hyp_list_vec.back().push_back(ParamsTrainingHypothesis(other)); 00141 return hyp_list_vec.back().back(); 00142 } 00143 00144 GenericVector<ParamsTrainingHypothesisList> hyp_list_vec; 00145 }; 00146 00147 } // namespace tesseract 00148 00149 #endif // TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_