tesseract
3.03
|
00001 // Copyright 2010 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00003 // 00004 // Licensed under the Apache License, Version 2.0 (the "License"); 00005 // you may not use this file except in compliance with the License. 00006 // You may obtain a copy of the License at 00007 // http://www.apache.org/licenses/LICENSE-2.0 00008 // Unless required by applicable law or agreed to in writing, software 00009 // distributed under the License is distributed on an "AS IS" BASIS, 00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00011 // See the License for the specific language governing permissions and 00012 // limitations under the License. 00013 // 00015 00016 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H__ 00017 #define TESSERACT_TRAINING_TRAININGSAMPLE_H__ 00018 00019 #include "elst.h" 00020 #include "featdefs.h" 00021 #include "intfx.h" 00022 #include "intmatcher.h" 00023 #include "matrix.h" 00024 #include "mf.h" 00025 #include "picofeat.h" 00026 #include "shapetable.h" 00027 #include "unicharset.h" 00028 00029 struct Pix; 00030 00031 namespace tesseract { 00032 00033 class IntFeatureMap; 00034 class IntFeatureSpace; 00035 class ShapeTable; 00036 00037 // Number of elements of cn_feature_. 00038 static const int kNumCNParams = 4; 00039 // Number of ways to shift the features when randomizing. 00040 static const int kSampleYShiftSize = 5; 00041 // Number of ways to scale the features when randomizing. 00042 static const int kSampleScaleSize = 3; 00043 // Total number of different ways to manipulate the features when randomizing. 00044 // The first and last combinations are removed to avoid an excessive 00045 // top movement (first) and an identity transformation (last). 00046 // WARNING: To avoid patterned duplication of samples, be sure to keep 00047 // kSampleRandomSize prime! 00048 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3) 00049 // kSampleRandomSize is 13, which is prime. 00050 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; 00051 // ASSERT_IS_PRIME(kSampleRandomSize) !! 00052 00053 class TrainingSample : public ELIST_LINK { 00054 public: 00055 TrainingSample() 00056 : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0), 00057 num_features_(0), num_micro_features_(0), outline_length_(0), 00058 features_(NULL), micro_features_(NULL), weight_(1.0), 00059 max_dist_(0.0), sample_index_(0), 00060 features_are_indexed_(false), features_are_mapped_(false), 00061 is_error_(false) { 00062 } 00063 ~TrainingSample(); 00064 00065 // Saves the given features into a TrainingSample. The features are copied, 00066 // so may be deleted afterwards. Delete the return value after use. 00067 static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info, 00068 const TBOX& bounding_box, 00069 const INT_FEATURE_STRUCT* features, 00070 int num_features); 00071 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. 00072 FEATURE_STRUCT* GetCNFeature() const; 00073 // Constructs and returns a copy "randomized" by the method given by 00074 // the randomizer index. If index is out of [0, kSampleRandomSize) then 00075 // an exact copy is returned. 00076 TrainingSample* RandomizedCopy(int index) const; 00077 // Constructs and returns an exact copy. 00078 TrainingSample* Copy() const; 00079 00080 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data 00081 // members, which is mostly the mapped features, and the weight. 00082 // It is assumed these can all be reconstructed from what is saved. 00083 // Writes to the given file. Returns false in case of error. 00084 bool Serialize(FILE* fp) const; 00085 // Creates from the given file. Returns NULL in case of error. 00086 // If swap is true, assumes a big/little-endian swap is needed. 00087 static TrainingSample* DeSerializeCreate(bool swap, FILE* fp); 00088 // Reads from the given file. Returns false in case of error. 00089 // If swap is true, assumes a big/little-endian swap is needed. 00090 bool DeSerialize(bool swap, FILE* fp); 00091 00092 // Extracts the needed information from the CHAR_DESC_STRUCT. 00093 void ExtractCharDesc(int feature_type, int micro_type, 00094 int cn_type, int geo_type, 00095 CHAR_DESC_STRUCT* char_desc); 00096 00097 // Sets the mapped_features_ from the features_ using the provided 00098 // feature_space to the indexed versions of the features. 00099 void IndexFeatures(const IntFeatureSpace& feature_space); 00100 // Sets the mapped_features_ from the features_ using the provided 00101 // feature_map. 00102 void MapFeatures(const IntFeatureMap& feature_map); 00103 00104 // Returns a pix representing the sample. (Int features only.) 00105 Pix* RenderToPix(const UNICHARSET* unicharset) const; 00106 // Displays the features in the given window with the given color. 00107 void DisplayFeatures(ScrollView::Color color, ScrollView* window) const; 00108 00109 // Returns a pix of the original sample image. The pix is padded all round 00110 // by padding wherever possible. 00111 // The returned Pix must be pixDestroyed after use. 00112 // If the input page_pix is NULL, NULL is returned. 00113 Pix* GetSamplePix(int padding, Pix* page_pix) const; 00114 00115 // Accessors. 00116 UNICHAR_ID class_id() const { 00117 return class_id_; 00118 } 00119 void set_class_id(int id) { 00120 class_id_ = id; 00121 } 00122 int font_id() const { 00123 return font_id_; 00124 } 00125 void set_font_id(int id) { 00126 font_id_ = id; 00127 } 00128 int page_num() const { 00129 return page_num_; 00130 } 00131 void set_page_num(int page) { 00132 page_num_ = page; 00133 } 00134 const TBOX& bounding_box() const { 00135 return bounding_box_; 00136 } 00137 void set_bounding_box(const TBOX& box) { 00138 bounding_box_ = box; 00139 } 00140 int num_features() const { 00141 return num_features_; 00142 } 00143 const INT_FEATURE_STRUCT* features() const { 00144 return features_; 00145 } 00146 int num_micro_features() const { 00147 return num_micro_features_; 00148 } 00149 const MicroFeature* micro_features() const { 00150 return micro_features_; 00151 } 00152 int outline_length() const { 00153 return outline_length_; 00154 } 00155 float cn_feature(int index) const { 00156 return cn_feature_[index]; 00157 } 00158 int geo_feature(int index) const { 00159 return geo_feature_[index]; 00160 } 00161 double weight() const { 00162 return weight_; 00163 } 00164 void set_weight(double value) { 00165 weight_ = value; 00166 } 00167 double max_dist() const { 00168 return max_dist_; 00169 } 00170 void set_max_dist(double value) { 00171 max_dist_ = value; 00172 } 00173 int sample_index() const { 00174 return sample_index_; 00175 } 00176 void set_sample_index(int value) { 00177 sample_index_ = value; 00178 } 00179 bool features_are_mapped() const { 00180 return features_are_mapped_; 00181 } 00182 const GenericVector<int>& mapped_features() const { 00183 ASSERT_HOST(features_are_mapped_); 00184 return mapped_features_; 00185 } 00186 const GenericVector<int>& indexed_features() const { 00187 ASSERT_HOST(features_are_indexed_); 00188 return mapped_features_; 00189 } 00190 bool is_error() const { 00191 return is_error_; 00192 } 00193 void set_is_error(bool value) { 00194 is_error_ = value; 00195 } 00196 00197 private: 00198 // Unichar id that this sample represents. There obviously must be a 00199 // reference UNICHARSET somewhere. Usually in TrainingSampleSet. 00200 UNICHAR_ID class_id_; 00201 // Font id in which this sample was printed. Refers to a fontinfo_table_ in 00202 // MasterTrainer. 00203 int font_id_; 00204 // Number of page that the sample came from. 00205 int page_num_; 00206 // Bounding box of sample in original image. 00207 TBOX bounding_box_; 00208 // Number of INT_FEATURE_STRUCT in features_ array. 00209 int num_features_; 00210 // Number of MicroFeature in micro_features_ array. 00211 int num_micro_features_; 00212 // Total length of outline in the baseline normalized coordinate space. 00213 // See comment in WERD_RES class definition for a discussion of coordinate 00214 // spaces. 00215 int outline_length_; 00216 // Array of features. 00217 INT_FEATURE_STRUCT* features_; 00218 // Array of features. 00219 MicroFeature* micro_features_; 00220 // The one and only CN feature. Indexed by NORM_PARAM_NAME enum. 00221 float cn_feature_[kNumCNParams]; 00222 // The one and only geometric feature. (Aims at replacing cn_feature_). 00223 // Indexed by GeoParams enum in picofeat.h 00224 int geo_feature_[GeoCount]; 00225 00226 // Non-serialized cache data. 00227 // Weight used for boosting training. 00228 double weight_; 00229 // Maximum distance to other samples of same class/font used in computing 00230 // the canonical sample. 00231 double max_dist_; 00232 // Global index of this sample. 00233 int sample_index_; 00234 // Indexed/mapped features, as indicated by the bools below. 00235 GenericVector<int> mapped_features_; 00236 bool features_are_indexed_; 00237 bool features_are_mapped_; 00238 // True if the last classification was an error by the current definition. 00239 bool is_error_; 00240 00241 // Randomizing factors. 00242 static const int kYShiftValues[kSampleYShiftSize]; 00243 static const double kScaleValues[kSampleScaleSize]; 00244 }; 00245 00246 ELISTIZEH(TrainingSample) 00247 00248 } // namespace tesseract 00249 00250 #endif // TESSERACT_TRAINING_TRAININGSAMPLE_H__