tesseract
3.03
|
00001 // Copyright 2010 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00003 // 00004 // Licensed under the Apache License, Version 2.0 (the "License"); 00005 // you may not use this file except in compliance with the License. 00006 // You may obtain a copy of the License at 00007 // http://www.apache.org/licenses/LICENSE-2.0 00008 // Unless required by applicable law or agreed to in writing, software 00009 // distributed under the License is distributed on an "AS IS" BASIS, 00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00011 // See the License for the specific language governing permissions and 00012 // limitations under the License. 00013 // 00015 00016 // Include automatically generated configuration file if running autoconf. 00017 #ifdef HAVE_CONFIG_H 00018 #include "config_auto.h" 00019 #endif 00020 00021 #include "trainingsample.h" 00022 00023 #include <math.h> 00024 #include "allheaders.h" 00025 #include "helpers.h" 00026 #include "intfeaturemap.h" 00027 #include "normfeat.h" 00028 #include "shapetable.h" 00029 00030 namespace tesseract { 00031 00032 ELISTIZE(TrainingSample) 00033 00034 // Center of randomizing operations. 00035 const int kRandomizingCenter = 128; 00036 00037 // Randomizing factors. 00038 const int TrainingSample::kYShiftValues[kSampleYShiftSize] = { 00039 6, 3, -3, -6, 0 00040 }; 00041 const double TrainingSample::kScaleValues[kSampleScaleSize] = { 00042 1.0625, 0.9375, 1.0 00043 }; 00044 00045 TrainingSample::~TrainingSample() { 00046 delete [] features_; 00047 delete [] micro_features_; 00048 } 00049 00050 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data 00051 // members, which is mostly the mapped features, and the weight. 00052 // It is assumed these can all be reconstructed from what is saved. 00053 // Writes to the given file. Returns false in case of error. 00054 bool TrainingSample::Serialize(FILE* fp) const { 00055 if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; 00056 if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; 00057 if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; 00058 if (!bounding_box_.Serialize(fp)) return false; 00059 if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; 00060 if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) 00061 return false; 00062 if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1) 00063 return false; 00064 if (static_cast<int>(fwrite(features_, sizeof(*features_), num_features_, fp)) 00065 != num_features_) 00066 return false; 00067 if (static_cast<int>(fwrite(micro_features_, sizeof(*micro_features_), 00068 num_micro_features_, 00069 fp)) != num_micro_features_) 00070 return false; 00071 if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != 00072 kNumCNParams) return false; 00073 if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) 00074 return false; 00075 return true; 00076 } 00077 00078 // Creates from the given file. Returns NULL in case of error. 00079 // If swap is true, assumes a big/little-endian swap is needed. 00080 TrainingSample* TrainingSample::DeSerializeCreate(bool swap, FILE* fp) { 00081 TrainingSample* sample = new TrainingSample; 00082 if (sample->DeSerialize(swap, fp)) return sample; 00083 delete sample; 00084 return NULL; 00085 } 00086 00087 // Reads from the given file. Returns false in case of error. 00088 // If swap is true, assumes a big/little-endian swap is needed. 00089 bool TrainingSample::DeSerialize(bool swap, FILE* fp) { 00090 if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; 00091 if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; 00092 if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; 00093 if (!bounding_box_.DeSerialize(swap, fp)) return false; 00094 if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; 00095 if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) 00096 return false; 00097 if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1) 00098 return false; 00099 if (swap) { 00100 ReverseN(&class_id_, sizeof(class_id_)); 00101 ReverseN(&num_features_, sizeof(num_features_)); 00102 ReverseN(&num_micro_features_, sizeof(num_micro_features_)); 00103 ReverseN(&outline_length_, sizeof(outline_length_)); 00104 } 00105 delete [] features_; 00106 features_ = new INT_FEATURE_STRUCT[num_features_]; 00107 if (static_cast<int>(fread(features_, sizeof(*features_), num_features_, fp)) 00108 != num_features_) 00109 return false; 00110 delete [] micro_features_; 00111 micro_features_ = new MicroFeature[num_micro_features_]; 00112 if (static_cast<int>(fread(micro_features_, sizeof(*micro_features_), 00113 num_micro_features_, 00114 fp)) != num_micro_features_) 00115 return false; 00116 if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != 00117 kNumCNParams) return false; 00118 if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) 00119 return false; 00120 return true; 00121 } 00122 00123 // Saves the given features into a TrainingSample. 00124 TrainingSample* TrainingSample::CopyFromFeatures( 00125 const INT_FX_RESULT_STRUCT& fx_info, 00126 const TBOX& bounding_box, 00127 const INT_FEATURE_STRUCT* features, 00128 int num_features) { 00129 TrainingSample* sample = new TrainingSample; 00130 sample->num_features_ = num_features; 00131 sample->features_ = new INT_FEATURE_STRUCT[num_features]; 00132 sample->outline_length_ = fx_info.Length; 00133 memcpy(sample->features_, features, num_features * sizeof(features[0])); 00134 sample->geo_feature_[GeoBottom] = bounding_box.bottom(); 00135 sample->geo_feature_[GeoTop] = bounding_box.top(); 00136 sample->geo_feature_[GeoWidth] = bounding_box.width(); 00137 00138 // Generate the cn_feature_ from the fx_info. 00139 sample->cn_feature_[CharNormY] = 00140 MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset); 00141 sample->cn_feature_[CharNormLength] = 00142 MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION; 00143 sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx; 00144 sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry; 00145 00146 sample->features_are_indexed_ = false; 00147 sample->features_are_mapped_ = false; 00148 return sample; 00149 } 00150 00151 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. 00152 FEATURE_STRUCT* TrainingSample::GetCNFeature() const { 00153 FEATURE feature = NewFeature(&CharNormDesc); 00154 for (int i = 0; i < kNumCNParams; ++i) 00155 feature->Params[i] = cn_feature_[i]; 00156 return feature; 00157 } 00158 00159 // Constructs and returns a copy randomized by the method given by 00160 // the randomizer index. If index is out of [0, kSampleRandomSize) then 00161 // an exact copy is returned. 00162 TrainingSample* TrainingSample::RandomizedCopy(int index) const { 00163 TrainingSample* sample = Copy(); 00164 if (index >= 0 && index < kSampleRandomSize) { 00165 ++index; // Remove the first combination. 00166 int yshift = kYShiftValues[index / kSampleScaleSize]; 00167 double scaling = kScaleValues[index % kSampleScaleSize]; 00168 for (int i = 0; i < num_features_; ++i) { 00169 double result = (features_[i].X - kRandomizingCenter) * scaling; 00170 result += kRandomizingCenter; 00171 sample->features_[i].X = ClipToRange(static_cast<int>(result + 0.5), 0, 00172 MAX_UINT8); 00173 result = (features_[i].Y - kRandomizingCenter) * scaling; 00174 result += kRandomizingCenter + yshift; 00175 sample->features_[i].Y = ClipToRange(static_cast<int>(result + 0.5), 0, 00176 MAX_UINT8); 00177 } 00178 } 00179 return sample; 00180 } 00181 00182 // Constructs and returns an exact copy. 00183 TrainingSample* TrainingSample::Copy() const { 00184 TrainingSample* sample = new TrainingSample; 00185 sample->class_id_ = class_id_; 00186 sample->font_id_ = font_id_; 00187 sample->weight_ = weight_; 00188 sample->sample_index_ = sample_index_; 00189 sample->num_features_ = num_features_; 00190 if (num_features_ > 0) { 00191 sample->features_ = new INT_FEATURE_STRUCT[num_features_]; 00192 memcpy(sample->features_, features_, num_features_ * sizeof(features_[0])); 00193 } 00194 sample->num_micro_features_ = num_micro_features_; 00195 if (num_micro_features_ > 0) { 00196 sample->micro_features_ = new MicroFeature[num_micro_features_]; 00197 memcpy(sample->micro_features_, micro_features_, 00198 num_micro_features_ * sizeof(micro_features_[0])); 00199 } 00200 memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams); 00201 memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount); 00202 return sample; 00203 } 00204 00205 // Extracts the needed information from the CHAR_DESC_STRUCT. 00206 void TrainingSample::ExtractCharDesc(int int_feature_type, 00207 int micro_type, 00208 int cn_type, 00209 int geo_type, 00210 CHAR_DESC_STRUCT* char_desc) { 00211 // Extract the INT features. 00212 if (features_ != NULL) delete [] features_; 00213 FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type]; 00214 if (char_features == NULL) { 00215 tprintf("Error: no features to train on of type %s\n", 00216 kIntFeatureType); 00217 num_features_ = 0; 00218 features_ = NULL; 00219 } else { 00220 num_features_ = char_features->NumFeatures; 00221 features_ = new INT_FEATURE_STRUCT[num_features_]; 00222 for (int f = 0; f < num_features_; ++f) { 00223 features_[f].X = 00224 static_cast<uinT8>(char_features->Features[f]->Params[IntX]); 00225 features_[f].Y = 00226 static_cast<uinT8>(char_features->Features[f]->Params[IntY]); 00227 features_[f].Theta = 00228 static_cast<uinT8>(char_features->Features[f]->Params[IntDir]); 00229 features_[f].CP_misses = 0; 00230 } 00231 } 00232 // Extract the Micro features. 00233 if (micro_features_ != NULL) delete [] micro_features_; 00234 char_features = char_desc->FeatureSets[micro_type]; 00235 if (char_features == NULL) { 00236 tprintf("Error: no features to train on of type %s\n", 00237 kMicroFeatureType); 00238 num_micro_features_ = 0; 00239 micro_features_ = NULL; 00240 } else { 00241 num_micro_features_ = char_features->NumFeatures; 00242 micro_features_ = new MicroFeature[num_micro_features_]; 00243 for (int f = 0; f < num_micro_features_; ++f) { 00244 for (int d = 0; d < MFCount; ++d) { 00245 micro_features_[f][d] = char_features->Features[f]->Params[d]; 00246 } 00247 } 00248 } 00249 // Extract the CN feature. 00250 char_features = char_desc->FeatureSets[cn_type]; 00251 if (char_features == NULL) { 00252 tprintf("Error: no CN feature to train on.\n"); 00253 } else { 00254 ASSERT_HOST(char_features->NumFeatures == 1); 00255 cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY]; 00256 cn_feature_[CharNormLength] = 00257 char_features->Features[0]->Params[CharNormLength]; 00258 cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx]; 00259 cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy]; 00260 } 00261 // Extract the Geo feature. 00262 char_features = char_desc->FeatureSets[geo_type]; 00263 if (char_features == NULL) { 00264 tprintf("Error: no Geo feature to train on.\n"); 00265 } else { 00266 ASSERT_HOST(char_features->NumFeatures == 1); 00267 geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom]; 00268 geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop]; 00269 geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth]; 00270 } 00271 features_are_indexed_ = false; 00272 features_are_mapped_ = false; 00273 } 00274 00275 // Sets the mapped_features_ from the features_ using the provided 00276 // feature_space to the indexed versions of the features. 00277 void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) { 00278 GenericVector<int> indexed_features; 00279 feature_space.IndexAndSortFeatures(features_, num_features_, 00280 &mapped_features_); 00281 features_are_indexed_ = true; 00282 features_are_mapped_ = false; 00283 } 00284 00285 // Sets the mapped_features_ from the features using the provided 00286 // feature_map. 00287 void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) { 00288 GenericVector<int> indexed_features; 00289 feature_map.feature_space().IndexAndSortFeatures(features_, num_features_, 00290 &indexed_features); 00291 feature_map.MapIndexedFeatures(indexed_features, &mapped_features_); 00292 features_are_indexed_ = false; 00293 features_are_mapped_ = true; 00294 } 00295 00296 // Returns a pix representing the sample. (Int features only.) 00297 Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const { 00298 Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); 00299 for (int f = 0; f < num_features_; ++f) { 00300 int start_x = features_[f].X; 00301 int start_y = kIntFeatureExtent - features_[f].Y; 00302 double dx = cos((features_[f].Theta / 256.0) * 2.0 * PI - PI); 00303 double dy = -sin((features_[f].Theta / 256.0) * 2.0 * PI - PI); 00304 for (int i = 0; i <= 5; ++i) { 00305 int x = static_cast<int>(start_x + dx * i); 00306 int y = static_cast<int>(start_y + dy * i); 00307 if (x >= 0 && x < 256 && y >= 0 && y < 256) 00308 pixSetPixel(pix, x, y, 1); 00309 } 00310 } 00311 if (unicharset != NULL) 00312 pixSetText(pix, unicharset->id_to_unichar(class_id_)); 00313 return pix; 00314 } 00315 00316 // Displays the features in the given window with the given color. 00317 void TrainingSample::DisplayFeatures(ScrollView::Color color, 00318 ScrollView* window) const { 00319 #ifndef GRAPHICS_DISABLED 00320 for (int f = 0; f < num_features_; ++f) { 00321 RenderIntFeature(window, &features_[f], color); 00322 } 00323 #endif // GRAPHICS_DISABLED 00324 } 00325 00326 // Returns a pix of the original sample image. The pix is padded all round 00327 // by padding wherever possible. 00328 // The returned Pix must be pixDestroyed after use. 00329 // If the input page_pix is NULL, NULL is returned. 00330 Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const { 00331 if (page_pix == NULL) 00332 return NULL; 00333 int page_width = pixGetWidth(page_pix); 00334 int page_height = pixGetHeight(page_pix); 00335 TBOX padded_box = bounding_box(); 00336 padded_box.pad(padding, padding); 00337 // Clip the padded_box to the limits of the page 00338 TBOX page_box(0, 0, page_width, page_height); 00339 padded_box &= page_box; 00340 Box* box = boxCreate(page_box.left(), page_height - page_box.top(), 00341 page_box.width(), page_box.height()); 00342 Pix* sample_pix = pixClipRectangle(page_pix, box, NULL); 00343 boxDestroy(&box); 00344 return sample_pix; 00345 } 00346 00347 } // namespace tesseract