tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/classify/trainingsample.cpp
Go to the documentation of this file.
00001 // Copyright 2010 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00003 //
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 //
00015 
00016 // Include automatically generated configuration file if running autoconf.
00017 #ifdef HAVE_CONFIG_H
00018 #include "config_auto.h"
00019 #endif
00020 
00021 #include "trainingsample.h"
00022 
00023 #include <math.h>
00024 #include "allheaders.h"
00025 #include "helpers.h"
00026 #include "intfeaturemap.h"
00027 #include "normfeat.h"
00028 #include "shapetable.h"
00029 
00030 namespace tesseract {
00031 
00032 ELISTIZE(TrainingSample)
00033 
00034 // Center of randomizing operations.
00035 const int kRandomizingCenter = 128;
00036 
00037 // Randomizing factors.
00038 const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {
00039     6, 3, -3, -6, 0
00040 };
00041 const double TrainingSample::kScaleValues[kSampleScaleSize] = {
00042     1.0625, 0.9375, 1.0
00043 };
00044 
00045 TrainingSample::~TrainingSample() {
00046   delete [] features_;
00047   delete [] micro_features_;
00048 }
00049 
00050 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
00051 // members, which is mostly the mapped features, and the weight.
00052 // It is assumed these can all be reconstructed from what is saved.
00053 // Writes to the given file. Returns false in case of error.
00054 bool TrainingSample::Serialize(FILE* fp) const {
00055   if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
00056   if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
00057   if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
00058   if (!bounding_box_.Serialize(fp)) return false;
00059   if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
00060   if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
00061     return false;
00062   if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
00063     return false;
00064   if (static_cast<int>(fwrite(features_, sizeof(*features_), num_features_, fp))
00065       != num_features_)
00066     return false;
00067   if (static_cast<int>(fwrite(micro_features_, sizeof(*micro_features_),
00068                               num_micro_features_,
00069                               fp)) != num_micro_features_)
00070     return false;
00071   if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
00072       kNumCNParams) return false;
00073   if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
00074     return false;
00075   return true;
00076 }
00077 
00078 // Creates from the given file. Returns NULL in case of error.
00079 // If swap is true, assumes a big/little-endian swap is needed.
00080 TrainingSample* TrainingSample::DeSerializeCreate(bool swap, FILE* fp) {
00081   TrainingSample* sample = new TrainingSample;
00082   if (sample->DeSerialize(swap, fp)) return sample;
00083   delete sample;
00084   return NULL;
00085 }
00086 
00087 // Reads from the given file. Returns false in case of error.
00088 // If swap is true, assumes a big/little-endian swap is needed.
00089 bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
00090   if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
00091   if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
00092   if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
00093   if (!bounding_box_.DeSerialize(swap, fp)) return false;
00094   if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
00095   if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
00096     return false;
00097   if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
00098     return false;
00099   if (swap) {
00100     ReverseN(&class_id_, sizeof(class_id_));
00101     ReverseN(&num_features_, sizeof(num_features_));
00102     ReverseN(&num_micro_features_, sizeof(num_micro_features_));
00103     ReverseN(&outline_length_, sizeof(outline_length_));
00104   }
00105   delete [] features_;
00106   features_ = new INT_FEATURE_STRUCT[num_features_];
00107   if (static_cast<int>(fread(features_, sizeof(*features_), num_features_, fp))
00108       != num_features_)
00109     return false;
00110   delete [] micro_features_;
00111   micro_features_ = new MicroFeature[num_micro_features_];
00112   if (static_cast<int>(fread(micro_features_, sizeof(*micro_features_),
00113                              num_micro_features_,
00114                              fp)) != num_micro_features_)
00115     return false;
00116   if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
00117             kNumCNParams) return false;
00118   if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
00119     return false;
00120   return true;
00121 }
00122 
00123 // Saves the given features into a TrainingSample.
00124 TrainingSample* TrainingSample::CopyFromFeatures(
00125     const INT_FX_RESULT_STRUCT& fx_info,
00126     const TBOX& bounding_box,
00127     const INT_FEATURE_STRUCT* features,
00128     int num_features) {
00129   TrainingSample* sample = new TrainingSample;
00130   sample->num_features_ = num_features;
00131   sample->features_ = new INT_FEATURE_STRUCT[num_features];
00132   sample->outline_length_ = fx_info.Length;
00133   memcpy(sample->features_, features, num_features * sizeof(features[0]));
00134   sample->geo_feature_[GeoBottom] = bounding_box.bottom();
00135   sample->geo_feature_[GeoTop] = bounding_box.top();
00136   sample->geo_feature_[GeoWidth] = bounding_box.width();
00137 
00138   // Generate the cn_feature_ from the fx_info.
00139   sample->cn_feature_[CharNormY] =
00140       MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
00141   sample->cn_feature_[CharNormLength] =
00142       MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
00143   sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
00144   sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
00145 
00146   sample->features_are_indexed_ = false;
00147   sample->features_are_mapped_ = false;
00148   return sample;
00149 }
00150 
00151 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
00152 FEATURE_STRUCT* TrainingSample::GetCNFeature() const {
00153   FEATURE feature = NewFeature(&CharNormDesc);
00154   for (int i = 0; i < kNumCNParams; ++i)
00155     feature->Params[i] = cn_feature_[i];
00156   return feature;
00157 }
00158 
00159 // Constructs and returns a copy randomized by the method given by
00160 // the randomizer index. If index is out of [0, kSampleRandomSize) then
00161 // an exact copy is returned.
00162 TrainingSample* TrainingSample::RandomizedCopy(int index) const {
00163   TrainingSample* sample = Copy();
00164   if (index >= 0 && index < kSampleRandomSize) {
00165     ++index;  // Remove the first combination.
00166     int yshift = kYShiftValues[index / kSampleScaleSize];
00167     double scaling = kScaleValues[index % kSampleScaleSize];
00168     for (int i = 0; i < num_features_; ++i) {
00169       double result = (features_[i].X - kRandomizingCenter) * scaling;
00170       result += kRandomizingCenter;
00171       sample->features_[i].X = ClipToRange(static_cast<int>(result + 0.5), 0,
00172                                            MAX_UINT8);
00173       result = (features_[i].Y - kRandomizingCenter) * scaling;
00174       result += kRandomizingCenter + yshift;
00175       sample->features_[i].Y = ClipToRange(static_cast<int>(result + 0.5), 0,
00176                                            MAX_UINT8);
00177     }
00178   }
00179   return sample;
00180 }
00181 
00182 // Constructs and returns an exact copy.
00183 TrainingSample* TrainingSample::Copy() const {
00184   TrainingSample* sample = new TrainingSample;
00185   sample->class_id_ = class_id_;
00186   sample->font_id_ = font_id_;
00187   sample->weight_ = weight_;
00188   sample->sample_index_ = sample_index_;
00189   sample->num_features_ = num_features_;
00190   if (num_features_ > 0) {
00191     sample->features_ = new INT_FEATURE_STRUCT[num_features_];
00192     memcpy(sample->features_, features_, num_features_ * sizeof(features_[0]));
00193   }
00194   sample->num_micro_features_ = num_micro_features_;
00195   if (num_micro_features_ > 0) {
00196     sample->micro_features_ = new MicroFeature[num_micro_features_];
00197     memcpy(sample->micro_features_, micro_features_,
00198            num_micro_features_ * sizeof(micro_features_[0]));
00199   }
00200   memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
00201   memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
00202   return sample;
00203 }
00204 
00205 // Extracts the needed information from the CHAR_DESC_STRUCT.
00206 void TrainingSample::ExtractCharDesc(int int_feature_type,
00207                                      int micro_type,
00208                                      int cn_type,
00209                                      int geo_type,
00210                                      CHAR_DESC_STRUCT* char_desc) {
00211   // Extract the INT features.
00212   if (features_ != NULL) delete [] features_;
00213   FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];
00214   if (char_features == NULL) {
00215     tprintf("Error: no features to train on of type %s\n",
00216             kIntFeatureType);
00217     num_features_ = 0;
00218     features_ = NULL;
00219   } else {
00220     num_features_ = char_features->NumFeatures;
00221     features_ = new INT_FEATURE_STRUCT[num_features_];
00222     for (int f = 0; f < num_features_; ++f) {
00223       features_[f].X =
00224           static_cast<uinT8>(char_features->Features[f]->Params[IntX]);
00225       features_[f].Y =
00226           static_cast<uinT8>(char_features->Features[f]->Params[IntY]);
00227       features_[f].Theta =
00228           static_cast<uinT8>(char_features->Features[f]->Params[IntDir]);
00229       features_[f].CP_misses = 0;
00230     }
00231   }
00232   // Extract the Micro features.
00233   if (micro_features_ != NULL) delete [] micro_features_;
00234   char_features = char_desc->FeatureSets[micro_type];
00235   if (char_features == NULL) {
00236     tprintf("Error: no features to train on of type %s\n",
00237             kMicroFeatureType);
00238     num_micro_features_ = 0;
00239     micro_features_ = NULL;
00240   } else {
00241     num_micro_features_ = char_features->NumFeatures;
00242     micro_features_ = new MicroFeature[num_micro_features_];
00243     for (int f = 0; f < num_micro_features_; ++f) {
00244       for (int d = 0; d < MFCount; ++d) {
00245         micro_features_[f][d] = char_features->Features[f]->Params[d];
00246       }
00247     }
00248   }
00249   // Extract the CN feature.
00250   char_features = char_desc->FeatureSets[cn_type];
00251   if (char_features == NULL) {
00252     tprintf("Error: no CN feature to train on.\n");
00253   } else {
00254     ASSERT_HOST(char_features->NumFeatures == 1);
00255     cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];
00256     cn_feature_[CharNormLength] =
00257         char_features->Features[0]->Params[CharNormLength];
00258     cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];
00259     cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];
00260   }
00261   // Extract the Geo feature.
00262   char_features = char_desc->FeatureSets[geo_type];
00263   if (char_features == NULL) {
00264     tprintf("Error: no Geo feature to train on.\n");
00265   } else {
00266     ASSERT_HOST(char_features->NumFeatures == 1);
00267     geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];
00268     geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];
00269     geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];
00270   }
00271   features_are_indexed_ = false;
00272   features_are_mapped_ = false;
00273 }
00274 
00275 // Sets the mapped_features_ from the features_ using the provided
00276 // feature_space to the indexed versions of the features.
00277 void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) {
00278   GenericVector<int> indexed_features;
00279   feature_space.IndexAndSortFeatures(features_, num_features_,
00280                                      &mapped_features_);
00281   features_are_indexed_ = true;
00282   features_are_mapped_ = false;
00283 }
00284 
00285 // Sets the mapped_features_ from the features using the provided
00286 // feature_map.
00287 void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) {
00288   GenericVector<int> indexed_features;
00289   feature_map.feature_space().IndexAndSortFeatures(features_, num_features_,
00290                                                    &indexed_features);
00291   feature_map.MapIndexedFeatures(indexed_features, &mapped_features_);
00292   features_are_indexed_ = false;
00293   features_are_mapped_ = true;
00294 }
00295 
00296 // Returns a pix representing the sample. (Int features only.)
00297 Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
00298   Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
00299   for (int f = 0; f < num_features_; ++f) {
00300     int start_x = features_[f].X;
00301     int start_y = kIntFeatureExtent - features_[f].Y;
00302     double dx = cos((features_[f].Theta / 256.0) * 2.0 * PI - PI);
00303     double dy = -sin((features_[f].Theta / 256.0) * 2.0 * PI - PI);
00304     for (int i = 0; i <= 5; ++i) {
00305       int x = static_cast<int>(start_x + dx * i);
00306       int y = static_cast<int>(start_y + dy * i);
00307       if (x >= 0 && x < 256 && y >= 0 && y < 256)
00308         pixSetPixel(pix, x, y, 1);
00309     }
00310   }
00311   if (unicharset != NULL)
00312     pixSetText(pix, unicharset->id_to_unichar(class_id_));
00313   return pix;
00314 }
00315 
00316 // Displays the features in the given window with the given color.
00317 void TrainingSample::DisplayFeatures(ScrollView::Color color,
00318                                      ScrollView* window) const {
00319   #ifndef GRAPHICS_DISABLED
00320   for (int f = 0; f < num_features_; ++f) {
00321     RenderIntFeature(window, &features_[f], color);
00322   }
00323   #endif  // GRAPHICS_DISABLED
00324 }
00325 
00326 // Returns a pix of the original sample image. The pix is padded all round
00327 // by padding wherever possible.
00328 // The returned Pix must be pixDestroyed after use.
00329 // If the input page_pix is NULL, NULL is returned.
00330 Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const {
00331   if (page_pix == NULL)
00332     return NULL;
00333   int page_width = pixGetWidth(page_pix);
00334   int page_height = pixGetHeight(page_pix);
00335   TBOX padded_box = bounding_box();
00336   padded_box.pad(padding, padding);
00337   // Clip the padded_box to the limits of the page
00338   TBOX page_box(0, 0, page_width, page_height);
00339   padded_box &= page_box;
00340   Box* box = boxCreate(page_box.left(), page_height - page_box.top(),
00341                        page_box.width(), page_box.height());
00342   Pix* sample_pix = pixClipRectangle(page_pix, box, NULL);
00343   boxDestroy(&box);
00344   return sample_pix;
00345 }
00346 
00347 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines