tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/classify/trainingsample.h
Go to the documentation of this file.
00001 // Copyright 2010 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00003 //
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 //
00015 
00016 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H__
00017 #define TESSERACT_TRAINING_TRAININGSAMPLE_H__
00018 
00019 #include "elst.h"
00020 #include "featdefs.h"
00021 #include "intfx.h"
00022 #include "intmatcher.h"
00023 #include "matrix.h"
00024 #include "mf.h"
00025 #include "picofeat.h"
00026 #include "shapetable.h"
00027 #include "unicharset.h"
00028 
00029 struct Pix;
00030 
00031 namespace tesseract {
00032 
00033 class IntFeatureMap;
00034 class IntFeatureSpace;
00035 class ShapeTable;
00036 
00037 // Number of elements of cn_feature_.
00038 static const int kNumCNParams = 4;
00039 // Number of ways to shift the features when randomizing.
00040 static const int kSampleYShiftSize = 5;
00041 // Number of ways to scale the features when randomizing.
00042 static const int kSampleScaleSize = 3;
00043 // Total number of different ways to manipulate the features when randomizing.
00044 // The first and last combinations are removed to avoid an excessive
00045 // top movement (first) and an identity transformation (last).
00046 // WARNING: To avoid patterned duplication of samples, be sure to keep
00047 // kSampleRandomSize prime!
00048 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
00049 // kSampleRandomSize is 13, which is prime.
00050 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
00051 // ASSERT_IS_PRIME(kSampleRandomSize) !!
00052 
00053 class TrainingSample : public ELIST_LINK {
00054  public:
00055   TrainingSample()
00056     : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
00057       num_features_(0), num_micro_features_(0), outline_length_(0),
00058       features_(NULL), micro_features_(NULL), weight_(1.0),
00059       max_dist_(0.0), sample_index_(0),
00060       features_are_indexed_(false), features_are_mapped_(false),
00061       is_error_(false) {
00062   }
00063   ~TrainingSample();
00064 
00065   // Saves the given features into a TrainingSample. The features are copied,
00066   // so may be deleted afterwards. Delete the return value after use.
00067   static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info,
00068                                           const TBOX& bounding_box,
00069                                           const INT_FEATURE_STRUCT* features,
00070                                           int num_features);
00071   // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
00072   FEATURE_STRUCT* GetCNFeature() const;
00073   // Constructs and returns a copy "randomized" by the method given by
00074   // the randomizer index. If index is out of [0, kSampleRandomSize) then
00075   // an exact copy is returned.
00076   TrainingSample* RandomizedCopy(int index) const;
00077   // Constructs and returns an exact copy.
00078   TrainingSample* Copy() const;
00079 
00080   // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
00081   // members, which is mostly the mapped features, and the weight.
00082   // It is assumed these can all be reconstructed from what is saved.
00083   // Writes to the given file. Returns false in case of error.
00084   bool Serialize(FILE* fp) const;
00085   // Creates from the given file. Returns NULL in case of error.
00086   // If swap is true, assumes a big/little-endian swap is needed.
00087   static TrainingSample* DeSerializeCreate(bool swap, FILE* fp);
00088   // Reads from the given file. Returns false in case of error.
00089   // If swap is true, assumes a big/little-endian swap is needed.
00090   bool DeSerialize(bool swap, FILE* fp);
00091 
00092   // Extracts the needed information from the CHAR_DESC_STRUCT.
00093   void ExtractCharDesc(int feature_type, int micro_type,
00094                        int cn_type, int geo_type,
00095                        CHAR_DESC_STRUCT* char_desc);
00096 
00097   // Sets the mapped_features_ from the features_ using the provided
00098   // feature_space to the indexed versions of the features.
00099   void IndexFeatures(const IntFeatureSpace& feature_space);
00100   // Sets the mapped_features_ from the features_ using the provided
00101   // feature_map.
00102   void MapFeatures(const IntFeatureMap& feature_map);
00103 
00104   // Returns a pix representing the sample. (Int features only.)
00105   Pix* RenderToPix(const UNICHARSET* unicharset) const;
00106   // Displays the features in the given window with the given color.
00107   void DisplayFeatures(ScrollView::Color color, ScrollView* window) const;
00108 
00109   // Returns a pix of the original sample image. The pix is padded all round
00110   // by padding wherever possible.
00111   // The returned Pix must be pixDestroyed after use.
00112   // If the input page_pix is NULL, NULL is returned.
00113   Pix* GetSamplePix(int padding, Pix* page_pix) const;
00114 
00115   // Accessors.
00116   UNICHAR_ID class_id() const {
00117     return class_id_;
00118   }
00119   void set_class_id(int id) {
00120     class_id_ = id;
00121   }
00122   int font_id() const {
00123     return font_id_;
00124   }
00125   void set_font_id(int id) {
00126     font_id_ = id;
00127   }
00128   int page_num() const {
00129     return page_num_;
00130   }
00131   void set_page_num(int page) {
00132     page_num_ = page;
00133   }
00134   const TBOX& bounding_box() const {
00135     return bounding_box_;
00136   }
00137   void set_bounding_box(const TBOX& box) {
00138     bounding_box_ = box;
00139   }
00140   int num_features() const {
00141     return num_features_;
00142   }
00143   const INT_FEATURE_STRUCT* features() const {
00144     return features_;
00145   }
00146   int num_micro_features() const {
00147     return num_micro_features_;
00148   }
00149   const MicroFeature* micro_features() const {
00150     return micro_features_;
00151   }
00152   int outline_length() const {
00153     return outline_length_;
00154   }
00155   float cn_feature(int index) const {
00156     return cn_feature_[index];
00157   }
00158   int geo_feature(int index) const {
00159     return geo_feature_[index];
00160   }
00161   double weight() const {
00162     return weight_;
00163   }
00164   void set_weight(double value) {
00165     weight_ = value;
00166   }
00167   double max_dist() const {
00168     return max_dist_;
00169   }
00170   void set_max_dist(double value) {
00171     max_dist_ = value;
00172   }
00173   int sample_index() const {
00174     return sample_index_;
00175   }
00176   void set_sample_index(int value) {
00177     sample_index_ = value;
00178   }
00179   bool features_are_mapped() const {
00180     return features_are_mapped_;
00181   }
00182   const GenericVector<int>& mapped_features() const {
00183     ASSERT_HOST(features_are_mapped_);
00184     return mapped_features_;
00185   }
00186   const GenericVector<int>& indexed_features() const {
00187     ASSERT_HOST(features_are_indexed_);
00188     return mapped_features_;
00189   }
00190   bool is_error() const {
00191     return is_error_;
00192   }
00193   void set_is_error(bool value) {
00194     is_error_ = value;
00195   }
00196 
00197  private:
00198   // Unichar id that this sample represents. There obviously must be a
00199   // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
00200   UNICHAR_ID class_id_;
00201   // Font id in which this sample was printed. Refers to a fontinfo_table_ in
00202   // MasterTrainer.
00203   int font_id_;
00204   // Number of page that the sample came from.
00205   int page_num_;
00206   // Bounding box of sample in original image.
00207   TBOX bounding_box_;
00208   // Number of INT_FEATURE_STRUCT in features_ array.
00209   int num_features_;
00210   // Number of MicroFeature in micro_features_ array.
00211   int num_micro_features_;
00212   // Total length of outline in the baseline normalized coordinate space.
00213   // See comment in WERD_RES class definition for a discussion of coordinate
00214   // spaces.
00215   int outline_length_;
00216   // Array of features.
00217   INT_FEATURE_STRUCT* features_;
00218   // Array of features.
00219   MicroFeature* micro_features_;
00220   // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
00221   float cn_feature_[kNumCNParams];
00222   // The one and only geometric feature. (Aims at replacing cn_feature_).
00223   // Indexed by GeoParams enum in picofeat.h
00224   int geo_feature_[GeoCount];
00225 
00226   // Non-serialized cache data.
00227   // Weight used for boosting training.
00228   double weight_;
00229   // Maximum distance to other samples of same class/font used in computing
00230   // the canonical sample.
00231   double max_dist_;
00232   // Global index of this sample.
00233   int sample_index_;
00234   // Indexed/mapped features, as indicated by the bools below.
00235   GenericVector<int> mapped_features_;
00236   bool features_are_indexed_;
00237   bool features_are_mapped_;
00238   // True if the last classification was an error by the current definition.
00239   bool is_error_;
00240 
00241   // Randomizing factors.
00242   static const int kYShiftValues[kSampleYShiftSize];
00243   static const double kScaleValues[kSampleScaleSize];
00244 };
00245 
00246 ELISTIZEH(TrainingSample)
00247 
00248 }  // namespace tesseract
00249 
00250 #endif  // TESSERACT_TRAINING_TRAININGSAMPLE_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines