tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/params_model.h
Go to the documentation of this file.
00001 
00002 // File:        params_model.h
00003 // Description: Trained feature serialization for language parameter training.
00004 // Author:      David Eger
00005 // Created:     Mon Jun 11 11:26:42 PDT 2012
00006 //
00007 // (C) Copyright 2011, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_WORDREC_PARAMS_MODEL_H_
00021 #define TESSERACT_WORDREC_PARAMS_MODEL_H_
00022 
00023 #include "params_training_featdef.h"
00024 #include "ratngs.h"
00025 #include "strngs.h"
00026 
00027 namespace tesseract {
00028 
00029 // Represents the learned weights for a given language.
00030 class ParamsModel {
00031  public:
00032   // Enum for expressing OCR pass.
00033   enum PassEnum {
00034     PTRAIN_PASS1,
00035     PTRAIN_PASS2,
00036 
00037     PTRAIN_NUM_PASSES
00038   };
00039 
00040   ParamsModel() : pass_(PTRAIN_PASS1) {}
00041   ParamsModel(const char *lang, const GenericVector<float> &weights) :
00042     lang_(lang), pass_(PTRAIN_PASS1) { weights_vec_[pass_] = weights; }
00043   inline bool Initialized() {
00044     return weights_vec_[pass_].size() == PTRAIN_NUM_FEATURE_TYPES;
00045   }
00046   // Prints out feature weights.
00047   void Print();
00048   // Clears weights for all passes.
00049   void Clear() {
00050     for (int p = 0; p < PTRAIN_NUM_PASSES; ++p) weights_vec_[p].clear();
00051   }
00052   // Copies the weights of the given params model.
00053   void Copy(const ParamsModel &other_model);
00054   // Applies params model weights to the given features.
00055   // Assumes that features is an array of size PTRAIN_NUM_FEATURE_TYPES.
00056   float ComputeCost(const float features[]) const;
00057   bool Equivalent(const ParamsModel &that) const;
00058 
00059   // Returns true on success.
00060   bool SaveToFile(const char *full_path) const;
00061 
00062   // Returns true on success.
00063   bool LoadFromFile(const char *lang, const char *full_path);
00064   bool LoadFromFp(const char *lang, FILE *fp, inT64 end_offset);
00065 
00066   const GenericVector<float>& weights() const {
00067     return weights_vec_[pass_];
00068   }
00069   const GenericVector<float>& weights_for_pass(PassEnum pass) const {
00070     return weights_vec_[pass];
00071   }
00072   void SetPass(PassEnum pass) { pass_ = pass; }
00073 
00074  private:
00075   bool ParseLine(char *line, char **key, float *val);
00076 
00077   STRING lang_;
00078   // Set to the current pass type and used to determine which set of weights
00079   // should be used for ComputeCost() and other functions.
00080   PassEnum pass_;
00081   // Several sets of weights for various OCR passes (e.g. pass1 with adaption,
00082   // pass2 without adaption, etc).
00083   GenericVector<float> weights_vec_[PTRAIN_NUM_PASSES];
00084 };
00085 
00086 }  // namespace tesseract
00087 
00088 #endif  // TESSERACT_WORDREC_PARAMS_MODEL_H_
00089 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines