tesseract
3.03
|
00001 /********************************************************************** 00002 * File: cube_object.h 00003 * Description: Declaration of the Cube Object Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // The CubeObject class is the main class used to perform recognition of 00021 // a specific char_samp as a single word. 00022 // To recognize a word, a CubeObject is constructed for this word. 00023 // A Call to RecognizeWord is then issued specifying the language model that 00024 // will be used during recognition. If none is specified, the default language 00025 // model in the CubeRecoContext is used. The CubeRecoContext is passed at 00026 // construction time 00027 // 00028 // The typical usage pattern for Cube is shown below: 00029 // 00030 // // Create and initialize Tesseract object and get its 00031 // // CubeRecoContext object (note that Tesseract object owns it, 00032 // // so it will be freed when the Tesseract object is freed). 00033 // tesseract::Tesseract *tess_obj = new tesseract::Tesseract(); 00034 // tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY); 00035 // CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext(); 00036 // CHECK(cntxt != NULL) << "Unable to create a Cube reco context"; 00037 // . 00038 // . 00039 // . 00040 // // Do this to recognize a word in pix whose co-ordinates are 00041 // // (left,top,width,height) 00042 // tesseract::CubeObject *cube_obj; 00043 // cube_obj = new tesseract::CubeObject(cntxt, pix, 00044 // left, top, width, height); 00045 // 00046 // // Get back Cube's list of answers 00047 // tesseract::WordAltList *alt_list = cube_obj->RecognizeWord(); 00048 // CHECK(alt_list != NULL && alt_list->AltCount() > 0); 00049 // 00050 // // Get the string and cost of every alternate 00051 // for (int alt = 0; alt < alt_list->AltCount(); alt++) { 00052 // // Return the result as a UTF-32 string 00053 // string_32 res_str32 = alt_list->Alt(alt); 00054 // // Convert to UTF8 if need-be 00055 // string res_str; 00056 // CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str); 00057 // // Get the string cost. This should get bigger as you go deeper 00058 // // in the list 00059 // int cost = alt_list->AltCost(alt); 00060 // } 00061 // 00062 // // Call this once you are done recognizing this word 00063 // delete cube_obj; 00064 // 00065 // // Call this once you are done recognizing all words with 00066 // // for the current language 00067 // delete tess_obj; 00068 // 00069 // Note that if the language supports "Italics" (see the CubeRecoContext), the 00070 // RecognizeWord function attempts to de-slant the word. 00071 00072 #ifndef CUBE_OBJECT_H 00073 #define CUBE_OBJECT_H 00074 00075 #include "char_samp.h" 00076 #include "word_altlist.h" 00077 #include "beam_search.h" 00078 #include "cube_search_object.h" 00079 #include "tess_lang_model.h" 00080 #include "cube_reco_context.h" 00081 00082 namespace tesseract { 00083 00084 // minimum aspect ratio needed to normalize a char_samp before recognition 00085 static const float kMinNormalizationAspectRatio = 3.5; 00086 // minimum probability a top alt choice must meet before having 00087 // deslanted processing applied to it 00088 static const float kMinProbSkipDeslanted = 0.25; 00089 00090 class CubeObject { 00091 public: 00092 // Different flavors of constructor. They just differ in the way the 00093 // word image is specified 00094 CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp); 00095 CubeObject(CubeRecoContext *cntxt, Pix *pix, 00096 int left, int top, int wid, int hgt); 00097 ~CubeObject(); 00098 00099 // Perform the word recognition using the specified language mode. If none 00100 // is specified, the default language model in the CubeRecoContext is used. 00101 // Returns the sorted list of alternate word answers 00102 WordAltList *RecognizeWord(LangModel *lang_mod = NULL); 00103 // Same as RecognizeWord but recognizes as a phrase 00104 WordAltList *RecognizePhrase(LangModel *lang_mod = NULL); 00105 // Computes the cost of a specific string. This is done by performing 00106 // recognition of a language model that allows only the specified word. 00107 // The alternate list(s) will be permanently modified. 00108 int WordCost(const char *str); 00109 // Recognizes a single character and returns the list of results. 00110 CharAltList *RecognizeChar(); 00111 00112 // Returns the BeamSearch object that resulted from the last call to 00113 // RecognizeWord 00114 inline BeamSearch *BeamObj() const { 00115 return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_); 00116 } 00117 // Returns the WordAltList object that resulted from the last call to 00118 // RecognizeWord 00119 inline WordAltList *AlternateList() const { 00120 return (deslanted_ == true ? deslanted_alt_list_ : alt_list_); 00121 } 00122 // Returns the CubeSearchObject object that resulted from the last call to 00123 // RecognizeWord 00124 inline CubeSearchObject *SrchObj() const { 00125 return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_); 00126 } 00127 // Returns the CharSamp object that resulted from the last call to 00128 // RecognizeWord. Note that this object is not necessarily identical to the 00129 // one passed at construction time as normalization might have occurred 00130 inline CharSamp *CharSample() const { 00131 return (deslanted_ == true ? deslanted_char_samp_ : char_samp_); 00132 } 00133 00134 // Set the ownership of the CharSamp 00135 inline void SetCharSampOwnership(bool own_char_samp) { 00136 own_char_samp_ = own_char_samp; 00137 } 00138 00139 protected: 00140 // Normalize the CharSamp if its aspect ratio exceeds the below constant. 00141 bool Normalize(); 00142 00143 private: 00144 // minimum segment count needed to normalize a char_samp before recognition 00145 static const int kMinNormalizationSegmentCnt = 4; 00146 00147 // Data member initialization function 00148 void Init(); 00149 // Free alternate lists. 00150 void Cleanup(); 00151 // Perform the actual recognition using the specified language mode. If none 00152 // is specified, the default language model in the CubeRecoContext is used. 00153 // Returns the sorted list of alternate answers. Called by both 00154 // RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false) 00155 WordAltList *Recognize(LangModel *lang_mod, bool word_mode); 00156 00157 CubeRecoContext *cntxt_; 00158 BeamSearch *beam_obj_; 00159 BeamSearch *deslanted_beam_obj_; 00160 bool own_char_samp_; 00161 bool deslanted_; 00162 CharSamp *char_samp_; 00163 CharSamp *deslanted_char_samp_; 00164 CubeSearchObject *srch_obj_; 00165 CubeSearchObject *deslanted_srch_obj_; 00166 WordAltList *alt_list_; 00167 WordAltList *deslanted_alt_list_; 00168 }; 00169 } 00170 00171 #endif // CUBE_OBJECT_H