tesseract
3.03
|
00001 /********************************************************************** 00002 * File: cube_object.cpp 00003 * Description: Implementation of the Cube Object Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <math.h> 00021 #include "cube_object.h" 00022 #include "cube_utils.h" 00023 #include "word_list_lang_model.h" 00024 00025 namespace tesseract { 00026 CubeObject::CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp) { 00027 Init(); 00028 char_samp_ = char_samp; 00029 cntxt_ = cntxt; 00030 } 00031 00032 CubeObject::CubeObject(CubeRecoContext *cntxt, Pix *pix, 00033 int left, int top, int wid, int hgt) { 00034 Init(); 00035 char_samp_ = CubeUtils::CharSampleFromPix(pix, left, top, wid, hgt); 00036 own_char_samp_ = true; 00037 cntxt_ = cntxt; 00038 } 00039 00040 // Data member initialization function 00041 void CubeObject::Init() { 00042 char_samp_ = NULL; 00043 own_char_samp_ = false; 00044 alt_list_ = NULL; 00045 srch_obj_ = NULL; 00046 deslanted_alt_list_ = NULL; 00047 deslanted_srch_obj_ = NULL; 00048 deslanted_ = false; 00049 deslanted_char_samp_ = NULL; 00050 beam_obj_ = NULL; 00051 deslanted_beam_obj_ = NULL; 00052 cntxt_ = NULL; 00053 } 00054 00055 // Cleanup function 00056 void CubeObject::Cleanup() { 00057 if (alt_list_ != NULL) { 00058 delete alt_list_; 00059 alt_list_ = NULL; 00060 } 00061 00062 if (deslanted_alt_list_ != NULL) { 00063 delete deslanted_alt_list_; 00064 deslanted_alt_list_ = NULL; 00065 } 00066 } 00067 00068 CubeObject::~CubeObject() { 00069 if (char_samp_ != NULL && own_char_samp_ == true) { 00070 delete char_samp_; 00071 char_samp_ = NULL; 00072 } 00073 00074 if (srch_obj_ != NULL) { 00075 delete srch_obj_; 00076 srch_obj_ = NULL; 00077 } 00078 00079 if (deslanted_srch_obj_ != NULL) { 00080 delete deslanted_srch_obj_; 00081 deslanted_srch_obj_ = NULL; 00082 } 00083 00084 if (beam_obj_ != NULL) { 00085 delete beam_obj_; 00086 beam_obj_ = NULL; 00087 } 00088 00089 if (deslanted_beam_obj_ != NULL) { 00090 delete deslanted_beam_obj_; 00091 deslanted_beam_obj_ = NULL; 00092 } 00093 00094 if (deslanted_char_samp_ != NULL) { 00095 delete deslanted_char_samp_; 00096 deslanted_char_samp_ = NULL; 00097 } 00098 00099 Cleanup(); 00100 } 00101 00102 // Actually do the recognition using the specified language mode. If none 00103 // is specified, the default language model in the CubeRecoContext is used. 00104 // Returns the sorted list of alternate answers 00105 // The Word mode determines whether recognition is done as a word or a phrase 00106 WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) { 00107 if (char_samp_ == NULL) { 00108 return NULL; 00109 } 00110 00111 // clear alt lists 00112 Cleanup(); 00113 00114 // no specified language model, use the one in the reco context 00115 if (lang_mod == NULL) { 00116 lang_mod = cntxt_->LangMod(); 00117 } 00118 00119 // normalize if necessary 00120 if (cntxt_->SizeNormalization()) { 00121 Normalize(); 00122 } 00123 00124 // assume not de-slanted by default 00125 deslanted_ = false; 00126 00127 // create a beam search object 00128 if (beam_obj_ == NULL) { 00129 beam_obj_ = new BeamSearch(cntxt_, word_mode); 00130 if (beam_obj_ == NULL) { 00131 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " 00132 "BeamSearch\n"); 00133 return NULL; 00134 } 00135 } 00136 00137 // create a cube search object 00138 if (srch_obj_ == NULL) { 00139 srch_obj_ = new CubeSearchObject(cntxt_, char_samp_); 00140 if (srch_obj_ == NULL) { 00141 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " 00142 "CubeSearchObject\n"); 00143 return NULL; 00144 } 00145 } 00146 00147 // run a beam search against the tesslang model 00148 alt_list_ = beam_obj_->Search(srch_obj_, lang_mod); 00149 00150 // deslant (if supported by language) and re-reco if probability is low enough 00151 if (cntxt_->HasItalics() == true && 00152 (alt_list_ == NULL || alt_list_->AltCount() < 1 || 00153 alt_list_->AltCost(0) > CubeUtils::Prob2Cost(kMinProbSkipDeslanted))) { 00154 00155 if (deslanted_beam_obj_ == NULL) { 00156 deslanted_beam_obj_ = new BeamSearch(cntxt_); 00157 if (deslanted_beam_obj_ == NULL) { 00158 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " 00159 "construct deslanted BeamSearch\n"); 00160 return NULL; 00161 } 00162 } 00163 00164 if (deslanted_srch_obj_ == NULL) { 00165 deslanted_char_samp_ = char_samp_->Clone(); 00166 if (deslanted_char_samp_ == NULL) { 00167 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " 00168 "construct deslanted CharSamp\n"); 00169 return NULL; 00170 } 00171 00172 if (deslanted_char_samp_->Deslant() == false) { 00173 return NULL; 00174 } 00175 00176 deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_); 00177 if (deslanted_srch_obj_ == NULL) { 00178 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " 00179 "construct deslanted CubeSearchObject\n"); 00180 return NULL; 00181 } 00182 } 00183 00184 // run a beam search against the tesslang model 00185 deslanted_alt_list_ = deslanted_beam_obj_->Search(deslanted_srch_obj_, 00186 lang_mod); 00187 // should we use de-slanted altlist? 00188 if (deslanted_alt_list_ != NULL && deslanted_alt_list_->AltCount() > 0) { 00189 if (alt_list_ == NULL || alt_list_->AltCount() < 1 || 00190 deslanted_alt_list_->AltCost(0) < alt_list_->AltCost(0)) { 00191 deslanted_ = true; 00192 return deslanted_alt_list_; 00193 } 00194 } 00195 } 00196 00197 return alt_list_; 00198 } 00199 00200 // Recognize the member char sample as a word 00201 WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) { 00202 return Recognize(lang_mod, true); 00203 } 00204 00205 // Recognize the member char sample as a word 00206 WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) { 00207 return Recognize(lang_mod, false); 00208 } 00209 00210 // Computes the cost of a specific string. This is done by performing 00211 // recognition of a language model that allows only the specified word 00212 int CubeObject::WordCost(const char *str) { 00213 WordListLangModel *lang_mod = new WordListLangModel(cntxt_); 00214 if (lang_mod == NULL) { 00215 return WORST_COST; 00216 } 00217 00218 if (lang_mod->AddString(str) == false) { 00219 delete lang_mod; 00220 return WORST_COST; 00221 } 00222 00223 // run a beam search against the single string wordlist model 00224 WordAltList *alt_list = RecognizeWord(lang_mod); 00225 delete lang_mod; 00226 00227 int cost = WORST_COST; 00228 if (alt_list != NULL) { 00229 if (alt_list->AltCount() > 0) { 00230 cost = alt_list->AltCost(0); 00231 } 00232 } 00233 00234 return cost; 00235 } 00236 00237 // Recognizes a single character and returns the list of results. 00238 CharAltList *CubeObject::RecognizeChar() { 00239 if (char_samp_ == NULL) return NULL; 00240 CharAltList* alt_list = NULL; 00241 CharClassifier *char_classifier = cntxt_->Classifier(); 00242 ASSERT_HOST(char_classifier != NULL); 00243 alt_list = char_classifier->Classify(char_samp_); 00244 return alt_list; 00245 } 00246 00247 // Normalize the input word bitmap to have a minimum aspect ratio 00248 bool CubeObject::Normalize() { 00249 // create a cube search object 00250 CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_); 00251 if (srch_obj == NULL) { 00252 return false; 00253 } 00254 // Perform over-segmentation 00255 int seg_cnt = srch_obj->SegPtCnt(); 00256 // Only perform normalization if segment count is large enough 00257 if (seg_cnt < kMinNormalizationSegmentCnt) { 00258 delete srch_obj; 00259 return true; 00260 } 00261 // compute the mean AR of the segments 00262 double ar_mean = 0.0; 00263 for (int seg_idx = 0; seg_idx <= seg_cnt; seg_idx++) { 00264 CharSamp *seg_samp = srch_obj->CharSample(seg_idx - 1, seg_idx); 00265 if (seg_samp != NULL && seg_samp->Width() > 0) { 00266 ar_mean += (1.0 * seg_samp->Height() / seg_samp->Width()); 00267 } 00268 } 00269 ar_mean /= (seg_cnt + 1); 00270 // perform normalization if segment AR is too high 00271 if (ar_mean > kMinNormalizationAspectRatio) { 00272 // scale down the image in the y-direction to attain AR 00273 CharSamp *new_samp = char_samp_->Scale(char_samp_->Width(), 00274 2.0 * char_samp_->Height() / ar_mean, 00275 false); 00276 if (new_samp != NULL) { 00277 // free existing char samp if owned 00278 if (own_char_samp_) { 00279 delete char_samp_; 00280 } 00281 // update with new scaled charsamp and set ownership flag 00282 char_samp_ = new_samp; 00283 own_char_samp_ = true; 00284 } 00285 } 00286 delete srch_obj; 00287 return true; 00288 } 00289 }