tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/lm_state.cpp
Go to the documentation of this file.
00001 
00002 // File:        lm_state.cpp
00003 // Description: Structures and functionality for capturing the state of
00004 //              segmentation search guided by the language model.
00005 // Author:      Rika Antonova
00006 // Created:     Mon Jun 20 11:26:43 PST 2012
00007 //
00008 // (C) Copyright 2012, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "lm_state.h"
00022 
00023 namespace tesseract {
00024 
00025 ELISTIZE(ViterbiStateEntry);
00026 
00027 void ViterbiStateEntry::Print(const char *msg) const {
00028   tprintf("%s ViterbiStateEntry", msg);
00029   if (updated) tprintf("(NEW)");
00030   if (this->debug_str != NULL) {
00031     tprintf(" str=%s", this->debug_str->string());
00032   }
00033   tprintf(" with ratings_sum=%.4f length=%d cost=%.6f",
00034           this->ratings_sum, this->length, this->cost);
00035   if (this->top_choice_flags) {
00036     tprintf(" top_choice_flags=0x%x", this->top_choice_flags);
00037   }
00038   if (!this->Consistent()) {
00039     tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)",
00040             this->consistency_info.NumInconsistentPunc(),
00041             this->consistency_info.NumInconsistentCase(),
00042             this->consistency_info.NumInconsistentChartype(),
00043             this->consistency_info.inconsistent_script,
00044             this->consistency_info.inconsistent_font);
00045   }
00046   if (this->dawg_info) tprintf(" permuter=%d", this->dawg_info->permuter);
00047   if (this->ngram_info) {
00048     tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d",
00049             this->ngram_info->ngram_and_classifier_cost,
00050             this->ngram_info->context.string(),
00051             this->ngram_info->pruned);
00052   }
00053   if (this->associate_stats.shape_cost > 0.0f) {
00054     tprintf(" shape_cost=%g", this->associate_stats.shape_cost);
00055   }
00056   tprintf(" %s",
00057           XHeightConsistencyEnumName[this->consistency_info.xht_decision]);
00058 
00059   tprintf("\n");
00060 }
00061 
00062 // Clears the viterbi search state back to its initial conditions.
00063 void LanguageModelState::Clear() {
00064   viterbi_state_entries.clear();
00065   viterbi_state_entries_prunable_length = 0;
00066   viterbi_state_entries_prunable_max_cost = MAX_FLOAT32;
00067   viterbi_state_entries_length = 0;
00068 }
00069 
00070 void LanguageModelState::Print(const char *msg) {
00071   tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n",
00072           msg, viterbi_state_entries_prunable_max_cost,
00073           viterbi_state_entries_prunable_length, viterbi_state_entries_length);
00074   ViterbiStateEntry_IT vit(&viterbi_state_entries);
00075   for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) {
00076     vit.data()->Print("");
00077   }
00078 }
00079 
00080 
00081 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines