tesseract
3.03
|
00001 00002 // File: lm_state.cpp 00003 // Description: Structures and functionality for capturing the state of 00004 // segmentation search guided by the language model. 00005 // Author: Rika Antonova 00006 // Created: Mon Jun 20 11:26:43 PST 2012 00007 // 00008 // (C) Copyright 2012, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #include "lm_state.h" 00022 00023 namespace tesseract { 00024 00025 ELISTIZE(ViterbiStateEntry); 00026 00027 void ViterbiStateEntry::Print(const char *msg) const { 00028 tprintf("%s ViterbiStateEntry", msg); 00029 if (updated) tprintf("(NEW)"); 00030 if (this->debug_str != NULL) { 00031 tprintf(" str=%s", this->debug_str->string()); 00032 } 00033 tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", 00034 this->ratings_sum, this->length, this->cost); 00035 if (this->top_choice_flags) { 00036 tprintf(" top_choice_flags=0x%x", this->top_choice_flags); 00037 } 00038 if (!this->Consistent()) { 00039 tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)", 00040 this->consistency_info.NumInconsistentPunc(), 00041 this->consistency_info.NumInconsistentCase(), 00042 this->consistency_info.NumInconsistentChartype(), 00043 this->consistency_info.inconsistent_script, 00044 this->consistency_info.inconsistent_font); 00045 } 00046 if (this->dawg_info) tprintf(" permuter=%d", this->dawg_info->permuter); 00047 if (this->ngram_info) { 00048 tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d", 00049 this->ngram_info->ngram_and_classifier_cost, 00050 this->ngram_info->context.string(), 00051 this->ngram_info->pruned); 00052 } 00053 if (this->associate_stats.shape_cost > 0.0f) { 00054 tprintf(" shape_cost=%g", this->associate_stats.shape_cost); 00055 } 00056 tprintf(" %s", 00057 XHeightConsistencyEnumName[this->consistency_info.xht_decision]); 00058 00059 tprintf("\n"); 00060 } 00061 00062 // Clears the viterbi search state back to its initial conditions. 00063 void LanguageModelState::Clear() { 00064 viterbi_state_entries.clear(); 00065 viterbi_state_entries_prunable_length = 0; 00066 viterbi_state_entries_prunable_max_cost = MAX_FLOAT32; 00067 viterbi_state_entries_length = 0; 00068 } 00069 00070 void LanguageModelState::Print(const char *msg) { 00071 tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", 00072 msg, viterbi_state_entries_prunable_max_cost, 00073 viterbi_state_entries_prunable_length, viterbi_state_entries_length); 00074 ViterbiStateEntry_IT vit(&viterbi_state_entries); 00075 for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { 00076 vit.data()->Print(""); 00077 } 00078 } 00079 00080 00081 } // namespace tesseract