tesseract
3.03
|
00001 00002 // File: wordrec.cpp 00003 // Description: wordrec class. 00004 // Author: Samuel Charron 00005 // 00006 // (C) Copyright 2006, Google Inc. 00007 // Licensed under the Apache License, Version 2.0 (the "License"); 00008 // you may not use this file except in compliance with the License. 00009 // You may obtain a copy of the License at 00010 // http://www.apache.org/licenses/LICENSE-2.0 00011 // Unless required by applicable law or agreed to in writing, software 00012 // distributed under the License is distributed on an "AS IS" BASIS, 00013 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 // See the License for the specific language governing permissions and 00015 // limitations under the License. 00016 // 00018 00019 #include "wordrec.h" 00020 00021 #include "language_model.h" 00022 #include "params.h" 00023 00024 00025 namespace tesseract { 00026 Wordrec::Wordrec() : 00027 // control parameters 00028 BOOL_MEMBER(merge_fragments_in_matrix, TRUE, 00029 "Merge the fragments in the ratings matrix and delete them" 00030 " after merging", params()), 00031 BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information", 00032 params()), 00033 BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable", 00034 params()), 00035 BOOL_MEMBER(force_word_assoc, FALSE, 00036 "force associator to run regardless of what enable_assoc is." 00037 "This is used for CJK where component grouping is necessary.", 00038 CCUtil::params()), 00039 double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state", 00040 params()), 00041 BOOL_MEMBER(fragments_guide_chopper, FALSE, 00042 "Use information from fragments to guide chopping process", 00043 params()), 00044 INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", 00045 params()), 00046 double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", 00047 params()), 00048 INT_MEMBER(chop_debug, 0, "Chop debug", 00049 params()), 00050 BOOL_MEMBER(chop_enable, 1, "Chop enable", 00051 params()), 00052 BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", 00053 params()), 00054 INT_MEMBER(chop_split_length, 10000, "Split Length", 00055 params()), 00056 INT_MEMBER(chop_same_distance, 2, "Same distance", 00057 params()), 00058 INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", 00059 params()), 00060 INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", 00061 params()), 00062 BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()), 00063 INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", 00064 params()), 00065 INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", 00066 params()), 00067 double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", 00068 params()), 00069 double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", 00070 params()), 00071 double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", 00072 params()), 00073 INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs " 00074 "above which we don't care that a chop is not near the center.", 00075 params()), 00076 double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", 00077 params()), 00078 double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", 00079 params()), 00080 double_MEMBER(chop_ok_split, 100.0, "OK split limit", 00081 params()), 00082 double_MEMBER(chop_good_split, 50.0, "Good split limit", 00083 params()), 00084 INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", 00085 params()), 00086 INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug", 00087 params()), 00088 BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE, 00089 "include fixed-pitch heuristics in char segmentation", 00090 params()), 00091 INT_MEMBER(wordrec_debug_level, 0, 00092 "Debug level for wordrec", params()), 00093 INT_MEMBER(wordrec_max_join_chunks, 4, 00094 "Max number of broken pieces to associate", params()), 00095 BOOL_MEMBER(wordrec_skip_no_truth_words, false, 00096 "Only run OCR for words that had truth recorded in BlamerBundle", 00097 params()), 00098 BOOL_MEMBER(wordrec_debug_blamer, false, 00099 "Print blamer debug messages", params()), 00100 BOOL_MEMBER(wordrec_run_blamer, false, 00101 "Try to set the blame for errors", params()), 00102 INT_MEMBER(segsearch_debug_level, 0, 00103 "SegSearch debug level", params()), 00104 INT_MEMBER(segsearch_max_pain_points, 2000, 00105 "Maximum number of pain points stored in the queue", 00106 params()), 00107 INT_MEMBER(segsearch_max_futile_classifications, 20, 00108 "Maximum number of pain point classifications per chunk that" 00109 "did not result in finding a better word choice.", 00110 params()), 00111 double_MEMBER(segsearch_max_char_wh_ratio, 2.0, 00112 "Maximum character width-to-height ratio", params()), 00113 BOOL_MEMBER(save_alt_choices, true, 00114 "Save alternative paths found during chopping" 00115 " and segmentation search", 00116 params()) { 00117 prev_word_best_choice_ = NULL; 00118 language_model_ = new LanguageModel(&get_fontinfo_table(), 00119 &(getDict())); 00120 fill_lattice_ = NULL; 00121 } 00122 00123 Wordrec::~Wordrec() { 00124 delete language_model_; 00125 } 00126 00127 } // namespace tesseract