tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/wordrec.cpp
Go to the documentation of this file.
00001 
00002 // File:        wordrec.cpp
00003 // Description: wordrec class.
00004 // Author:      Samuel Charron
00005 //
00006 // (C) Copyright 2006, Google Inc.
00007 // Licensed under the Apache License, Version 2.0 (the "License");
00008 // you may not use this file except in compliance with the License.
00009 // You may obtain a copy of the License at
00010 // http://www.apache.org/licenses/LICENSE-2.0
00011 // Unless required by applicable law or agreed to in writing, software
00012 // distributed under the License is distributed on an "AS IS" BASIS,
00013 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014 // See the License for the specific language governing permissions and
00015 // limitations under the License.
00016 //
00018 
00019 #include "wordrec.h"
00020 
00021 #include "language_model.h"
00022 #include "params.h"
00023 
00024 
00025 namespace tesseract {
00026 Wordrec::Wordrec() :
00027   // control parameters
00028   BOOL_MEMBER(merge_fragments_in_matrix, TRUE,
00029               "Merge the fragments in the ratings matrix and delete them"
00030               " after merging", params()),
00031   BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information",
00032               params()),
00033   BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable",
00034               params()),
00035   BOOL_MEMBER(force_word_assoc, FALSE,
00036               "force associator to run regardless of what enable_assoc is."
00037               "This is used for CJK where component grouping is necessary.",
00038               CCUtil::params()),
00039   double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state",
00040                 params()),
00041   BOOL_MEMBER(fragments_guide_chopper, FALSE,
00042               "Use information from fragments to guide chopping process",
00043               params()),
00044   INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped",
00045              params()),
00046   double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit",
00047                 params()),
00048   INT_MEMBER(chop_debug, 0, "Chop debug",
00049              params()),
00050   BOOL_MEMBER(chop_enable, 1, "Chop enable",
00051               params()),
00052   BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep",
00053             params()),
00054   INT_MEMBER(chop_split_length, 10000, "Split Length",
00055              params()),
00056   INT_MEMBER(chop_same_distance, 2, "Same distance",
00057              params()),
00058   INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline",
00059              params()),
00060   INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile",
00061              params()),
00062   BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()),
00063   INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend",
00064              params()),
00065   INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area",
00066              params()),
00067   double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment",
00068                 params()),
00069   double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment",
00070                 params()),
00071   double_MEMBER(chop_center_knob, 0.15, "Split center adjustment",
00072                 params()),
00073   INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs "
00074              "above which we don't care that a chop is not near the center.",
00075              params()),
00076   double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment",
00077                 params()),
00078   double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment",
00079                 params()),
00080   double_MEMBER(chop_ok_split, 100.0, "OK split limit",
00081                 params()),
00082   double_MEMBER(chop_good_split, 50.0, "Good split limit",
00083                 params()),
00084   INT_MEMBER(chop_x_y_weight, 3, "X / Y  length weight",
00085              params()),
00086   INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug",
00087              params()),
00088   BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE,
00089               "include fixed-pitch heuristics in char segmentation",
00090               params()),
00091   INT_MEMBER(wordrec_debug_level, 0,
00092              "Debug level for wordrec", params()),
00093   INT_MEMBER(wordrec_max_join_chunks, 4,
00094              "Max number of broken pieces to associate", params()),
00095   BOOL_MEMBER(wordrec_skip_no_truth_words, false,
00096               "Only run OCR for words that had truth recorded in BlamerBundle",
00097               params()),
00098   BOOL_MEMBER(wordrec_debug_blamer, false,
00099               "Print blamer debug messages", params()),
00100   BOOL_MEMBER(wordrec_run_blamer, false,
00101               "Try to set the blame for errors", params()),
00102   INT_MEMBER(segsearch_debug_level, 0,
00103              "SegSearch debug level", params()),
00104   INT_MEMBER(segsearch_max_pain_points, 2000,
00105              "Maximum number of pain points stored in the queue",
00106              params()),
00107   INT_MEMBER(segsearch_max_futile_classifications, 20,
00108              "Maximum number of pain point classifications per chunk that"
00109              "did not result in finding a better word choice.",
00110              params()),
00111   double_MEMBER(segsearch_max_char_wh_ratio, 2.0,
00112                 "Maximum character width-to-height ratio", params()),
00113   BOOL_MEMBER(save_alt_choices, true,
00114               "Save alternative paths found during chopping"
00115               " and segmentation search",
00116               params()) {
00117   prev_word_best_choice_ = NULL;
00118   language_model_ = new LanguageModel(&get_fontinfo_table(),
00119                                       &(getDict()));
00120   fill_lattice_ = NULL;
00121 }
00122 
00123 Wordrec::~Wordrec() {
00124   delete language_model_;
00125 }
00126 
00127 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines