tesseract
3.03
|
00001 00002 // File: textord.cpp 00003 // Description: The top-level text line and word finding functionality. 00004 // Author: Ray Smith 00005 // Created: Fri Mar 13 14:43:01 PDT 2009 00006 // 00007 // (C) Copyright 2009, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 // Include automatically generated configuration file if running autoconf. 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 #include "baselinedetect.h" 00026 #include "drawtord.h" 00027 #include "textord.h" 00028 #include "makerow.h" 00029 #include "pageres.h" 00030 #include "tordmain.h" 00031 #include "wordseg.h" 00032 00033 namespace tesseract { 00034 00035 Textord::Textord(CCStruct* ccstruct) 00036 : ccstruct_(ccstruct), use_cjk_fp_model_(false), 00037 // makerow.cpp /////////////////////////////////////////// 00038 BOOL_MEMBER(textord_single_height_mode, false, 00039 "Script has no xheight, so use a single mode", 00040 ccstruct_->params()), 00041 // tospace.cpp /////////////////////////////////////////// 00042 BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", 00043 ccstruct_->params()), 00044 BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false, 00045 "Constrain relative values of inter and intra-word gaps for " 00046 "old_to_method.", 00047 ccstruct_->params()), 00048 BOOL_MEMBER(tosp_only_use_prop_rows, true, 00049 "Block stats to use fixed pitch rows?", 00050 ccstruct_->params()), 00051 BOOL_MEMBER(tosp_force_wordbreak_on_punct, false, 00052 "Force word breaks on punct to break long lines in non-space " 00053 "delimited langs", 00054 ccstruct_->params()), 00055 BOOL_MEMBER(tosp_use_pre_chopping, false, 00056 "Space stats use prechopping?", 00057 ccstruct_->params()), 00058 BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", 00059 ccstruct_->params()), 00060 BOOL_MEMBER(tosp_block_use_cert_spaces, true, 00061 "Only stat OBVIOUS spaces", 00062 ccstruct_->params()), 00063 BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", 00064 ccstruct_->params()), 00065 BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, 00066 "Only stat OBVIOUS spaces", 00067 ccstruct_->params()), 00068 BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", 00069 ccstruct_->params()), 00070 BOOL_MEMBER(tosp_recovery_isolated_row_stats, true, 00071 "Use row alone when inadequate cert spaces", 00072 ccstruct_->params()), 00073 BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", 00074 ccstruct_->params()), 00075 BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", 00076 ccstruct_->params()), 00077 BOOL_MEMBER(tosp_fuzzy_limit_all, true, 00078 "Dont restrict kn->sp fuzzy limit to tables", 00079 ccstruct_->params()), 00080 BOOL_MEMBER(tosp_stats_use_xht_gaps, true, 00081 "Use within xht gap for wd breaks", 00082 ccstruct_->params()), 00083 BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", 00084 ccstruct_->params()), 00085 BOOL_MEMBER(tosp_only_use_xht_gaps, false, 00086 "Only use within xht gap for wd breaks", 00087 ccstruct_->params()), 00088 BOOL_MEMBER(tosp_rule_9_test_punct, false, 00089 "Dont chng kn to space next to punct", 00090 ccstruct_->params()), 00091 BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", 00092 ccstruct_->params()), 00093 BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", 00094 ccstruct_->params()), 00095 BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", 00096 ccstruct_->params()), 00097 INT_MEMBER(tosp_debug_level, 0, "Debug data", 00098 ccstruct_->params()), 00099 INT_MEMBER(tosp_enough_space_samples_for_median, 3, 00100 "or should we use mean", 00101 ccstruct_->params()), 00102 INT_MEMBER(tosp_redo_kern_limit, 10, 00103 "No.samples reqd to reestimate for row", 00104 ccstruct_->params()), 00105 INT_MEMBER(tosp_few_samples, 40, 00106 "No.gaps reqd with 1 large gap to treat as a table", 00107 ccstruct_->params()), 00108 INT_MEMBER(tosp_short_row, 20, 00109 "No.gaps reqd with few cert spaces to use certs", 00110 ccstruct_->params()), 00111 INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", 00112 ccstruct_->params()), 00113 double_MEMBER(tosp_old_sp_kn_th_factor, 2.0, 00114 "Factor for defining space threshold in terms of space and " 00115 "kern sizes", 00116 ccstruct_->params()), 00117 double_MEMBER(tosp_threshold_bias1, 0, 00118 "how far between kern and space?", 00119 ccstruct_->params()), 00120 double_MEMBER(tosp_threshold_bias2, 0, 00121 "how far between kern and space?", 00122 ccstruct_->params()), 00123 double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", 00124 ccstruct_->params()), 00125 double_MEMBER(tosp_narrow_aspect_ratio, 0.48, 00126 "narrow if w/h less than this", 00127 ccstruct_->params()), 00128 double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", 00129 ccstruct_->params()), 00130 double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", 00131 ccstruct_->params()), 00132 double_MEMBER(tosp_fuzzy_space_factor, 0.6, 00133 "Fract of xheight for fuzz sp", 00134 ccstruct_->params()), 00135 double_MEMBER(tosp_fuzzy_space_factor1, 0.5, 00136 "Fract of xheight for fuzz sp", 00137 ccstruct_->params()), 00138 double_MEMBER(tosp_fuzzy_space_factor2, 0.72, 00139 "Fract of xheight for fuzz sp", 00140 ccstruct_->params()), 00141 double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", 00142 ccstruct_->params()), 00143 double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", 00144 ccstruct_->params()), 00145 double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", 00146 ccstruct_->params()), 00147 double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", 00148 ccstruct_->params()), 00149 double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", 00150 ccstruct_->params()), 00151 double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", 00152 ccstruct_->params()), 00153 double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", 00154 ccstruct_->params()), 00155 double_MEMBER(tosp_enough_small_gaps, 0.65, 00156 "Fract of kerns reqd for isolated row stats", 00157 ccstruct_->params()), 00158 double_MEMBER(tosp_table_kn_sp_ratio, 2.25, 00159 "Min difference of kn & sp in table", 00160 ccstruct_->params()), 00161 double_MEMBER(tosp_table_xht_sp_ratio, 0.33, 00162 "Expect spaces bigger than this", 00163 ccstruct_->params()), 00164 double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, 00165 "Fuzzy if less than this", 00166 ccstruct_->params()), 00167 double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", 00168 ccstruct_->params()), 00169 double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", 00170 ccstruct_->params()), 00171 double_MEMBER(tosp_min_sane_kn_sp, 1.5, 00172 "Dont trust spaces less than this time kn", 00173 ccstruct_->params()), 00174 double_MEMBER(tosp_init_guess_kn_mult, 2.2, 00175 "Thresh guess - mult kn by this", 00176 ccstruct_->params()), 00177 double_MEMBER(tosp_init_guess_xht_mult, 0.28, 00178 "Thresh guess - mult xht by this", 00179 ccstruct_->params()), 00180 double_MEMBER(tosp_max_sane_kn_thresh, 5.0, 00181 "Multiplier on kn to limit thresh", 00182 ccstruct_->params()), 00183 double_MEMBER(tosp_flip_caution, 0.0, 00184 "Dont autoflip kn to sp when large separation", 00185 ccstruct_->params()), 00186 double_MEMBER(tosp_large_kerning, 0.19, 00187 "Limit use of xht gap with large kns", 00188 ccstruct_->params()), 00189 double_MEMBER(tosp_dont_fool_with_small_kerns, -1, 00190 "Limit use of xht gap with odd small kns", 00191 ccstruct_->params()), 00192 double_MEMBER(tosp_near_lh_edge, 0, 00193 "Dont reduce box if the top left is non blank", 00194 ccstruct_->params()), 00195 double_MEMBER(tosp_silly_kn_sp_gap, 0.2, 00196 "Dont let sp minus kn get too small", 00197 ccstruct_->params()), 00198 double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, 00199 "How wide fuzzies need context", 00200 ccstruct_->params()), 00201 // tordmain.cpp /////////////////////////////////////////// 00202 BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", 00203 ccstruct_->params()), 00204 BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", 00205 ccstruct_->params()), 00206 BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", 00207 ccstruct_->params()), 00208 INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", 00209 ccstruct_->params()), 00210 INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", 00211 ccstruct_->params()), 00212 double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs", 00213 ccstruct_->params()), 00214 double_MEMBER(textord_noise_area_ratio, 0.7, 00215 "Fraction of bounding box for noise", 00216 ccstruct_->params()), 00217 double_MEMBER(textord_blob_size_smallile, 20, 00218 "Percentile for small blobs", 00219 ccstruct_->params()), 00220 double_MEMBER(textord_initialx_ile, 0.75, 00221 "Ile of sizes for xheight guess", 00222 ccstruct_->params()), 00223 double_MEMBER(textord_initialasc_ile, 0.90, 00224 "Ile of sizes for xheight guess", 00225 ccstruct_->params()), 00226 INT_MEMBER(textord_noise_sizefraction, 10, 00227 "Fraction of size for maxima", 00228 ccstruct_->params()), 00229 double_MEMBER(textord_noise_sizelimit, 0.5, 00230 "Fraction of x for big t count", 00231 ccstruct_->params()), 00232 INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", 00233 ccstruct_->params()), 00234 double_MEMBER(textord_noise_normratio, 2.0, 00235 "Dot to norm ratio for deletion", 00236 ccstruct_->params()), 00237 BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", 00238 ccstruct_->params()), 00239 BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", 00240 ccstruct_->params()), 00241 double_MEMBER(textord_noise_syfract, 0.2, 00242 "xh fract height error for norm blobs", 00243 ccstruct_->params()), 00244 double_MEMBER(textord_noise_sxfract, 0.4, 00245 "xh fract width error for norm blobs", 00246 ccstruct_->params()), 00247 double_MEMBER(textord_noise_hfract, 1.0/64, 00248 "Height fraction to discard outlines as speckle noise", 00249 ccstruct_->params()), 00250 INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", 00251 ccstruct_->params()), 00252 double_MEMBER(textord_noise_rowratio, 6.0, 00253 "Dot to norm ratio for deletion", 00254 ccstruct_->params()), 00255 BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", 00256 ccstruct_->params()), 00257 double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", 00258 ccstruct_->params()), 00259 double_MEMBER(textord_blshift_xfraction, 9.99, 00260 "Min size of baseline shift", 00261 ccstruct_->params()) { 00262 } 00263 00264 Textord::~Textord() { 00265 } 00266 00267 // Make the textlines and words inside each block. 00268 void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew, 00269 int width, int height, Pix* binary_pix, 00270 Pix* thresholds_pix, Pix* grey_pix, 00271 bool use_box_bottoms, 00272 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { 00273 page_tr_.set_x(width); 00274 page_tr_.set_y(height); 00275 if (to_blocks->empty()) { 00276 // AutoPageSeg was not used, so we need to find_components first. 00277 find_components(binary_pix, blocks, to_blocks); 00278 TO_BLOCK_IT it(to_blocks); 00279 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00280 TO_BLOCK* to_block = it.data(); 00281 // Compute the edge offsets whether or not there is a grey_pix. 00282 // We have by-passed auto page seg, so we have to run it here. 00283 // By page segmentation mode there is no non-text to avoid running on. 00284 to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); 00285 } 00286 } else if (!PSM_SPARSE(pageseg_mode)) { 00287 // AutoPageSeg does not need to find_components as it did that already. 00288 // Filter_blobs sets up the TO_BLOCKs the same as find_components does. 00289 filter_blobs(page_tr_, to_blocks, true); 00290 } 00291 00292 ASSERT_HOST(!to_blocks->empty()); 00293 if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) { 00294 const FCOORD anticlockwise90(0.0f, 1.0f); 00295 const FCOORD clockwise90(0.0f, -1.0f); 00296 TO_BLOCK_IT it(to_blocks); 00297 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00298 TO_BLOCK* to_block = it.data(); 00299 BLOCK* block = to_block->block; 00300 // Create a fake poly_block in block from its bounding box. 00301 block->set_poly_block(new POLY_BLOCK(block->bounding_box(), 00302 PT_VERTICAL_TEXT)); 00303 // Rotate the to_block along with its contained block and blobnbox lists. 00304 to_block->rotate(anticlockwise90); 00305 // Set the block's rotation values to obey the convention followed in 00306 // layout analysis for vertical text. 00307 block->set_re_rotation(clockwise90); 00308 block->set_classify_rotation(clockwise90); 00309 } 00310 } 00311 00312 TO_BLOCK_IT to_block_it(to_blocks); 00313 TO_BLOCK* to_block = to_block_it.data(); 00314 // Make the rows in the block. 00315 float gradient; 00316 // Do it the old fashioned way. 00317 if (PSM_LINE_FIND_ENABLED(pageseg_mode)) { 00318 gradient = make_rows(page_tr_, to_blocks); 00319 } else if (!PSM_SPARSE(pageseg_mode)) { 00320 // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. 00321 gradient = make_single_row(page_tr_, to_block, to_blocks); 00322 } 00323 BaselineDetect baseline_detector(textord_baseline_debug, 00324 reskew, to_blocks); 00325 baseline_detector.ComputeStraightBaselines(use_box_bottoms); 00326 baseline_detector.ComputeBaselineSplinesAndXheights(page_tr_, true, 00327 textord_heavy_nr, 00328 textord_show_final_rows, 00329 this); 00330 // Now make the words in the lines. 00331 if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { 00332 // SINGLE_LINE uses the old word maker on the single line. 00333 make_words(this, page_tr_, gradient, blocks, to_blocks); 00334 } else { 00335 // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a 00336 // single word, and in SINGLE_CHAR mode, all the outlines 00337 // go in a single blob. 00338 TO_BLOCK* to_block = to_block_it.data(); 00339 make_single_word(pageseg_mode == PSM_SINGLE_CHAR, 00340 to_block->get_rows(), to_block->block->row_list()); 00341 } 00342 cleanup_blocks(blocks); // Remove empties. 00343 00344 // Compute the margins for each row in the block, to be used later for 00345 // paragraph detection. 00346 BLOCK_IT b_it(blocks); 00347 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00348 b_it.data()->compute_row_margins(); 00349 } 00350 #ifndef GRAPHICS_DISABLED 00351 close_to_win(); 00352 #endif 00353 } 00354 00355 // If we were supposed to return only a single textline, and there is more 00356 // than one, clean up and leave only the best. 00357 void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, 00358 PAGE_RES* page_res) { 00359 if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) 00360 return; // No cleanup required. 00361 PAGE_RES_IT it(page_res); 00362 // Find the best row, being the greatest mean word conf. 00363 float row_total_conf = 0.0f; 00364 int row_word_count = 0; 00365 ROW_RES* best_row = NULL; 00366 float best_conf = 0.0f; 00367 for (it.restart_page(); it.word() != NULL; it.forward()) { 00368 WERD_RES* word = it.word(); 00369 row_total_conf += word->best_choice->certainty(); 00370 ++row_word_count; 00371 if (it.next_row() != it.row()) { 00372 row_total_conf /= row_word_count; 00373 if (best_row == NULL || best_conf < row_total_conf) { 00374 best_row = it.row(); 00375 best_conf = row_total_conf; 00376 } 00377 row_total_conf = 0.0f; 00378 row_word_count = 0; 00379 } 00380 } 00381 // Now eliminate any word not in the best row. 00382 for (it.restart_page(); it.word() != NULL; it.forward()) { 00383 if (it.row() != best_row) 00384 it.DeleteCurrentWord(); 00385 } 00386 } 00387 00388 } // namespace tesseract.