tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/topitch.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        topitch.cpp  (Formerly to_pitch.c)
00003  * Description: Code to determine fixed pitchness and the pitch if fixed.
00004  * Author:              Ray Smith
00005  * Created:             Tue Aug 24 16:57:29 BST 1993
00006  *
00007  * (C) Copyright 1993, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifdef __UNIX__
00021 #include          <assert.h>
00022 #endif
00023 #include          "stderr.h"
00024 #include          "blobbox.h"
00025 #include          "statistc.h"
00026 #include          "drawtord.h"
00027 #include          "makerow.h"
00028 #include          "pitsync1.h"
00029 #include          "pithsync.h"
00030 #include          "tovars.h"
00031 #include          "wordseg.h"
00032 #include          "topitch.h"
00033 #include          "secname.h"
00034 #include          "helpers.h"
00035 
00036 // Include automatically generated configuration file if running autoconf.
00037 #ifdef HAVE_CONFIG_H
00038 #include "config_auto.h"
00039 #endif
00040 
00041 #define EXTERN
00042 
00043 EXTERN BOOL_VAR (textord_all_prop, FALSE, "All doc is proportial text");
00044 EXTERN BOOL_VAR (textord_debug_pitch_test, FALSE,
00045 "Debug on fixed pitch test");
00046 EXTERN BOOL_VAR (textord_disable_pitch_test, FALSE,
00047 "Turn off dp fixed pitch algorithm");
00048 EXTERN BOOL_VAR (textord_fast_pitch_test, FALSE,
00049 "Do even faster pitch algorithm");
00050 EXTERN BOOL_VAR (textord_debug_pitch_metric, FALSE,
00051 "Write full metric stuff");
00052 EXTERN BOOL_VAR (textord_show_row_cuts, FALSE, "Draw row-level cuts");
00053 EXTERN BOOL_VAR (textord_show_page_cuts, FALSE, "Draw page-level cuts");
00054 EXTERN BOOL_VAR (textord_pitch_cheat, FALSE,
00055 "Use correct answer for fixed/prop");
00056 EXTERN BOOL_VAR (textord_blockndoc_fixed, FALSE,
00057 "Attempt whole doc/block fixed pitch");
00058 EXTERN double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts");
00059 EXTERN double_VAR (textord_balance_factor, 1.0,
00060 "Ding rate for unbalanced char cells");
00061 
00062 #define FIXED_WIDTH_MULTIPLE  5
00063 #define BLOCK_STATS_CLUSTERS  10
00064 #define MAX_ALLOWED_PITCH 100    //max pixel pitch.
00065 
00066 /**********************************************************************
00067  * compute_fixed_pitch
00068  *
00069  * Decide whether each row is fixed pitch individually.
00070  * Correlate definite and uncertain results to obtain an individual
00071  * result for each row in the TO_ROW class.
00072  **********************************************************************/
00073 
00074 void compute_fixed_pitch(ICOORD page_tr,              // top right
00075                          TO_BLOCK_LIST *port_blocks,  // input list
00076                          float gradient,              // page skew
00077                          FCOORD rotation,             // for drawing
00078                          BOOL8 testing_on) {          // correct orientation
00079   TO_BLOCK_IT block_it;          //iterator
00080   TO_BLOCK *block;               //current block;
00081   TO_ROW_IT row_it;              //row iterator
00082   TO_ROW *row;                   //current row
00083   int block_index;               //block number
00084   int row_index;                 //row number
00085 
00086 #ifndef GRAPHICS_DISABLED
00087   if (textord_show_initial_words && testing_on) {
00088     if (to_win == NULL)
00089       create_to_win(page_tr);
00090   }
00091 #endif
00092 
00093   block_it.set_to_list (port_blocks);
00094   block_index = 1;
00095   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00096   block_it.forward ()) {
00097     block = block_it.data ();
00098     compute_block_pitch(block, rotation, block_index, testing_on);
00099     block_index++;
00100   }
00101 
00102   if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
00103     block_index = 1;
00104     for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00105     block_it.forward ()) {
00106       block = block_it.data ();
00107       if (!try_block_fixed (block, block_index))
00108         try_rows_fixed(block, block_index, testing_on);
00109       block_index++;
00110     }
00111   }
00112 
00113   block_index = 1;
00114   for (block_it.mark_cycle_pt(); !block_it.cycled_list();
00115        block_it.forward()) {
00116     block = block_it.data ();
00117     POLY_BLOCK* pb = block->block->poly_block();
00118     if (pb != NULL && !pb->IsText()) continue;  // Non-text doesn't exist!
00119     row_it.set_to_list (block->get_rows ());
00120     row_index = 1;
00121     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00122       row = row_it.data ();
00123       fix_row_pitch(row, block, port_blocks, row_index, block_index);
00124       row_index++;
00125     }
00126     block_index++;
00127   }
00128 #ifndef GRAPHICS_DISABLED
00129   if (textord_show_initial_words && testing_on) {
00130     ScrollView::Update();
00131   }
00132 #endif
00133 }
00134 
00135 
00136 /**********************************************************************
00137  * fix_row_pitch
00138  *
00139  * Get a pitch_decision for this row by voting among similar rows in the
00140  * block, then similar rows over all the page, or any other rows at all.
00141  **********************************************************************/
00142 
00143 void fix_row_pitch(TO_ROW *bad_row,        // row to fix
00144                    TO_BLOCK *bad_block,    // block of bad_row
00145                    TO_BLOCK_LIST *blocks,  // blocks to scan
00146                    inT32 row_target,       // number of row
00147                    inT32 block_target) {   // number of block
00148   inT16 mid_cuts;
00149   int block_votes;               //votes in block
00150   int like_votes;                //votes over page
00151   int other_votes;               //votes of unlike blocks
00152   int block_index;               //number of block
00153   int row_index;                 //number of row
00154   int maxwidth;                  //max pitch
00155   TO_BLOCK_IT block_it = blocks; //block iterator
00156   TO_ROW_IT row_it;
00157   TO_BLOCK *block;               //current block
00158   TO_ROW *row;                   //current row
00159   float sp_sd;                   //space deviation
00160   STATS block_stats;             //pitches in block
00161   STATS like_stats;              //pitches in page
00162 
00163   block_votes = like_votes = other_votes = 0;
00164   maxwidth = (inT32) ceil (bad_row->xheight * textord_words_maxspace);
00165   if (bad_row->pitch_decision != PITCH_DEF_FIXED
00166   && bad_row->pitch_decision != PITCH_DEF_PROP) {
00167     block_stats.set_range (0, maxwidth);
00168     like_stats.set_range (0, maxwidth);
00169     block_index = 1;
00170     for (block_it.mark_cycle_pt(); !block_it.cycled_list();
00171          block_it.forward()) {
00172       block = block_it.data();
00173       POLY_BLOCK* pb = block->block->poly_block();
00174       if (pb != NULL && !pb->IsText()) continue;  // Non text doesn't exist!
00175       row_index = 1;
00176       row_it.set_to_list (block->get_rows ());
00177       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
00178       row_it.forward ()) {
00179         row = row_it.data ();
00180         if ((bad_row->all_caps
00181           && row->xheight + row->ascrise
00182           <
00183           (bad_row->xheight + bad_row->ascrise) * (1 +
00184           textord_pitch_rowsimilarity)
00185           && row->xheight + row->ascrise >
00186           (bad_row->xheight + bad_row->ascrise) * (1 -
00187           textord_pitch_rowsimilarity))
00188           || (!bad_row->all_caps
00189           && row->xheight <
00190           bad_row->xheight * (1 + textord_pitch_rowsimilarity)
00191           && row->xheight >
00192         bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
00193           if (block_index == block_target) {
00194             if (row->pitch_decision == PITCH_DEF_FIXED) {
00195               block_votes += textord_words_veto_power;
00196               block_stats.add ((inT32) row->fixed_pitch,
00197                 textord_words_veto_power);
00198             }
00199             else if (row->pitch_decision == PITCH_MAYBE_FIXED
00200             || row->pitch_decision == PITCH_CORR_FIXED) {
00201               block_votes++;
00202               block_stats.add ((inT32) row->fixed_pitch, 1);
00203             }
00204             else if (row->pitch_decision == PITCH_DEF_PROP)
00205               block_votes -= textord_words_veto_power;
00206             else if (row->pitch_decision == PITCH_MAYBE_PROP
00207               || row->pitch_decision == PITCH_CORR_PROP)
00208               block_votes--;
00209           }
00210           else {
00211             if (row->pitch_decision == PITCH_DEF_FIXED) {
00212               like_votes += textord_words_veto_power;
00213               like_stats.add ((inT32) row->fixed_pitch,
00214                 textord_words_veto_power);
00215             }
00216             else if (row->pitch_decision == PITCH_MAYBE_FIXED
00217             || row->pitch_decision == PITCH_CORR_FIXED) {
00218               like_votes++;
00219               like_stats.add ((inT32) row->fixed_pitch, 1);
00220             }
00221             else if (row->pitch_decision == PITCH_DEF_PROP)
00222               like_votes -= textord_words_veto_power;
00223             else if (row->pitch_decision == PITCH_MAYBE_PROP
00224               || row->pitch_decision == PITCH_CORR_PROP)
00225               like_votes--;
00226           }
00227         }
00228         else {
00229           if (row->pitch_decision == PITCH_DEF_FIXED)
00230             other_votes += textord_words_veto_power;
00231           else if (row->pitch_decision == PITCH_MAYBE_FIXED
00232             || row->pitch_decision == PITCH_CORR_FIXED)
00233             other_votes++;
00234           else if (row->pitch_decision == PITCH_DEF_PROP)
00235             other_votes -= textord_words_veto_power;
00236           else if (row->pitch_decision == PITCH_MAYBE_PROP
00237             || row->pitch_decision == PITCH_CORR_PROP)
00238             other_votes--;
00239         }
00240         row_index++;
00241       }
00242       block_index++;
00243     }
00244     if (block_votes > textord_words_veto_power) {
00245       bad_row->fixed_pitch = block_stats.ile (0.5);
00246       bad_row->pitch_decision = PITCH_CORR_FIXED;
00247     }
00248     else if (block_votes <= textord_words_veto_power && like_votes > 0) {
00249       bad_row->fixed_pitch = like_stats.ile (0.5);
00250       bad_row->pitch_decision = PITCH_CORR_FIXED;
00251     }
00252     else {
00253       bad_row->pitch_decision = PITCH_CORR_PROP;
00254       #ifndef SECURE_NAMES
00255       if (block_votes == 0 && like_votes == 0 && other_votes > 0
00256         && (textord_debug_pitch_test || textord_debug_pitch_metric))
00257         tprintf
00258           ("Warning:row %d of block %d set prop with no like rows against trend\n",
00259           row_target, block_target);
00260       #endif
00261     }
00262   }
00263   if (textord_debug_pitch_metric) {
00264     tprintf(":b_votes=%d:l_votes=%d:o_votes=%d",
00265             block_votes, like_votes, other_votes);
00266     tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
00267   }
00268   if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
00269     if (bad_row->fixed_pitch < textord_min_xheight) {
00270       if (block_votes > 0)
00271         bad_row->fixed_pitch = block_stats.ile (0.5);
00272       else if (block_votes == 0 && like_votes > 0)
00273         bad_row->fixed_pitch = like_stats.ile (0.5);
00274       else {
00275         tprintf
00276           ("Warning:guessing pitch as xheight on row %d, block %d\n",
00277           row_target, block_target);
00278         bad_row->fixed_pitch = bad_row->xheight;
00279       }
00280     }
00281     if (bad_row->fixed_pitch < textord_min_xheight)
00282       bad_row->fixed_pitch = (float) textord_min_xheight;
00283     bad_row->kern_size = bad_row->fixed_pitch / 4;
00284     bad_row->min_space = (inT32) (bad_row->fixed_pitch * 0.6);
00285     bad_row->max_nonspace = (inT32) (bad_row->fixed_pitch * 0.4);
00286     bad_row->space_threshold =
00287       (bad_row->min_space + bad_row->max_nonspace) / 2;
00288     bad_row->space_size = bad_row->fixed_pitch;
00289     if (bad_row->char_cells.empty ())
00290       tune_row_pitch (bad_row, &bad_row->projection,
00291         bad_row->projection_left, bad_row->projection_right,
00292         (bad_row->fixed_pitch +
00293         bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
00294         sp_sd, mid_cuts, &bad_row->char_cells, FALSE);
00295   }
00296   else if (bad_row->pitch_decision == PITCH_CORR_PROP
00297   || bad_row->pitch_decision == PITCH_DEF_PROP) {
00298     bad_row->fixed_pitch = 0.0f;
00299     bad_row->char_cells.clear ();
00300   }
00301 }
00302 
00303 
00304 /**********************************************************************
00305  * compute_block_pitch
00306  *
00307  * Decide whether each block is fixed pitch individually.
00308  **********************************************************************/
00309 
00310 void compute_block_pitch(TO_BLOCK *block,     // input list
00311                          FCOORD rotation,     // for drawing
00312                          inT32 block_index,   // block number
00313                          BOOL8 testing_on) {  // correct orientation
00314    TBOX block_box;                 //bounding box
00315 
00316   block_box = block->block->bounding_box ();
00317   if (testing_on && textord_debug_pitch_test) {
00318     tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
00319       block_index,
00320       block_box.left (), block_box.bottom (),
00321       block_box.right (), block_box.top ());
00322   }
00323   block->min_space = (inT32) floor (block->xheight
00324     * textord_words_default_minspace);
00325   block->max_nonspace = (inT32) ceil (block->xheight
00326     * textord_words_default_nonspace);
00327   block->fixed_pitch = 0.0f;
00328   block->space_size = (float) block->min_space;
00329   block->kern_size = (float) block->max_nonspace;
00330   block->pr_nonsp = block->xheight * words_default_prop_nonspace;
00331   block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop;
00332   if (!block->get_rows ()->empty ()) {
00333     ASSERT_HOST (block->xheight > 0);
00334     find_repeated_chars(block, textord_show_initial_words && testing_on);
00335 #ifndef GRAPHICS_DISABLED
00336     if (textord_show_initial_words && testing_on)
00337       //overlap_picture_ops(TRUE);
00338       ScrollView::Update();
00339 #endif
00340     compute_rows_pitch(block,
00341                        block_index,
00342                        textord_debug_pitch_test &&testing_on);
00343   }
00344 }
00345 
00346 
00347 /**********************************************************************
00348  * compute_rows_pitch
00349  *
00350  * Decide whether each row is fixed pitch individually.
00351  **********************************************************************/
00352 
00353 BOOL8 compute_rows_pitch(                    //find line stats
00354                          TO_BLOCK *block,    //block to do
00355                          inT32 block_index,  //block number
00356                          BOOL8 testing_on    //correct orientation
00357                         ) {
00358   inT32 maxwidth;                //of spaces
00359   TO_ROW *row;                   //current row
00360   inT32 row_index;               //row number.
00361   float lower, upper;            //cluster thresholds
00362   TO_ROW_IT row_it = block->get_rows ();
00363 
00364   row_index = 1;
00365   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00366     row = row_it.data ();
00367     ASSERT_HOST (row->xheight > 0);
00368     row->compute_vertical_projection ();
00369     maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
00370     if (row_pitch_stats (row, maxwidth, testing_on)
00371       && find_row_pitch (row, maxwidth,
00372       textord_dotmatrix_gap + 1, block, block_index,
00373     row_index, testing_on)) {
00374       if (row->fixed_pitch == 0) {
00375         lower = row->pr_nonsp;
00376         upper = row->pr_space;
00377         row->space_size = upper;
00378         row->kern_size = lower;
00379       }
00380     }
00381     else {
00382       row->fixed_pitch = 0.0f;   //insufficient data
00383       row->pitch_decision = PITCH_DUNNO;
00384     }
00385     row_index++;
00386   }
00387   return FALSE;
00388 }
00389 
00390 
00391 /**********************************************************************
00392  * try_doc_fixed
00393  *
00394  * Attempt to call the entire document fixed pitch.
00395  **********************************************************************/
00396 
00397 BOOL8 try_doc_fixed(                             //determine pitch
00398                     ICOORD page_tr,              //top right
00399                     TO_BLOCK_LIST *port_blocks,  //input list
00400                     float gradient               //page skew
00401                    ) {
00402   inT16 master_x;                //uniform shifts
00403   inT16 pitch;                   //median pitch.
00404   int x;                         //profile coord
00405   int prop_blocks;               //correct counts
00406   int fixed_blocks;
00407   int total_row_count;           //total in page
00408                                  //iterator
00409   TO_BLOCK_IT block_it = port_blocks;
00410   TO_BLOCK *block;               //current block;
00411   TO_ROW_IT row_it;              //row iterator
00412   TO_ROW *row;                   //current row
00413   inT16 projection_left;         //edges
00414   inT16 projection_right;
00415   inT16 row_left;                //edges of row
00416   inT16 row_right;
00417   ICOORDELT_LIST *master_cells;  //cells for page
00418   float master_y;                //uniform shifts
00419   float shift_factor;            //page skew correction
00420   float row_shift;               //shift for row
00421   float final_pitch;             //output pitch
00422   float row_y;                   //baseline
00423   STATS projection;              //entire page
00424   STATS pitches (0, MAX_ALLOWED_PITCH);
00425   //for median
00426   float sp_sd;                   //space sd
00427   inT16 mid_cuts;                //no of cheap cuts
00428   float pitch_sd;                //sync rating
00429 
00430   if (block_it.empty ()
00431     //      || block_it.data()==block_it.data_relative(1)
00432     || !textord_blockndoc_fixed)
00433     return FALSE;
00434   shift_factor = gradient / (gradient * gradient + 1);
00435   row_it.set_to_list (block_it.data ()->get_rows ());
00436   master_x = row_it.data ()->projection_left;
00437   master_y = row_it.data ()->baseline.y (master_x);
00438   projection_left = MAX_INT16;
00439   projection_right = -MAX_INT16;
00440   prop_blocks = 0;
00441   fixed_blocks = 0;
00442   total_row_count = 0;
00443 
00444   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00445   block_it.forward ()) {
00446     block = block_it.data ();
00447     row_it.set_to_list (block->get_rows ());
00448     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00449       row = row_it.data ();
00450       total_row_count++;
00451       if (row->fixed_pitch > 0)
00452         pitches.add ((inT32) (row->fixed_pitch), 1);
00453       //find median
00454       row_y = row->baseline.y (master_x);
00455       row_left =
00456         (inT16) (row->projection_left -
00457         shift_factor * (master_y - row_y));
00458       row_right =
00459         (inT16) (row->projection_right -
00460         shift_factor * (master_y - row_y));
00461       if (row_left < projection_left)
00462         projection_left = row_left;
00463       if (row_right > projection_right)
00464         projection_right = row_right;
00465     }
00466   }
00467   if (pitches.get_total () == 0)
00468     return FALSE;
00469   projection.set_range (projection_left, projection_right);
00470 
00471   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00472   block_it.forward ()) {
00473     block = block_it.data ();
00474     row_it.set_to_list (block->get_rows ());
00475     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00476       row = row_it.data ();
00477       row_y = row->baseline.y (master_x);
00478       row_left =
00479         (inT16) (row->projection_left -
00480         shift_factor * (master_y - row_y));
00481       for (x = row->projection_left; x < row->projection_right;
00482       x++, row_left++) {
00483         projection.add (row_left, row->projection.pile_count (x));
00484       }
00485     }
00486   }
00487 
00488   row_it.set_to_list (block_it.data ()->get_rows ());
00489   row = row_it.data ();
00490 #ifndef GRAPHICS_DISABLED
00491   if (textord_show_page_cuts && to_win != NULL)
00492     projection.plot (to_win, projection_left,
00493       row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
00494 #endif
00495   final_pitch = pitches.ile (0.5);
00496   pitch = (inT16) final_pitch;
00497   pitch_sd =
00498     tune_row_pitch (row, &projection, projection_left, projection_right,
00499     pitch * 0.75, final_pitch, sp_sd, mid_cuts,
00500     &row->char_cells, FALSE);
00501 
00502   if (textord_debug_pitch_metric)
00503     tprintf
00504       ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
00505       prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
00506       pitch_sd / total_row_count, pitch_sd / pitch,
00507       pitch_sd / total_row_count / pitch);
00508 
00509 #ifndef GRAPHICS_DISABLED
00510   if (textord_show_page_cuts && to_win != NULL) {
00511     master_cells = &row->char_cells;
00512     for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00513     block_it.forward ()) {
00514       block = block_it.data ();
00515       row_it.set_to_list (block->get_rows ());
00516       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
00517       row_it.forward ()) {
00518         row = row_it.data ();
00519         row_y = row->baseline.y (master_x);
00520         row_shift = shift_factor * (master_y - row_y);
00521         plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
00522       }
00523     }
00524   }
00525 #endif
00526   row->char_cells.clear ();
00527   return FALSE;
00528 }
00529 
00530 
00531 /**********************************************************************
00532  * try_block_fixed
00533  *
00534  * Try to call the entire block fixed.
00535  **********************************************************************/
00536 
00537 BOOL8 try_block_fixed(                   //find line stats
00538                       TO_BLOCK *block,   //block to do
00539                       inT32 block_index  //block number
00540                      ) {
00541   return FALSE;
00542 }
00543 
00544 
00545 /**********************************************************************
00546  * try_rows_fixed
00547  *
00548  * Decide whether each row is fixed pitch individually.
00549  **********************************************************************/
00550 
00551 BOOL8 try_rows_fixed(                    //find line stats
00552                      TO_BLOCK *block,    //block to do
00553                      inT32 block_index,  //block number
00554                      BOOL8 testing_on    //correct orientation
00555                     ) {
00556   TO_ROW *row;                   //current row
00557   inT32 row_index;               //row number.
00558   inT32 def_fixed = 0;           //counters
00559   inT32 def_prop = 0;
00560   inT32 maybe_fixed = 0;
00561   inT32 maybe_prop = 0;
00562   inT32 dunno = 0;
00563   inT32 corr_fixed = 0;
00564   inT32 corr_prop = 0;
00565   float lower, upper;            //cluster thresholds
00566   TO_ROW_IT row_it = block->get_rows ();
00567 
00568   row_index = 1;
00569   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00570     row = row_it.data ();
00571     ASSERT_HOST (row->xheight > 0);
00572     if (row->fixed_pitch > 0 &&
00573         fixed_pitch_row(row, block->block, block_index)) {
00574       if (row->fixed_pitch == 0) {
00575         lower = row->pr_nonsp;
00576         upper = row->pr_space;
00577         row->space_size = upper;
00578         row->kern_size = lower;
00579       }
00580     }
00581     row_index++;
00582   }
00583   count_block_votes(block,
00584                     def_fixed,
00585                     def_prop,
00586                     maybe_fixed,
00587                     maybe_prop,
00588                     corr_fixed,
00589                     corr_prop,
00590                     dunno);
00591   if (testing_on
00592     && (textord_debug_pitch_test
00593   || textord_blocksall_prop || textord_blocksall_fixed)) {
00594     tprintf ("Initially:");
00595     print_block_counts(block, block_index);
00596   }
00597   if (def_fixed > def_prop * textord_words_veto_power)
00598     block->pitch_decision = PITCH_DEF_FIXED;
00599   else if (def_prop > def_fixed * textord_words_veto_power)
00600     block->pitch_decision = PITCH_DEF_PROP;
00601   else if (def_fixed > 0 || def_prop > 0)
00602     block->pitch_decision = PITCH_DUNNO;
00603   else if (maybe_fixed > maybe_prop * textord_words_veto_power)
00604     block->pitch_decision = PITCH_MAYBE_FIXED;
00605   else if (maybe_prop > maybe_fixed * textord_words_veto_power)
00606     block->pitch_decision = PITCH_MAYBE_PROP;
00607   else
00608     block->pitch_decision = PITCH_DUNNO;
00609   return FALSE;
00610 }
00611 
00612 
00613 /**********************************************************************
00614  * print_block_counts
00615  *
00616  * Count up how many rows have what decision and print the results.
00617  **********************************************************************/
00618 
00619 void print_block_counts(                   //find line stats
00620                         TO_BLOCK *block,   //block to do
00621                         inT32 block_index  //block number
00622                        ) {
00623   inT32 def_fixed = 0;           //counters
00624   inT32 def_prop = 0;
00625   inT32 maybe_fixed = 0;
00626   inT32 maybe_prop = 0;
00627   inT32 dunno = 0;
00628   inT32 corr_fixed = 0;
00629   inT32 corr_prop = 0;
00630 
00631   count_block_votes(block,
00632                     def_fixed,
00633                     def_prop,
00634                     maybe_fixed,
00635                     maybe_prop,
00636                     corr_fixed,
00637                     corr_prop,
00638                     dunno);
00639   tprintf ("Block %d has (%d,%d,%d)",
00640     block_index, def_fixed, maybe_fixed, corr_fixed);
00641   if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed))
00642     tprintf (" (Wrongly)");
00643   tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
00644   if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop))
00645     tprintf (" (Wrongly)");
00646   tprintf (" prop, %d dunno\n", dunno);
00647 }
00648 
00649 
00650 /**********************************************************************
00651  * count_block_votes
00652  *
00653  * Count the number of rows in the block with each kind of pitch_decision.
00654  **********************************************************************/
00655 
00656 void count_block_votes(                   //find line stats
00657                        TO_BLOCK *block,   //block to do
00658                        inT32 &def_fixed,  //add to counts
00659                        inT32 &def_prop,
00660                        inT32 &maybe_fixed,
00661                        inT32 &maybe_prop,
00662                        inT32 &corr_fixed,
00663                        inT32 &corr_prop,
00664                        inT32 &dunno) {
00665   TO_ROW *row;                   //current row
00666   TO_ROW_IT row_it = block->get_rows ();
00667 
00668   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00669     row = row_it.data ();
00670     switch (row->pitch_decision) {
00671       case PITCH_DUNNO:
00672         dunno++;
00673         break;
00674       case PITCH_DEF_PROP:
00675         def_prop++;
00676         break;
00677       case PITCH_MAYBE_PROP:
00678         maybe_prop++;
00679         break;
00680       case PITCH_DEF_FIXED:
00681         def_fixed++;
00682         break;
00683       case PITCH_MAYBE_FIXED:
00684         maybe_fixed++;
00685         break;
00686       case PITCH_CORR_PROP:
00687         corr_prop++;
00688         break;
00689       case PITCH_CORR_FIXED:
00690         corr_fixed++;
00691         break;
00692     }
00693   }
00694 }
00695 
00696 
00697 /**********************************************************************
00698  * row_pitch_stats
00699  *
00700  * Decide whether each row is fixed pitch individually.
00701  **********************************************************************/
00702 
00703 BOOL8 row_pitch_stats(                  //find line stats
00704                       TO_ROW *row,      //current row
00705                       inT32 maxwidth,   //of spaces
00706                       BOOL8 testing_on  //correct orientation
00707                      ) {
00708   BLOBNBOX *blob;                //current blob
00709   int gap_index;                 //current gap
00710   inT32 prev_x;                  //end of prev blob
00711   inT32 cluster_count;           //no of clusters
00712   inT32 prev_count;              //of clusters
00713   inT32 smooth_factor;           //for smoothing stats
00714   TBOX blob_box;                  //bounding box
00715   float lower, upper;            //cluster thresholds
00716                                  //gap sizes
00717   float gaps[BLOCK_STATS_CLUSTERS];
00718                                  //blobs
00719   BLOBNBOX_IT blob_it = row->blob_list ();
00720   STATS gap_stats (0, maxwidth);
00721   STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
00722   //clusters
00723 
00724   smooth_factor =
00725     (inT32) (row->xheight * textord_wordstats_smooth_factor + 1.5);
00726   if (!blob_it.empty ()) {
00727     prev_x = blob_it.data ()->bounding_box ().right ();
00728     blob_it.forward ();
00729     while (!blob_it.at_first ()) {
00730       blob = blob_it.data ();
00731       if (!blob->joined_to_prev ()) {
00732         blob_box = blob->bounding_box ();
00733         if (blob_box.left () - prev_x < maxwidth)
00734           gap_stats.add (blob_box.left () - prev_x, 1);
00735         prev_x = blob_box.right ();
00736       }
00737       blob_it.forward ();
00738     }
00739   }
00740   if (gap_stats.get_total () == 0) {
00741     return FALSE;
00742   }
00743   cluster_count = 0;
00744   lower = row->xheight * words_initial_lower;
00745   upper = row->xheight * words_initial_upper;
00746   gap_stats.smooth (smooth_factor);
00747   do {
00748     prev_count = cluster_count;
00749     cluster_count = gap_stats.cluster (lower, upper,
00750       textord_spacesize_ratioprop,
00751       BLOCK_STATS_CLUSTERS, cluster_stats);
00752   }
00753   while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
00754   if (cluster_count < 1) {
00755     return FALSE;
00756   }
00757   for (gap_index = 0; gap_index < cluster_count; gap_index++)
00758     gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
00759   //get medians
00760   if (testing_on) {
00761     tprintf ("cluster_count=%d:", cluster_count);
00762     for (gap_index = 0; gap_index < cluster_count; gap_index++)
00763       tprintf (" %g(%d)", gaps[gap_index],
00764         cluster_stats[gap_index + 1].get_total ());
00765     tprintf ("\n");
00766   }
00767   qsort (gaps, cluster_count, sizeof (float), sort_floats);
00768 
00769   //Try to find proportional non-space and space for row.
00770   lower = row->xheight * words_default_prop_nonspace;
00771   upper = row->xheight * textord_words_min_minspace;
00772   for (gap_index = 0; gap_index < cluster_count
00773     && gaps[gap_index] < lower; gap_index++);
00774   if (gap_index == 0) {
00775     if (testing_on)
00776       tprintf ("No clusters below nonspace threshold!!\n");
00777     if (cluster_count > 1) {
00778       row->pr_nonsp = gaps[0];
00779       row->pr_space = gaps[1];
00780     }
00781     else {
00782       row->pr_nonsp = lower;
00783       row->pr_space = gaps[0];
00784     }
00785   }
00786   else {
00787     row->pr_nonsp = gaps[gap_index - 1];
00788     while (gap_index < cluster_count && gaps[gap_index] < upper)
00789       gap_index++;
00790     if (gap_index == cluster_count) {
00791       if (testing_on)
00792         tprintf ("No clusters above nonspace threshold!!\n");
00793       row->pr_space = lower * textord_spacesize_ratioprop;
00794     }
00795     else
00796       row->pr_space = gaps[gap_index];
00797   }
00798 
00799   //Now try to find the fixed pitch space and non-space.
00800   upper = row->xheight * words_default_fixed_space;
00801   for (gap_index = 0; gap_index < cluster_count
00802     && gaps[gap_index] < upper; gap_index++);
00803   if (gap_index == 0) {
00804     if (testing_on)
00805       tprintf ("No clusters below space threshold!!\n");
00806     row->fp_nonsp = upper;
00807     row->fp_space = gaps[0];
00808   }
00809   else {
00810     row->fp_nonsp = gaps[gap_index - 1];
00811     if (gap_index == cluster_count) {
00812       if (testing_on)
00813         tprintf ("No clusters above space threshold!!\n");
00814       row->fp_space = row->xheight;
00815     }
00816     else
00817       row->fp_space = gaps[gap_index];
00818   }
00819   if (testing_on) {
00820     tprintf
00821       ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
00822       row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
00823   }
00824   return TRUE;                   //computed some stats
00825 }
00826 
00827 
00828 /**********************************************************************
00829  * find_row_pitch
00830  *
00831  * Check to see if this row could be fixed pitch using the given spacings.
00832  * Blobs with gaps smaller than the lower threshold are assumed to be one.
00833  * The larger threshold is the word gap threshold.
00834  **********************************************************************/
00835 
00836 BOOL8 find_row_pitch(                    //find lines
00837                      TO_ROW *row,        //row to do
00838                      inT32 maxwidth,     //max permitted space
00839                      inT32 dm_gap,       //ignorable gaps
00840                      TO_BLOCK *block,    //block of row
00841                      inT32 block_index,  //block_number
00842                      inT32 row_index,    //number of row
00843                      BOOL8 testing_on    //correct orientation
00844                     ) {
00845   BOOL8 used_dm_model;           //looks lik dot matrix
00846   float min_space;               //estimate threshold
00847   float non_space;               //gap size
00848   float gap_iqr;                 //interquartile range
00849   float pitch_iqr;
00850   float dm_gap_iqr;              //interquartile range
00851   float dm_pitch_iqr;
00852   float dm_pitch;                //pitch with dm on
00853   float pitch;                   //revised estimate
00854   float initial_pitch;           //guess at pitch
00855   STATS gap_stats (0, maxwidth);
00856                                  //centre-centre
00857   STATS pitch_stats (0, maxwidth);
00858 
00859   row->fixed_pitch = 0.0f;
00860   initial_pitch = row->fp_space;
00861   if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
00862     initial_pitch = row->xheight;//keep pitch decent
00863   non_space = row->fp_nonsp;
00864   if (non_space > initial_pitch)
00865     non_space = initial_pitch;
00866   min_space = (initial_pitch + non_space) / 2;
00867 
00868   if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
00869   initial_pitch, min_space, TRUE, FALSE, dm_gap)) {
00870     dm_gap_iqr = 0.0001;
00871     dm_pitch_iqr = maxwidth * 2.0f;
00872     dm_pitch = initial_pitch;
00873   }
00874   else {
00875     dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
00876     dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
00877     dm_pitch = pitch_stats.ile (0.5);
00878   }
00879   gap_stats.clear ();
00880   pitch_stats.clear ();
00881   if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
00882   initial_pitch, min_space, TRUE, FALSE, 0)) {
00883     gap_iqr = 0.0001;
00884     pitch_iqr = maxwidth * 3.0f;
00885   }
00886   else {
00887     gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
00888     pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
00889     if (testing_on)
00890       tprintf
00891         ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
00892         initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
00893     initial_pitch = pitch_stats.ile (0.5);
00894     if (min_space > initial_pitch
00895       && count_pitch_stats (row, &gap_stats, &pitch_stats,
00896     initial_pitch, initial_pitch, TRUE, FALSE, 0)) {
00897       min_space = initial_pitch;
00898       gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
00899       pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
00900       if (testing_on)
00901         tprintf
00902           ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
00903           initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
00904       initial_pitch = pitch_stats.ile (0.5);
00905     }
00906   }
00907   if (textord_debug_pitch_metric)
00908     tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
00909             block_index, row_index, 'X',
00910             pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
00911             pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' :
00912               (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
00913   if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
00914     row->pitch_decision = PITCH_DUNNO;
00915     if (textord_debug_pitch_metric)
00916       tprintf ("\n");
00917     return FALSE;                //insufficient data
00918   }
00919   if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
00920     if (testing_on)
00921       tprintf
00922         ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
00923         pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
00924     gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
00925     pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
00926     pitch = pitch_stats.ile (0.5);
00927     used_dm_model = FALSE;
00928   }
00929   else {
00930     if (testing_on)
00931       tprintf
00932         ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
00933         pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
00934     gap_iqr = dm_gap_iqr;
00935     pitch_iqr = dm_pitch_iqr;
00936     pitch = dm_pitch;
00937     used_dm_model = TRUE;
00938   }
00939   if (textord_debug_pitch_metric) {
00940     tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
00941       pitch_iqr, gap_iqr, pitch);
00942     tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
00943       pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
00944       pitch_iqr < gap_iqr * textord_fpiqr_ratio
00945       && pitch_iqr < block->xheight * textord_max_pitch_iqr
00946       && pitch < block->xheight * textord_words_default_maxspace
00947       ? 'F' : 'P');
00948   }
00949   if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
00950     && pitch_iqr < block->xheight * textord_max_pitch_iqr
00951     && pitch < block->xheight * textord_words_default_maxspace)
00952     row->pitch_decision = PITCH_MAYBE_FIXED;
00953   else
00954     row->pitch_decision = PITCH_MAYBE_PROP;
00955   row->fixed_pitch = pitch;
00956   row->kern_size = gap_stats.ile (0.5);
00957   row->min_space = (inT32) (row->fixed_pitch + non_space) / 2;
00958   if (row->min_space > row->fixed_pitch)
00959     row->min_space = (inT32) row->fixed_pitch;
00960   row->max_nonspace = row->min_space;
00961   row->space_size = row->fixed_pitch;
00962   row->space_threshold = (row->max_nonspace + row->min_space) / 2;
00963   row->used_dm_model = used_dm_model;
00964   return TRUE;
00965 }
00966 
00967 
00968 /**********************************************************************
00969  * fixed_pitch_row
00970  *
00971  * Check to see if this row could be fixed pitch using the given spacings.
00972  * Blobs with gaps smaller than the lower threshold are assumed to be one.
00973  * The larger threshold is the word gap threshold.
00974  **********************************************************************/
00975 
00976 BOOL8 fixed_pitch_row(TO_ROW *row,       // row to do
00977                       BLOCK* block,
00978                       inT32 block_index  // block_number
00979                      ) {
00980   const char *res_string;        // pitch result
00981   inT16 mid_cuts;                // no of cheap cuts
00982   float non_space;               // gap size
00983   float pitch_sd;                // error on pitch
00984   float sp_sd = 0.0f;            // space sd
00985 
00986   non_space = row->fp_nonsp;
00987   if (non_space > row->fixed_pitch)
00988     non_space = row->fixed_pitch;
00989   POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
00990   if (textord_all_prop || (pb != NULL && !pb->IsText())) {
00991     // Set the decision to definitely proportional.
00992     pitch_sd = textord_words_def_prop * row->fixed_pitch;
00993     row->pitch_decision = PITCH_DEF_PROP;
00994   } else {
00995     pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
00996                                row->projection_right,
00997                                (row->fixed_pitch + non_space * 3) / 4,
00998                                row->fixed_pitch, sp_sd, mid_cuts,
00999                                &row->char_cells,
01000                                block_index == textord_debug_block);
01001     if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
01002       && ((pitsync_linear_version & 3) < 3
01003       || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
01004       || sp_sd > 20
01005     || (pitch_sd == 0 && sp_sd > 10))))) {
01006       if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
01007         && !row->all_caps
01008         && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
01009         row->pitch_decision = PITCH_DEF_FIXED;
01010       else
01011         row->pitch_decision = PITCH_MAYBE_FIXED;
01012     }
01013     else if ((pitsync_linear_version & 3) < 3
01014       || sp_sd > 20
01015       || mid_cuts > 0
01016       || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
01017       if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
01018         row->pitch_decision = PITCH_MAYBE_PROP;
01019       else
01020         row->pitch_decision = PITCH_DEF_PROP;
01021     }
01022     else
01023       row->pitch_decision = PITCH_DUNNO;
01024   }
01025 
01026   if (textord_debug_pitch_metric) {
01027     res_string = "??";
01028     switch (row->pitch_decision) {
01029       case PITCH_DEF_PROP:
01030         res_string = "DP";
01031         break;
01032       case PITCH_MAYBE_PROP:
01033         res_string = "MP";
01034         break;
01035       case PITCH_DEF_FIXED:
01036         res_string = "DF";
01037         break;
01038       case PITCH_MAYBE_FIXED:
01039         res_string = "MF";
01040       default:
01041         res_string = "??";
01042     }
01043     tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
01044       pitch_sd / row->fixed_pitch, sp_sd, res_string);
01045   }
01046   return TRUE;
01047 }
01048 
01049 
01050 /**********************************************************************
01051  * count_pitch_stats
01052  *
01053  * Count up the gap and pitch stats on the block to see if it is fixed pitch.
01054  * Blobs with gaps smaller than the lower threshold are assumed to be one.
01055  * The larger threshold is the word gap threshold.
01056  * The return value indicates whether there were any decent values to use.
01057  **********************************************************************/
01058 
01059 BOOL8 count_pitch_stats(                       //find lines
01060                         TO_ROW *row,           //row to do
01061                         STATS *gap_stats,      //blob gaps
01062                         STATS *pitch_stats,    //centre-centre stats
01063                         float initial_pitch,   //guess at pitch
01064                         float min_space,       //estimate space size
01065                         BOOL8 ignore_outsize,  //discard big objects
01066                         BOOL8 split_outsize,   //split big objects
01067                         inT32 dm_gap           //ignorable gaps
01068                        ) {
01069   BOOL8 prev_valid;              //not word broken
01070   BLOBNBOX *blob;                //current blob
01071                                  //blobs
01072   BLOBNBOX_IT blob_it = row->blob_list ();
01073   inT32 prev_right;              //end of prev blob
01074   inT32 prev_centre;             //centre of previous blob
01075   inT32 x_centre;                //centre of this blob
01076   inT32 blob_width;              //width of blob
01077   inT32 width_units;             //no of widths in blob
01078   float width;                   //blob width
01079   TBOX blob_box;                  //bounding box
01080   TBOX joined_box;                //of super blob
01081 
01082   gap_stats->clear ();
01083   pitch_stats->clear ();
01084   if (blob_it.empty ())
01085     return FALSE;
01086   prev_valid = FALSE;
01087   prev_centre = 0;
01088   prev_right = 0;                //stop complier warning
01089   joined_box = blob_it.data ()->bounding_box ();
01090   do {
01091     blob_it.forward ();
01092     blob = blob_it.data ();
01093     if (!blob->joined_to_prev ()) {
01094       blob_box = blob->bounding_box ();
01095       if ((blob_box.left () - joined_box.right () < dm_gap
01096         && !blob_it.at_first ())
01097         || blob->cblob() == NULL)
01098         joined_box += blob_box;  //merge blobs
01099       else {
01100         blob_width = joined_box.width ();
01101         if (split_outsize) {
01102           width_units =
01103             (inT32) floor ((float) blob_width / initial_pitch + 0.5);
01104           if (width_units < 1)
01105             width_units = 1;
01106           width_units--;
01107         }
01108         else if (ignore_outsize) {
01109           width = (float) blob_width / initial_pitch;
01110           width_units = width < 1 + words_default_fixed_limit
01111             && width > 1 - words_default_fixed_limit ? 0 : -1;
01112         }
01113         else
01114           width_units = 0;       //everything in
01115         x_centre = (inT32) (joined_box.left ()
01116           + (blob_width -
01117           width_units * initial_pitch) / 2);
01118         if (prev_valid && width_units >= 0) {
01119           //                                              if (width_units>0)
01120           //                                              {
01121           //                                                      tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
01122           //                                                              width_units,blob_width,x_centre,x_centre-prev_centre);
01123           //                                              }
01124           gap_stats->add (joined_box.left () - prev_right, 1);
01125           pitch_stats->add (x_centre - prev_centre, 1);
01126         }
01127         prev_centre = (inT32) (x_centre + width_units * initial_pitch);
01128         prev_right = joined_box.right ();
01129         prev_valid = blob_box.left () - joined_box.right () < min_space;
01130         prev_valid = prev_valid && width_units >= 0;
01131         joined_box = blob_box;
01132       }
01133     }
01134   }
01135   while (!blob_it.at_first ());
01136   return gap_stats->get_total () >= 3;
01137 }
01138 
01139 
01140 /**********************************************************************
01141  * tune_row_pitch
01142  *
01143  * Use a dp algorithm to fit the character cells and return the sd of
01144  * the cell size over the row.
01145  **********************************************************************/
01146 
01147 float tune_row_pitch(                             //find fp cells
01148                      TO_ROW *row,                 //row to do
01149                      STATS *projection,           //vertical projection
01150                      inT16 projection_left,       //edge of projection
01151                      inT16 projection_right,      //edge of projection
01152                      float space_size,            //size of blank
01153                      float &initial_pitch,        //guess at pitch
01154                      float &best_sp_sd,           //space sd
01155                      inT16 &best_mid_cuts,        //no of cheap cuts
01156                      ICOORDELT_LIST *best_cells,  //row cells
01157                      BOOL8 testing_on             //inidividual words
01158                     ) {
01159   int pitch_delta;               //offset pitch
01160   inT16 mid_cuts;                //cheap cuts
01161   float pitch_sd;                //current sd
01162   float best_sd;                 //best result
01163   float best_pitch;              //pitch for best result
01164   float initial_sd;              //starting error
01165   float sp_sd;                   //space sd
01166   ICOORDELT_LIST test_cells;     //row cells
01167   ICOORDELT_IT best_it;          //start of best list
01168 
01169   if (textord_fast_pitch_test)
01170     return tune_row_pitch2 (row, projection, projection_left,
01171       projection_right, space_size, initial_pitch,
01172       best_sp_sd,
01173     //space sd
01174       best_mid_cuts, best_cells, testing_on);
01175   if (textord_disable_pitch_test) {
01176     best_sp_sd = initial_pitch;
01177     return initial_pitch;
01178   }
01179   initial_sd =
01180     compute_pitch_sd(row,
01181                      projection,
01182                      projection_left,
01183                      projection_right,
01184                      space_size,
01185                      initial_pitch,
01186                      best_sp_sd,
01187                      best_mid_cuts,
01188                      best_cells,
01189                      testing_on);
01190   best_sd = initial_sd;
01191   best_pitch = initial_pitch;
01192   if (testing_on)
01193     tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
01194   for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
01195     pitch_sd =
01196       compute_pitch_sd (row, projection, projection_left, projection_right,
01197       space_size, initial_pitch + pitch_delta, sp_sd,
01198       mid_cuts, &test_cells, testing_on);
01199     if (testing_on)
01200       tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
01201         pitch_sd);
01202     if (pitch_sd < best_sd) {
01203       best_sd = pitch_sd;
01204       best_mid_cuts = mid_cuts;
01205       best_sp_sd = sp_sd;
01206       best_pitch = initial_pitch + pitch_delta;
01207       best_cells->clear ();
01208       best_it.set_to_list (best_cells);
01209       best_it.add_list_after (&test_cells);
01210     }
01211     else
01212       test_cells.clear ();
01213     if (pitch_sd > initial_sd)
01214       break;                     //getting worse
01215   }
01216   for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
01217     pitch_sd =
01218       compute_pitch_sd (row, projection, projection_left, projection_right,
01219       space_size, initial_pitch - pitch_delta, sp_sd,
01220       mid_cuts, &test_cells, testing_on);
01221     if (testing_on)
01222       tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
01223         pitch_sd);
01224     if (pitch_sd < best_sd) {
01225       best_sd = pitch_sd;
01226       best_mid_cuts = mid_cuts;
01227       best_sp_sd = sp_sd;
01228       best_pitch = initial_pitch - pitch_delta;
01229       best_cells->clear ();
01230       best_it.set_to_list (best_cells);
01231       best_it.add_list_after (&test_cells);
01232     }
01233     else
01234       test_cells.clear ();
01235     if (pitch_sd > initial_sd)
01236       break;
01237   }
01238   initial_pitch = best_pitch;
01239 
01240   if (textord_debug_pitch_metric)
01241     print_pitch_sd(row,
01242                    projection,
01243                    projection_left,
01244                    projection_right,
01245                    space_size,
01246                    best_pitch);
01247 
01248   return best_sd;
01249 }
01250 
01251 
01252 /**********************************************************************
01253  * tune_row_pitch
01254  *
01255  * Use a dp algorithm to fit the character cells and return the sd of
01256  * the cell size over the row.
01257  **********************************************************************/
01258 
01259 float tune_row_pitch2(                             //find fp cells
01260                       TO_ROW *row,                 //row to do
01261                       STATS *projection,           //vertical projection
01262                       inT16 projection_left,       //edge of projection
01263                       inT16 projection_right,      //edge of projection
01264                       float space_size,            //size of blank
01265                       float &initial_pitch,        //guess at pitch
01266                       float &best_sp_sd,           //space sd
01267                       inT16 &best_mid_cuts,        //no of cheap cuts
01268                       ICOORDELT_LIST *best_cells,  //row cells
01269                       BOOL8 testing_on             //inidividual words
01270                      ) {
01271   int pitch_delta;               //offset pitch
01272   inT16 pixel;                   //pixel coord
01273   inT16 best_pixel;              //pixel coord
01274   inT16 best_delta;              //best pitch
01275   inT16 best_pitch;              //best pitch
01276   inT16 start;                   //of good range
01277   inT16 end;                     //of good range
01278   inT32 best_count;              //lowest sum
01279   float best_sd;                 //best result
01280   STATS *sum_proj;               //summed projection
01281 
01282   best_sp_sd = initial_pitch;
01283 
01284   if (textord_disable_pitch_test) {
01285     return initial_pitch;
01286   }
01287   sum_proj = new STATS[textord_pitch_range * 2 + 1];
01288   if (sum_proj == NULL)
01289     return initial_pitch;
01290   best_pitch = (inT32) initial_pitch;
01291 
01292   for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
01293     pitch_delta++)
01294   sum_proj[textord_pitch_range + pitch_delta].set_range (0,
01295       best_pitch +
01296       pitch_delta + 1);
01297   for (pixel = projection_left; pixel <= projection_right; pixel++) {
01298     for (pitch_delta = -textord_pitch_range;
01299       pitch_delta <= textord_pitch_range; pitch_delta++)
01300     sum_proj[textord_pitch_range +
01301         pitch_delta].add ((pixel - projection_left) % (best_pitch +
01302         pitch_delta),
01303         projection->pile_count (pixel));
01304   }
01305   best_count = sum_proj[textord_pitch_range].pile_count (0);
01306   best_delta = 0;
01307   best_pixel = 0;
01308   for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
01309   pitch_delta++) {
01310     for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
01311       if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
01312       < best_count) {
01313         best_count =
01314           sum_proj[textord_pitch_range +
01315           pitch_delta].pile_count (pixel);
01316         best_delta = pitch_delta;
01317         best_pixel = pixel;
01318       }
01319     }
01320   }
01321   if (testing_on)
01322     tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
01323       initial_pitch, best_delta, best_count);
01324   best_pitch += best_delta;
01325   initial_pitch = best_pitch;
01326   best_count++;
01327   best_count += best_count;
01328   for (start = best_pixel - 2; start > best_pixel - best_pitch
01329     && sum_proj[textord_pitch_range +
01330     best_delta].pile_count (start % best_pitch) <= best_count;
01331     start--);
01332   for (end = best_pixel + 2;
01333     end < best_pixel + best_pitch
01334     && sum_proj[textord_pitch_range +
01335     best_delta].pile_count (end % best_pitch) <= best_count;
01336     end++);
01337 
01338   best_sd =
01339     compute_pitch_sd(row,
01340                      projection,
01341                      projection_left,
01342                      projection_right,
01343                      space_size,
01344                      initial_pitch,
01345                      best_sp_sd,
01346                      best_mid_cuts,
01347                      best_cells,
01348                      testing_on,
01349                      start,
01350                      end);
01351   if (testing_on)
01352     tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
01353       best_sd);
01354 
01355   if (textord_debug_pitch_metric)
01356     print_pitch_sd(row,
01357                    projection,
01358                    projection_left,
01359                    projection_right,
01360                    space_size,
01361                    initial_pitch);
01362 
01363   delete[]sum_proj;
01364 
01365   return best_sd;
01366 }
01367 
01368 
01369 /**********************************************************************
01370  * compute_pitch_sd
01371  *
01372  * Use a dp algorithm to fit the character cells and return the sd of
01373  * the cell size over the row.
01374  **********************************************************************/
01375 
01376 float compute_pitch_sd(                            //find fp cells
01377                        TO_ROW *row,                //row to do
01378                        STATS *projection,          //vertical projection
01379                        inT16 projection_left,      //edge
01380                        inT16 projection_right,     //edge
01381                        float space_size,           //size of blank
01382                        float initial_pitch,        //guess at pitch
01383                        float &sp_sd,               //space sd
01384                        inT16 &mid_cuts,            //no of free cuts
01385                        ICOORDELT_LIST *row_cells,  //list of chop pts
01386                        BOOL8 testing_on,           //inidividual words
01387                        inT16 start,                //start of good range
01388                        inT16 end                   //end of good range
01389                       ) {
01390   inT16 occupation;              //no of cells in word.
01391                                  //blobs
01392   BLOBNBOX_IT blob_it = row->blob_list ();
01393   BLOBNBOX_IT start_it;          //start of word
01394   BLOBNBOX_IT plot_it;           //for plotting
01395   inT16 blob_count;              //no of blobs
01396   TBOX blob_box;                  //bounding box
01397   TBOX prev_box;                  //of super blob
01398   inT32 prev_right;              //of word sync
01399   int scale_factor;              //on scores for big words
01400   inT32 sp_count;                //spaces
01401   FPSEGPT_LIST seg_list;         //char cells
01402   FPSEGPT_IT seg_it;             //iterator
01403   inT16 segpos;                  //position of segment
01404   inT16 cellpos;                 //previous cell boundary
01405                                  //iterator
01406   ICOORDELT_IT cell_it = row_cells;
01407   ICOORDELT *cell;               //new cell
01408   double sqsum;                  //sum of squares
01409   double spsum;                  //of spaces
01410   double sp_var;                 //space error
01411   double word_sync;              //result for word
01412   inT32 total_count;             //total blobs
01413 
01414   if ((pitsync_linear_version & 3) > 1) {
01415     word_sync = compute_pitch_sd2 (row, projection, projection_left,
01416       projection_right, initial_pitch,
01417       occupation, mid_cuts, row_cells,
01418       testing_on, start, end);
01419     sp_sd = occupation;
01420     return word_sync;
01421   }
01422   mid_cuts = 0;
01423   cellpos = 0;
01424   total_count = 0;
01425   sqsum = 0;
01426   sp_count = 0;
01427   spsum = 0;
01428   prev_right = -1;
01429   if (blob_it.empty ())
01430     return space_size * 10;
01431 #ifndef GRAPHICS_DISABLED
01432   if (testing_on && to_win > 0) {
01433     blob_box = blob_it.data ()->bounding_box ();
01434     projection->plot (to_win, projection_left,
01435       row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
01436   }
01437 #endif
01438   start_it = blob_it;
01439   blob_count = 0;
01440   blob_box = box_next (&blob_it);//first blob
01441   blob_it.mark_cycle_pt ();
01442   do {
01443     for (; blob_count > 0; blob_count--)
01444       box_next(&start_it);
01445     do {
01446       prev_box = blob_box;
01447       blob_count++;
01448       blob_box = box_next (&blob_it);
01449     }
01450     while (!blob_it.cycled_list ()
01451       && blob_box.left () - prev_box.right () < space_size);
01452     plot_it = start_it;
01453     if (pitsync_linear_version & 3)
01454       word_sync =
01455         check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
01456         projection, projection_left, projection_right,
01457         row->xheight * textord_projection_scale,
01458         occupation, &seg_list, start, end);
01459     else
01460       word_sync =
01461         check_pitch_sync (&start_it, blob_count, (inT16) initial_pitch, 2,
01462         projection, &seg_list);
01463     if (testing_on) {
01464       tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
01465         prev_box.right (), prev_box.top (),
01466         seg_list.length () - 1, word_sync);
01467       seg_it.set_to_list (&seg_list);
01468       for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
01469       seg_it.forward ()) {
01470         if (seg_it.data ()->faked)
01471           tprintf ("(F)");
01472         tprintf ("%d, ", seg_it.data ()->position ());
01473         //                              tprintf("C=%g, s=%g, sq=%g\n",
01474         //                                      seg_it.data()->cost_function(),
01475         //                                      seg_it.data()->sum(),
01476         //                                      seg_it.data()->squares());
01477       }
01478       tprintf ("\n");
01479     }
01480 #ifndef GRAPHICS_DISABLED
01481     if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
01482       plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
01483 #endif
01484     seg_it.set_to_list (&seg_list);
01485     if (prev_right >= 0) {
01486       sp_var = seg_it.data ()->position () - prev_right;
01487       sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
01488       sp_var *= sp_var;
01489       spsum += sp_var;
01490       sp_count++;
01491     }
01492     for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
01493       segpos = seg_it.data ()->position ();
01494       if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
01495                                  //big gap
01496         while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
01497           cell = new ICOORDELT (cellpos + (inT16) initial_pitch, 0);
01498           cell_it.add_after_then_move (cell);
01499           cellpos += (inT16) initial_pitch;
01500         }
01501                                  //make new one
01502         cell = new ICOORDELT (segpos, 0);
01503         cell_it.add_after_then_move (cell);
01504         cellpos = segpos;
01505       }
01506       else if (segpos > cellpos - initial_pitch / 2) {
01507         cell = cell_it.data ();
01508                                  //average positions
01509         cell->set_x ((cellpos + segpos) / 2);
01510         cellpos = cell->x ();
01511       }
01512     }
01513     seg_it.move_to_last ();
01514     prev_right = seg_it.data ()->position ();
01515     if (textord_pitch_scalebigwords) {
01516       scale_factor = (seg_list.length () - 2) / 2;
01517       if (scale_factor < 1)
01518         scale_factor = 1;
01519     }
01520     else
01521       scale_factor = 1;
01522     sqsum += word_sync * scale_factor;
01523     total_count += (seg_list.length () - 1) * scale_factor;
01524     seg_list.clear ();
01525   }
01526   while (!blob_it.cycled_list ());
01527   sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
01528   return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
01529 }
01530 
01531 
01532 /**********************************************************************
01533  * compute_pitch_sd2
01534  *
01535  * Use a dp algorithm to fit the character cells and return the sd of
01536  * the cell size over the row.
01537  **********************************************************************/
01538 
01539 float compute_pitch_sd2(                            //find fp cells
01540                         TO_ROW *row,                //row to do
01541                         STATS *projection,          //vertical projection
01542                         inT16 projection_left,      //edge
01543                         inT16 projection_right,     //edge
01544                         float initial_pitch,        //guess at pitch
01545                         inT16 &occupation,          //no of occupied cells
01546                         inT16 &mid_cuts,            //no of free cuts
01547                         ICOORDELT_LIST *row_cells,  //list of chop pts
01548                         BOOL8 testing_on,           //inidividual words
01549                         inT16 start,                //start of good range
01550                         inT16 end                   //end of good range
01551                        ) {
01552                                  //blobs
01553   BLOBNBOX_IT blob_it = row->blob_list ();
01554   BLOBNBOX_IT plot_it;
01555   inT16 blob_count;              //no of blobs
01556   TBOX blob_box;                  //bounding box
01557   FPSEGPT_LIST seg_list;         //char cells
01558   FPSEGPT_IT seg_it;             //iterator
01559   inT16 segpos;                  //position of segment
01560                                  //iterator
01561   ICOORDELT_IT cell_it = row_cells;
01562   ICOORDELT *cell;               //new cell
01563   double word_sync;              //result for word
01564 
01565   mid_cuts = 0;
01566   if (blob_it.empty ()) {
01567     occupation = 0;
01568     return initial_pitch * 10;
01569   }
01570 #ifndef GRAPHICS_DISABLED
01571   if (testing_on && to_win > 0) {
01572     projection->plot (to_win, projection_left,
01573       row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
01574   }
01575 #endif
01576   blob_count = 0;
01577   blob_it.mark_cycle_pt ();
01578   do {
01579                                  //first blob
01580     blob_box = box_next (&blob_it);
01581     blob_count++;
01582   }
01583   while (!blob_it.cycled_list ());
01584   plot_it = blob_it;
01585   word_sync = check_pitch_sync2 (&blob_it, blob_count, (inT16) initial_pitch,
01586     2, projection, projection_left,
01587     projection_right,
01588     row->xheight * textord_projection_scale,
01589     occupation, &seg_list, start, end);
01590   if (testing_on) {
01591     tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
01592       blob_box.right (), blob_box.top (),
01593       seg_list.length () - 1, word_sync);
01594     seg_it.set_to_list (&seg_list);
01595     for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
01596       if (seg_it.data ()->faked)
01597         tprintf ("(F)");
01598       tprintf ("%d, ", seg_it.data ()->position ());
01599       //                              tprintf("C=%g, s=%g, sq=%g\n",
01600       //                                      seg_it.data()->cost_function(),
01601       //                                      seg_it.data()->sum(),
01602       //                                      seg_it.data()->squares());
01603     }
01604     tprintf ("\n");
01605   }
01606 #ifndef GRAPHICS_DISABLED
01607   if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
01608     plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
01609 #endif
01610   seg_it.set_to_list (&seg_list);
01611   for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
01612     segpos = seg_it.data ()->position ();
01613                                  //make new one
01614     cell = new ICOORDELT (segpos, 0);
01615     cell_it.add_after_then_move (cell);
01616     if (seg_it.at_last ())
01617       mid_cuts = seg_it.data ()->cheap_cuts ();
01618   }
01619   seg_list.clear ();
01620   return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
01621 }
01622 
01623 
01624 /**********************************************************************
01625  * print_pitch_sd
01626  *
01627  * Use a dp algorithm to fit the character cells and return the sd of
01628  * the cell size over the row.
01629  **********************************************************************/
01630 
01631 void print_pitch_sd(                        //find fp cells
01632                     TO_ROW *row,            //row to do
01633                     STATS *projection,      //vertical projection
01634                     inT16 projection_left,  //edges //size of blank
01635                     inT16 projection_right,
01636                     float space_size,
01637                     float initial_pitch     //guess at pitch
01638                    ) {
01639   const char *res2;              //pitch result
01640   inT16 occupation;              //used cells
01641   float sp_sd;                   //space sd
01642                                  //blobs
01643   BLOBNBOX_IT blob_it = row->blob_list ();
01644   BLOBNBOX_IT start_it;          //start of word
01645   BLOBNBOX_IT row_start;         //start of row
01646   inT16 blob_count;              //no of blobs
01647   inT16 total_blob_count;        //total blobs in line
01648   TBOX blob_box;                  //bounding box
01649   TBOX prev_box;                  //of super blob
01650   inT32 prev_right;              //of word sync
01651   int scale_factor;              //on scores for big words
01652   inT32 sp_count;                //spaces
01653   FPSEGPT_LIST seg_list;         //char cells
01654   FPSEGPT_IT seg_it;             //iterator
01655   double sqsum;                  //sum of squares
01656   double spsum;                  //of spaces
01657   double sp_var;                 //space error
01658   double word_sync;              //result for word
01659   double total_count;            //total cuts
01660 
01661   if (blob_it.empty ())
01662     return;
01663   row_start = blob_it;
01664   total_blob_count = 0;
01665 
01666   total_count = 0;
01667   sqsum = 0;
01668   sp_count = 0;
01669   spsum = 0;
01670   prev_right = -1;
01671   blob_it = row_start;
01672   start_it = blob_it;
01673   blob_count = 0;
01674   blob_box = box_next (&blob_it);//first blob
01675   blob_it.mark_cycle_pt ();
01676   do {
01677     for (; blob_count > 0; blob_count--)
01678       box_next(&start_it);
01679     do {
01680       prev_box = blob_box;
01681       blob_count++;
01682       blob_box = box_next (&blob_it);
01683     }
01684     while (!blob_it.cycled_list ()
01685       && blob_box.left () - prev_box.right () < space_size);
01686     word_sync =
01687       check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
01688       projection, projection_left, projection_right,
01689       row->xheight * textord_projection_scale,
01690       occupation, &seg_list, 0, 0);
01691     total_blob_count += blob_count;
01692     seg_it.set_to_list (&seg_list);
01693     if (prev_right >= 0) {
01694       sp_var = seg_it.data ()->position () - prev_right;
01695       sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
01696       sp_var *= sp_var;
01697       spsum += sp_var;
01698       sp_count++;
01699     }
01700     seg_it.move_to_last ();
01701     prev_right = seg_it.data ()->position ();
01702     if (textord_pitch_scalebigwords) {
01703       scale_factor = (seg_list.length () - 2) / 2;
01704       if (scale_factor < 1)
01705         scale_factor = 1;
01706     }
01707     else
01708       scale_factor = 1;
01709     sqsum += word_sync * scale_factor;
01710     total_count += (seg_list.length () - 1) * scale_factor;
01711     seg_list.clear ();
01712   }
01713   while (!blob_it.cycled_list ());
01714   sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
01715   word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
01716   tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
01717     word_sync, word_sync / initial_pitch, sp_sd,
01718     word_sync < textord_words_pitchsd_threshold * initial_pitch
01719     ? 'F' : 'P');
01720 
01721   start_it = row_start;
01722   blob_it = row_start;
01723   word_sync =
01724     check_pitch_sync2 (&blob_it, total_blob_count, (inT16) initial_pitch, 2,
01725     projection, projection_left, projection_right,
01726     row->xheight * textord_projection_scale, occupation,
01727     &seg_list, 0, 0);
01728   if (occupation > 1)
01729     word_sync /= occupation;
01730   word_sync = sqrt (word_sync);
01731 
01732 #ifndef GRAPHICS_DISABLED
01733   if (textord_show_row_cuts && to_win != NULL)
01734     plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
01735 #endif
01736   seg_list.clear ();
01737   if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
01738     if (word_sync < textord_words_def_fixed * initial_pitch
01739       && !row->all_caps)
01740       res2 = "DF";
01741     else
01742       res2 = "MF";
01743   }
01744   else
01745     res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
01746   tprintf
01747     ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
01748     word_sync, word_sync / initial_pitch,
01749     word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
01750     occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
01751 }
01752 
01753 /**********************************************************************
01754  * find_repeated_chars
01755  *
01756  * Extract marked leader blobs and put them
01757  * into words in advance of fixed pitch checking and word generation.
01758  **********************************************************************/
01759 void find_repeated_chars(TO_BLOCK *block,       // Block to search.
01760                          BOOL8 testing_on) {    // Debug mode.
01761   POLY_BLOCK* pb = block->block->poly_block();
01762   if (pb != NULL && !pb->IsText())
01763     return;  // Don't find repeated chars in non-text blocks.
01764 
01765   TO_ROW *row;
01766   BLOBNBOX_IT box_it;
01767   BLOBNBOX_IT search_it;         // forward search
01768   WERD_IT word_it;               // new words
01769   WERD *word;                    // new word
01770   TBOX word_box;                 // for plotting
01771   int blobcount, repeated_set;
01772 
01773   TO_ROW_IT row_it = block->get_rows();
01774   if (row_it.empty()) return;  // empty block
01775   for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
01776     row = row_it.data();
01777     box_it.set_to_list(row->blob_list());
01778     if (box_it.empty())  continue; // no blobs in this row
01779     if (!row->rep_chars_marked()) {
01780       mark_repeated_chars(row);
01781     }
01782     if (row->num_repeated_sets() == 0) continue;  // nothing to do for this row
01783     word_it.set_to_list(&row->rep_words);
01784     do {
01785       if (box_it.data()->repeated_set() != 0 &&
01786           !box_it.data()->joined_to_prev()) {
01787         blobcount = 1;
01788         repeated_set = box_it.data()->repeated_set();
01789         search_it = box_it;
01790         search_it.forward();
01791         while (!search_it.at_first() &&
01792                search_it.data()->repeated_set() == repeated_set) {
01793           blobcount++;
01794           search_it.forward();
01795         }
01796         // After the call to make_real_word() all the blobs from this
01797         // repeated set will be removed from the blob list. box_it will be
01798         // set to point to the blob after the end of the extracted sequence.
01799         word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
01800         if (!box_it.empty() && box_it.data()->joined_to_prev()) {
01801           tprintf("Bad box joined to prev at");
01802           box_it.data()->bounding_box().print();
01803           tprintf("After repeated word:");
01804           word->bounding_box().print();
01805         }
01806         ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
01807         word->set_flag(W_REP_CHAR, true);
01808         word->set_flag(W_DONT_CHOP, true);
01809         word_it.add_after_then_move(word);
01810       } else {
01811         box_it.forward();
01812       }
01813     } while (!box_it.at_first());
01814   }
01815 }
01816 
01817 
01818 /**********************************************************************
01819  * plot_fp_word
01820  *
01821  * Plot a block of words as if fixed pitch.
01822  **********************************************************************/
01823 
01824 #ifndef GRAPHICS_DISABLED
01825 void plot_fp_word(                  //draw block of words
01826                   TO_BLOCK *block,  //block to draw
01827                   float pitch,      //pitch to draw with
01828                   float nonspace    //for space threshold
01829                  ) {
01830   TO_ROW *row;                   //current row
01831   TO_ROW_IT row_it = block->get_rows ();
01832 
01833   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01834     row = row_it.data ();
01835     row->min_space = (inT32) ((pitch + nonspace) / 2);
01836     row->max_nonspace = row->min_space;
01837     row->space_threshold = row->min_space;
01838     plot_word_decisions (to_win, (inT16) pitch, row);
01839   }
01840 }
01841 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines