tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/wordseg.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        wordseg.cpp  (Formerly wspace.c)
00003  * Description: Code to segment the blobs into words.
00004  * Author:              Ray Smith
00005  * Created:             Fri Oct 16 11:32:28 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifdef __UNIX__
00021 #include          <assert.h>
00022 #endif
00023 #include          "stderr.h"
00024 #include          "blobbox.h"
00025 #include          "statistc.h"
00026 #include          "drawtord.h"
00027 #include          "makerow.h"
00028 #include          "pitsync1.h"
00029 #include          "tovars.h"
00030 #include          "topitch.h"
00031 #include          "cjkpitch.h"
00032 #include          "textord.h"
00033 #include          "fpchop.h"
00034 #include          "wordseg.h"
00035 
00036 // Include automatically generated configuration file if running autoconf.
00037 #ifdef HAVE_CONFIG_H
00038 #include "config_auto.h"
00039 #endif
00040 
00041 #define EXTERN
00042 
00043 EXTERN BOOL_VAR(textord_fp_chopping, TRUE, "Do fixed pitch chopping");
00044 EXTERN BOOL_VAR(textord_force_make_prop_words, FALSE,
00045                 "Force proportional word segmentation on all rows");
00046 EXTERN BOOL_VAR(textord_chopper_test, FALSE,
00047                 "Chopper is being tested.");
00048 
00049 #define FIXED_WIDTH_MULTIPLE  5
00050 #define BLOCK_STATS_CLUSTERS  10
00051 
00052 
00060 void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) {
00061   TO_ROW_IT to_row_it(rows);
00062   ROW_IT row_it(real_rows);
00063   for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
00064        to_row_it.forward()) {
00065     TO_ROW* row = to_row_it.data();
00066     // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
00067     // to create the word.
00068     C_BLOB_LIST cblobs;
00069     C_BLOB_IT cblob_it(&cblobs);
00070     BLOBNBOX_IT box_it(row->blob_list());
00071     for (;!box_it.empty(); box_it.forward()) {
00072       BLOBNBOX* bblob= box_it.extract();
00073       if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
00074         if (bblob->cblob() != NULL) {
00075           C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
00076           cout_it.move_to_last();
00077           cout_it.add_list_after(bblob->cblob()->out_list());
00078           delete bblob->cblob();
00079         }
00080       } else {
00081         if (bblob->cblob() != NULL)
00082           cblob_it.add_after_then_move(bblob->cblob());
00083       }
00084       delete bblob;
00085     }
00086     // Convert the TO_ROW to a ROW.
00087     ROW* real_row = new ROW(row, static_cast<inT16>(row->kern_size),
00088                             static_cast<inT16>(row->space_size));
00089     WERD_IT word_it(real_row->word_list());
00090     WERD* word = new WERD(&cblobs, 0, NULL);
00091     word->set_flag(W_BOL, TRUE);
00092     word->set_flag(W_EOL, TRUE);
00093     word->set_flag(W_DONT_CHOP, one_blob);
00094     word_it.add_after_then_move(word);
00095     row_it.add_after_then_move(real_row);
00096   }
00097 }
00098 
00104 void make_words(tesseract::Textord *textord,
00105                 ICOORD page_tr,                // top right
00106                 float gradient,                // page skew
00107                 BLOCK_LIST *blocks,            // block list
00108                 TO_BLOCK_LIST *port_blocks) {  // output list
00109   TO_BLOCK_IT block_it;          // iterator
00110   TO_BLOCK *block;               // current block
00111 
00112   if (textord->use_cjk_fp_model()) {
00113     compute_fixed_pitch_cjk(page_tr, port_blocks);
00114   } else {
00115     compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
00116                         !(BOOL8) textord_test_landscape);
00117   }
00118   textord->to_spacing(page_tr, port_blocks);
00119   block_it.set_to_list(port_blocks);
00120   for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
00121     block = block_it.data();
00122     make_real_words(textord, block, FCOORD(1.0f, 0.0f));
00123   }
00124 }
00125 
00126 
00134 void set_row_spaces(                  //find space sizes
00135                     TO_BLOCK *block,  //block to do
00136                     FCOORD rotation,  //for drawing
00137                     BOOL8 testing_on  //correct orientation
00138                    ) {
00139   TO_ROW *row;                   //current row
00140   TO_ROW_IT row_it = block->get_rows ();
00141 
00142   if (row_it.empty ())
00143     return;                      //empty block
00144   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00145     row = row_it.data ();
00146     if (row->fixed_pitch == 0) {
00147       row->min_space =
00148         (inT32) ceil (row->pr_space -
00149         (row->pr_space -
00150         row->pr_nonsp) * textord_words_definite_spread);
00151       row->max_nonspace =
00152         (inT32) floor (row->pr_nonsp +
00153         (row->pr_space -
00154         row->pr_nonsp) * textord_words_definite_spread);
00155       if (testing_on && textord_show_initial_words) {
00156         tprintf ("Assigning defaults %d non, %d space to row at %g\n",
00157           row->max_nonspace, row->min_space, row->intercept ());
00158       }
00159       row->space_threshold = (row->max_nonspace + row->min_space) / 2;
00160       row->space_size = row->pr_space;
00161       row->kern_size = row->pr_nonsp;
00162     }
00163 #ifndef GRAPHICS_DISABLED
00164     if (textord_show_initial_words && testing_on) {
00165       plot_word_decisions (to_win, (inT16) row->fixed_pitch, row);
00166     }
00167 #endif
00168   }
00169 }
00170 
00171 
00178 inT32 row_words(                  //compute space size
00179                 TO_BLOCK *block,  //block it came from
00180                 TO_ROW *row,      //row to operate on
00181                 inT32 maxwidth,   //max expected space size
00182                 FCOORD rotation,  //for drawing
00183                 BOOL8 testing_on  //for debug
00184                ) {
00185   BOOL8 testing_row;             //contains testpt
00186   BOOL8 prev_valid;              //if decent size
00187   BOOL8 this_valid;              //current blob big enough
00188   inT32 prev_x;                  //end of prev blob
00189   inT32 min_gap;                 //min interesting gap
00190   inT32 cluster_count;           //no of clusters
00191   inT32 gap_index;               //which cluster
00192   inT32 smooth_factor;           //for smoothing stats
00193   BLOBNBOX *blob;                //current blob
00194   float lower, upper;            //clustering parameters
00195   float gaps[3];                 //gap clusers
00196   ICOORD testpt;
00197   TBOX blob_box;                  //bounding box
00198                                  //iterator
00199   BLOBNBOX_IT blob_it = row->blob_list ();
00200   STATS gap_stats (0, maxwidth);
00201   STATS cluster_stats[4];        //clusters
00202 
00203   testpt = ICOORD (textord_test_x, textord_test_y);
00204   smooth_factor =
00205     (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5);
00206   //      if (testing_on)
00207   //              tprintf("Row smooth factor=%d\n",smooth_factor);
00208   prev_valid = FALSE;
00209   prev_x = -MAX_INT32;
00210   testing_row = FALSE;
00211   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
00212     blob = blob_it.data ();
00213     blob_box = blob->bounding_box ();
00214     if (blob_box.contains (testpt))
00215       testing_row = TRUE;
00216     gap_stats.add (blob_box.width (), 1);
00217   }
00218   min_gap = (inT32) floor (gap_stats.ile (textord_words_width_ile));
00219   gap_stats.clear ();
00220   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
00221     blob = blob_it.data ();
00222     if (!blob->joined_to_prev ()) {
00223       blob_box = blob->bounding_box ();
00224       //                      this_valid=blob_box.width()>=min_gap;
00225       this_valid = TRUE;
00226       if (this_valid && prev_valid
00227       && blob_box.left () - prev_x < maxwidth) {
00228         gap_stats.add (blob_box.left () - prev_x, 1);
00229       }
00230       prev_x = blob_box.right ();
00231       prev_valid = this_valid;
00232     }
00233   }
00234   if (gap_stats.get_total () == 0) {
00235     row->min_space = 0;          //no evidence
00236     row->max_nonspace = 0;
00237     return 0;
00238   }
00239   gap_stats.smooth (smooth_factor);
00240   lower = row->xheight * textord_words_initial_lower;
00241   upper = row->xheight * textord_words_initial_upper;
00242   cluster_count = gap_stats.cluster (lower, upper,
00243     textord_spacesize_ratioprop, 3,
00244     cluster_stats);
00245   while (cluster_count < 2 && ceil (lower) < floor (upper)) {
00246                                  //shrink gap
00247     upper = (upper * 3 + lower) / 4;
00248     lower = (lower * 3 + upper) / 4;
00249     cluster_count = gap_stats.cluster (lower, upper,
00250       textord_spacesize_ratioprop, 3,
00251       cluster_stats);
00252   }
00253   if (cluster_count < 2) {
00254     row->min_space = 0;          //no evidence
00255     row->max_nonspace = 0;
00256     return 0;
00257   }
00258   for (gap_index = 0; gap_index < cluster_count; gap_index++)
00259     gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
00260   //get medians
00261   if (cluster_count > 2) {
00262     if (testing_on && textord_show_initial_words) {
00263       tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
00264         row->intercept (),
00265         cluster_stats[1].ile (0.5),
00266         cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
00267     }
00268     lower = gaps[0];
00269     if (gaps[1] > lower) {
00270       upper = gaps[1];           //prefer most frequent
00271       if (upper < block->xheight * textord_words_min_minspace
00272       && gaps[2] > gaps[1]) {
00273         upper = gaps[2];
00274       }
00275     }
00276     else if (gaps[2] > lower
00277       && gaps[2] >= block->xheight * textord_words_min_minspace)
00278       upper = gaps[2];
00279     else if (lower >= block->xheight * textord_words_min_minspace) {
00280       upper = lower;             //not nice
00281       lower = gaps[1];
00282       if (testing_on && textord_show_initial_words) {
00283         tprintf ("Had to switch most common from lower to upper!!\n");
00284         gap_stats.print();
00285       }
00286     }
00287     else {
00288       row->min_space = 0;        //no evidence
00289       row->max_nonspace = 0;
00290       return 0;
00291     }
00292   }
00293   else {
00294     if (gaps[1] < gaps[0]) {
00295       if (testing_on && textord_show_initial_words) {
00296         tprintf ("Had to switch most common from lower to upper!!\n");
00297         gap_stats.print();
00298       }
00299       lower = gaps[1];
00300       upper = gaps[0];
00301     }
00302     else {
00303       upper = gaps[1];
00304       lower = gaps[0];
00305     }
00306   }
00307   if (upper < block->xheight * textord_words_min_minspace) {
00308     row->min_space = 0;          //no evidence
00309     row->max_nonspace = 0;
00310     return 0;
00311   }
00312   if (upper * 3 < block->min_space * 2 + block->max_nonspace
00313   || lower * 3 > block->min_space * 2 + block->max_nonspace) {
00314     if (testing_on && textord_show_initial_words) {
00315       tprintf ("Disagreement between block and row at %g!!\n",
00316         row->intercept ());
00317       tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
00318       gap_stats.print();
00319     }
00320   }
00321   row->min_space =
00322     (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread);
00323   row->max_nonspace =
00324     (inT32) floor (lower + (upper - lower) * textord_words_definite_spread);
00325   row->space_threshold = (row->max_nonspace + row->min_space) / 2;
00326   row->space_size = upper;
00327   row->kern_size = lower;
00328   if (testing_on && textord_show_initial_words) {
00329     if (testing_row) {
00330       tprintf ("GAP STATS\n");
00331       gap_stats.print();
00332       tprintf ("SPACE stats\n");
00333       cluster_stats[2].print_summary();
00334       tprintf ("NONSPACE stats\n");
00335       cluster_stats[1].print_summary();
00336     }
00337     tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
00338       row->intercept (), row->min_space, upper,
00339       row->max_nonspace, lower);
00340   }
00341   return cluster_stats[2].get_total ();
00342 }
00343 
00344 
00351 inT32 row_words2(                  //compute space size
00352                  TO_BLOCK *block,  //block it came from
00353                  TO_ROW *row,      //row to operate on
00354                  inT32 maxwidth,   //max expected space size
00355                  FCOORD rotation,  //for drawing
00356                  BOOL8 testing_on  //for debug
00357                 ) {
00358   BOOL8 testing_row;             //contains testpt
00359   BOOL8 prev_valid;              //if decent size
00360   BOOL8 this_valid;              //current blob big enough
00361   inT32 prev_x;                  //end of prev blob
00362   inT32 min_width;               //min interesting width
00363   inT32 valid_count;             //good gaps
00364   inT32 total_count;             //total gaps
00365   inT32 cluster_count;           //no of clusters
00366   inT32 prev_count;              //previous cluster_count
00367   inT32 gap_index;               //which cluster
00368   inT32 smooth_factor;           //for smoothing stats
00369   BLOBNBOX *blob;                //current blob
00370   float lower, upper;            //clustering parameters
00371   ICOORD testpt;
00372   TBOX blob_box;                  //bounding box
00373                                  //iterator
00374   BLOBNBOX_IT blob_it = row->blob_list ();
00375   STATS gap_stats (0, maxwidth);
00376                                  //gap sizes
00377   float gaps[BLOCK_STATS_CLUSTERS];
00378   STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
00379   //clusters
00380 
00381   testpt = ICOORD (textord_test_x, textord_test_y);
00382   smooth_factor =
00383     (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5);
00384   //      if (testing_on)
00385   //              tprintf("Row smooth factor=%d\n",smooth_factor);
00386   prev_valid = FALSE;
00387   prev_x = -MAX_INT16;
00388   testing_row = FALSE;
00389                                  //min blob size
00390   min_width = (inT32) block->pr_space;
00391   total_count = 0;
00392   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
00393     blob = blob_it.data ();
00394     if (!blob->joined_to_prev ()) {
00395       blob_box = blob->bounding_box ();
00396       this_valid = blob_box.width () >= min_width;
00397       this_valid = TRUE;
00398       if (this_valid && prev_valid
00399       && blob_box.left () - prev_x < maxwidth) {
00400         gap_stats.add (blob_box.left () - prev_x, 1);
00401       }
00402       total_count++;             //count possibles
00403       prev_x = blob_box.right ();
00404       prev_valid = this_valid;
00405     }
00406   }
00407   valid_count = gap_stats.get_total ();
00408   if (valid_count < total_count * textord_words_minlarge) {
00409     gap_stats.clear ();
00410     prev_x = -MAX_INT16;
00411     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
00412     blob_it.forward ()) {
00413       blob = blob_it.data ();
00414       if (!blob->joined_to_prev ()) {
00415         blob_box = blob->bounding_box ();
00416         if (blob_box.left () - prev_x < maxwidth) {
00417           gap_stats.add (blob_box.left () - prev_x, 1);
00418         }
00419         prev_x = blob_box.right ();
00420       }
00421     }
00422   }
00423   if (gap_stats.get_total () == 0) {
00424     row->min_space = 0;          //no evidence
00425     row->max_nonspace = 0;
00426     return 0;
00427   }
00428 
00429   cluster_count = 0;
00430   lower = block->xheight * words_initial_lower;
00431   upper = block->xheight * words_initial_upper;
00432   gap_stats.smooth (smooth_factor);
00433   do {
00434     prev_count = cluster_count;
00435     cluster_count = gap_stats.cluster (lower, upper,
00436       textord_spacesize_ratioprop,
00437       BLOCK_STATS_CLUSTERS, cluster_stats);
00438   }
00439   while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
00440   if (cluster_count < 1) {
00441     row->min_space = 0;
00442     row->max_nonspace = 0;
00443     return 0;
00444   }
00445   for (gap_index = 0; gap_index < cluster_count; gap_index++)
00446     gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
00447   //get medians
00448   if (testing_on) {
00449     tprintf ("cluster_count=%d:", cluster_count);
00450     for (gap_index = 0; gap_index < cluster_count; gap_index++)
00451       tprintf (" %g(%d)", gaps[gap_index],
00452         cluster_stats[gap_index + 1].get_total ());
00453     tprintf ("\n");
00454   }
00455 
00456   //Try to find proportional non-space and space for row.
00457   for (gap_index = 0; gap_index < cluster_count
00458     && gaps[gap_index] > block->max_nonspace; gap_index++);
00459   if (gap_index < cluster_count)
00460     lower = gaps[gap_index];     //most frequent below
00461   else {
00462     if (testing_on)
00463       tprintf ("No cluster below block threshold!, using default=%g\n",
00464         block->pr_nonsp);
00465     lower = block->pr_nonsp;
00466   }
00467   for (gap_index = 0; gap_index < cluster_count
00468     && gaps[gap_index] <= block->max_nonspace; gap_index++);
00469   if (gap_index < cluster_count)
00470     upper = gaps[gap_index];     //most frequent above
00471   else {
00472     if (testing_on)
00473       tprintf ("No cluster above block threshold!, using default=%g\n",
00474         block->pr_space);
00475     upper = block->pr_space;
00476   }
00477   row->min_space =
00478     (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread);
00479   row->max_nonspace =
00480     (inT32) floor (lower + (upper - lower) * textord_words_definite_spread);
00481   row->space_threshold = (row->max_nonspace + row->min_space) / 2;
00482   row->space_size = upper;
00483   row->kern_size = lower;
00484   if (testing_on) {
00485     if (testing_row) {
00486       tprintf ("GAP STATS\n");
00487       gap_stats.print();
00488       tprintf ("SPACE stats\n");
00489       cluster_stats[2].print_summary();
00490       tprintf ("NONSPACE stats\n");
00491       cluster_stats[1].print_summary();
00492     }
00493     tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
00494       row->intercept (), row->min_space, upper,
00495       row->max_nonspace, lower);
00496   }
00497   return 1;
00498 }
00499 
00500 
00507 void make_real_words(
00508                      tesseract::Textord *textord,
00509                      TO_BLOCK *block,  //block to do
00510                      FCOORD rotation   //for drawing
00511                     ) {
00512   TO_ROW *row;                   //current row
00513   TO_ROW_IT row_it = block->get_rows ();
00514   ROW *real_row = NULL;          //output row
00515   ROW_IT real_row_it = block->block->row_list ();
00516 
00517   if (row_it.empty ())
00518     return;                      //empty block
00519   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00520     row = row_it.data ();
00521     if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
00522       real_row = make_rep_words (row, block);
00523     } else if (!row->blob_list()->empty()) {
00524       // In a fixed pitch document, some lines may be detected as fixed pitch
00525       // while others don't, and will go through different path.
00526       // For non-space delimited language like CJK, fixed pitch chop always
00527       // leave the entire line as one word.  We can force consistent chopping
00528       // with force_make_prop_words flag.
00529       POLY_BLOCK* pb = block->block->poly_block();
00530       if (textord_chopper_test) {
00531         real_row = textord->make_blob_words (row, rotation);
00532       } else if (textord_force_make_prop_words ||
00533                  (pb != NULL && !pb->IsText()) ||
00534                  row->pitch_decision == PITCH_DEF_PROP ||
00535                  row->pitch_decision == PITCH_CORR_PROP) {
00536         real_row = textord->make_prop_words (row, rotation);
00537       } else if (row->pitch_decision == PITCH_DEF_FIXED ||
00538                  row->pitch_decision == PITCH_CORR_FIXED) {
00539         real_row = fixed_pitch_words (row, rotation);
00540       } else {
00541         ASSERT_HOST(FALSE);
00542       }
00543     }
00544     if (real_row != NULL) {
00545                                  //put row in block
00546       real_row_it.add_after_then_move (real_row);
00547     }
00548   }
00549   block->block->set_stats (block->fixed_pitch == 0, (inT16) block->kern_size,
00550     (inT16) block->space_size,
00551     (inT16) block->fixed_pitch);
00552   block->block->check_pitch ();
00553 }
00554 
00555 
00563 ROW *make_rep_words(                 //make a row
00564                     TO_ROW *row,     //row to convert
00565                     TO_BLOCK *block  //block it lives in
00566                    ) {
00567   ROW *real_row;                 //output row
00568   TBOX word_box;                  //bounding box
00569                                  //iterator
00570   WERD_IT word_it = &row->rep_words;
00571 
00572   if (word_it.empty ())
00573     return NULL;
00574   word_box = word_it.data ()->bounding_box ();
00575   for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
00576     word_box += word_it.data ()->bounding_box ();
00577   row->xheight = block->xheight;
00578   real_row = new ROW(row,
00579     (inT16) block->kern_size, (inT16) block->space_size);
00580   word_it.set_to_list (real_row->word_list ());
00581                                  //put words in row
00582   word_it.add_list_after (&row->rep_words);
00583   real_row->recalc_bounding_box ();
00584   return real_row;
00585 }
00586 
00587 
00595 WERD *make_real_word(BLOBNBOX_IT *box_it,  //iterator
00596                      inT32 blobcount,      //no of blobs to use
00597                      BOOL8 bol,            //start of line
00598                      uinT8 blanks          //no of blanks
00599                     ) {
00600   C_OUTLINE_IT cout_it;
00601   C_BLOB_LIST cblobs;
00602   C_BLOB_IT cblob_it = &cblobs;
00603   WERD *word;                    // new word
00604   BLOBNBOX *bblob;               // current blob
00605   inT32 blobindex;               // in row
00606 
00607   for (blobindex = 0; blobindex < blobcount; blobindex++) {
00608     bblob = box_it->extract();
00609     if (bblob->joined_to_prev()) {
00610       if (bblob->cblob() != NULL) {
00611         cout_it.set_to_list(cblob_it.data()->out_list());
00612         cout_it.move_to_last();
00613         cout_it.add_list_after(bblob->cblob()->out_list());
00614         delete bblob->cblob();
00615       }
00616     }
00617     else {
00618       if (bblob->cblob() != NULL)
00619         cblob_it.add_after_then_move(bblob->cblob());
00620     }
00621     delete bblob;
00622     box_it->forward();          // next one
00623   }
00624 
00625   if (blanks < 1)
00626     blanks = 1;
00627 
00628   word = new WERD(&cblobs, blanks, NULL);
00629 
00630   if (bol)
00631     word->set_flag(W_BOL, TRUE);
00632   if (box_it->at_first())
00633     word->set_flag(W_EOL, TRUE);  // at end of line
00634 
00635   return word;
00636 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines